From c425a533fe0aad80ca4e107067dde99cc9f48be2 Mon Sep 17 00:00:00 2001 From: Tarek Date: Wed, 10 Dec 2025 17:21:14 -0800 Subject: [PATCH 01/10] wip: course lab benchmark rework Signed-off-by: Tarek --- .github/workflows/test.yml | 1 + benchmarks/courselab_bench/.env.toml.example | 17 + benchmarks/courselab_bench/.gitignore | 43 + benchmarks/courselab_bench/README.md | 119 +++ .../courselab_bench/__init__.py | 19 + .../courselab_bench/agent/__init__.py | 3 + .../courselab_bench/agent/react.py | 197 ++++ .../courselab_bench/data/__init__.py | 3 + .../courselab_bench/data/loader.py | 23 + .../courselab_bench/environment/__init__.py | 3 + .../courselab_bench/environment/docker.py | 150 +++ .../courselab_bench/evaluation/__init__.py | 3 + .../courselab_bench/evaluation/evaluator.py | 42 + .../courselab_bench/model/__init__.py | 3 + .../courselab_bench/model/litellm.py | 69 ++ .../courselab_bench/runner/__init__.py | 4 + .../courselab_bench/runner/executor.py | 69 ++ .../courselab_bench/runner/logger.py | 43 + .../courselab_bench/utils/env_loader.py | 36 + benchmarks/courselab_bench/data/courses.json | 16 + .../mit_6_5840_2024/1_mapreduce/config.json | 13 + .../mit_6_5840_2024/1_mapreduce/evaluate.sh | 40 + .../mit_6_5840_2024/1_mapreduce/preprocess.sh | 27 + .../data/mit_6_5840_2024/1_mapreduce/task.md | 175 ++++ .../data/mit_6_5840_2024/2a_kvsrv/config.json | 13 + .../data/mit_6_5840_2024/2a_kvsrv/evaluate.sh | 37 + .../mit_6_5840_2024/2a_kvsrv/preprocess.sh | 25 + .../data/mit_6_5840_2024/2a_kvsrv/task.md | 49 + .../data/mit_6_5840_2024/2b_kvsrv/config.json | 14 + .../data/mit_6_5840_2024/2b_kvsrv/evaluate.sh | 37 + .../mit_6_5840_2024/2b_kvsrv/preprocess.sh | 25 + .../data/mit_6_5840_2024/2b_kvsrv/task.md | 65 ++ .../data/mit_6_5840_2024/3a_raft/config.json | 13 + .../data/mit_6_5840_2024/3a_raft/evaluate.sh | 39 + .../mit_6_5840_2024/3a_raft/preprocess.sh | 27 + .../data/mit_6_5840_2024/3a_raft/task.md | 118 +++ .../data/mit_6_5840_2024/3b_raft/config.json | 13 + .../data/mit_6_5840_2024/3b_raft/evaluate.sh | 39 + .../mit_6_5840_2024/3b_raft/preprocess.sh | 27 + .../data/mit_6_5840_2024/3b_raft/task.md | 163 ++++ .../data/mit_6_5840_2024/3c_raft/config.json | 13 + .../data/mit_6_5840_2024/3c_raft/evaluate.sh | 39 + .../mit_6_5840_2024/3c_raft/preprocess.sh | 27 + .../data/mit_6_5840_2024/3c_raft/task.md | 227 +++++ .../data/mit_6_5840_2024/3d_raft/config.json | 13 + .../data/mit_6_5840_2024/3d_raft/evaluate.sh | 39 + .../mit_6_5840_2024/3d_raft/preprocess.sh | 27 + .../data/mit_6_5840_2024/3d_raft/task.md | 279 ++++++ .../mit_6_5840_2024/4a_kvraft/config.json | 14 + .../mit_6_5840_2024/4a_kvraft/evaluate.sh | 37 + .../mit_6_5840_2024/4a_kvraft/preprocess.sh | 915 +++++++++++++++++ .../data/mit_6_5840_2024/4a_kvraft/task.md | 125 +++ .../mit_6_5840_2024/4b_kvraft/config.json | 14 + .../mit_6_5840_2024/4b_kvraft/evaluate.sh | 37 + .../mit_6_5840_2024/4b_kvraft/preprocess.sh | 915 +++++++++++++++++ .../data/mit_6_5840_2024/4b_kvraft/task.md | 169 ++++ .../mit_6_5840_2024/5a_shardkv/config.json | 14 + .../mit_6_5840_2024/5a_shardkv/evaluate.sh | 49 + .../mit_6_5840_2024/5a_shardkv/preprocess.sh | 917 ++++++++++++++++++ .../data/mit_6_5840_2024/5a_shardkv/task.md | 129 +++ .../mit_6_5840_2024/5b_shardkv/config.json | 14 + .../mit_6_5840_2024/5b_shardkv/evaluate.sh | 49 + .../mit_6_5840_2024/5b_shardkv/preprocess.sh | 917 ++++++++++++++++++ .../data/mit_6_5840_2024/5b_shardkv/task.md | 242 +++++ .../test__simple__echo/config.json | 7 + .../test__simple__echo/evaluate.sh | 18 + .../test__simple__echo/preprocess.sh | 9 + .../test_course/test__simple__echo/task.md | 8 + benchmarks/courselab_bench/prepare_dataset.py | 103 ++ benchmarks/courselab_bench/pyproject.toml | 43 + benchmarks/courselab_bench/run_benchmark.py | 126 +++ benchmarks/courselab_bench/tests/__init__.py | 0 .../courselab_bench/tests/test_data_schema.py | 107 ++ 73 files changed, 7464 insertions(+) create mode 100644 benchmarks/courselab_bench/.env.toml.example create mode 100644 benchmarks/courselab_bench/.gitignore create mode 100644 benchmarks/courselab_bench/README.md create mode 100644 benchmarks/courselab_bench/courselab_bench/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/agent/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/agent/react.py create mode 100644 benchmarks/courselab_bench/courselab_bench/data/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/data/loader.py create mode 100644 benchmarks/courselab_bench/courselab_bench/environment/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/environment/docker.py create mode 100644 benchmarks/courselab_bench/courselab_bench/evaluation/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/evaluation/evaluator.py create mode 100644 benchmarks/courselab_bench/courselab_bench/model/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/model/litellm.py create mode 100644 benchmarks/courselab_bench/courselab_bench/runner/__init__.py create mode 100644 benchmarks/courselab_bench/courselab_bench/runner/executor.py create mode 100644 benchmarks/courselab_bench/courselab_bench/runner/logger.py create mode 100644 benchmarks/courselab_bench/courselab_bench/utils/env_loader.py create mode 100644 benchmarks/courselab_bench/data/courses.json create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/task.md create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/task.md create mode 100644 benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json create mode 100755 benchmarks/courselab_bench/data/test_course/test__simple__echo/evaluate.sh create mode 100755 benchmarks/courselab_bench/data/test_course/test__simple__echo/preprocess.sh create mode 100644 benchmarks/courselab_bench/data/test_course/test__simple__echo/task.md create mode 100755 benchmarks/courselab_bench/prepare_dataset.py create mode 100644 benchmarks/courselab_bench/pyproject.toml create mode 100644 benchmarks/courselab_bench/run_benchmark.py create mode 100644 benchmarks/courselab_bench/tests/__init__.py create mode 100644 benchmarks/courselab_bench/tests/test_data_schema.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 32dd8b9..1692174 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,6 +20,7 @@ jobs: - example_bench - course_exam_bench - toposense_bench + - courselab_bench # TODO: For now, we comment out other benchmarks as they have no tests # - arteval_bench # - cache_bench diff --git a/benchmarks/courselab_bench/.env.toml.example b/benchmarks/courselab_bench/.env.toml.example new file mode 100644 index 0000000..de517f9 --- /dev/null +++ b/benchmarks/courselab_bench/.env.toml.example @@ -0,0 +1,17 @@ +# LLM API Keys Configuration +# Copy this file to .env.toml and fill in your API keys +# LiteLLM will automatically use these environment variables + + +# OpenAI +# OPENAI_API_KEY = "sk-..." +# OPENAI_BASE_URL = "https://api.openai.com/v1" # Optional: custom endpoint + +# Anthropic +# ANTHROPIC_API_KEY = "sk-ant-..." + +# Azure OpenAI +# AZURE_API_KEY = "..." +# AZURE_API_BASE = "https://YOUR_RESOURCE.openai.azure.com" +# AZURE_API_VERSION = "2024-02-15-preview" + diff --git a/benchmarks/courselab_bench/.gitignore b/benchmarks/courselab_bench/.gitignore new file mode 100644 index 0000000..178f3bb --- /dev/null +++ b/benchmarks/courselab_bench/.gitignore @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +.eggs/ + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +*.cover + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Outputs (don't commit results) +outputs/ +*.log + +# Secrets (don't commit API keys) +configs/*secret*.yaml +.env +.env.toml + +# OS +.DS_Store +Thumbs.db +data/tasks.jsonl diff --git a/benchmarks/courselab_bench/README.md b/benchmarks/courselab_bench/README.md new file mode 100644 index 0000000..ec0c305 --- /dev/null +++ b/benchmarks/courselab_bench/README.md @@ -0,0 +1,119 @@ +# Course Lab Benchmark + +A benchmark for evaluating AI agents on systems programming labs. Agents run in Docker containers and are evaluated on their ability to complete course lab assignments. + +We include a simple ReAct agent inspired by [mini-swe-agent](https://github.com/AUTOMATIC/mini-swe-agent). + +## Quick Start + +Make sure to export the appropriate API keys for your chosen model provider (copy `.env.toml.example` to `.env.toml` and fill in your keys). We use litellm for model access. + +```bash +pip install -e . + +# Prepare dataset (This will generate data/tasks.jsonl using the tasks in data/) +python prepare_dataset.py + +# Run all tasks +python run_benchmark.py +``` + +## Usage + +```bash +python run_benchmark.py \ + --tasks data/tasks.jsonl \ + --model anthropic/claude-sonnet-4-5-20250929 \ + --max-steps 50 \ + --max-cost 20.0 +``` + +## Output + +Each run creates a directory with a single `results.json` file: + +```json +{ + "config": { "model": "...", "max_steps": 50, ... }, + "summary": { + "total": 10, + "passed": 8, + "success_rate": 0.8, + "total_cost": 0.234, + "by_course": { "mit_6_5840_2024": { "total": 10, "passed": 8, ... } } + }, + "results": [ + { + "instance_id": "test__simple__echo", + "passed": true, + "agent_status": "completed", + "test_output": "PASS: ...", + "test_exit_code": 0, + "duration_seconds": 12.5, + "model_cost": 0.0033 + } + ] +} +``` + +Detailed agent trajectories are saved in `trajectories/{instance_id}.jsonl`. + +## Task Structure + +Tasks are organized in a folder hierarchy: + +``` +data/ +└── course_id/ + └── task_id/ + ├── config.json # Task metadata + ├── task.md # Problem statement + ├── preprocess.sh # Setup script (runs before agent) + └── evaluate.sh # Evaluation script (determines pass/fail) +``` + +### config.json + +Required fields: + +- `instance_id`: Unique identifier (e.g., `"test__simple__echo"`) +- `course_id`: Course identifier (e.g., `"test_course"`) +- `docker_image`: Docker image to use (e.g., `"xuafeng/swe-go-python:latest"`) + +Optional fields: + +- `timeout_minutes`: Maximum execution time (default: 30) +- `tags`: List of topic tags +- `repo_url`: Git repository to clone +- `base_commit`: Git commit to checkout + +### task.md + +Markdown file containing the problem statement given to the agent. + +### preprocess.sh + +Shell script that runs before the agent starts. Use this to: + +- Set up the environment +- Create checksums of files that shouldn't be modified +- Initialize test data + +Exit with code 0 on success, non-zero on failure. + +### evaluate.sh + +Runs after the agent completes. Exit 0 for PASS, non-zero for FAIL. +Print verbose output for debugging (captured in results). + +### Example Task + +See `data/test_course/test__simple__echo/` for a minimal example. + +## Adding New Tasks + +1. Create a new folder: `data/{course_id}/{task_id}/` +2. Add the 4 required files: `config.json`, `task.md`, `preprocess.sh`, `evaluate.sh` +3. Make scripts executable: `chmod +x data/{course_id}/{task_id}/*.sh` +4. Run `python prepare_dataset.py` to regenerate `tasks.jsonl` +5. Run the benchmark diff --git a/benchmarks/courselab_bench/courselab_bench/__init__.py b/benchmarks/courselab_bench/courselab_bench/__init__.py new file mode 100644 index 0000000..b2b8812 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/__init__.py @@ -0,0 +1,19 @@ +__version__ = "0.1.0" + +from courselab_bench.agent import REACTAgent +from courselab_bench.environment import DockerEnvironment +from courselab_bench.model import LiteLLMModel +from courselab_bench.data import load_tasks +from courselab_bench.runner import execute_task, save_trajectory +from courselab_bench.evaluation import evaluate_task, compute_summary + +__all__ = [ + "REACTAgent", + "DockerEnvironment", + "LiteLLMModel", + "load_tasks", + "execute_task", + "save_trajectory", + "evaluate_task", + "compute_summary", +] diff --git a/benchmarks/courselab_bench/courselab_bench/agent/__init__.py b/benchmarks/courselab_bench/courselab_bench/agent/__init__.py new file mode 100644 index 0000000..98818e3 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/agent/__init__.py @@ -0,0 +1,3 @@ +from courselab_bench.agent.react import REACTAgent + +__all__ = ["REACTAgent"] diff --git a/benchmarks/courselab_bench/courselab_bench/agent/react.py b/benchmarks/courselab_bench/courselab_bench/agent/react.py new file mode 100644 index 0000000..b48cc4d --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/agent/react.py @@ -0,0 +1,197 @@ +import re +import subprocess +import time +from typing import Any +from loguru import logger + + +class AgentException(Exception): + pass + + +class FormatError(AgentException): + pass + + +class TimeoutError(AgentException): + pass + + +class LimitReached(AgentException): + pass + + +class TaskCompleted(AgentException): + pass + + +class REACTAgent: + def __init__(self, model: Any, env: Any, config: dict[str, Any]): + self.model = model + self.env = env + self.config = config + self.messages: list[dict[str, Any]] = [] + + def run(self, task: dict[str, Any]) -> dict[str, Any]: + self.messages = [] + self.add_message("system", self._system_prompt()) + self.add_message("user", self._task_prompt(task)) + + try: + while True: + self.step() + except LimitReached as e: + logger.info(f"Limit reached: {e}") + return self._finalize("limit_reached", str(e)) + except TaskCompleted as e: + logger.info("Task completed successfully") + return self._finalize("completed", str(e)) + except Exception as e: + logger.error(f"Unexpected error: {e}") + return self._finalize("error", str(e)) + + def step(self): + response = self.query() + try: + action = self.parse_action(response) + except FormatError as e: + # Format errors are recoverable - give the agent another chance + logger.warning(f"Format error: {e}") + self.add_message("user", str(e)) + return + + try: + output = self.execute_action(action) + except TimeoutError as e: + # Timeout errors are recoverable - give the agent another chance + logger.warning(f"Timeout: {e}") + self.add_message("user", str(e)) + return + + self.add_observation(output) + + def query(self) -> dict[str, Any]: + max_steps = self.config.get("max_steps", 50) + max_cost = self.config.get("max_cost", 5.0) # Default $5 cost limit + + num_steps = len([m for m in self.messages if m["role"] == "assistant"]) + if num_steps >= max_steps: + raise LimitReached(f"Step limit reached: {num_steps}/{max_steps}") + + stats = self.model.get_stats() + if stats["cost"] >= max_cost: + raise LimitReached(f"Cost limit reached: ${stats['cost']:.4f} >= ${max_cost}") + + logger.debug(f"Querying model (step {num_steps + 1}/{max_steps})") + try: + response = self.model.query(self.messages) + self.add_message("assistant", response["content"]) + return response + except Exception as e: + logger.error(f"Model query failed: {e}") + raise + + def parse_action(self, response: dict[str, Any]) -> str: + content = response["content"] + action = self._parse_action(content) + + if action is None: + raise FormatError( + "Error: Please provide exactly ONE bash command in triple backticks (```bash ... ```)." + ) + + return action + + def execute_action(self, action: str) -> dict[str, Any]: + if "TASK_COMPLETE" in action: + raise TaskCompleted("Agent marked task as complete") + + display_action = ( + action if len(action) <= 200 else action[:200] + "..." + ) # Truncate long commands + logger.info(f"→ Executing: {display_action}") + try: + output = self.env.execute(action) + output["action"] = action + + if output.get("returncode") == 0: + logger.info(f"✓ Command succeeded (exit code 0)") + else: + logger.warning(f"✗ Command failed (exit code {output.get('returncode')})") + + return output + except subprocess.TimeoutExpired as e: + partial_output = e.stdout.decode("utf-8", errors="replace") if e.stdout else "" + raise TimeoutError( + f"Command timed out after {e.timeout}s.\n" + f"Command: {action}\n" + f"Partial output: {partial_output[:500]}" + ) + except Exception as e: + logger.error(f"Execution failed: {e}") + return {"action": action, "output": f"[ERROR: {e}]", "returncode": 1} + + def add_observation(self, output: dict[str, Any]): + observation = self._format_observation(output) + self.add_message("user", observation) + + def add_message(self, role: str, content: str): + self.messages.append({"role": role, "content": content, "timestamp": time.time()}) + + def _finalize(self, status: str, message: str) -> dict[str, Any]: + num_steps = len([m for m in self.messages if m["role"] == "assistant"]) + + return { + "messages": self.messages, + "cost": self.model.get_stats()["cost"], + "status": status, + "steps": num_steps, + } + + # For now, prompts are not configurable externally + def _system_prompt(self) -> str: + return """You are a systems programming assistant that solves tasks using bash commands. + +Rules: +- Provide EXACTLY ONE bash command per response +- Format your commands in triple backticks: ```bash +command here +``` +- After each command, wait for the output before proceeding +- When you have completed the task, run: echo "TASK_COMPLETE" +- Be concise and focused on solving the task""" + + def _task_prompt(self, task: dict[str, Any]) -> str: + return f"""# Task: {task['instance_id']} + +{task['problem_statement']} + +## Instructions +Solve this task step by step using bash commands. Provide one command at a time and wait for the output. +When you are done, echo "TASK_COMPLETE" to signal completion.""" + + def _parse_action(self, content: str) -> str | None: + # Match ```bash ... ``` blocks + pattern = r"```bash\s*\n(.*?)```" + matches = re.findall(pattern, content, re.DOTALL) + + if matches: + return matches[0].strip() + + return None + + def _format_observation(self, output: dict[str, Any]) -> str: + returncode = output.get("returncode", -1) + output_text = output.get("output", "") + + max_output_len = 5000 + if len(output_text) > max_output_len: + output_text = ( + output_text[:max_output_len] + + f"\n... (truncated {len(output_text) - max_output_len} chars)" + ) + + return f"""Exit code: {returncode} + +Output: +{output_text}""" diff --git a/benchmarks/courselab_bench/courselab_bench/data/__init__.py b/benchmarks/courselab_bench/courselab_bench/data/__init__.py new file mode 100644 index 0000000..944fc8d --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/data/__init__.py @@ -0,0 +1,3 @@ +from courselab_bench.data.loader import load_tasks + +__all__ = ["load_tasks"] diff --git a/benchmarks/courselab_bench/courselab_bench/data/loader.py b/benchmarks/courselab_bench/courselab_bench/data/loader.py new file mode 100644 index 0000000..da309d0 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/data/loader.py @@ -0,0 +1,23 @@ +import json +from pathlib import Path +from typing import Any + + +def load_tasks(file_path: str | Path) -> list[dict[str, Any]]: + file_path = Path(file_path) + if not file_path.exists(): + raise FileNotFoundError(f"Tasks file not found: {file_path}") + + tasks = [] + with file_path.open("r", encoding="utf-8") as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: + continue + try: + task = json.loads(line) + tasks.append(task) + except json.JSONDecodeError as e: + print(f"Warning: Skipping invalid JSON on line {line_num}: {e}") + + return tasks diff --git a/benchmarks/courselab_bench/courselab_bench/environment/__init__.py b/benchmarks/courselab_bench/courselab_bench/environment/__init__.py new file mode 100644 index 0000000..915dd52 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/environment/__init__.py @@ -0,0 +1,3 @@ +from courselab_bench.environment.docker import DockerEnvironment + +__all__ = ["DockerEnvironment"] diff --git a/benchmarks/courselab_bench/courselab_bench/environment/docker.py b/benchmarks/courselab_bench/courselab_bench/environment/docker.py new file mode 100644 index 0000000..eab7b47 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/environment/docker.py @@ -0,0 +1,150 @@ +import subprocess +import uuid +from typing import Any +from loguru import logger + + +class DockerEnvironment: + def __init__( + self, + image: str, + timeout: int = 60, + work_dir: str = "/workspace", + ): + self.image = image + self.timeout = timeout + self.work_dir = work_dir + self.container_id: str | None = None + + def setup(self, task: dict[str, Any]) -> None: + self.container_id = self._start_container() + repo_url = task.get("repo_url") + if repo_url: + base_commit = task.get("base_commit") + self._clone_repo(repo_url, base_commit) + + preprocess_script = task.get("preprocess_script") + if preprocess_script: + self._run_preprocess(preprocess_script) + + def execute(self, command: str, timeout: int | None = None) -> dict[str, Any]: + if not self.container_id: + raise RuntimeError("Container not started. Call setup() first.") + + cmd = [ + "docker", + "exec", + "-w", + self.work_dir, # Set working directory + self.container_id, + "bash", + "-lc", # Login shell to load environment + command, + ] + + logger.debug(f"Executing: {command[:100]}...") + + try: + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Combine stdout and stderr + text=True, + encoding="utf-8", + errors="replace", # Replace invalid unicode + timeout=timeout or self.timeout, + ) + + logger.debug(f"Command finished with exit code: {result.returncode}") + + return {"output": result.stdout, "returncode": result.returncode} + + except subprocess.TimeoutExpired as e: + # Re-raise with stdout for agent to handle + logger.error(f"Command timed out after {timeout or self.timeout}s") + if isinstance(e.stdout, str): + e.stdout = e.stdout.encode("utf-8") + elif e.stdout is None: + e.stdout = b"" + raise + except Exception as e: + logger.error(f"Command execution failed: {e}") + return {"output": f"[ERROR: {type(e).__name__}: {str(e)}]", "returncode": 1} + + def cleanup(self) -> None: + if not self.container_id: + return + + # Run cleanup in background with timeout (similar to mini-swe-agent) + cmd = f"(timeout 60 docker stop {self.container_id} || docker rm -f {self.container_id}) >/dev/null 2>&1 &" + + try: + subprocess.Popen(cmd, shell=True) + except Exception: + pass # Ignore cleanup errors + finally: + self.container_id = None + + def __del__(self): + self.cleanup() + + def _start_container(self) -> str: + container_name = f"courselab-{uuid.uuid4().hex[:8]}" + cmd = [ + "docker", + "run", + "-d", # Detached mode + "-it", # Interactive with TTY + "--rm", # Auto-remove when stopped + "--name", + container_name, + "-w", + self.work_dir, # Set working directory + self.image, + "sleep", + "7200", # Keep container alive for 2 hours + ] + + logger.debug(f"Starting container: {' '.join(cmd)}") + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300, # 5 minutes to pull image if needed (will we ever need longer?) + check=True, + ) + container_id = result.stdout.strip() + return container_id + except subprocess.TimeoutExpired as e: + raise RuntimeError("Docker container start timed out") from e + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to start Docker container: {e.stderr}") from e + except FileNotFoundError: + raise RuntimeError("Docker is not installed or not in PATH") + + def _clone_repo(self, repo_url: str, base_commit: str | None = None) -> None: + clone_result = self.execute(f"git clone {repo_url} {self.work_dir}", timeout=300) + + if clone_result["returncode"] != 0: + raise RuntimeError(f"Failed to clone repository: {clone_result['output'][:200]}") + + if base_commit: + checkout_result = self.execute(f"cd {self.work_dir} && git checkout {base_commit}") + + if checkout_result["returncode"] != 0: + raise RuntimeError( + f"Failed to checkout commit {base_commit}: {checkout_result['output'][:200]}" + ) + + def _run_preprocess(self, preprocess_script: str) -> None: + script_path = f"{self.work_dir}/preprocess.sh" + self.execute( + f"cat > {script_path} << 'PREPROCESS_EOF'\n{preprocess_script}\nPREPROCESS_EOF" + ) + self.execute(f"chmod +x {script_path}") + result = self.execute(f"cd {self.work_dir} && bash {script_path}") + + if result["returncode"] != 0: + raise RuntimeError(f"Preprocess script failed: {result['output'][:200]}") diff --git a/benchmarks/courselab_bench/courselab_bench/evaluation/__init__.py b/benchmarks/courselab_bench/courselab_bench/evaluation/__init__.py new file mode 100644 index 0000000..ad785e8 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/evaluation/__init__.py @@ -0,0 +1,3 @@ +from courselab_bench.evaluation.evaluator import evaluate_task, compute_summary + +__all__ = ["evaluate_task", "compute_summary"] diff --git a/benchmarks/courselab_bench/courselab_bench/evaluation/evaluator.py b/benchmarks/courselab_bench/courselab_bench/evaluation/evaluator.py new file mode 100644 index 0000000..b29cfdd --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/evaluation/evaluator.py @@ -0,0 +1,42 @@ +from collections import defaultdict +from typing import Any + + +def evaluate_task(result: dict[str, Any]) -> bool: + return result.get("test_exit_code") == 0 + + +def compute_summary(results: list[dict[str, Any]]) -> dict[str, Any]: + if not results: + return {"total": 0, "passed": 0, "success_rate": 0.0} + + total = len(results) + passed = sum(1 for r in results if r.get("passed", False)) + + costs = [r["model_cost"] for r in results if "model_cost" in r] + durations = [r["duration_seconds"] for r in results if "duration_seconds" in r] + + summary = { + "total": total, + "passed": passed, + "success_rate": passed / total if total > 0 else 0.0, + "total_cost": round(sum(costs), 4) if costs else 0.0, + "avg_duration": round(sum(durations) / len(durations), 2) if durations else 0.0, + } + + by_course = defaultdict(lambda: {"total": 0, "passed": 0}) + for result in results: + course_id = result.get("course_id", "unknown") + by_course[course_id]["total"] += 1 + if result.get("passed", False): + by_course[course_id]["passed"] += 1 + + summary["by_course"] = { + course_id: { + **stats, + "success_rate": stats["passed"] / stats["total"] if stats["total"] > 0 else 0.0, + } + for course_id, stats in sorted(by_course.items()) + } + + return summary diff --git a/benchmarks/courselab_bench/courselab_bench/model/__init__.py b/benchmarks/courselab_bench/courselab_bench/model/__init__.py new file mode 100644 index 0000000..3331cb3 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/model/__init__.py @@ -0,0 +1,3 @@ +from courselab_bench.model.litellm import LiteLLMModel + +__all__ = ["LiteLLMModel"] diff --git a/benchmarks/courselab_bench/courselab_bench/model/litellm.py b/benchmarks/courselab_bench/courselab_bench/model/litellm.py new file mode 100644 index 0000000..b7ebfe6 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/model/litellm.py @@ -0,0 +1,69 @@ +import time +from typing import Any +import litellm +from loguru import logger + + +class LiteLLMModel: + def __init__(self, model_name: str, temperature: float = 0.0, max_tokens: int = 4096, **kwargs): + self.model_name = model_name + self.temperature = temperature + self.max_tokens = max_tokens + self.kwargs = kwargs + self.cost = 0.0 + self.n_calls = 0 + self.total_tokens = 0 + + def query(self, messages: list[dict[str, Any]], max_retries: int = 3) -> dict[str, Any]: + for attempt in range(max_retries): + try: + logger.debug(f"Querying {self.model_name} (attempt {attempt + 1}/{max_retries})") + + response = litellm.completion( + model=self.model_name, + messages=messages, + temperature=self.temperature, + max_tokens=self.max_tokens, + **self.kwargs, + ) + + content = response.choices[0].message.content + self.n_calls += 1 + if hasattr(response, "usage") and response.usage: + tokens = response.usage.total_tokens + self.total_tokens += tokens + + # Try to calculate cost (may not work for all models) + try: + cost = litellm.completion_cost(completion_response=response) + self.cost += cost + logger.debug(f"API call cost: ${cost:.6f}, tokens: {tokens}") + except Exception: + logger.debug(f"Could not calculate cost, tokens: {tokens}") + else: + logger.debug("Token usage info not available") + + return { + "content": content, + "extra": { + "model": response.model if hasattr(response, "model") else None, + "usage": ( + response.usage.model_dump() if hasattr(response, "usage") else None + ), + }, + } + + except Exception as e: + logger.warning(f"API call failed (attempt {attempt + 1}/{max_retries}): {e}") + + if attempt < max_retries - 1: + # Exponential backoff: 2^attempt seconds + wait_time = 2**attempt + logger.info(f"Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + logger.error(f"API call failed after {max_retries} attempts") + raise RuntimeError(f"LiteLLM API call failed: {e}") from e + + def get_stats(self) -> dict[str, Any]: + return {"cost": self.cost, "n_calls": self.n_calls, "tokens": self.total_tokens} diff --git a/benchmarks/courselab_bench/courselab_bench/runner/__init__.py b/benchmarks/courselab_bench/courselab_bench/runner/__init__.py new file mode 100644 index 0000000..aac2b6a --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/runner/__init__.py @@ -0,0 +1,4 @@ +from courselab_bench.runner.executor import execute_task +from courselab_bench.runner.logger import save_trajectory + +__all__ = ["execute_task", "save_trajectory"] diff --git a/benchmarks/courselab_bench/courselab_bench/runner/executor.py b/benchmarks/courselab_bench/courselab_bench/runner/executor.py new file mode 100644 index 0000000..e7d3524 --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/runner/executor.py @@ -0,0 +1,69 @@ +import time +from datetime import datetime +from typing import Any +from loguru import logger + + +def _run_evaluate_script(env: Any, evaluate_script: str, timeout: int) -> dict[str, Any]: + script_path = f"{env.work_dir}/evaluate.sh" + env.execute(f"cat > {script_path} << 'EVALUATE_EOF'\n{evaluate_script}\nEVALUATE_EOF") + env.execute(f"chmod +x {script_path}") + result = env.execute(f"cd {env.work_dir} && bash {script_path}", timeout=timeout) + return result + + +def execute_task(task: dict[str, Any], agent: Any, env: Any) -> dict[str, Any]: + instance_id = task["instance_id"] + start_time = time.time() + + try: + env.setup(task) + except Exception as e: + return { + "instance_id": instance_id, + "timestamp": datetime.now().isoformat(), + "duration_seconds": time.time() - start_time, + "error": f"Setup failed: {str(e)}", + "trajectory": [], + "agent_steps": 0, + "agent_status": "setup_error", + "model_cost": 0.0, + "test_output": None, + "test_exit_code": None, + } + + try: + agent_result = agent.run(task) + except Exception as e: + logger.error(f"Agent error: {e}") + agent_result = {"messages": [], "cost": 0.0, "status": "agent_error", "steps": 0} + + logger.info(f"\nRunning evaluation...") + try: + test_timeout = task.get("timeout_minutes", 30) * 60 + test_result = _run_evaluate_script(env, task["evaluate_script"], test_timeout) + except Exception as e: + logger.error(f"Evaluation error: {e}") + test_result = {"output": f"[ERROR: {e}]", "returncode": -1} + + duration = time.time() - start_time + + result = { + "instance_id": instance_id, + "timestamp": datetime.now().isoformat(), + "duration_seconds": round(duration, 2), + "trajectory": agent_result.get("messages", []), + "agent_steps": agent_result.get("steps", 0), + "agent_status": agent_result.get("status", "unknown"), + "model_cost": agent_result.get("cost", 0.0), + "test_output": test_result.get("output"), + "test_exit_code": test_result.get("returncode"), + } + + status_symbol = "✓" if test_result.get("returncode") == 0 else "✗" + logger.info( + f"{status_symbol} Completed in {duration:.1f}s " + f"({agent_result.get('steps', 0)} steps, ${result['model_cost']:.4f})" + ) + + return result diff --git a/benchmarks/courselab_bench/courselab_bench/runner/logger.py b/benchmarks/courselab_bench/courselab_bench/runner/logger.py new file mode 100644 index 0000000..2e4697a --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/runner/logger.py @@ -0,0 +1,43 @@ +import json +from pathlib import Path +from typing import Any +from loguru import logger + + +def save_trajectory(messages: list[dict[str, Any]], output_file: Path | str) -> None: + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with output_path.open("w", encoding="utf-8") as f: + for idx, message in enumerate(messages): + if "step" not in message: + message = {**message, "step": idx} + json.dump(message, f, ensure_ascii=False) + f.write("\n") + + +def load_trajectory(file_path: Path | str) -> list[dict[str, Any]]: + file_path = Path(file_path) + + if not file_path.exists(): + logger.error(f"Trajectory file not found: {file_path}") + raise FileNotFoundError(f"Trajectory file not found: {file_path}") + + logger.debug(f"Loading trajectory from: {file_path}") + + messages = [] + with file_path.open("r", encoding="utf-8") as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: # Skip empty lines + continue + + try: + message = json.loads(line) + messages.append(message) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse line {line_num}: {e}") + raise ValueError(f"Invalid JSON on line {line_num}: {e}") from e + + logger.info(f"Loaded trajectory: {len(messages)} messages") + return messages diff --git a/benchmarks/courselab_bench/courselab_bench/utils/env_loader.py b/benchmarks/courselab_bench/courselab_bench/utils/env_loader.py new file mode 100644 index 0000000..6dd120c --- /dev/null +++ b/benchmarks/courselab_bench/courselab_bench/utils/env_loader.py @@ -0,0 +1,36 @@ +import os +from pathlib import Path +from loguru import logger + +try: + import tomli +except ImportError: + import tomllib as tomli # Python 3.11+ + + +def load_env_config(config_path: Path | str | None = None) -> dict: + if config_path: + env_file = Path(config_path) + else: + env_file = Path(".env.toml") + if not env_file.exists(): + # Try project root + project_root = Path(__file__).parent.parent.parent + env_file = project_root / ".env.toml" + + if not env_file.exists(): + return {} + + try: + with open(env_file, "rb") as f: + config = tomli.load(f) + + for key, value in config.items(): + if value: # Only set if value is not None/empty + os.environ[key] = str(value) + + return config + + except Exception as e: + logger.warning(f"Failed to load .env.toml: {e}") + return {} diff --git a/benchmarks/courselab_bench/data/courses.json b/benchmarks/courselab_bench/data/courses.json new file mode 100644 index 0000000..a3e2652 --- /dev/null +++ b/benchmarks/courselab_bench/data/courses.json @@ -0,0 +1,16 @@ +{ + "courses": [ + { + "course_id": "test_course", + "name": "Test Course", + "institution": "Benchmark University", + "year": 2024 + }, + { + "course_id": "mit_6_5840_2024", + "name": "6.5840: Distributed Systems", + "institution": "MIT", + "year": 2024 + } + ] +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json new file mode 100644 index 0000000..51d3c57 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json @@ -0,0 +1,13 @@ +{ + "instance_id": "mit_6_5840_2024_mapreduce_1", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "mapreduce", + "concurrency", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/evaluate.sh new file mode 100755 index 0000000..43d516f --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/evaluate.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/main/mrcoordinator.go" + "src/main/mrworker.go" + "src/main/mrsequential.go" + "src/main/test-mr.sh" +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running MapReduce tests" +cd src/main +timeout 600 bash test-mr.sh 2>&1 | tee test_output.txt + +echo "Checking test results" +if grep -q 'PASSED ALL TESTS' test_output.txt; then + echo "PASS: All tests passed" + exit 0 +else + echo "FAIL: Tests did not pass" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/preprocess.sh new file mode 100755 index 0000000..7a32e77 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/preprocess.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing MapReduce Lab ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/main/mrcoordinator.go" + "src/main/mrworker.go" + "src/main/mrsequential.go" + "src/main/test-mr.sh" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/task.md new file mode 100644 index 0000000..06f15ff --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/task.md @@ -0,0 +1,175 @@ +# MapReduce + +## Introduction + +Build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. + +## Getting Started + +We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: + +``` +$ cd src/main +$ go build -buildmode=plugin ../mrapps/wc.go +$ rm mr-out* +$ go run mrsequential.go wc.so pg*.txt +$ more mr-out-0 +A 509 +ABOUT 2 +ACT 8 +... +``` + +`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. + +Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. + +## Your Task + +Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. + +We have given you a little code to start you off. The "main" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. + +Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: + +``` +go build -buildmode=plugin ../mrapps/wc.go +``` + +In the `main` directory, run the coordinator. + +``` +rm mr-out* +go run mrcoordinator.go pg-*.txt +``` + +The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one "split", and is the input to one Map task. + +In one or more other windows, run some workers: + +``` +go run mrworker.go wc.so +``` + +When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: + +``` +$ cat mr-out-* | sort | more +A 509 +ABOUT 2 +ACT 8 +... +``` + +We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. + +If you run the test script now, it will hang because the coordinator never finishes: + +``` +$ cd src/main +$ bash test-mr.sh +*** Starting wc test. +``` + +You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: + +``` +$ bash test-mr.sh +*** Starting wc test. +sort: No such file or directory +cmp: EOF on mr-wc-all +--- wc output is not the same as mr-correct-wc.txt +--- wc test: FAIL +$ +``` + +The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. + +When you've finished, the test script output should look like this: + +``` +$ bash test-mr.sh +*** Starting wc test. +--- wc test: PASS +*** Starting indexer test. +--- indexer test: PASS +*** Starting map parallelism test. +--- map parallelism test: PASS +*** Starting reduce parallelism test. +--- reduce parallelism test: PASS +*** Starting job count test. +--- job count test: PASS +*** Starting early exit test. +--- early exit test: PASS +*** Starting crash test. +--- crash test: PASS +*** PASSED ALL TESTS +$ +``` + +## A few rules + +- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. +- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. +- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `"%v %v"` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented "this is the correct format". The test script will fail if your implementation deviates too much from this format. +- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. +- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. +- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. +- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a "please exit" pseudo-task that the coordinator can give to workers. + +## Hints + +- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. +- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. +- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` +- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. +- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. +- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file: + + ``` + enc := json.NewEncoder(file) + for _, kv ... { + err := enc.Encode(&kv) + ``` + + and to read such a file back: + + ``` + dec := json.NewDecoder(file) + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + kva = append(kva, kv) + } + ``` + +- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. +- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. +- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. +- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. +- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. +- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). +- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. +- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. +- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). +- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. +- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this: + + ``` + reply := SomeType{} + call(..., &reply) + ``` + + without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values. + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/mr/coordinator.go` +- `src/mr/rpc.go` +- `src/mr/worker.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json new file mode 100644 index 0000000..03b2dff --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json @@ -0,0 +1,13 @@ +{ + "instance_id": "mit_6_5840_2024_kvsrv_2a", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "kv-store", + "rpc", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/evaluate.sh new file mode 100755 index 0000000..76f1dcb --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/evaluate.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/kvsrv/config.go" + "src/kvsrv/test_test.go" +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running KVServer TestBasic" +cd src/kvsrv +go test -run TestBasic -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/preprocess.sh new file mode 100755 index 0000000..12af7bc --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/preprocess.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing KVServer Lab 2a ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/kvsrv/config.go" + "src/kvsrv/test_test.go" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/task.md new file mode 100644 index 0000000..d5d224f --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/task.md @@ -0,0 +1,49 @@ +# Key/Value Server + +## Introduction + +Build a key/value server for a single machine that ensures operations are linearizable. Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value _and_ returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. + +Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. + +Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. + +## Getting Started + +We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. + +## Task: Key/value server with no network failures + +Your task is to implement a solution that works when there are no dropped messages. + +You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. + +You have completed this task when you pass the first two tests in the test suite: "one client" and "many clients". + +## Hints + +- Check that your code is race-free using `go test -race`. + +## Testing + +Run: `cd src/kvsrv && go test -run TestBasic -race` + +You should see: + +``` +Test: one client ... + ... Passed +Test: many clients ... + ... Passed +PASS +``` + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/kvsrv/client.go` +- `src/kvsrv/common.go` +- `src/kvsrv/server.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json new file mode 100644 index 0000000..517fa18 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json @@ -0,0 +1,14 @@ +{ + "instance_id": "mit_6_5840_2024_kvsrv_2b", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "kv-store", + "rpc", + "fault-tolerance", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/evaluate.sh new file mode 100755 index 0000000..c52024c --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/evaluate.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/kvsrv/config.go" + "src/kvsrv/test_test.go" +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running KVServer all tests" +cd src/kvsrv +go test -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/preprocess.sh new file mode 100755 index 0000000..3b68bae --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/preprocess.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing KVServer Lab 2b ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/kvsrv/config.go" + "src/kvsrv/test_test.go" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/task.md new file mode 100644 index 0000000..13adb57 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/task.md @@ -0,0 +1,65 @@ +# Key/Value Server + +## Introduction + +Build a key/value server for a single machine that ensures operations are linearizable and each operation is executed exactly once despite network failures. Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value _and_ returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. + +Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. + +Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. + +## Getting Started + +We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. + +## Task: Key/value server with dropped messages + +Your task is to modify your solution to continue in the face of dropped messages (e.g., RPC requests and RPC replies). If a message was lost, then the client's `ck.server.Call()` will return `false` (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it succeeds. Each call to `Clerk.Put()` or `Clerk.Append()`, however, should result in just a _single_ execution, so you will have to ensure that the re-send doesn't result in the server executing the request twice. + +Add code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. + +## Hints + +- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. +- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. +- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. + +## Testing + +Run: `cd src/kvsrv && go test -race` + +Your code should pass all tests, like this: + +``` +Test: one client ... + ... Passed -- t 3.8 nrpc 31135 ops 31135 +Test: many clients ... + ... Passed -- t 4.7 nrpc 102853 ops 102853 +Test: unreliable net, many clients ... + ... Passed -- t 4.1 nrpc 580 ops 496 +Test: concurrent append to same key, unreliable ... + ... Passed -- t 0.6 nrpc 61 ops 52 +Test: memory use get ... + ... Passed -- t 0.4 nrpc 4 ops 0 +Test: memory use put ... + ... Passed -- t 0.2 nrpc 2 ops 0 +Test: memory use append ... + ... Passed -- t 0.4 nrpc 2 ops 0 +Test: memory use many puts ... + ... Passed -- t 11.5 nrpc 100000 ops 0 +Test: memory use many gets ... + ... Passed -- t 12.2 nrpc 100001 ops 0 +PASS +``` + +The numbers after each `Passed` are real time in seconds, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls). + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/kvsrv/client.go` +- `src/kvsrv/common.go` +- `src/kvsrv/server.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json new file mode 100644 index 0000000..a7a1da3 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json @@ -0,0 +1,13 @@ +{ + "instance_id": "mit_6_5840_2024_raft_3a", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "raft", + "consensus", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/evaluate.sh new file mode 100755 index 0000000..312e553 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/evaluate.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running Raft 3A tests" +cd src/raft +go test -run 3A -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/preprocess.sh new file mode 100755 index 0000000..b9baf40 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/preprocess.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing Raft Lab 3A ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/task.md new file mode 100644 index 0000000..b891601 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/task.md @@ -0,0 +1,118 @@ +# Raft + +### Introduction + +This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will "shard" your service over multiple replicated state machines for higher performance. + +A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. + +Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. + +In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with _index numbers_. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. + +You should follow the design in the extended Raft paper, with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). + +This lab is due in four parts. You must submit each part on the corresponding due date. + +### Getting Started + +We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. + +To get up and running, execute the following commands: + +```sh +$ cd src/raft +$ go test +Test (3A): initial election ... +--- FAIL: TestInitialElection3A (5.04s) +config.go:326: expected one leader, got none +Test (3A): election after network failure ... +--- FAIL: TestReElection3A (5.03s) +config.go:326: expected one leader, got none +... +$ +``` + +### The code + +Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. + +Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. + +```go +// create a new Raft server instance: +rf := Make(peers, me, persister, applyCh) + +// start agreement on a new log entry: +rf.Start(command interface{}) (index, term, isleader) + +// ask a Raft for its current term, and whether it thinks it is leader +rf.GetState() (term, isLeader) + +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester). +type ApplyMsg +``` + +A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. + +`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. + +Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. + +### Part 3A: leader election ("moderate") + +#### Task + +Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A` to test your 3A code. + +#### Hints + +- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A` . +- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, +- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. +- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. +- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. +- The tester requires that the leader send heartbeat RPCs no more than ten times per second. +- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). +- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. +- You may find Go's rand useful. +- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep(); see the ticker() goroutine that Make() creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. +- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. +- Don't forget to implement `GetState()`. +- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. +- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. +- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. + +Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: + +```sh +$ go test -run 3A +Test (3A): initial election ... +... Passed -- 3.5 3 58 16840 0 +Test (3A): election after network failure ... +... Passed -- 5.4 3 118 25269 0 +Test (3A): multiple elections ... +... Passed -- 7.3 7 624 138014 0 +PASS +ok raft 16.265s +$ +``` + +Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should make sure that your code consistently passes the tests with the `-race` flag. + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/raft/append_entries.go` +- `src/raft/election.go` +- `src/raft/install_snapshot.go` +- `src/raft/raft.go` +- `src/raft/util.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json new file mode 100644 index 0000000..39823b9 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json @@ -0,0 +1,13 @@ +{ + "instance_id": "mit_6_5840_2024_raft_3b", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "raft", + "consensus", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/evaluate.sh new file mode 100755 index 0000000..ecffd3a --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/evaluate.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running Raft 3B tests" +cd src/raft +go test -run 3B -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/preprocess.sh new file mode 100755 index 0000000..ef6a611 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/preprocess.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing Raft Lab 3B ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/task.md new file mode 100644 index 0000000..b6c16a4 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/task.md @@ -0,0 +1,163 @@ +# Raft + +### Introduction + +This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will "shard" your service over multiple replicated state machines for higher performance. + +A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. + +Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. + +In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with _index numbers_. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. + +You should follow the design in the extended Raft paper, with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). + +This lab is due in four parts. You must submit each part on the corresponding due date. + +### Getting Started + +We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. + +To get up and running, execute the following commands: + +```sh +$ cd src/raft +$ go test +Test (3A): initial election ... +--- FAIL: TestInitialElection3A (5.04s) +config.go:326: expected one leader, got none +Test (3A): election after network failure ... +--- FAIL: TestReElection3A (5.03s) +config.go:326: expected one leader, got none +... +$ +``` + +### The code + +Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. + +Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. + +```go +// create a new Raft server instance: +rf := Make(peers, me, persister, applyCh) + +// start agreement on a new log entry: +rf.Start(command interface{}) (index, term, isleader) + +// ask a Raft for its current term, and whether it thinks it is leader +rf.GetState() (term, isLeader) + +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester). +type ApplyMsg +``` + +A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. + +`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. + +Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. + +### Part 3A: leader election ("moderate") + +#### Task + +Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A` to test your 3A code. + +#### Hints + +- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A` . +- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, +- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. +- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. +- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. +- The tester requires that the leader send heartbeat RPCs no more than ten times per second. +- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). +- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. +- You may find Go's rand useful. +- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep(); see the ticker() goroutine that Make() creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. +- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. +- Don't forget to implement `GetState()`. +- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. +- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. +- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. + +Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: + +```sh +$ go test -run 3A +Test (3A): initial election ... +... Passed -- 3.5 3 58 16840 0 +Test (3A): election after network failure ... +... Passed -- 5.4 3 118 25269 0 +Test (3A): multiple elections ... +... Passed -- 7.3 7 624 138014 0 +PASS +ok raft 16.265s +$ +``` + +Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should make sure that your code consistently passes the tests with the `-race` flag. + +### Part 3B: log ("hard") + +#### Task + +Implement the leader and follower code to append new log entries, so that the `go test -run 3B` tests pass. + +#### Hints + +- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. +- You will need to implement the election restriction (section 5.4.1 in the paper). +- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's condition variables, or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. +- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. +- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API. + +The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: + +```sh +$ time go test -run 3B +Test (3B): basic agreement ... +... Passed -- 0.9 3 16 4572 3 +Test (3B): RPC byte count ... +... Passed -- 1.7 3 48 114536 11 +Test (3B): agreement after follower reconnects ... +... Passed -- 3.6 3 78 22131 7 +Test (3B): no agreement if too many followers disconnect ... +... Passed -- 3.8 5 172 40935 3 +Test (3B): concurrent Start()s ... +... Passed -- 1.1 3 24 7379 6 +Test (3B): rejoin of partitioned leader ... +... Passed -- 5.1 3 152 37021 4 +Test (3B): leader backs up quickly over incorrect follower logs ... +... Passed -- 17.2 5 2080 1587388 102 +Test (3B): RPC counts aren't too high ... +... Passed -- 2.2 3 60 20119 12 +PASS +ok raft 35.557s + +real 0m35.899s +user 0m2.556s +sys 0m1.458s +$ +``` + +The "ok raft 35.557s" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The "user 0m2.556s" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/raft/append_entries.go` +- `src/raft/election.go` +- `src/raft/install_snapshot.go` +- `src/raft/raft.go` +- `src/raft/util.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json new file mode 100644 index 0000000..c61e661 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json @@ -0,0 +1,13 @@ +{ + "instance_id": "mit_6_5840_2024_raft_3c", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "raft", + "consensus", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/evaluate.sh new file mode 100755 index 0000000..3c37d2a --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/evaluate.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running Raft 3C tests" +cd src/raft +go test -run 3C -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/preprocess.sh new file mode 100755 index 0000000..17ff863 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/preprocess.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing Raft Lab 3C ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/task.md new file mode 100644 index 0000000..8834a2e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/task.md @@ -0,0 +1,227 @@ +# Raft + +### Introduction + +This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will "shard" your service over multiple replicated state machines for higher performance. + +A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. + +Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. + +In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with _index numbers_. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. + +You should follow the design in the extended Raft paper, with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). + +This lab is due in four parts. You must submit each part on the corresponding due date. + +### Getting Started + +We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. + +To get up and running, execute the following commands: + +```sh +$ cd src/raft +$ go test +Test (3A): initial election ... +--- FAIL: TestInitialElection3A (5.04s) +config.go:326: expected one leader, got none +Test (3A): election after network failure ... +--- FAIL: TestReElection3A (5.03s) +config.go:326: expected one leader, got none +... +$ +``` + +### The code + +Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. + +Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. + +```go +// create a new Raft server instance: +rf := Make(peers, me, persister, applyCh) + +// start agreement on a new log entry: +rf.Start(command interface{}) (index, term, isleader) + +// ask a Raft for its current term, and whether it thinks it is leader +rf.GetState() (term, isLeader) + +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester). +type ApplyMsg +``` + +A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. + +`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. + +Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. + +### Part 3A: leader election ("moderate") + +#### Task + +Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A` to test your 3A code. + +#### Hints + +- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A` . +- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, +- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. +- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. +- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. +- The tester requires that the leader send heartbeat RPCs no more than ten times per second. +- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). +- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. +- You may find Go's rand useful. +- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep(); see the ticker() goroutine that Make() creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. +- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. +- Don't forget to implement `GetState()`. +- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. +- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. +- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. + +Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: + +```sh +$ go test -run 3A +Test (3A): initial election ... +... Passed -- 3.5 3 58 16840 0 +Test (3A): election after network failure ... +... Passed -- 5.4 3 118 25269 0 +Test (3A): multiple elections ... +... Passed -- 7.3 7 624 138014 0 +PASS +ok raft 16.265s +$ +``` + +Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should make sure that your code consistently passes the tests with the `-race` flag. + +### Part 3B: log ("hard") + +#### Task + +Implement the leader and follower code to append new log entries, so that the `go test -run 3B` tests pass. + +#### Hints + +- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. +- You will need to implement the election restriction (section 5.4.1 in the paper). +- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's condition variables, or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. +- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. +- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API. + +The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: + +```sh +$ time go test -run 3B +Test (3B): basic agreement ... +... Passed -- 0.9 3 16 4572 3 +Test (3B): RPC byte count ... +... Passed -- 1.7 3 48 114536 11 +Test (3B): agreement after follower reconnects ... +... Passed -- 3.6 3 78 22131 7 +Test (3B): no agreement if too many followers disconnect ... +... Passed -- 3.8 5 172 40935 3 +Test (3B): concurrent Start()s ... +... Passed -- 1.1 3 24 7379 6 +Test (3B): rejoin of partitioned leader ... +... Passed -- 5.1 3 152 37021 4 +Test (3B): leader backs up quickly over incorrect follower logs ... +... Passed -- 17.2 5 2080 1587388 102 +Test (3B): RPC counts aren't too high ... +... Passed -- 2.2 3 60 20119 12 +PASS +ok raft 35.557s + +real 0m35.899s +user 0m2.556s +sys 0m1.458s +$ +``` + +The "ok raft 35.557s" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The "user 0m2.556s" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. + +### Part 3C: persistence ("hard") + +If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. + +A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. + +#### Task + +Complete the functions `persist()` and `readPersist()` in raft.go by adding code to save and restore persistent state. You will need to encode (or "serialize") the state as an array of bytes in order to pass it to the Persister. Use the labgob encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. + +You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the extended Raft paper starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: + +```sh + XTerm: term in the conflicting entry (if any) + XIndex: index of first entry with that term (if any) + XLen: log length +``` + +Then the leader's logic can be something like: + +```sh +Case 1: leader doesn't have XTerm: +nextIndex = XIndex +Case 2: leader has XTerm: +nextIndex = leader's last entry for XTerm +Case 3: follower's log is too short: +nextIndex = XLen +``` + +A few other hints: + +- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. + +Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. + +```sh +$ go test -run 3C +Test (3C): basic persistence ... +... Passed -- 5.0 3 86 22849 6 +Test (3C): more persistence ... +... Passed -- 17.6 5 952 218854 16 +Test (3C): partitioned leader and one follower crash, leader restarts ... +... Passed -- 2.0 3 34 8937 4 +Test (3C): Figure 8 ... +... Passed -- 31.2 5 580 130675 32 +Test (3C): unreliable agreement ... +... Passed -- 1.7 5 1044 366392 246 +Test (3C): Figure 8 (unreliable) ... +... Passed -- 33.6 5 10700 33695245 308 +Test (3C): churn ... +... Passed -- 16.1 5 8864 44771259 1544 +Test (3C): unreliable churn ... +... Passed -- 16.5 5 4220 6414632 906 +PASS +ok raft 123.564s +$ +``` + +It is a good idea to run the tests multiple times before submitting and check that each run prints PASS. + +```sh +$ for i in {0..10}; do go test; done +``` + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/raft/append_entries.go` +- `src/raft/election.go` +- `src/raft/install_snapshot.go` +- `src/raft/raft.go` +- `src/raft/util.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json new file mode 100644 index 0000000..238f9c7 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json @@ -0,0 +1,13 @@ +{ + "instance_id": "mit_6_5840_2024_raft_3d", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "raft", + "consensus", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/evaluate.sh new file mode 100755 index 0000000..0e289d4 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/evaluate.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running Raft 3D tests" +cd src/raft +go test -run 3D -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/preprocess.sh new file mode 100755 index 0000000..40403e3 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/preprocess.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing Raft Lab 3D ===" + +cd /workspace + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/raft/config.go" + "src/raft/persister.go" + "src/raft/test_test.go" + +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/task.md new file mode 100644 index 0000000..b49c5be --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/task.md @@ -0,0 +1,279 @@ +# Raft + +### Introduction + +This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will "shard" your service over multiple replicated state machines for higher performance. + +A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. + +Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. + +In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with _index numbers_. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. + +You should follow the design in the extended Raft paper, with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). + +This lab is due in four parts. You must submit each part on the corresponding due date. + +### Getting Started + +We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. + +To get up and running, execute the following commands: + +```sh +$ cd src/raft +$ go test +Test (3A): initial election ... +--- FAIL: TestInitialElection3A (5.04s) +config.go:326: expected one leader, got none +Test (3A): election after network failure ... +--- FAIL: TestReElection3A (5.03s) +config.go:326: expected one leader, got none +... +$ +``` + +### The code + +Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. + +Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. + +```go +// create a new Raft server instance: +rf := Make(peers, me, persister, applyCh) + +// start agreement on a new log entry: +rf.Start(command interface{}) (index, term, isleader) + +// ask a Raft for its current term, and whether it thinks it is leader +rf.GetState() (term, isLeader) + +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester). +type ApplyMsg +``` + +A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. + +`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. + +Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. + +### Part 3A: leader election ("moderate") + +#### Task + +Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A` to test your 3A code. + +#### Hints + +- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A` . +- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, +- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. +- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. +- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. +- The tester requires that the leader send heartbeat RPCs no more than ten times per second. +- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). +- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. +- You may find Go's rand useful. +- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep(); see the ticker() goroutine that Make() creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. +- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. +- Don't forget to implement `GetState()`. +- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. +- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. +- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. + +Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: + +```sh +$ go test -run 3A +Test (3A): initial election ... +... Passed -- 3.5 3 58 16840 0 +Test (3A): election after network failure ... +... Passed -- 5.4 3 118 25269 0 +Test (3A): multiple elections ... +... Passed -- 7.3 7 624 138014 0 +PASS +ok raft 16.265s +$ +``` + +Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. + +When we grade your submissions, we will run the tests without the `-race` flag. However, you should make sure that your code consistently passes the tests with the `-race` flag. + +### Part 3B: log ("hard") + +#### Task + +Implement the leader and follower code to append new log entries, so that the `go test -run 3B` tests pass. + +#### Hints + +- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. +- You will need to implement the election restriction (section 5.4.1 in the paper). +- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's condition variables, or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. +- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. +- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API. + +The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: + +```sh +$ time go test -run 3B +Test (3B): basic agreement ... +... Passed -- 0.9 3 16 4572 3 +Test (3B): RPC byte count ... +... Passed -- 1.7 3 48 114536 11 +Test (3B): agreement after follower reconnects ... +... Passed -- 3.6 3 78 22131 7 +Test (3B): no agreement if too many followers disconnect ... +... Passed -- 3.8 5 172 40935 3 +Test (3B): concurrent Start()s ... +... Passed -- 1.1 3 24 7379 6 +Test (3B): rejoin of partitioned leader ... +... Passed -- 5.1 3 152 37021 4 +Test (3B): leader backs up quickly over incorrect follower logs ... +... Passed -- 17.2 5 2080 1587388 102 +Test (3B): RPC counts aren't too high ... +... Passed -- 2.2 3 60 20119 12 +PASS +ok raft 35.557s + +real 0m35.899s +user 0m2.556s +sys 0m1.458s +$ +``` + +The "ok raft 35.557s" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The "user 0m2.556s" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. + +### Part 3C: persistence ("hard") + +If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. + +A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. + +#### Task + +Complete the functions `persist()` and `readPersist()` in raft.go by adding code to save and restore persistent state. You will need to encode (or "serialize") the state as an array of bytes in order to pass it to the Persister. Use the labgob encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. + +You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the extended Raft paper starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: + +```sh + XTerm: term in the conflicting entry (if any) + XIndex: index of first entry with that term (if any) + XLen: log length +``` + +Then the leader's logic can be something like: + +```sh +Case 1: leader doesn't have XTerm: +nextIndex = XIndex +Case 2: leader has XTerm: +nextIndex = leader's last entry for XTerm +Case 3: follower's log is too short: +nextIndex = XLen +``` + +A few other hints: + +- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. + +Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. + +```sh +$ go test -run 3C +Test (3C): basic persistence ... +... Passed -- 5.0 3 86 22849 6 +Test (3C): more persistence ... +... Passed -- 17.6 5 952 218854 16 +Test (3C): partitioned leader and one follower crash, leader restarts ... +... Passed -- 2.0 3 34 8937 4 +Test (3C): Figure 8 ... +... Passed -- 31.2 5 580 130675 32 +Test (3C): unreliable agreement ... +... Passed -- 1.7 5 1044 366392 246 +Test (3C): Figure 8 (unreliable) ... +... Passed -- 33.6 5 10700 33695245 308 +Test (3C): churn ... +... Passed -- 16.1 5 8864 44771259 1544 +Test (3C): unreliable churn ... +... Passed -- 16.5 5 4220 6414632 906 +PASS +ok raft 123.564s +$ +``` + +It is a good idea to run the tests multiple times before submitting and check that each run prints PASS. + +```sh +$ for i in {0..10}; do go test; done +``` + +### Part 3D: log compaction ("hard") + +As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a "snapshot" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the extended Raft paper outlines the scheme; you will have to design the details. + +Your Raft must provide the following function that the service can call with a serialized snapshot of its state: + +`Snapshot(index int, snapshot []byte)` + +In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). + +The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. + +You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. + +When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. + +If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. + +When a server restarts, the application layer reads the persisted snapshot and restores its saved state. + +#### Task + +Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). + +#### Hints + +- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. +- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. +- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. +- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. +- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. + +Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. + +```sh +$ go test -run 3D +Test (3D): snapshots basic ... +... Passed -- 11.6 3 176 61716 192 +Test (3D): install snapshots (disconnect) ... +... Passed -- 64.2 3 878 320610 336 +Test (3D): install snapshots (disconnect+unreliable) ... +... Passed -- 81.1 3 1059 375850 341 +Test (3D): install snapshots (crash) ... +... Passed -- 53.5 3 601 256638 339 +Test (3D): install snapshots (unreliable+crash) ... +... Passed -- 63.5 3 687 288294 336 +Test (3D): crash and restart all servers ... +... Passed -- 19.5 3 268 81352 58 +PASS +ok raft 293.456s +``` + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/raft/append_entries.go` +- `src/raft/election.go` +- `src/raft/install_snapshot.go` +- `src/raft/raft.go` +- `src/raft/util.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json new file mode 100644 index 0000000..025056c --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json @@ -0,0 +1,14 @@ +{ + "instance_id": "mit_6_5840_2024_kvraft_4a", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "kvraft", + "raft", + "fault-tolerance", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/evaluate.sh new file mode 100755 index 0000000..5a492c3 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/evaluate.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/kvraft/config.go" + "src/kvraft/test_test.go" +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running KVRaft 4A tests" +cd src/kvraft +go test -run 4A -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh new file mode 100755 index 0000000..424c3c6 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh @@ -0,0 +1,915 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing KVRaft Lab 4A ===" + +cd /workspace + +echo "KVRaft depends on Raft implementation from Lab 3" +echo "Copying reference Raft implementation..." + +echo ' Copying raft.go' +cat > src/raft/raft.go << 'RAFT_EOF' +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} + +RAFT_EOF + +echo ' Copying election.go' +cat > src/raft/election.go << 'RAFT_EOF' +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} + +RAFT_EOF + +echo ' Copying append_entries.go' +cat > src/raft/append_entries.go << 'RAFT_EOF' +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} + +RAFT_EOF + +echo ' Copying install_snapshot.go' +cat > src/raft/install_snapshot.go << 'RAFT_EOF' +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} + +RAFT_EOF + +echo ' Copying util.go' +cat > src/raft/util.go << 'RAFT_EOF' +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} + +RAFT_EOF + + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/kvraft/config.go" + "src/kvraft/test_test.go" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/task.md new file mode 100644 index 0000000..3d9732a --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/task.md @@ -0,0 +1,125 @@ +# Fault-tolerant Key/Value Service + +### Introduction + +In this lab you will build a fault-tolerant key/value storage service using your Raft library from Lab 3. Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in Lab 2, but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. + +Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: + +- `Put(key, value)`: replaces the value for a particular key in the database +- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) +- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) + +Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. + +Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. + +Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. + +This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. + +You should review the extended Raft paper, in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and Bolosky et al. + +Start early. + +### Getting Started + +We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. + +To get up and running, execute the following commands: + +```sh +$ cd src/kvraft +$ go test +... +$ +``` + +### Part A: Key/value service without snapshots ("moderate/hard") + +Each of your key/value servers ("kvservers") will have an associated Raft peer. Clerks send `Put()`, `Append()`, and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Append/Get operation to Raft, so that the Raft log holds a sequence of Put/Append/Get operations. All of the kvservers execute operations from the Raft log in order, applying the operations to their key/value databases; the intent is for the servers to maintain identical replicas of the key/value database. + +A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. + +Your kvservers should not directly communicate; they should only interact with each other through Raft. + +#### Task + +Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. + +Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. + +You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. + +You have completed this task when you **reliably** pass the first test in the test suite: "One client". + +#### Hints + +- After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. +- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. +- You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. +- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. + +Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` or `Clerk.Append()` should result in just a single execution, so you will have to ensure that the re-send doesn't result in the servers executing the request twice. + +#### Task + +Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. Your code should pass the `go test -run 4A` tests. + +#### Hints + +- Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. +- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. +- You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2. + +Your code should now pass the Lab 4A tests, like this: + +```sh +$ go test -run 4A +Test: one client (4A) ... +... Passed -- 15.5 5 4576 903 +Test: ops complete fast enough (4A) ... +... Passed -- 15.7 3 3022 0 +Test: many clients (4A) ... +... Passed -- 15.9 5 5884 1160 +Test: unreliable net, many clients (4A) ... +... Passed -- 19.2 5 3083 441 +Test: concurrent append to same key, unreliable (4A) ... +... Passed -- 2.5 3 218 52 +Test: progress in majority (4A) ... +... Passed -- 1.7 5 103 2 +Test: no progress in minority (4A) ... +... Passed -- 1.0 5 102 3 +Test: completion after heal (4A) ... +... Passed -- 1.2 5 70 3 +Test: partitions, one client (4A) ... +... Passed -- 23.8 5 4501 765 +Test: partitions, many clients (4A) ... +... Passed -- 23.5 5 5692 974 +Test: restarts, one client (4A) ... +... Passed -- 22.2 5 4721 908 +Test: restarts, many clients (4A) ... +... Passed -- 22.5 5 5490 1033 +Test: unreliable net, restarts, many clients (4A) ... +... Passed -- 26.5 5 3532 474 +Test: restarts, partitions, many clients (4A) ... +... Passed -- 29.7 5 6122 1060 +Test: unreliable net, restarts, partitions, many clients (4A) ... +... Passed -- 32.9 5 2967 317 +Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... +... Passed -- 35.0 7 8249 746 +PASS +ok kvraft 290.184s +``` + +The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls). + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/kvraft/client.go` +- `src/kvraft/common.go` +- `src/kvraft/server.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json new file mode 100644 index 0000000..08bc44d --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json @@ -0,0 +1,14 @@ +{ + "instance_id": "mit_6_5840_2024_kvraft_4b", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 30, + "tags": [ + "distributed-systems", + "kvraft", + "raft", + "fault-tolerance", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/evaluate.sh new file mode 100755 index 0000000..fca0810 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/evaluate.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/kvraft/config.go" + "src/kvraft/test_test.go" +) + +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ] && [ -f "/tmp/checksums/$(basename $file).sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/$(basename $file).sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running KVRaft 4B tests" +cd src/kvraft +go test -run 4B -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh new file mode 100755 index 0000000..21aba34 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh @@ -0,0 +1,915 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing KVRaft Lab 4B ===" + +cd /workspace + +echo "KVRaft depends on Raft implementation from Lab 3" +echo "Copying reference Raft implementation..." + +echo ' Copying raft.go' +cat > src/raft/raft.go << 'RAFT_EOF' +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} + +RAFT_EOF + +echo ' Copying election.go' +cat > src/raft/election.go << 'RAFT_EOF' +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} + +RAFT_EOF + +echo ' Copying append_entries.go' +cat > src/raft/append_entries.go << 'RAFT_EOF' +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} + +RAFT_EOF + +echo ' Copying install_snapshot.go' +cat > src/raft/install_snapshot.go << 'RAFT_EOF' +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} + +RAFT_EOF + +echo ' Copying util.go' +cat > src/raft/util.go << 'RAFT_EOF' +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} + +RAFT_EOF + + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/kvraft/config.go" + "src/kvraft/test_test.go" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/task.md new file mode 100644 index 0000000..50c5753 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/task.md @@ -0,0 +1,169 @@ +# Fault-tolerant Key/Value Service + +### Introduction + +In this lab you will build a fault-tolerant key/value storage service using your Raft library from Lab 3. Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in Lab 2, but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. + +Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: + +- `Put(key, value)`: replaces the value for a particular key in the database +- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) +- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) + +Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. + +Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. + +Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. + +This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. + +You should review the extended Raft paper, in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and Bolosky et al. + +Start early. + +### Getting Started + +We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. + +To get up and running, execute the following commands: + +```sh +$ cd src/kvraft +$ go test +... +$ +``` + +### Part A: Key/value service without snapshots ("moderate/hard") + +Each of your key/value servers ("kvservers") will have an associated Raft peer. Clerks send `Put()`, `Append()`, and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Append/Get operation to Raft, so that the Raft log holds a sequence of Put/Append/Get operations. All of the kvservers execute operations from the Raft log in order, applying the operations to their key/value databases; the intent is for the servers to maintain identical replicas of the key/value database. + +A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. + +Your kvservers should not directly communicate; they should only interact with each other through Raft. + +#### Task + +Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. + +Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. + +You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. + +You have completed this task when you **reliably** pass the first test in the test suite: "One client". + +#### Hints + +- After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. +- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. +- You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. +- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. + +Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` or `Clerk.Append()` should result in just a single execution, so you will have to ensure that the re-send doesn't result in the servers executing the request twice. + +#### Task + +Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. Your code should pass the `go test -run 4A` tests. + +#### Hints + +- Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. +- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. +- You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2. + +Your code should now pass the Lab 4A tests, like this: + +```sh +$ go test -run 4A +Test: one client (4A) ... +... Passed -- 15.5 5 4576 903 +Test: ops complete fast enough (4A) ... +... Passed -- 15.7 3 3022 0 +Test: many clients (4A) ... +... Passed -- 15.9 5 5884 1160 +Test: unreliable net, many clients (4A) ... +... Passed -- 19.2 5 3083 441 +Test: concurrent append to same key, unreliable (4A) ... +... Passed -- 2.5 3 218 52 +Test: progress in majority (4A) ... +... Passed -- 1.7 5 103 2 +Test: no progress in minority (4A) ... +... Passed -- 1.0 5 102 3 +Test: completion after heal (4A) ... +... Passed -- 1.2 5 70 3 +Test: partitions, one client (4A) ... +... Passed -- 23.8 5 4501 765 +Test: partitions, many clients (4A) ... +... Passed -- 23.5 5 5692 974 +Test: restarts, one client (4A) ... +... Passed -- 22.2 5 4721 908 +Test: restarts, many clients (4A) ... +... Passed -- 22.5 5 5490 1033 +Test: unreliable net, restarts, many clients (4A) ... +... Passed -- 26.5 5 3532 474 +Test: restarts, partitions, many clients (4A) ... +... Passed -- 29.7 5 6122 1060 +Test: unreliable net, restarts, partitions, many clients (4A) ... +... Passed -- 32.9 5 2967 317 +Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... +... Passed -- 35.0 7 8249 746 +PASS +ok kvraft 290.184s +``` + +The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls). + +### Part B: Key/value service with snapshots ("hard") + +As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver to cooperate with Raft to save log space, and reduce restart time, using Raft's `Snapshot()` from Lab 3D. + +The tester passes `maxraftstate` to your `StartKVServer()`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `persister.RaftStateSize()`. Whenever your key/value server detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. If `maxraftstate` is -1, you do not have to snapshot. `maxraftstate` applies to the GOB-encoded bytes your Raft passes as the first argument to to `persister.Save()`. + +#### Task + +Modify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot. + +#### Hints + +- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. +- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots. +- Capitalize all fields of structures stored in the snapshot. +- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. +- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time. + +Your code should pass the 4B tests (as in the example here) as well as the 4A tests (and your Raft must continue to pass the Lab 3 tests). + +```sh +$ go test -run 4B +Test: InstallSnapshot RPC (4B) ... +... Passed -- 4.0 3 289 63 +Test: snapshot size is reasonable (4B) ... +... Passed -- 2.6 3 2418 800 +Test: ops complete fast enough (4B) ... +... Passed -- 3.2 3 3025 0 +Test: restarts, snapshots, one client (4B) ... +... Passed -- 21.9 5 29266 5820 +Test: restarts, snapshots, many clients (4B) ... +... Passed -- 21.5 5 33115 6420 +Test: unreliable net, snapshots, many clients (4B) ... +... Passed -- 17.4 5 3233 482 +Test: unreliable net, restarts, snapshots, many clients (4B) ... +... Passed -- 22.7 5 3337 471 +Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... +... Passed -- 30.4 5 2725 274 +Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... +... Passed -- 37.7 7 8378 681 +PASS +ok kvraft 161.538s +``` + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/kvraft/client.go` +- `src/kvraft/common.go` +- `src/kvraft/server.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json new file mode 100644 index 0000000..2ec4ff3 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json @@ -0,0 +1,14 @@ +{ + "instance_id": "mit_6_5840_2024_shardkv_5a", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 40, + "tags": [ + "distributed-systems", + "sharding", + "raft", + "fault-tolerance", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/evaluate.sh new file mode 100755 index 0000000..76d6400 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/evaluate.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/shardctrler/config.go:src_shardctrler_config.go" + "src/shardctrler/test_test.go:src_shardctrler_test_test.go" + "src/shardkv/config.go:src_shardkv_config.go" + "src/shardkv/test_test.go:src_shardkv_test_test.go" +) + +for entry in "${PROTECTED_FILES[@]}"; do + file="${entry%%:*}" + checksum_name="${entry##*:}" + if [ -f "$file" ] && [ -f "/tmp/checksums/${checksum_name}.sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/${checksum_name}.sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running ShardCtrler tests" +cd src/shardctrler +go test -race +if [ $? -ne 0 ]; then + echo "FAIL: ShardCtrler tests failed" + exit 1 +fi + +echo "Running ShardKV 5A tests" +cd ../shardkv +go test -run 5A -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh new file mode 100755 index 0000000..0ccad9e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh @@ -0,0 +1,917 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing ShardKV Lab 5A ===" + +cd /workspace + +echo "ShardKV depends on Raft implementation from Lab 3" +echo "Copying reference Raft implementation..." + +echo ' Copying raft.go' +cat > src/raft/raft.go << 'RAFT_EOF' +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} + +RAFT_EOF + +echo ' Copying election.go' +cat > src/raft/election.go << 'RAFT_EOF' +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} + +RAFT_EOF + +echo ' Copying append_entries.go' +cat > src/raft/append_entries.go << 'RAFT_EOF' +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} + +RAFT_EOF + +echo ' Copying install_snapshot.go' +cat > src/raft/install_snapshot.go << 'RAFT_EOF' +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} + +RAFT_EOF + +echo ' Copying util.go' +cat > src/raft/util.go << 'RAFT_EOF' +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} + +RAFT_EOF + + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/shardctrler/config.go" + "src/shardctrler/test_test.go" + "src/shardkv/config.go" + "src/shardkv/test_test.go" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).$(dirname $file | tr '/' '_').sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/task.md new file mode 100644 index 0000000..47f46af --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/task.md @@ -0,0 +1,129 @@ +# Sharded Key/Value Service + +### Introduction + +In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. + +Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the "shard controller". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. + +A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. + +The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. + +Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. + +- Note: Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. +- Note: This lab uses "configuration" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. + +This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. + +- Note: Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation. + +### Getting Started + +We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. + +To get up and running, execute the following commands: + +```sh +$ cd src/shardctrler +$ go test +--- FAIL: TestBasic (0.00s) +test_test.go:11: wanted 1 groups, got 0 +FAIL +exit status 1 +FAIL shardctrler 0.008s +$ +``` + +When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`. + +### Part A: The Controller and Static Sharding ("easy") + +First you'll implement the shard controller, in `shardctrler/server.go` and `client.go`, and a sharded key/value server that can handle an unchanging (static) configuration. When you're done, your code should pass all the tests in the `shardctrler/` directory, and the `5A` tests in `shardkv/`. + +```sh +$ cd src/shardctrler +$ go test +Test: Basic leave/join ... +... Passed +Test: Historical queries ... +... Passed +Test: Move ... +... Passed +Test: Concurrent leave/join ... +... Passed +Test: Minimal transfers after joins ... +... Passed +Test: Minimal transfers after leaves ... +... Passed +Test: Multi-group join/leave ... +... Passed +Test: Concurrent multi leave/join ... +... Passed +Test: Minimal transfers after multijoins ... +... Passed +Test: Minimal transfers after multileaves ... +... Passed +Test: Check Same config on servers ... +... Passed +PASS +ok shardctrler 5.863s +$ +$ cd ../shardkv +$ go test -run 5A +Test (5A): static shards ... +... Passed +Test (5A): rejection ... +... Passed +PASS +ok shardkv 9.262s +$ +``` + +The shardctrler manages a sequence of numbered configurations. Each configuration describes a set of replica groups and an assignment of shards to replica groups. Whenever this assignment needs to change, the shard controller creates a new configuration with the new assignment. Key/value clients and servers contact the shardctrler when they want to know the current (or a past) configuration. + +Your implementation must support the RPC interface described in `shardctrler/common.go`, which consists of `Join`, `Leave`, `Move`, and `Query` RPCs. These RPCs are intended to allow an administrator (and the tests) to control the shardctrler: to add new replica groups, to eliminate replica groups, and to move shards between replica groups. + +The `Join` RPC is used by an administrator to add new replica groups. Its argument is a set of mappings from unique, non-zero replica group identifiers (GIDs) to lists of server names. The shardctrler should react by creating a new configuration that includes the new replica groups. The new configuration should divide the shards as evenly as possible among the full set of groups, and should move as few shards as possible to achieve that goal. The shardctrler should allow re-use of a GID if it's not part of the current configuration (i.e. a GID should be allowed to Join, then Leave, then Join again). + +The `Leave` RPC's argument is a list of GIDs of previously joined groups. The shardctrler should create a new configuration that does not include those groups, and that assigns those groups' shards to the remaining groups. The new configuration should divide the shards as evenly as possible among the groups, and should move as few shards as possible to achieve that goal. + +The `Move` RPC's arguments are a shard number and a GID. The shardctrler should create a new configuration in which the shard is assigned to the group. The purpose of `Move` is to allow us to test your software. A `Join` or `Leave` following a `Move` will likely un-do the `Move`, since `Join` and `Leave` re-balance. + +The `Query` RPC's argument is a configuration number. The shardctrler replies with the configuration that has that number. If the number is -1 or bigger than the biggest known configuration number, the shardctrler should reply with the latest configuration. The result of `Query(-1)` should reflect every `Join`, `Leave`, or `Move` RPC that the shardctrler finished handling before it received the `Query(-1)` RPC. + +The very first configuration should be numbered zero. It should contain no groups, and all shards should be assigned to GID zero (an invalid GID). The next configuration (created in response to a `Join` RPC) should be numbered 1, &c. There will usually be significantly more shards than groups (i.e., each group will serve more than one shard), in order that load can be shifted at a fairly fine granularity. + +#### Task + +You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`. + +#### Hints + +- Start with a stripped-down copy of your kvraft server. +- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. +- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is not deterministic. +- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. +- The Go race detector (go test -race) may help you find bugs. + +Next, in the `shardkv/` directory, implement enough of a sharded key/value server to pass the first two tests in `shardkv/`. Again, start by copying code from your existing `kvraft` server. You should be able to get the first test to pass without doing anything special regarding sharding, since the `shardkv/client.go` we give you takes care of sending RPCs to the group that the controller assigns to the key in question. + +For the second `shardkv` test, each k/v replica group must reject requests for keys for shards for which the group is not the assigned group. At this point, it's enough for the k/v servers to periodically ask the controller for the latest configuration, and to check that configuration each time a client Get/Put/Append RPC arrives. Use `key2shard()` (in `client.go`) to find the shard number for a key. + +Your server should respond with an `ErrWrongGroup` error to a client RPC with a key that the server isn't responsible for (i.e. for a key whose shard is not assigned to the server's group). + +Your server should not call the shard controller's `Join()` handler. The tester will call `Join()` when appropriate. + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/shardctrler/client.go` +- `src/shardctrler/common.go` +- `src/shardctrler/server.go` +- `src/shardkv/client.go` +- `src/shardkv/common.go` +- `src/shardkv/server.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json new file mode 100644 index 0000000..d290052 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json @@ -0,0 +1,14 @@ +{ + "instance_id": "mit_6_5840_2024_shardkv_5b", + "course_id": "mit_6_5840_2024", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 40, + "tags": [ + "distributed-systems", + "sharding", + "raft", + "fault-tolerance", + "go" + ], + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/evaluate.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/evaluate.sh new file mode 100755 index 0000000..8e5ee96 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/evaluate.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +cd /workspace + +export PATH=$PATH:/usr/local/go/bin + +echo "Verifying protected files were not modified" +PROTECTED_FILES=( + "src/shardctrler/config.go:src_shardctrler_config.go" + "src/shardctrler/test_test.go:src_shardctrler_test_test.go" + "src/shardkv/config.go:src_shardkv_config.go" + "src/shardkv/test_test.go:src_shardkv_test_test.go" +) + +for entry in "${PROTECTED_FILES[@]}"; do + file="${entry%%:*}" + checksum_name="${entry##*:}" + if [ -f "$file" ] && [ -f "/tmp/checksums/${checksum_name}.sha256" ]; then + echo " Checking $file" + sha256sum -c "/tmp/checksums/${checksum_name}.sha256" || { + echo "FAIL: $file was modified" + exit 1 + } + fi +done +echo "All protected files unchanged" + +echo "Running ShardCtrler tests" +cd src/shardctrler +go test -race +if [ $? -ne 0 ]; then + echo "FAIL: ShardCtrler tests failed" + exit 1 +fi + +echo "Running ShardKV all tests" +cd ../shardkv +go test -race + +if [ $? -eq 0 ]; then + echo "PASS: Tests passed" + exit 0 +else + echo "FAIL: Tests failed" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh new file mode 100755 index 0000000..e8b7741 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh @@ -0,0 +1,917 @@ +#!/bin/bash +set -e + +echo "=== Preprocessing ShardKV Lab 5B ===" + +cd /workspace + +echo "ShardKV depends on Raft implementation from Lab 3" +echo "Copying reference Raft implementation..." + +echo ' Copying raft.go' +cat > src/raft/raft.go << 'RAFT_EOF' +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} + +RAFT_EOF + +echo ' Copying election.go' +cat > src/raft/election.go << 'RAFT_EOF' +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} + +RAFT_EOF + +echo ' Copying append_entries.go' +cat > src/raft/append_entries.go << 'RAFT_EOF' +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} + +RAFT_EOF + +echo ' Copying install_snapshot.go' +cat > src/raft/install_snapshot.go << 'RAFT_EOF' +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} + +RAFT_EOF + +echo ' Copying util.go' +cat > src/raft/util.go << 'RAFT_EOF' +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} + +RAFT_EOF + + +echo "Creating checksums for protected files" +PROTECTED_FILES=( + "src/shardctrler/config.go" + "src/shardctrler/test_test.go" + "src/shardkv/config.go" + "src/shardkv/test_test.go" +) + +mkdir -p /tmp/checksums +for file in "${PROTECTED_FILES[@]}"; do + if [ -f "$file" ]; then + sha256sum "$file" > "/tmp/checksums/$(basename $file).$(dirname $file | tr '/' '_').sha256" + echo " $file" + fi +done + + + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/task.md b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/task.md new file mode 100644 index 0000000..7914f7b --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/task.md @@ -0,0 +1,242 @@ +# Sharded Key/Value Service + +### Introduction + +In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. + +Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the "shard controller". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. + +A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. + +The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. + +Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. + +- Note: Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. +- Note: This lab uses "configuration" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. + +This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. + +- Note: Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation. + +### Getting Started + +We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. + +To get up and running, execute the following commands: + +```sh +$ cd src/shardctrler +$ go test +--- FAIL: TestBasic (0.00s) +test_test.go:11: wanted 1 groups, got 0 +FAIL +exit status 1 +FAIL shardctrler 0.008s +$ +``` + +When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`. + +### Part A: The Controller and Static Sharding ("easy") + +First you'll implement the shard controller, in `shardctrler/server.go` and `client.go`, and a sharded key/value server that can handle an unchanging (static) configuration. When you're done, your code should pass all the tests in the `shardctrler/` directory, and the `5A` tests in `shardkv/`. + +```sh +$ cd src/shardctrler +$ go test +Test: Basic leave/join ... +... Passed +Test: Historical queries ... +... Passed +Test: Move ... +... Passed +Test: Concurrent leave/join ... +... Passed +Test: Minimal transfers after joins ... +... Passed +Test: Minimal transfers after leaves ... +... Passed +Test: Multi-group join/leave ... +... Passed +Test: Concurrent multi leave/join ... +... Passed +Test: Minimal transfers after multijoins ... +... Passed +Test: Minimal transfers after multileaves ... +... Passed +Test: Check Same config on servers ... +... Passed +PASS +ok shardctrler 5.863s +$ +$ cd ../shardkv +$ go test -run 5A +Test (5A): static shards ... +... Passed +Test (5A): rejection ... +... Passed +PASS +ok shardkv 9.262s +$ +``` + +The shardctrler manages a sequence of numbered configurations. Each configuration describes a set of replica groups and an assignment of shards to replica groups. Whenever this assignment needs to change, the shard controller creates a new configuration with the new assignment. Key/value clients and servers contact the shardctrler when they want to know the current (or a past) configuration. + +Your implementation must support the RPC interface described in `shardctrler/common.go`, which consists of `Join`, `Leave`, `Move`, and `Query` RPCs. These RPCs are intended to allow an administrator (and the tests) to control the shardctrler: to add new replica groups, to eliminate replica groups, and to move shards between replica groups. + +The `Join` RPC is used by an administrator to add new replica groups. Its argument is a set of mappings from unique, non-zero replica group identifiers (GIDs) to lists of server names. The shardctrler should react by creating a new configuration that includes the new replica groups. The new configuration should divide the shards as evenly as possible among the full set of groups, and should move as few shards as possible to achieve that goal. The shardctrler should allow re-use of a GID if it's not part of the current configuration (i.e. a GID should be allowed to Join, then Leave, then Join again). + +The `Leave` RPC's argument is a list of GIDs of previously joined groups. The shardctrler should create a new configuration that does not include those groups, and that assigns those groups' shards to the remaining groups. The new configuration should divide the shards as evenly as possible among the groups, and should move as few shards as possible to achieve that goal. + +The `Move` RPC's arguments are a shard number and a GID. The shardctrler should create a new configuration in which the shard is assigned to the group. The purpose of `Move` is to allow us to test your software. A `Join` or `Leave` following a `Move` will likely un-do the `Move`, since `Join` and `Leave` re-balance. + +The `Query` RPC's argument is a configuration number. The shardctrler replies with the configuration that has that number. If the number is -1 or bigger than the biggest known configuration number, the shardctrler should reply with the latest configuration. The result of `Query(-1)` should reflect every `Join`, `Leave`, or `Move` RPC that the shardctrler finished handling before it received the `Query(-1)` RPC. + +The very first configuration should be numbered zero. It should contain no groups, and all shards should be assigned to GID zero (an invalid GID). The next configuration (created in response to a `Join` RPC) should be numbered 1, &c. There will usually be significantly more shards than groups (i.e., each group will serve more than one shard), in order that load can be shifted at a fairly fine granularity. + +#### Task + +You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`. + +#### Hints + +- Start with a stripped-down copy of your kvraft server. +- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. +- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is not deterministic. +- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. +- The Go race detector (go test -race) may help you find bugs. + +Next, in the `shardkv/` directory, implement enough of a sharded key/value server to pass the first two tests in `shardkv/`. Again, start by copying code from your existing `kvraft` server. You should be able to get the first test to pass without doing anything special regarding sharding, since the `shardkv/client.go` we give you takes care of sending RPCs to the group that the controller assigns to the key in question. + +For the second `shardkv` test, each k/v replica group must reject requests for keys for shards for which the group is not the assigned group. At this point, it's enough for the k/v servers to periodically ask the controller for the latest configuration, and to check that configuration each time a client Get/Put/Append RPC arrives. Use `key2shard()` (in `client.go`) to find the shard number for a key. + +Your server should respond with an `ErrWrongGroup` error to a client RPC with a key that the server isn't responsible for (i.e. for a key whose shard is not assigned to the server's group). + +Your server should not call the shard controller's `Join()` handler. The tester will call `Join()` when appropriate. + +### Part B: Shard Movement ("hard") + +The main task in this part of the lab is to move shards among replica groups when the controller changes the sharding, and do it in a way that provides linearizable k/v client operations. + +Each of your shards is only required to make progress when a majority of servers in the shard's Raft replica group is alive and can talk to each other, and can talk to a majority of the `shardctrler` servers. Your implementation must operate (serve requests and be able to re-configure as needed) even if a minority of servers in some replica group(s) are dead, temporarily unavailable, or slow. + +A shardkv server is a member of only a single replica group. The set of servers in a given replica group will never change. + +We supply you with `client.go` code that sends each RPC to the replica group responsible for the RPC's key. It re-tries if the replica group says it is not responsible for the key; in that case, the client code asks the shard controller for the latest configuration and tries again. You'll have to modify client.go as part of your support for dealing with duplicate client RPCs, much as in the kvraft lab. + +When you're done your code should pass all the shardkv tests other than the challenge tests: + +```sh +$ cd src/shardkv +$ go test +Test (5A): static shards ... +... Passed +Test (5A): rejection ... +... Passed +Test (5B): join then leave ... +... Passed +Test (5B): snapshots, join, and leave ... +labgob warning: Decoding into a non-default variable/field Num may not work +... Passed +Test (5B): servers miss configuration changes... +... Passed +Test (5B): concurrent puts and configuration changes... +... Passed +Test (5B): more concurrent puts and configuration changes... +... Passed +Test (5B): concurrent configuration change and restart... +... Passed +Test (5B): unreliable 1... +... Passed +Test (5B): unreliable 2... +... Passed +Test (5B): unreliable 3... +... Passed +Test: shard deletion (challenge 1) ... +... Passed +Test: unaffected shard access (challenge 2) ... +... Passed +Test: partial migration shard access (challenge 2) ... +... Passed +PASS +ok shardkv 173.974s +$ +``` + +You will need to make your servers watch for configuration changes, and when one is detected, to start the shard migration process. If a replica group loses a shard, it must stop serving requests to keys in that shard immediately, and start migrating the data for that shard to the replica group that is taking over ownership. If a replica group gains a shard, it needs to wait for the previous owner to send over the old shard data before accepting requests for that shard. + +#### Task + +Implement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test ("join then leave") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`. + +- Note: Your server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems. + +- Note: Servers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this. + +#### Hints + +- Process re-configurations one at a time, in order. +- If a test fails, check for gob errors (e.g. "gob: type not registered for interface ..."). Go doesn't consider gob errors to be fatal, although they are fatal for the lab. +- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement. +- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request? +- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation. +- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1? +- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple. +- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. +- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log. +- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source. + +### No-credit challenge exercises + +These two features would be essential if you were to build a system like this for production use. + +#### Garbage collection of state + +When a replica group loses ownership of a shard, that replica group should eliminate the keys that it lost from its database. It is wasteful for it to keep values that it no longer owns, and no longer serves requests for. However, this poses some issues for migration. Say we have two groups, G1 and G2, and there is a new configuration C that moves shard S from G1 to G2. If G1 erases all keys in S from its database when it transitions to C, how does G2 get the data for S when it tries to move to C? + +##### Challenge + +Cause each replica group to keep old shards no longer than absolutely necessary. Your solution must work even if all the servers in a replica group like G1 above crash and are then brought back up. You have completed this challenge if you pass `TestChallenge1Delete`. + +#### Client requests during configuration changes + +The simplest way to handle configuration changes is to disallow all client operations until the transition has completed. While conceptually simple, this approach is not feasible in production-level systems; it results in long pauses for all clients whenever machines are brought in or taken out. It would be better to continue serving shards that are not affected by the ongoing configuration change. + +##### Challenge + +Modify your solution so that client operations for keys in unaffected shards continue to execute during a configuration change. You have completed this challenge when you pass `TestChallenge2Unaffected`. + +While the optimization above is good, we can still do better. Say that some replica group G3, when transitioning to C, needs shard S1 from G1, and shard S2 from G2. We really want G3 to immediately start serving a shard once it has received the necessary state, even if it is still waiting for some other shards. For example, if G1 is down, G3 should still start serving requests for S2 once it receives the appropriate data from G2, despite the transition to C not yet having completed. + +##### Challenge + +Modify your solution so that replica groups start serving shards the moment they are able to, even if a configuration is still ongoing. You have completed this challenge when you pass `TestChallenge2Partial`. + +### Handin procedure + +Before submitting, please run _all_ the tests one final time. + +Also, note that your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation. + +Before submitting, double check that your solution works with: + +```sh +$ go test ./raft +$ go test ./kvraft +$ go test ./shardctrler +$ go test ./shardkv +``` + +## Files Agent Should Modify + +The agent should implement the solution by modifying the following files: + +- `src/shardctrler/client.go` +- `src/shardctrler/common.go` +- `src/shardctrler/server.go` +- `src/shardkv/client.go` +- `src/shardkv/common.go` +- `src/shardkv/server.go` + +**Important**: Do not modify test files, configuration files, or other infrastructure files. diff --git a/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json b/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json new file mode 100644 index 0000000..428122b --- /dev/null +++ b/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json @@ -0,0 +1,7 @@ +{ + "instance_id": "test__simple__echo", + "course_id": "test_course", + "docker_image": "xuafeng/swe-go-python:latest", + "timeout_minutes": 5, + "tags": ["simple", "test"] +} diff --git a/benchmarks/courselab_bench/data/test_course/test__simple__echo/evaluate.sh b/benchmarks/courselab_bench/data/test_course/test__simple__echo/evaluate.sh new file mode 100755 index 0000000..f70ce4f --- /dev/null +++ b/benchmarks/courselab_bench/data/test_course/test__simple__echo/evaluate.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +echo "=== Evaluation ===" + +if [ ! -f result.txt ]; then + echo "FAIL: result.txt does not exist" + exit 1 +fi + +content=$(cat result.txt) +if [ "$content" = "SUCCESS" ]; then + echo "PASS: result.txt contains 'SUCCESS'" + exit 0 +else + echo "FAIL: result.txt does not contain 'SUCCESS' (found: '$content')" + exit 1 +fi diff --git a/benchmarks/courselab_bench/data/test_course/test__simple__echo/preprocess.sh b/benchmarks/courselab_bench/data/test_course/test__simple__echo/preprocess.sh new file mode 100755 index 0000000..9838ecb --- /dev/null +++ b/benchmarks/courselab_bench/data/test_course/test__simple__echo/preprocess.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e + +if [ -f result.txt ]; then + rm result.txt +fi + +echo "Preprocessing complete" +exit 0 diff --git a/benchmarks/courselab_bench/data/test_course/test__simple__echo/task.md b/benchmarks/courselab_bench/data/test_course/test__simple__echo/task.md new file mode 100644 index 0000000..40d367f --- /dev/null +++ b/benchmarks/courselab_bench/data/test_course/test__simple__echo/task.md @@ -0,0 +1,8 @@ +# Task: Create a Result File + +Create a file called `result.txt` containing the word SUCCESS. + +You can do this by running: +```bash +echo SUCCESS > result.txt +``` diff --git a/benchmarks/courselab_bench/prepare_dataset.py b/benchmarks/courselab_bench/prepare_dataset.py new file mode 100755 index 0000000..7721b6a --- /dev/null +++ b/benchmarks/courselab_bench/prepare_dataset.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +import json +import sys +from pathlib import Path +from loguru import logger + + +def load_task_from_folder(task_folder: Path) -> dict: + config_path = task_folder / "config.json" + task_md_path = task_folder / "task.md" + preprocess_path = task_folder / "preprocess.sh" + evaluate_path = task_folder / "evaluate.sh" + + required_files = [config_path, task_md_path, preprocess_path, evaluate_path] + for file_path in required_files: + if not file_path.exists(): + raise FileNotFoundError(f"{file_path.name} not found in {task_folder}") + + with config_path.open("r") as f: + config = json.load(f) + + with task_md_path.open("r") as f: + problem_statement = f.read() + + with preprocess_path.open("r") as f: + preprocess_script = f.read() + + with evaluate_path.open("r") as f: + evaluate_script = f.read() + + return { + "instance_id": config["instance_id"], + "course_id": config["course_id"], + "problem_statement": problem_statement, + "docker_image": config["docker_image"], + "timeout_minutes": config.get("timeout_minutes", 30), + "tags": config.get("tags", []), + "preprocess_script": preprocess_script, + "evaluate_script": evaluate_script, + "repo_url": config.get("repo_url"), + "base_commit": config.get("base_commit"), + } + + +def prepare_dataset(data_dir: Path, output_file: Path) -> None: + if not data_dir.exists(): + logger.error(f"Data directory not found: {data_dir}") + sys.exit(1) + + tasks = [] + for item in sorted(data_dir.iterdir()): + if not item.is_dir(): + continue + + if (item / "config.json").exists(): + try: + task = load_task_from_folder(item) + tasks.append(task) + logger.info(f"Loaded: {task['instance_id']}") + except Exception as e: + logger.warning(f"Skipped {item.name}: {e}") + else: + for task_dir in sorted(item.iterdir()): + if not task_dir.is_dir(): + continue + try: + task = load_task_from_folder(task_dir) + tasks.append(task) + logger.info(f"Loaded: {task['instance_id']}") + except Exception as e: + logger.warning(f"Skipped {task_dir.name}: {e}") + + if not tasks: + logger.error("No tasks found") + sys.exit(1) + + output_file.parent.mkdir(parents=True, exist_ok=True) + with output_file.open("w") as f: + for task in tasks: + f.write(json.dumps(task) + "\n") + + logger.info(f"Wrote {len(tasks)} tasks to {output_file}") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Prepare dataset from task folders") + parser.add_argument( + "--data-dir", type=str, default="data", help="Data directory (default: data)" + ) + parser.add_argument( + "--output", + type=str, + default="data/tasks.jsonl", + help="Output file (default: data/tasks.jsonl)", + ) + + args = parser.parse_args() + logger.remove() + logger.add(sys.stderr, level="INFO", format="{message}", colorize=True) + + prepare_dataset(Path(args.data_dir), Path(args.output)) diff --git a/benchmarks/courselab_bench/pyproject.toml b/benchmarks/courselab_bench/pyproject.toml new file mode 100644 index 0000000..5a94d22 --- /dev/null +++ b/benchmarks/courselab_bench/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "courselab-bench" +version = "0.1.0" +description = "A benchmark for evaluating AI agents on systems programming labs" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "MIT" } +authors = [{ name = "System Intelligence Benchmark Team" }] + +dependencies = [ + "pydantic>=2.0", + "pyyaml>=6.0", + "click>=8.0", + "loguru>=0.7", + "litellm>=1.0", + "tomli>=2.0; python_version < '3.11'", +] + +[project.optional-dependencies] +dev = ["pytest>=7.0", "pytest-cov>=4.0", "black>=23.0", "ruff>=0.1"] + +[project.scripts] +courselab-bench = "courselab_bench.cli:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["courselab_bench*"] + +[tool.black] +line-length = 100 +target-version = ["py310"] + +[tool.ruff] +line-length = 100 +target-version = "py310" + +[tool.pytest.ini_options] +testpaths = ["tests"] +markers = ["requires_api: tests that need API keys", "slow: slow tests"] diff --git a/benchmarks/courselab_bench/run_benchmark.py b/benchmarks/courselab_bench/run_benchmark.py new file mode 100644 index 0000000..40c0bdb --- /dev/null +++ b/benchmarks/courselab_bench/run_benchmark.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +import sys +import json +import argparse +from pathlib import Path +from datetime import datetime +from loguru import logger + +sys.path.insert(0, str(Path(__file__).parent)) +from courselab_bench import ( + load_tasks, + DockerEnvironment, + LiteLLMModel, + REACTAgent, + execute_task, + save_trajectory, +) +from courselab_bench.evaluation.evaluator import evaluate_task, compute_summary +from courselab_bench.utils.env_loader import load_env_config + + +def main(): + parser = argparse.ArgumentParser(description="Run benchmark") + parser.add_argument( + "--tasks", + type=str, + default="data/tasks.jsonl", + help="Path to tasks JSONL file (default: data/tasks.jsonl)", + ) + parser.add_argument("--model", type=str, default="anthropic/claude-sonnet-4-5-20250929") + parser.add_argument("--max-steps", type=int, default=50) + parser.add_argument("--max-cost", type=float, default=5.0) + parser.add_argument("--output-dir", type=str, default="outputs") + + args = parser.parse_args() + logger.remove() + logger.add(sys.stderr, level="INFO", format="{message}", colorize=True) + + load_env_config() + tasks_file = Path(args.tasks) + if not tasks_file.exists(): + logger.error(f"Error: Tasks file not found: {tasks_file}") + logger.error("Run 'python prepare_dataset.py' first to generate tasks.jsonl") + sys.exit(1) + + tasks = load_tasks(tasks_file) + if not tasks: + logger.error("No tasks found") + sys.exit(1) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_dir = Path(args.output_dir) / f"run_{timestamp}" + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "trajectories").mkdir(exist_ok=True) + + logger.info(f"Loaded {len(tasks)} task(s)") + logger.info(f"Output: {output_dir}") + + results = [] + for idx, task in enumerate(tasks, 1): + logger.info(f"\n[{idx}/{len(tasks)}] {task['instance_id']}") + + env = DockerEnvironment( + image=task["docker_image"], + timeout=task.get("timeout_minutes", 30) * 60, + work_dir="/workspace", + ) + model = LiteLLMModel(model_name=args.model, temperature=0.0, max_tokens=4096) + agent = REACTAgent( + model=model, env=env, config={"max_steps": args.max_steps, "max_cost": args.max_cost} + ) + + try: + result = execute_task(task, agent, env) + result["course_id"] = task["course_id"] + result["passed"] = evaluate_task(result) + + trajectory = result.pop("trajectory", []) + traj_file = output_dir / "trajectories" / f"{task['instance_id']}.jsonl" + save_trajectory(trajectory, traj_file) + + results.append(result) + + status = "✓" if result["passed"] else "✗" + logger.info(f"{status} {result['agent_status']} | ${result['model_cost']:.4f}") + + except Exception as e: + logger.error(f"Error: {e}") + results.append( + { + "instance_id": task["instance_id"], + "course_id": task["course_id"], + "passed": False, + "agent_status": "error", + "error": str(e), + } + ) + finally: + env.cleanup() + + summary = compute_summary(results) + output = { + "config": { + "model": args.model, + "max_steps": args.max_steps, + "max_cost": args.max_cost, + "timestamp": datetime.now().isoformat(), + }, + "summary": summary, + "results": results, + } + + with (output_dir / "results.json").open("w") as f: + json.dump(output, f, indent=2) + + logger.info("\n" + "=" * 70) + logger.info( + f"Pass rate: {summary['passed']}/{summary['total']} ({summary['success_rate']:.1%})" + ) + logger.info(f"Total cost: ${summary['total_cost']:.4f}") + logger.info(f"Results: {output_dir}") + logger.info("=" * 70) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/courselab_bench/tests/__init__.py b/benchmarks/courselab_bench/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/courselab_bench/tests/test_data_schema.py b/benchmarks/courselab_bench/tests/test_data_schema.py new file mode 100644 index 0000000..7b4c3cb --- /dev/null +++ b/benchmarks/courselab_bench/tests/test_data_schema.py @@ -0,0 +1,107 @@ +import json +from pathlib import Path +import pytest + +DATA_DIR = Path(__file__).parent.parent / "data" + + +def get_task_folders(data_dir: Path) -> list[Path]: + task_folders = [] + for item in data_dir.iterdir(): + if not item.is_dir(): + continue + if (item / "config.json").exists(): + task_folders.append(item) + else: + for task_dir in item.iterdir(): + if task_dir.is_dir() and (task_dir / "config.json").exists(): + task_folders.append(task_dir) + return task_folders + + +class TestTaskStructure: + def test_data_dir_exists(self): + assert DATA_DIR.exists(), f"Data directory not found: {DATA_DIR}" + + def test_tasks_found(self): + task_folders = get_task_folders(DATA_DIR) + assert len(task_folders) > 0, "No tasks found in data directory" + + def test_required_files_exist(self): + task_folders = get_task_folders(DATA_DIR) + required_files = ["config.json", "task.md", "preprocess.sh", "evaluate.sh"] + + for task_folder in task_folders: + for filename in required_files: + file_path = task_folder / filename + assert file_path.exists(), f"{task_folder.name} missing {filename}" + + def test_config_valid_json(self): + task_folders = get_task_folders(DATA_DIR) + + for task_folder in task_folders: + config_path = task_folder / "config.json" + with config_path.open("r") as f: + config = json.load(f) + assert isinstance(config, dict), f"{task_folder.name}: config.json must be object" + + def test_config_required_fields(self): + task_folders = get_task_folders(DATA_DIR) + required_fields = ["instance_id", "course_id", "docker_image"] + + for task_folder in task_folders: + config_path = task_folder / "config.json" + with config_path.open("r") as f: + config = json.load(f) + + for field in required_fields: + assert field in config, f"{task_folder.name}: missing {field}" + assert isinstance(config[field], str), f"{task_folder.name}: {field} must be string" + + def test_config_optional_fields(self): + task_folders = get_task_folders(DATA_DIR) + + for task_folder in task_folders: + config_path = task_folder / "config.json" + with config_path.open("r") as f: + config = json.load(f) + + if "timeout_minutes" in config: + assert isinstance(config["timeout_minutes"], (int, float)) + assert config["timeout_minutes"] > 0 + + if "tags" in config: + assert isinstance(config["tags"], list) + for tag in config["tags"]: + assert isinstance(tag, str) + + if "repo_url" in config: + assert isinstance(config["repo_url"], (str, type(None))) + + if "base_commit" in config: + assert isinstance(config["base_commit"], (str, type(None))) + + def test_scripts_executable(self): + task_folders = get_task_folders(DATA_DIR) + script_files = ["preprocess.sh", "evaluate.sh"] + + for task_folder in task_folders: + for script in script_files: + script_path = task_folder / script + assert script_path.exists() + + def test_instance_ids_unique(self): + task_folders = get_task_folders(DATA_DIR) + instance_ids = [] + + for task_folder in task_folders: + config_path = task_folder / "config.json" + with config_path.open("r") as f: + config = json.load(f) + instance_ids.append(config["instance_id"]) + + assert len(instance_ids) == len(set(instance_ids)), "Duplicate instance_ids found" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 309a3ae387037ccf9c6cdb9622f30be3798a85fc Mon Sep 17 00:00:00 2001 From: Tarek Date: Fri, 12 Dec 2025 13:57:40 -0800 Subject: [PATCH 02/10] docs(course_lab_bench): update task instructions to include course metadata Signed-off-by: Tarek --- benchmarks/courselab_bench/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmarks/courselab_bench/README.md b/benchmarks/courselab_bench/README.md index ec0c305..7c4be8a 100644 --- a/benchmarks/courselab_bench/README.md +++ b/benchmarks/courselab_bench/README.md @@ -112,8 +112,9 @@ See `data/test_course/test__simple__echo/` for a minimal example. ## Adding New Tasks -1. Create a new folder: `data/{course_id}/{task_id}/` -2. Add the 4 required files: `config.json`, `task.md`, `preprocess.sh`, `evaluate.sh` -3. Make scripts executable: `chmod +x data/{course_id}/{task_id}/*.sh` -4. Run `python prepare_dataset.py` to regenerate `tasks.jsonl` -5. Run the benchmark +1. If you are adding tasks for a new course, first add a new entry to [`/data/courses.json`](./data/courses.json) with the course metadata +2. Create a new folder: `data/{course_id}/{task_id}/` (where `{course_id}` matches the entry in `courses.json`) +3. Add the 4 required files: `config.json`, `task.md`, `preprocess.sh`, `evaluate.sh` for each task +4. Make scripts executable +5. Run `python prepare_dataset.py` to regenerate `tasks.jsonl` +6. Run the benchmark From f2a19aea0e6cc824c72cec68e26178c1b4aef6cd Mon Sep 17 00:00:00 2001 From: Tarek Date: Fri, 12 Dec 2025 14:00:13 -0800 Subject: [PATCH 03/10] docs(pyproject.toml): update author information Signed-off-by: Tarek --- benchmarks/courselab_bench/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/courselab_bench/pyproject.toml b/benchmarks/courselab_bench/pyproject.toml index 5a94d22..7b3aa9d 100644 --- a/benchmarks/courselab_bench/pyproject.toml +++ b/benchmarks/courselab_bench/pyproject.toml @@ -9,7 +9,7 @@ description = "A benchmark for evaluating AI agents on systems programming labs" readme = "README.md" requires-python = ">=3.10" license = { text = "MIT" } -authors = [{ name = "System Intelligence Benchmark Team" }] +authors = [{ name = "Tarek Elsayed", email = "tareknaser360@gmail.com" }] dependencies = [ "pydantic>=2.0", From 40cfa2ab1fae6ac2efb4150ffab38b0df2ac6ffb Mon Sep 17 00:00:00 2001 From: Tarek Date: Mon, 15 Dec 2025 10:57:44 -0800 Subject: [PATCH 04/10] fix(docker): go PATH for login shells in Docker environment Add PATH exports to profile files so Go is available when using `bash -lc` Signed-off-by: Tarek --- benchmarks/course_lab_bench/go-python.Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmarks/course_lab_bench/go-python.Dockerfile b/benchmarks/course_lab_bench/go-python.Dockerfile index af38642..f4211e5 100644 --- a/benchmarks/course_lab_bench/go-python.Dockerfile +++ b/benchmarks/course_lab_bench/go-python.Dockerfile @@ -25,11 +25,13 @@ RUN apt-get update && apt-get install -y wget tar git build-essential \ ENV PATH="/usr/local/go/bin:${PATH}" -RUN python --version && go version - SHELL ["/bin/bash", "-c"] # This is where pipx installs things -ENV PATH="$PATH:/root/.local/bin/" +ENV PATH="$PATH:/root/.local/bin/" + +# Write PATH to profile files so it's available in login shells (bash -lc) +RUN echo 'export PATH="/usr/local/go/bin:/root/.local/bin:$PATH"' >> /etc/profile && \ + echo 'export PATH="/usr/local/go/bin:/root/.local/bin:$PATH"' >> /root/.bashrc RUN python --version && go version From bcace88ca6a15578704af525f655f91436c14507 Mon Sep 17 00:00:00 2001 From: Tarek Date: Mon, 15 Dec 2025 11:04:14 -0800 Subject: [PATCH 05/10] feat(executor): retry mechanism for evaluation script to handle flaky tests Signed-off-by: Tarek --- benchmarks/courselab_bench/README.md | 2 ++ .../courselab_bench/runner/executor.py | 31 +++++++++++++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/benchmarks/courselab_bench/README.md b/benchmarks/courselab_bench/README.md index 7c4be8a..bee49ff 100644 --- a/benchmarks/courselab_bench/README.md +++ b/benchmarks/courselab_bench/README.md @@ -106,6 +106,8 @@ Exit with code 0 on success, non-zero on failure. Runs after the agent completes. Exit 0 for PASS, non-zero for FAIL. Print verbose output for debugging (captured in results). +> The evaluation script is automatically retried up to 3 times or until a successful evaluation. This helps handle flaky tests or non-deterministic timeouts common in some systems programming labs. + ### Example Task See `data/test_course/test__simple__echo/` for a minimal example. diff --git a/benchmarks/courselab_bench/courselab_bench/runner/executor.py b/benchmarks/courselab_bench/courselab_bench/runner/executor.py index e7d3524..d6e99fb 100644 --- a/benchmarks/courselab_bench/courselab_bench/runner/executor.py +++ b/benchmarks/courselab_bench/courselab_bench/runner/executor.py @@ -39,12 +39,31 @@ def execute_task(task: dict[str, Any], agent: Any, env: Any) -> dict[str, Any]: agent_result = {"messages": [], "cost": 0.0, "status": "agent_error", "steps": 0} logger.info(f"\nRunning evaluation...") - try: - test_timeout = task.get("timeout_minutes", 30) * 60 - test_result = _run_evaluate_script(env, task["evaluate_script"], test_timeout) - except Exception as e: - logger.error(f"Evaluation error: {e}") - test_result = {"output": f"[ERROR: {e}]", "returncode": -1} + + # Retry evaluation up to 3 times to handle flaky tests + max_retries = 3 + test_result = None + for attempt in range(1, max_retries + 1): + try: + if attempt > 1: + logger.info(f"Retry attempt {attempt}/{max_retries}...") + test_timeout = task.get("timeout_minutes", 30) * 60 + test_result = _run_evaluate_script(env, task["evaluate_script"], test_timeout) + + if test_result.get("returncode") == 0: + if attempt > 1: + logger.info(f"Evaluation passed on attempt {attempt}") + break + + if attempt < max_retries: + logger.warning(f"Evaluation failed on attempt {attempt}, retrying...") + except Exception as e: + logger.error(f"Evaluation error on attempt {attempt}: {e}") + test_result = {"output": f"[ERROR: {e}]", "returncode": -1} + if attempt < max_retries: + logger.warning(f"Retrying after error...") + if test_result is None: + test_result = {"output": "[ERROR: No test result]", "returncode": -1} duration = time.time() - start_time From dccef81e81bcd5c8551303badb529d8ff4b4ccd6 Mon Sep 17 00:00:00 2001 From: Tarek Date: Mon, 15 Dec 2025 12:20:33 -0800 Subject: [PATCH 06/10] docs(courselab_bench): add a note on previous labs reference implementation Signed-off-by: Tarek --- benchmarks/courselab_bench/README.md | 1 + .../mit_6_5840_2024/4a_kvraft/preprocess.sh | 721 +++++++++- .../mit_6_5840_2024/4b_kvraft/preprocess.sh | 721 +++++++++- .../mit_6_5840_2024/5a_shardkv/preprocess.sh | 1169 ++++++++++++++++- .../mit_6_5840_2024/5b_shardkv/preprocess.sh | 1169 ++++++++++++++++- 5 files changed, 3689 insertions(+), 92 deletions(-) diff --git a/benchmarks/courselab_bench/README.md b/benchmarks/courselab_bench/README.md index bee49ff..f2bcf26 100644 --- a/benchmarks/courselab_bench/README.md +++ b/benchmarks/courselab_bench/README.md @@ -96,6 +96,7 @@ Markdown file containing the problem statement given to the agent. Shell script that runs before the agent starts. Use this to: - Set up the environment +- If the lab depends on previous labs, copy reference implementations to prevent distractions - Create checksums of files that shouldn't be modified - Initialize test data diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh index 424c3c6..3523e6a 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh @@ -1,15 +1,16 @@ #!/bin/bash set -e -echo "=== Preprocessing KVRaft Lab 4A ===" +echo '=== Preprocessing 4A Kvraft ===' cd /workspace -echo "KVRaft depends on Raft implementation from Lab 3" -echo "Copying reference Raft implementation..." +echo 'Copying reference implementations from previous labs...' -echo ' Copying raft.go' -cat > src/raft/raft.go << 'RAFT_EOF' +echo 'Copying raft implementation...' +mkdir -p src/raft + +cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' package raft // @@ -427,10 +428,9 @@ func Make(peers []*labrpc.ClientEnd, me int, return rf } -RAFT_EOF +FILE_EOF_raft_raft_go -echo ' Copying election.go' -cat > src/raft/election.go << 'RAFT_EOF' +cat > src/raft/election.go << 'FILE_EOF_raft_election_go' package raft import ( @@ -555,10 +555,9 @@ func (rf *Raft) isElectionTimeout() bool { return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) } -RAFT_EOF +FILE_EOF_raft_election_go -echo ' Copying append_entries.go' -cat > src/raft/append_entries.go << 'RAFT_EOF' +cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' package raft // Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 @@ -774,10 +773,9 @@ func (rf *Raft) broadcaster(peer int) { } } -RAFT_EOF +FILE_EOF_raft_append_entries_go -echo ' Copying install_snapshot.go' -cat > src/raft/install_snapshot.go << 'RAFT_EOF' +cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' package raft type InstallSnapshotArgs struct { @@ -871,10 +869,9 @@ func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { rf.persister.Save(rf.encodeState(), args.Data) } -RAFT_EOF +FILE_EOF_raft_install_snapshot_go -echo ' Copying util.go' -cat > src/raft/util.go << 'RAFT_EOF' +cat > src/raft/util.go << 'FILE_EOF_raft_util_go' package raft import ( @@ -892,10 +889,684 @@ func DPrintf(format string, a ...interface{}) { log.Printf(format, a...) } -RAFT_EOF +FILE_EOF_raft_util_go + +echo 'Copying kvsrv implementation...' +mkdir -p src/kvsrv + +cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} + +FILE_EOF_kvsrv_client_go + +cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} + +FILE_EOF_kvsrv_common_go + +cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} + +FILE_EOF_kvsrv_server_go + +echo 'Copying mr implementation...' +mkdir -p src/mr +cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} + +FILE_EOF_mr_coordinator_go + +cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) -echo "Creating checksums for protected files" +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} + +FILE_EOF_mr_rpc_go + +cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} + +FILE_EOF_mr_worker_go + + +echo 'Creating checksums for protected files...' PROTECTED_FILES=( "src/kvraft/config.go" "src/kvraft/test_test.go" @@ -904,12 +1575,16 @@ PROTECTED_FILES=( mkdir -p /tmp/checksums for file in "${PROTECTED_FILES[@]}"; do if [ -f "$file" ]; then - sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + sha256sum "$file" > "/tmp/checksums/$(basename $file).$(dirname $file | tr '/' '_').sha256" echo " $file" fi done +echo '' +echo 'Preprocessing complete' +echo 'Agent should focus on implementing:' +echo ' - src/kvraft/client.go' +echo ' - src/kvraft/common.go' +echo ' - src/kvraft/server.go' - -echo "Preprocessing complete" -exit 0 +exit 0 \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh index 21aba34..8fd1995 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh @@ -1,15 +1,16 @@ #!/bin/bash set -e -echo "=== Preprocessing KVRaft Lab 4B ===" +echo '=== Preprocessing 4B Kvraft ===' cd /workspace -echo "KVRaft depends on Raft implementation from Lab 3" -echo "Copying reference Raft implementation..." +echo 'Copying reference implementations from previous labs...' -echo ' Copying raft.go' -cat > src/raft/raft.go << 'RAFT_EOF' +echo 'Copying raft implementation...' +mkdir -p src/raft + +cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' package raft // @@ -427,10 +428,9 @@ func Make(peers []*labrpc.ClientEnd, me int, return rf } -RAFT_EOF +FILE_EOF_raft_raft_go -echo ' Copying election.go' -cat > src/raft/election.go << 'RAFT_EOF' +cat > src/raft/election.go << 'FILE_EOF_raft_election_go' package raft import ( @@ -555,10 +555,9 @@ func (rf *Raft) isElectionTimeout() bool { return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) } -RAFT_EOF +FILE_EOF_raft_election_go -echo ' Copying append_entries.go' -cat > src/raft/append_entries.go << 'RAFT_EOF' +cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' package raft // Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 @@ -774,10 +773,9 @@ func (rf *Raft) broadcaster(peer int) { } } -RAFT_EOF +FILE_EOF_raft_append_entries_go -echo ' Copying install_snapshot.go' -cat > src/raft/install_snapshot.go << 'RAFT_EOF' +cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' package raft type InstallSnapshotArgs struct { @@ -871,10 +869,9 @@ func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { rf.persister.Save(rf.encodeState(), args.Data) } -RAFT_EOF +FILE_EOF_raft_install_snapshot_go -echo ' Copying util.go' -cat > src/raft/util.go << 'RAFT_EOF' +cat > src/raft/util.go << 'FILE_EOF_raft_util_go' package raft import ( @@ -892,10 +889,684 @@ func DPrintf(format string, a ...interface{}) { log.Printf(format, a...) } -RAFT_EOF +FILE_EOF_raft_util_go + +echo 'Copying kvsrv implementation...' +mkdir -p src/kvsrv + +cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} + +FILE_EOF_kvsrv_client_go + +cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} + +FILE_EOF_kvsrv_common_go + +cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} + +FILE_EOF_kvsrv_server_go + +echo 'Copying mr implementation...' +mkdir -p src/mr +cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} + +FILE_EOF_mr_coordinator_go + +cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) -echo "Creating checksums for protected files" +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} + +FILE_EOF_mr_rpc_go + +cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} + +FILE_EOF_mr_worker_go + + +echo 'Creating checksums for protected files...' PROTECTED_FILES=( "src/kvraft/config.go" "src/kvraft/test_test.go" @@ -904,12 +1575,16 @@ PROTECTED_FILES=( mkdir -p /tmp/checksums for file in "${PROTECTED_FILES[@]}"; do if [ -f "$file" ]; then - sha256sum "$file" > "/tmp/checksums/$(basename $file).sha256" + sha256sum "$file" > "/tmp/checksums/$(basename $file).$(dirname $file | tr '/' '_').sha256" echo " $file" fi done +echo '' +echo 'Preprocessing complete' +echo 'Agent should focus on implementing:' +echo ' - src/kvraft/client.go' +echo ' - src/kvraft/common.go' +echo ' - src/kvraft/server.go' - -echo "Preprocessing complete" -exit 0 +exit 0 \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh index 0ccad9e..648faac 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh @@ -1,15 +1,16 @@ #!/bin/bash set -e -echo "=== Preprocessing ShardKV Lab 5A ===" +echo '=== Preprocessing 5A Shardkv ===' cd /workspace -echo "ShardKV depends on Raft implementation from Lab 3" -echo "Copying reference Raft implementation..." +echo 'Copying reference implementations from previous labs...' -echo ' Copying raft.go' -cat > src/raft/raft.go << 'RAFT_EOF' +echo 'Copying raft implementation...' +mkdir -p src/raft + +cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' package raft // @@ -427,10 +428,9 @@ func Make(peers []*labrpc.ClientEnd, me int, return rf } -RAFT_EOF +FILE_EOF_raft_raft_go -echo ' Copying election.go' -cat > src/raft/election.go << 'RAFT_EOF' +cat > src/raft/election.go << 'FILE_EOF_raft_election_go' package raft import ( @@ -555,10 +555,9 @@ func (rf *Raft) isElectionTimeout() bool { return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) } -RAFT_EOF +FILE_EOF_raft_election_go -echo ' Copying append_entries.go' -cat > src/raft/append_entries.go << 'RAFT_EOF' +cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' package raft // Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 @@ -774,10 +773,9 @@ func (rf *Raft) broadcaster(peer int) { } } -RAFT_EOF +FILE_EOF_raft_append_entries_go -echo ' Copying install_snapshot.go' -cat > src/raft/install_snapshot.go << 'RAFT_EOF' +cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' package raft type InstallSnapshotArgs struct { @@ -871,10 +869,9 @@ func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { rf.persister.Save(rf.encodeState(), args.Data) } -RAFT_EOF +FILE_EOF_raft_install_snapshot_go -echo ' Copying util.go' -cat > src/raft/util.go << 'RAFT_EOF' +cat > src/raft/util.go << 'FILE_EOF_raft_util_go' package raft import ( @@ -892,10 +889,1129 @@ func DPrintf(format string, a ...interface{}) { log.Printf(format, a...) } -RAFT_EOF +FILE_EOF_raft_util_go + +echo 'Copying kvraft implementation...' +mkdir -p src/kvraft + +cat > src/kvraft/client.go << 'FILE_EOF_kvraft_client_go' +package kvraft + +import ( + "crypto/rand" + "math/big" + "sync/atomic" + "time" + + "6.5840/labrpc" +) + +type Clerk struct { + servers []*labrpc.ClientEnd + cid int64 + seq int + leader int32 // cache the leader +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.servers, ck.cid, ck.seq = servers, nrand(), 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seq++ + + args := new(GetArgs) + args.Key, args.Cid, args.Seq = key, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(GetReply) + ok := ck.servers[peer].Call("KVServer.Get", args, reply) + if ok && (reply.Err == OK || reply.Err == ErrNoKey) { + atomic.StoreInt32(&ck.leader, int32(peer)) + return reply.Value + } + } + time.Sleep(100 * time.Millisecond) + } +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) { + ck.seq++ + + args := new(PutAppendArgs) + args.OpStr, args.Key, args.Value, args.Cid, args.Seq = op, key, value, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(PutAppendReply) + ok := ck.servers[peer].Call("KVServer.PutAppend", args, reply) + if ok && reply.Err == OK { + atomic.StoreInt32(&ck.leader, int32(peer)) + return + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} +func (ck *Clerk) Append(key string, value string) { + ck.PutAppend(key, value, "Append") +} + +FILE_EOF_kvraft_client_go + +cat > src/kvraft/common.go << 'FILE_EOF_kvraft_common_go' +package kvraft + +const ( + OK = "OK" + ErrNoKey = "ErrNoKey" + ErrWrongLeader = "ErrWrongLeader" +) + +type ClientInfo struct { + Cid int64 + Seq int +} + +type Err string + +type RaftReply struct { + Value string + Err Err +} + +type GetArgs struct { + Key string + ClientInfo +} + +type GetReply = RaftReply + +// Put or Append +type PutAppendArgs struct { + OpStr string // "Put" or "Append" + Key string + Value string + ClientInfo +} + +type PutAppendReply = RaftReply + +type Cache struct { + Seq int + RaftReply +} + +FILE_EOF_kvraft_common_go + +cat > src/kvraft/server.go << 'FILE_EOF_kvraft_server_go' +package kvraft + +import ( + "bytes" + "log" + "os" + "sync" + "sync/atomic" + "time" + + "6.5840/labgob" + "6.5840/labrpc" + "6.5840/raft" +) + +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Opcode int + +const ( + GET Opcode = iota + PUT + APPEND +) + +type Op struct { + Cmd interface{} + ClientInfo +} + +type Done struct { + index int + term int + value string + err Err +} + +type KVServer struct { + mu sync.Mutex + me int + rf *raft.Raft + ps *raft.Persister + applyCh chan raft.ApplyMsg + dead int32 // set by Kill() + + maxraftstate int // snapshot if log grows this big + + data map[string]string + cache map[int64]*Cache // client id -> seq + chanmap map[int64]chan Done +} + +func getChanId(term, index int) (id int64) { + id = int64(term) << 32 + id += int64(index) + return +} + +func (kv *KVServer) makeChan(term, index int) chan Done { + id := getChanId(term, index) + ch := make(chan Done, 1) + kv.chanmap[id] = ch + return ch +} + +func (kv *KVServer) closeAndDeleteChan(term, index int) { + kv.mu.Lock() + defer kv.mu.Unlock() + id := getChanId(term, index) + close(kv.chanmap[id]) + delete(kv.chanmap, id) +} + +func (kv *KVServer) isCacheHit(Cid int64, Seq int) (bool, *Cache) { + // Why cache.Seq >= Seq works? + // 1. If the seq of cache equals to Seq, it means the operation has been + // executed. Return the value directly. + // 2. If the seq of cache is Greater than Seq, it means some operations + // after this Op have been executed, which implies client has already + // received the result of this Op (the operation must be completed before + // next operation happened). Theorically, return anything is OK. + if cache, ok := kv.cache[Cid]; ok && cache.Seq >= Seq { + return true, cache + } else if ok { + return false, cache + } else { + kv.cache[Cid] = new(Cache) + return false, kv.cache[Cid] + } +} +func (kv *KVServer) encode() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(kv.cache) + e.Encode(kv.data) + return w.Bytes() +} + +func (kv *KVServer) decode(buf []byte) { + if buf == nil || len(buf) < 1 { + return + } + r := bytes.NewBuffer(buf) + d := labgob.NewDecoder(r) + var cache map[int64]*Cache + var data map[string]string + if d.Decode(&cache) != nil || d.Decode(&data) != nil { + log.Fatal("Decode error") + return + } + kv.cache = cache + kv.data = data +} + +func (kv *KVServer) startRaft(cmd interface{}, cid int64, seq int, ch chan *Cache) { + kv.mu.Lock() + defer kv.mu.Unlock() + rr := new(Cache) + if hit, cache := kv.isCacheHit(cid, seq); hit { + rr.Seq, rr.Value, rr.Err = cache.Seq, cache.Value, cache.Err + ch <- rr + } else { + op := new(Op) + op.Cmd, op.Cid, op.Seq = cmd, cid, seq + index, term, isLeader := kv.rf.Start(op) + if !isLeader { + cache.Value, cache.Err = "", ErrWrongLeader + rr.Err = ErrWrongLeader + ch <- rr + return + } + donech := kv.makeChan(term, index) + go kv.waitRaft(term, index, ch, donech) + DPrintf("(startRaft) [%d] start raft with op %+v\n", kv.me, op) + } +} + +func (kv *KVServer) waitRaft(term, index int, ch chan *Cache, donech chan Done) { + timer := time.NewTimer(500 * time.Millisecond) + rr := new(Cache) + DPrintf("(waitRaft) [%d] wait for term: %d, index: %d\n", kv.me, term, index) + select { + case <-timer.C: + DPrintf("(waitRaft) [%d] timeout, term: %d, index: %d\n", kv.me, term, index) + rr.Value = "" + rr.Err = ErrWrongLeader + ch <- rr + case done := <-donech: + rr.Value = done.value + rr.Err = done.err + ch <- rr + } + kv.closeAndDeleteChan(term, index) +} + +func (kv *KVServer) raft(cmd interface{}, cid int64, seq int) *Cache { + ch := make(chan *Cache) + go kv.startRaft(cmd, cid, seq, ch) + r := <-ch + close(ch) + return r +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + DPrintf("(Get) [%d] get %s\n", kv.me, args.Key) + r := kv.raft(args, args.Cid, args.Seq) + reply.Value = r.Value + reply.Err = r.Err +} + +func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { + DPrintf("(PutAppend) [%d] %s %s: %s\n", kv.me, args.OpStr, args.Key, args.Value) + r := kv.raft(args, args.Cid, args.Seq) + reply.Err = r.Err +} + +// Serializes the execution of operations on the key-value store. +func (kv *KVServer) executor() { + for !kv.killed() { + msg := <-kv.applyCh + DPrintf("(executor) [%d] receive msg %+v\n", kv.me, msg) + kv.mu.Lock() + if msg.CommandValid { + DPrintf("(executor) [%d] type of command: %T\n", kv.me, msg.Command) + op := msg.Command.(*Op) + index, term, cid, seq := msg.CommandIndex, msg.CommandTerm, op.Cid, op.Seq + hit, cache := kv.isCacheHit(cid, seq) + if !hit { + cache.Seq, cache.Value, cache.Err = seq, "", OK + switch v := op.Cmd.(type) { + case *GetArgs: + key := v.Key + DPrintf("(executor) [%d] get %s: %s\n", kv.me, key, kv.data[key]) + if val, ok := kv.data[key]; ok { + cache.Value = val + } else { + cache.Err = ErrNoKey + } + case *PutAppendArgs: + if v.OpStr == "Put" { + kv.data[v.Key] = v.Value + } else if v.OpStr == "Append" { + kv.data[v.Key] += v.Value + } + DPrintf("(executor) [%d] %s %s: %s\n", kv.me, v.OpStr, v.Key, kv.data[v.Key]) + } + if kv.maxraftstate != -1 && kv.maxraftstate < kv.ps.RaftStateSize() { + kv.rf.Snapshot(index, kv.encode()) + } + } + if ch, ok := kv.chanmap[getChanId(term, index)]; ok { + select { + case ch <- Done{index, term, cache.Value, cache.Err}: + default: + panic("Channel is full or closed") + } + } + } else if msg.SnapshotValid { + kv.decode(msg.Snapshot) + } else { + log.Fatalf("Invalid applyMsg, %+v\n", msg) + } + kv.mu.Unlock() + } +} + +// the tester calls Kill() when a KVServer instance won't +// be needed again. for your convenience, we supply +// code to set rf.dead (without needing a lock), +// and a killed() method to test rf.dead in +// long-running loops. you can also add your own +// code to Kill(). you're not required to do anything +// about this, but it may be convenient (for example) +// to suppress debug output from a Kill()ed instance. +func (kv *KVServer) Kill() { + atomic.StoreInt32(&kv.dead, 1) + kv.rf.Kill() + // Your code here, if desired. +} + +func (kv *KVServer) killed() bool { + z := atomic.LoadInt32(&kv.dead) + return z == 1 +} + +// servers[] contains the ports of the set of +// servers that will cooperate via Raft to +// form the fault-tolerant key/value service. +// me is the index of the current server in servers[]. +// the k/v server should store snapshots through the underlying Raft +// implementation, which should call persister.SaveStateAndSnapshot() to +// atomically save the Raft state along with the snapshot. +// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, +// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, +// you don't need to snapshot. +// StartKVServer() must return quickly, so it should start goroutines +// for any long-running work. +func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { + // call labgob.Register on structures you want + // Go's RPC library to marshall/unmarshall. + labgob.Register(&Op{}) + labgob.Register(&GetArgs{}) + labgob.Register(&PutAppendArgs{}) + labgob.Register(&RaftReply{}) + labgob.Register(&Cache{}) + + kv := new(KVServer) + kv.me = me + kv.maxraftstate = maxraftstate + + kv.applyCh = make(chan raft.ApplyMsg) + kv.rf = raft.Make(servers, me, persister, kv.applyCh) + kv.ps = persister + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + kv.chanmap = make(map[int64]chan Done) + + // Read from persister if any + kv.decode(kv.ps.ReadSnapshot()) + + go kv.executor() + + return kv +} + +FILE_EOF_kvraft_server_go + +echo 'Copying kvsrv implementation...' +mkdir -p src/kvsrv + +cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} + +FILE_EOF_kvsrv_client_go + +cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} + +FILE_EOF_kvsrv_common_go + +cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} -echo "Creating checksums for protected files" +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} + +FILE_EOF_kvsrv_server_go + +echo 'Copying mr implementation...' +mkdir -p src/mr + +cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} + +FILE_EOF_mr_coordinator_go + +cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} + +FILE_EOF_mr_rpc_go + +cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} + +FILE_EOF_mr_worker_go + + +echo 'Creating checksums for protected files...' PROTECTED_FILES=( "src/shardctrler/config.go" "src/shardctrler/test_test.go" @@ -911,7 +2027,14 @@ for file in "${PROTECTED_FILES[@]}"; do fi done - - -echo "Preprocessing complete" -exit 0 +echo '' +echo 'Preprocessing complete' +echo 'Agent should focus on implementing:' +echo ' - src/shardctrler/client.go' +echo ' - src/shardctrler/common.go' +echo ' - src/shardctrler/server.go' +echo ' - src/shardkv/client.go' +echo ' - src/shardkv/common.go' +echo ' - src/shardkv/server.go' + +exit 0 \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh index e8b7741..a6edaed 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh @@ -1,15 +1,16 @@ #!/bin/bash set -e -echo "=== Preprocessing ShardKV Lab 5B ===" +echo '=== Preprocessing 5B Shardkv ===' cd /workspace -echo "ShardKV depends on Raft implementation from Lab 3" -echo "Copying reference Raft implementation..." +echo 'Copying reference implementations from previous labs...' -echo ' Copying raft.go' -cat > src/raft/raft.go << 'RAFT_EOF' +echo 'Copying raft implementation...' +mkdir -p src/raft + +cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' package raft // @@ -427,10 +428,9 @@ func Make(peers []*labrpc.ClientEnd, me int, return rf } -RAFT_EOF +FILE_EOF_raft_raft_go -echo ' Copying election.go' -cat > src/raft/election.go << 'RAFT_EOF' +cat > src/raft/election.go << 'FILE_EOF_raft_election_go' package raft import ( @@ -555,10 +555,9 @@ func (rf *Raft) isElectionTimeout() bool { return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) } -RAFT_EOF +FILE_EOF_raft_election_go -echo ' Copying append_entries.go' -cat > src/raft/append_entries.go << 'RAFT_EOF' +cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' package raft // Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 @@ -774,10 +773,9 @@ func (rf *Raft) broadcaster(peer int) { } } -RAFT_EOF +FILE_EOF_raft_append_entries_go -echo ' Copying install_snapshot.go' -cat > src/raft/install_snapshot.go << 'RAFT_EOF' +cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' package raft type InstallSnapshotArgs struct { @@ -871,10 +869,9 @@ func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { rf.persister.Save(rf.encodeState(), args.Data) } -RAFT_EOF +FILE_EOF_raft_install_snapshot_go -echo ' Copying util.go' -cat > src/raft/util.go << 'RAFT_EOF' +cat > src/raft/util.go << 'FILE_EOF_raft_util_go' package raft import ( @@ -892,10 +889,1129 @@ func DPrintf(format string, a ...interface{}) { log.Printf(format, a...) } -RAFT_EOF +FILE_EOF_raft_util_go + +echo 'Copying kvraft implementation...' +mkdir -p src/kvraft + +cat > src/kvraft/client.go << 'FILE_EOF_kvraft_client_go' +package kvraft + +import ( + "crypto/rand" + "math/big" + "sync/atomic" + "time" + + "6.5840/labrpc" +) + +type Clerk struct { + servers []*labrpc.ClientEnd + cid int64 + seq int + leader int32 // cache the leader +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.servers, ck.cid, ck.seq = servers, nrand(), 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seq++ + + args := new(GetArgs) + args.Key, args.Cid, args.Seq = key, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(GetReply) + ok := ck.servers[peer].Call("KVServer.Get", args, reply) + if ok && (reply.Err == OK || reply.Err == ErrNoKey) { + atomic.StoreInt32(&ck.leader, int32(peer)) + return reply.Value + } + } + time.Sleep(100 * time.Millisecond) + } +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) { + ck.seq++ + + args := new(PutAppendArgs) + args.OpStr, args.Key, args.Value, args.Cid, args.Seq = op, key, value, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(PutAppendReply) + ok := ck.servers[peer].Call("KVServer.PutAppend", args, reply) + if ok && reply.Err == OK { + atomic.StoreInt32(&ck.leader, int32(peer)) + return + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} +func (ck *Clerk) Append(key string, value string) { + ck.PutAppend(key, value, "Append") +} + +FILE_EOF_kvraft_client_go + +cat > src/kvraft/common.go << 'FILE_EOF_kvraft_common_go' +package kvraft + +const ( + OK = "OK" + ErrNoKey = "ErrNoKey" + ErrWrongLeader = "ErrWrongLeader" +) + +type ClientInfo struct { + Cid int64 + Seq int +} + +type Err string + +type RaftReply struct { + Value string + Err Err +} + +type GetArgs struct { + Key string + ClientInfo +} + +type GetReply = RaftReply + +// Put or Append +type PutAppendArgs struct { + OpStr string // "Put" or "Append" + Key string + Value string + ClientInfo +} + +type PutAppendReply = RaftReply + +type Cache struct { + Seq int + RaftReply +} + +FILE_EOF_kvraft_common_go + +cat > src/kvraft/server.go << 'FILE_EOF_kvraft_server_go' +package kvraft + +import ( + "bytes" + "log" + "os" + "sync" + "sync/atomic" + "time" + + "6.5840/labgob" + "6.5840/labrpc" + "6.5840/raft" +) + +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Opcode int + +const ( + GET Opcode = iota + PUT + APPEND +) + +type Op struct { + Cmd interface{} + ClientInfo +} + +type Done struct { + index int + term int + value string + err Err +} + +type KVServer struct { + mu sync.Mutex + me int + rf *raft.Raft + ps *raft.Persister + applyCh chan raft.ApplyMsg + dead int32 // set by Kill() + + maxraftstate int // snapshot if log grows this big + + data map[string]string + cache map[int64]*Cache // client id -> seq + chanmap map[int64]chan Done +} + +func getChanId(term, index int) (id int64) { + id = int64(term) << 32 + id += int64(index) + return +} + +func (kv *KVServer) makeChan(term, index int) chan Done { + id := getChanId(term, index) + ch := make(chan Done, 1) + kv.chanmap[id] = ch + return ch +} + +func (kv *KVServer) closeAndDeleteChan(term, index int) { + kv.mu.Lock() + defer kv.mu.Unlock() + id := getChanId(term, index) + close(kv.chanmap[id]) + delete(kv.chanmap, id) +} + +func (kv *KVServer) isCacheHit(Cid int64, Seq int) (bool, *Cache) { + // Why cache.Seq >= Seq works? + // 1. If the seq of cache equals to Seq, it means the operation has been + // executed. Return the value directly. + // 2. If the seq of cache is Greater than Seq, it means some operations + // after this Op have been executed, which implies client has already + // received the result of this Op (the operation must be completed before + // next operation happened). Theorically, return anything is OK. + if cache, ok := kv.cache[Cid]; ok && cache.Seq >= Seq { + return true, cache + } else if ok { + return false, cache + } else { + kv.cache[Cid] = new(Cache) + return false, kv.cache[Cid] + } +} +func (kv *KVServer) encode() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(kv.cache) + e.Encode(kv.data) + return w.Bytes() +} + +func (kv *KVServer) decode(buf []byte) { + if buf == nil || len(buf) < 1 { + return + } + r := bytes.NewBuffer(buf) + d := labgob.NewDecoder(r) + var cache map[int64]*Cache + var data map[string]string + if d.Decode(&cache) != nil || d.Decode(&data) != nil { + log.Fatal("Decode error") + return + } + kv.cache = cache + kv.data = data +} + +func (kv *KVServer) startRaft(cmd interface{}, cid int64, seq int, ch chan *Cache) { + kv.mu.Lock() + defer kv.mu.Unlock() + rr := new(Cache) + if hit, cache := kv.isCacheHit(cid, seq); hit { + rr.Seq, rr.Value, rr.Err = cache.Seq, cache.Value, cache.Err + ch <- rr + } else { + op := new(Op) + op.Cmd, op.Cid, op.Seq = cmd, cid, seq + index, term, isLeader := kv.rf.Start(op) + if !isLeader { + cache.Value, cache.Err = "", ErrWrongLeader + rr.Err = ErrWrongLeader + ch <- rr + return + } + donech := kv.makeChan(term, index) + go kv.waitRaft(term, index, ch, donech) + DPrintf("(startRaft) [%d] start raft with op %+v\n", kv.me, op) + } +} + +func (kv *KVServer) waitRaft(term, index int, ch chan *Cache, donech chan Done) { + timer := time.NewTimer(500 * time.Millisecond) + rr := new(Cache) + DPrintf("(waitRaft) [%d] wait for term: %d, index: %d\n", kv.me, term, index) + select { + case <-timer.C: + DPrintf("(waitRaft) [%d] timeout, term: %d, index: %d\n", kv.me, term, index) + rr.Value = "" + rr.Err = ErrWrongLeader + ch <- rr + case done := <-donech: + rr.Value = done.value + rr.Err = done.err + ch <- rr + } + kv.closeAndDeleteChan(term, index) +} + +func (kv *KVServer) raft(cmd interface{}, cid int64, seq int) *Cache { + ch := make(chan *Cache) + go kv.startRaft(cmd, cid, seq, ch) + r := <-ch + close(ch) + return r +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + DPrintf("(Get) [%d] get %s\n", kv.me, args.Key) + r := kv.raft(args, args.Cid, args.Seq) + reply.Value = r.Value + reply.Err = r.Err +} + +func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { + DPrintf("(PutAppend) [%d] %s %s: %s\n", kv.me, args.OpStr, args.Key, args.Value) + r := kv.raft(args, args.Cid, args.Seq) + reply.Err = r.Err +} + +// Serializes the execution of operations on the key-value store. +func (kv *KVServer) executor() { + for !kv.killed() { + msg := <-kv.applyCh + DPrintf("(executor) [%d] receive msg %+v\n", kv.me, msg) + kv.mu.Lock() + if msg.CommandValid { + DPrintf("(executor) [%d] type of command: %T\n", kv.me, msg.Command) + op := msg.Command.(*Op) + index, term, cid, seq := msg.CommandIndex, msg.CommandTerm, op.Cid, op.Seq + hit, cache := kv.isCacheHit(cid, seq) + if !hit { + cache.Seq, cache.Value, cache.Err = seq, "", OK + switch v := op.Cmd.(type) { + case *GetArgs: + key := v.Key + DPrintf("(executor) [%d] get %s: %s\n", kv.me, key, kv.data[key]) + if val, ok := kv.data[key]; ok { + cache.Value = val + } else { + cache.Err = ErrNoKey + } + case *PutAppendArgs: + if v.OpStr == "Put" { + kv.data[v.Key] = v.Value + } else if v.OpStr == "Append" { + kv.data[v.Key] += v.Value + } + DPrintf("(executor) [%d] %s %s: %s\n", kv.me, v.OpStr, v.Key, kv.data[v.Key]) + } + if kv.maxraftstate != -1 && kv.maxraftstate < kv.ps.RaftStateSize() { + kv.rf.Snapshot(index, kv.encode()) + } + } + if ch, ok := kv.chanmap[getChanId(term, index)]; ok { + select { + case ch <- Done{index, term, cache.Value, cache.Err}: + default: + panic("Channel is full or closed") + } + } + } else if msg.SnapshotValid { + kv.decode(msg.Snapshot) + } else { + log.Fatalf("Invalid applyMsg, %+v\n", msg) + } + kv.mu.Unlock() + } +} + +// the tester calls Kill() when a KVServer instance won't +// be needed again. for your convenience, we supply +// code to set rf.dead (without needing a lock), +// and a killed() method to test rf.dead in +// long-running loops. you can also add your own +// code to Kill(). you're not required to do anything +// about this, but it may be convenient (for example) +// to suppress debug output from a Kill()ed instance. +func (kv *KVServer) Kill() { + atomic.StoreInt32(&kv.dead, 1) + kv.rf.Kill() + // Your code here, if desired. +} + +func (kv *KVServer) killed() bool { + z := atomic.LoadInt32(&kv.dead) + return z == 1 +} + +// servers[] contains the ports of the set of +// servers that will cooperate via Raft to +// form the fault-tolerant key/value service. +// me is the index of the current server in servers[]. +// the k/v server should store snapshots through the underlying Raft +// implementation, which should call persister.SaveStateAndSnapshot() to +// atomically save the Raft state along with the snapshot. +// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, +// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, +// you don't need to snapshot. +// StartKVServer() must return quickly, so it should start goroutines +// for any long-running work. +func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { + // call labgob.Register on structures you want + // Go's RPC library to marshall/unmarshall. + labgob.Register(&Op{}) + labgob.Register(&GetArgs{}) + labgob.Register(&PutAppendArgs{}) + labgob.Register(&RaftReply{}) + labgob.Register(&Cache{}) + + kv := new(KVServer) + kv.me = me + kv.maxraftstate = maxraftstate + + kv.applyCh = make(chan raft.ApplyMsg) + kv.rf = raft.Make(servers, me, persister, kv.applyCh) + kv.ps = persister + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + kv.chanmap = make(map[int64]chan Done) + + // Read from persister if any + kv.decode(kv.ps.ReadSnapshot()) + + go kv.executor() + + return kv +} + +FILE_EOF_kvraft_server_go + +echo 'Copying kvsrv implementation...' +mkdir -p src/kvsrv + +cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} + +FILE_EOF_kvsrv_client_go + +cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} + +FILE_EOF_kvsrv_common_go + +cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} -echo "Creating checksums for protected files" +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} + +FILE_EOF_kvsrv_server_go + +echo 'Copying mr implementation...' +mkdir -p src/mr + +cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} + +FILE_EOF_mr_coordinator_go + +cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} + +FILE_EOF_mr_rpc_go + +cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} + +FILE_EOF_mr_worker_go + + +echo 'Creating checksums for protected files...' PROTECTED_FILES=( "src/shardctrler/config.go" "src/shardctrler/test_test.go" @@ -911,7 +2027,14 @@ for file in "${PROTECTED_FILES[@]}"; do fi done - - -echo "Preprocessing complete" -exit 0 +echo '' +echo 'Preprocessing complete' +echo 'Agent should focus on implementing:' +echo ' - src/shardctrler/client.go' +echo ' - src/shardctrler/common.go' +echo ' - src/shardctrler/server.go' +echo ' - src/shardkv/client.go' +echo ' - src/shardkv/common.go' +echo ' - src/shardkv/server.go' + +exit 0 \ No newline at end of file From 3ee1725300348ecde2ebefbadaa3a3c2d97ddb4c Mon Sep 17 00:00:00 2001 From: Tarek Date: Mon, 15 Dec 2025 12:21:43 -0800 Subject: [PATCH 07/10] feat(courselab_bench): modify system prompt to emphasize focus on current task Signed-off-by: Tarek --- benchmarks/courselab_bench/courselab_bench/agent/react.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/courselab_bench/courselab_bench/agent/react.py b/benchmarks/courselab_bench/courselab_bench/agent/react.py index b48cc4d..cfac8d6 100644 --- a/benchmarks/courselab_bench/courselab_bench/agent/react.py +++ b/benchmarks/courselab_bench/courselab_bench/agent/react.py @@ -159,7 +159,12 @@ def _system_prompt(self) -> str: ``` - After each command, wait for the output before proceeding - When you have completed the task, run: echo "TASK_COMPLETE" -- Be concise and focused on solving the task""" +- Be concise and focused on solving the task + +Important: Focus on the Current Lab +- You are working on a specific lab assignment from a course sequence +- Avoid getting distracted by code from earlier labs in the course sequence +- The task description clearly states which files you should modify""" def _task_prompt(self, task: dict[str, Any]) -> str: return f"""# Task: {task['instance_id']} From 9d351e64161f834bed5b25270d3d5bac403ee473 Mon Sep 17 00:00:00 2001 From: Tarek Date: Thu, 18 Dec 2025 15:40:19 -0800 Subject: [PATCH 08/10] feat(courselab_bench): add config option to add starter files Signed-off-by: Tarek --- benchmarks/courselab_bench/README.md | 18 +- .../courselab_bench/environment/docker.py | 76 + .../courselab_bench/runner/executor.py | 14 +- .../mit_6_5840_2024/1_mapreduce/config.json | 16 +- .../data/mit_6_5840_2024/2a_kvsrv/config.json | 16 +- .../data/mit_6_5840_2024/2b_kvsrv/config.json | 16 +- .../data/mit_6_5840_2024/3a_raft/config.json | 24 +- .../data/mit_6_5840_2024/3b_raft/config.json | 24 +- .../data/mit_6_5840_2024/3c_raft/config.json | 26 +- .../data/mit_6_5840_2024/3d_raft/config.json | 24 +- .../mit_6_5840_2024/4a_kvraft/config.json | 66 +- .../mit_6_5840_2024/4a_kvraft/preprocess.sh | 1560 ------------- .../4a_kvraft/starter_files/kvsrv/client.go | 83 + .../4a_kvraft/starter_files/kvsrv/common.go | 22 + .../4a_kvraft/starter_files/kvsrv/server.go | 84 + .../4a_kvraft/starter_files/mr/coordinator.go | 141 ++ .../4a_kvraft/starter_files/mr/rpc.go | 81 + .../4a_kvraft/starter_files/mr/worker.go | 233 ++ .../starter_files/raft/append_entries.go | 214 ++ .../4a_kvraft/starter_files/raft/election.go | 123 + .../starter_files/raft/install_snapshot.go | 92 + .../4a_kvraft/starter_files/raft/persister.go | 70 + .../4a_kvraft/starter_files/raft/raft.go | 416 ++++ .../4a_kvraft/starter_files/raft/util.go | 16 + .../mit_6_5840_2024/4b_kvraft/config.json | 66 +- .../mit_6_5840_2024/4b_kvraft/preprocess.sh | 1560 ------------- .../4b_kvraft/starter_files/kvsrv/client.go | 83 + .../4b_kvraft/starter_files/kvsrv/common.go | 22 + .../4b_kvraft/starter_files/kvsrv/server.go | 84 + .../4b_kvraft/starter_files/mr/coordinator.go | 141 ++ .../4b_kvraft/starter_files/mr/rpc.go | 81 + .../4b_kvraft/starter_files/mr/worker.go | 233 ++ .../starter_files/raft/append_entries.go | 214 ++ .../4b_kvraft/starter_files/raft/election.go | 123 + .../starter_files/raft/install_snapshot.go | 92 + .../4b_kvraft/starter_files/raft/persister.go | 70 + .../4b_kvraft/starter_files/raft/raft.go | 416 ++++ .../4b_kvraft/starter_files/raft/util.go | 16 + .../mit_6_5840_2024/5a_shardkv/config.json | 90 +- .../mit_6_5840_2024/5a_shardkv/preprocess.sh | 2005 ----------------- .../5a_shardkv/starter_files/kvraft/client.go | 97 + .../5a_shardkv/starter_files/kvraft/common.go | 41 + .../5a_shardkv/starter_files/kvraft/server.go | 292 +++ .../5a_shardkv/starter_files/kvsrv/client.go | 83 + .../5a_shardkv/starter_files/kvsrv/common.go | 22 + .../5a_shardkv/starter_files/kvsrv/server.go | 84 + .../starter_files/mr/coordinator.go | 141 ++ .../5a_shardkv/starter_files/mr/rpc.go | 81 + .../5a_shardkv/starter_files/mr/worker.go | 233 ++ .../starter_files/raft/append_entries.go | 214 ++ .../5a_shardkv/starter_files/raft/election.go | 123 + .../starter_files/raft/install_snapshot.go | 92 + .../starter_files/raft/persister.go | 70 + .../5a_shardkv/starter_files/raft/raft.go | 416 ++++ .../5a_shardkv/starter_files/raft/util.go | 16 + .../mit_6_5840_2024/5b_shardkv/config.json | 92 +- .../mit_6_5840_2024/5b_shardkv/preprocess.sh | 2005 ----------------- .../5b_shardkv/starter_files/kvraft/client.go | 97 + .../5b_shardkv/starter_files/kvraft/common.go | 41 + .../5b_shardkv/starter_files/kvraft/server.go | 292 +++ .../5b_shardkv/starter_files/kvsrv/client.go | 83 + .../5b_shardkv/starter_files/kvsrv/common.go | 22 + .../5b_shardkv/starter_files/kvsrv/server.go | 84 + .../starter_files/mr/coordinator.go | 141 ++ .../5b_shardkv/starter_files/mr/rpc.go | 81 + .../5b_shardkv/starter_files/mr/worker.go | 233 ++ .../starter_files/raft/append_entries.go | 214 ++ .../5b_shardkv/starter_files/raft/election.go | 123 + .../starter_files/raft/install_snapshot.go | 92 + .../starter_files/raft/persister.go | 70 + .../5b_shardkv/starter_files/raft/raft.go | 416 ++++ .../5b_shardkv/starter_files/raft/util.go | 16 + benchmarks/courselab_bench/prepare_dataset.py | 10 +- benchmarks/courselab_bench/run_benchmark.py | 7 +- 74 files changed, 7722 insertions(+), 7153 deletions(-) create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/client.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/common.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/server.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/coordinator.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/rpc.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/worker.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/append_entries.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/election.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/install_snapshot.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/persister.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/raft.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/util.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/client.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/common.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/server.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/coordinator.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/rpc.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/worker.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/append_entries.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/election.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/install_snapshot.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/persister.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/raft.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/util.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/client.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/common.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/server.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/client.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/common.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/server.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/coordinator.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/rpc.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/worker.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/append_entries.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/election.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/install_snapshot.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/persister.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/raft.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/util.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/client.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/common.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/server.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/client.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/common.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/server.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/coordinator.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/rpc.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/worker.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/append_entries.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/election.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/install_snapshot.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/persister.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/raft.go create mode 100644 benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/util.go diff --git a/benchmarks/courselab_bench/README.md b/benchmarks/courselab_bench/README.md index f2bcf26..de51c1c 100644 --- a/benchmarks/courselab_bench/README.md +++ b/benchmarks/courselab_bench/README.md @@ -69,7 +69,9 @@ data/ ├── config.json # Task metadata ├── task.md # Problem statement ├── preprocess.sh # Setup script (runs before agent) - └── evaluate.sh # Evaluation script (determines pass/fail) + ├── evaluate.sh # Evaluation script (determines pass/fail) + └── starter_files/ # Optional: files to copy to container + └── ... ``` ### config.json @@ -86,6 +88,8 @@ Optional fields: - `tags`: List of topic tags - `repo_url`: Git repository to clone - `base_commit`: Git commit to checkout +- `starter_files`: List of files to copy from `starter_files/` directory to container (`src` is relative to `starter_files/`, `dest` is absolute path in container) +- `output_files`: List of files to copy from container to output directory after agent completes (`src` is absolute path in container, `dest` is relative to output directory) ### task.md @@ -96,9 +100,7 @@ Markdown file containing the problem statement given to the agent. Shell script that runs before the agent starts. Use this to: - Set up the environment -- If the lab depends on previous labs, copy reference implementations to prevent distractions - Create checksums of files that shouldn't be modified -- Initialize test data Exit with code 0 on success, non-zero on failure. @@ -111,13 +113,15 @@ Print verbose output for debugging (captured in results). ### Example Task -See `data/test_course/test__simple__echo/` for a minimal example. +See `data/test_course/test__simple__echo/` for a minimal example, or `data/mit_6_5840_2024/4a_kvraft/` for an example using `starter_files` and `output_files`. ## Adding New Tasks 1. If you are adding tasks for a new course, first add a new entry to [`/data/courses.json`](./data/courses.json) with the course metadata 2. Create a new folder: `data/{course_id}/{task_id}/` (where `{course_id}` matches the entry in `courses.json`) 3. Add the 4 required files: `config.json`, `task.md`, `preprocess.sh`, `evaluate.sh` for each task -4. Make scripts executable -5. Run `python prepare_dataset.py` to regenerate `tasks.jsonl` -6. Run the benchmark +4. (Optional) Create a `starter_files/` directory and add files that should be copied to the container +5. (Optional) Configure `starter_files` and `output_files` in `config.json` +6. Make scripts executable +7. Run `python prepare_dataset.py` to regenerate `tasks.jsonl` +8. Run the benchmark diff --git a/benchmarks/courselab_bench/courselab_bench/environment/docker.py b/benchmarks/courselab_bench/courselab_bench/environment/docker.py index eab7b47..c9ad412 100644 --- a/benchmarks/courselab_bench/courselab_bench/environment/docker.py +++ b/benchmarks/courselab_bench/courselab_bench/environment/docker.py @@ -1,6 +1,7 @@ import subprocess import uuid from typing import Any +from pathlib import Path from loguru import logger @@ -10,11 +11,13 @@ def __init__( image: str, timeout: int = 60, work_dir: str = "/workspace", + task_folder: Path | None = None, ): self.image = image self.timeout = timeout self.work_dir = work_dir self.container_id: str | None = None + self.task_folder = task_folder def setup(self, task: dict[str, Any]) -> None: self.container_id = self._start_container() @@ -23,6 +26,10 @@ def setup(self, task: dict[str, Any]) -> None: base_commit = task.get("base_commit") self._clone_repo(repo_url, base_commit) + starter_files = task.get("starter_files") + if starter_files and self.task_folder: + self._copy_starter_files(starter_files) + preprocess_script = task.get("preprocess_script") if preprocess_script: self._run_preprocess(preprocess_script) @@ -148,3 +155,72 @@ def _run_preprocess(self, preprocess_script: str) -> None: if result["returncode"] != 0: raise RuntimeError(f"Preprocess script failed: {result['output'][:200]}") + + def _copy_starter_files(self, starter_files: list[dict[str, str]]) -> None: + if not self.task_folder: + raise RuntimeError("task_folder not set, cannot copy starter files") + starter_files_dir = self.task_folder / "starter_files" + if not starter_files_dir.exists(): + raise RuntimeError(f"starter_files directory not found: {starter_files_dir}") + + for file_spec in starter_files: + src_rel = file_spec["src"] + dest = file_spec["dest"] + + src_path = starter_files_dir / src_rel + if not src_path.exists(): + raise RuntimeError(f"Starter file not found: {src_path}") + + logger.debug(f"Copying {src_path} to container:{dest}") + + parent_dir = str(Path(dest).parent) + self.execute(f"mkdir -p {parent_dir}") + cmd = [ + "docker", + "cp", + str(src_path), + f"{self.container_id}:{dest}", + ] + + try: + _result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=60, + check=True, + ) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to copy starter file {src_path}: {e.stderr}") from e + + def copy_output_files(self, output_files: list[dict[str, str]], output_dir: Path) -> None: + if not self.container_id: + raise RuntimeError("Container not started") + + output_dir.mkdir(parents=True, exist_ok=True) + for file_spec in output_files: + src = file_spec["src"] + dest_rel = file_spec["dest"] + dest_path = output_dir / dest_rel + + dest_path.parent.mkdir(parents=True, exist_ok=True) + logger.debug(f"Copying container:{src} to {dest_path}") + cmd = [ + "docker", + "cp", + f"{self.container_id}:{src}", + str(dest_path), + ] + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=60, + check=False, # Don't raise on error, file might not exist + ) + if result.returncode != 0: + logger.warning(f"Failed to copy output file {src}: {result.stderr}") + except Exception as e: + logger.warning(f"Failed to copy output file {src}: {e}") diff --git a/benchmarks/courselab_bench/courselab_bench/runner/executor.py b/benchmarks/courselab_bench/courselab_bench/runner/executor.py index d6e99fb..b7a41ec 100644 --- a/benchmarks/courselab_bench/courselab_bench/runner/executor.py +++ b/benchmarks/courselab_bench/courselab_bench/runner/executor.py @@ -1,6 +1,7 @@ import time from datetime import datetime from typing import Any +from pathlib import Path from loguru import logger @@ -12,7 +13,9 @@ def _run_evaluate_script(env: Any, evaluate_script: str, timeout: int) -> dict[s return result -def execute_task(task: dict[str, Any], agent: Any, env: Any) -> dict[str, Any]: +def execute_task( + task: dict[str, Any], agent: Any, env: Any, output_dir: Path | None = None +) -> dict[str, Any]: instance_id = task["instance_id"] start_time = time.time() @@ -65,6 +68,15 @@ def execute_task(task: dict[str, Any], agent: Any, env: Any) -> dict[str, Any]: if test_result is None: test_result = {"output": "[ERROR: No test result]", "returncode": -1} + output_files = task.get("output_files") + if output_files and output_dir: + try: + files_output_dir = output_dir / "files" / instance_id + logger.info(f"Copying output files to {files_output_dir}") + env.copy_output_files(output_files, files_output_dir) + except Exception as e: + logger.error(f"Failed to copy output files: {e}") + duration = time.time() - start_time result = { diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json index 51d3c57..215d88d 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/1_mapreduce/config.json @@ -9,5 +9,19 @@ "concurrency", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/mr/coordinator.go", + "dest": "src/mr/coordinator.go" + }, + { + "src": "/workspace/src/mr/rpc.go", + "dest": "src/mr/rpc.go" + }, + { + "src": "/workspace/src/mr/worker.go", + "dest": "src/mr/worker.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json index 03b2dff..34977ad 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2a_kvsrv/config.json @@ -9,5 +9,19 @@ "rpc", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/kvsrv/client.go", + "dest": "src/kvsrv/client.go" + }, + { + "src": "/workspace/src/kvsrv/common.go", + "dest": "src/kvsrv/common.go" + }, + { + "src": "/workspace/src/kvsrv/server.go", + "dest": "src/kvsrv/server.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json index 517fa18..627c849 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/2b_kvsrv/config.json @@ -10,5 +10,19 @@ "fault-tolerance", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/kvsrv/client.go", + "dest": "src/kvsrv/client.go" + }, + { + "src": "/workspace/src/kvsrv/common.go", + "dest": "src/kvsrv/common.go" + }, + { + "src": "/workspace/src/kvsrv/server.go", + "dest": "src/kvsrv/server.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json index a7a1da3..6937604 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3a_raft/config.json @@ -9,5 +9,27 @@ "consensus", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/raft/append_entries.go", + "dest": "src/raft/append_entries.go" + }, + { + "src": "/workspace/src/raft/election.go", + "dest": "src/raft/election.go" + }, + { + "src": "/workspace/src/raft/install_snapshot.go", + "dest": "src/raft/install_snapshot.go" + }, + { + "src": "/workspace/src/raft/raft.go", + "dest": "src/raft/raft.go" + }, + { + "src": "/workspace/src/raft/util.go", + "dest": "src/raft/util.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json index 39823b9..788d325 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3b_raft/config.json @@ -9,5 +9,27 @@ "consensus", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/raft/append_entries.go", + "dest": "src/raft/append_entries.go" + }, + { + "src": "/workspace/src/raft/election.go", + "dest": "src/raft/election.go" + }, + { + "src": "/workspace/src/raft/install_snapshot.go", + "dest": "src/raft/install_snapshot.go" + }, + { + "src": "/workspace/src/raft/raft.go", + "dest": "src/raft/raft.go" + }, + { + "src": "/workspace/src/raft/util.go", + "dest": "src/raft/util.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json index c61e661..aa5d720 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3c_raft/config.json @@ -9,5 +9,27 @@ "consensus", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" -} \ No newline at end of file + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/raft/append_entries.go", + "dest": "src/raft/append_entries.go" + }, + { + "src": "/workspace/src/raft/election.go", + "dest": "src/raft/election.go" + }, + { + "src": "/workspace/src/raft/install_snapshot.go", + "dest": "src/raft/install_snapshot.go" + }, + { + "src": "/workspace/src/raft/raft.go", + "dest": "src/raft/raft.go" + }, + { + "src": "/workspace/src/raft/util.go", + "dest": "src/raft/util.go" + } + ] +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json index 238f9c7..50ec04b 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/3d_raft/config.json @@ -9,5 +9,27 @@ "consensus", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "output_files": [ + { + "src": "/workspace/src/raft/append_entries.go", + "dest": "src/raft/append_entries.go" + }, + { + "src": "/workspace/src/raft/election.go", + "dest": "src/raft/election.go" + }, + { + "src": "/workspace/src/raft/install_snapshot.go", + "dest": "src/raft/install_snapshot.go" + }, + { + "src": "/workspace/src/raft/raft.go", + "dest": "src/raft/raft.go" + }, + { + "src": "/workspace/src/raft/util.go", + "dest": "src/raft/util.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json index 025056c..c26a348 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/config.json @@ -10,5 +10,69 @@ "fault-tolerance", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "starter_files": [ + { + "src": "raft/persister.go", + "dest": "/workspace/src/raft/persister.go" + }, + { + "src": "raft/election.go", + "dest": "/workspace/src/raft/election.go" + }, + { + "src": "raft/append_entries.go", + "dest": "/workspace/src/raft/append_entries.go" + }, + { + "src": "raft/util.go", + "dest": "/workspace/src/raft/util.go" + }, + { + "src": "raft/install_snapshot.go", + "dest": "/workspace/src/raft/install_snapshot.go" + }, + { + "src": "raft/raft.go", + "dest": "/workspace/src/raft/raft.go" + }, + { + "src": "kvsrv/server.go", + "dest": "/workspace/src/kvsrv/server.go" + }, + { + "src": "kvsrv/client.go", + "dest": "/workspace/src/kvsrv/client.go" + }, + { + "src": "kvsrv/common.go", + "dest": "/workspace/src/kvsrv/common.go" + }, + { + "src": "mr/coordinator.go", + "dest": "/workspace/src/mr/coordinator.go" + }, + { + "src": "mr/worker.go", + "dest": "/workspace/src/mr/worker.go" + }, + { + "src": "mr/rpc.go", + "dest": "/workspace/src/mr/rpc.go" + } + ], + "output_files": [ + { + "src": "/workspace/src/kvraft/client.go", + "dest": "src/kvraft/client.go" + }, + { + "src": "/workspace/src/kvraft/common.go", + "dest": "src/kvraft/common.go" + }, + { + "src": "/workspace/src/kvraft/server.go", + "dest": "src/kvraft/server.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh index 3523e6a..27f8ae8 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/preprocess.sh @@ -5,1566 +5,6 @@ echo '=== Preprocessing 4A Kvraft ===' cd /workspace -echo 'Copying reference implementations from previous labs...' - -echo 'Copying raft implementation...' -mkdir -p src/raft - -cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' -package raft - -// -// this is an outline of the API that raft must expose to -// the service (or tester). see comments below for -// each of these functions for more details. -// -// rf = Make(...) -// create a new Raft server. -// rf.Start(command interface{}) (index, term, isleader) -// start agreement on a new log entry -// rf.GetState() (term, isLeader) -// ask a Raft for its current term, and whether it thinks it is leader -// ApplyMsg -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester) -// in the same server. -// - -import ( - // "bytes" - - "bytes" - "log" - "sync" - "sync/atomic" - "time" - - // "6.5840/labgob" - "6.5840/labgob" - "6.5840/labrpc" -) - -// as each Raft peer becomes aware that successive log entries are -// committed, the peer should send an ApplyMsg to the service (or -// tester) on the same server, via the applyCh passed to Make(). set -// CommandValid to true to indicate that the ApplyMsg contains a newly -// committed log entry. -// -// in part 3D you'll want to send other kinds of messages (e.g., -// snapshots) on the applyCh, but set CommandValid to false for these -// other uses. -type ApplyMsg struct { - CommandValid bool - Command interface{} - CommandIndex int - CommandTerm int - - // For 3D: - SnapshotValid bool - Snapshot []byte - SnapshotTerm int - SnapshotIndex int -} - -type Entry struct { - Term int - Index int - Command interface{} -} - -// Base struct for common fields -type BaseRPC struct { - Term int -} - -// Implement RaftRPC interface for BaseRPC -func (b *BaseRPC) GetTerm() int { - return b.Term -} - -func (b *BaseRPC) SetTerm(term int) { - b.Term = term -} - -// RaftRPC interface -type RaftRPC interface { - GetTerm() int - SetTerm(int) -} - -type ServerState int - -const ( - FOLLOWER ServerState = iota - CANDIDATE - LEADER -) - -// A Go object implementing a single Raft peer. -type Raft struct { - mu sync.Mutex // Lock to protect shared access to this peer's state - peers []*labrpc.ClientEnd // RPC end points of all peers - persister *Persister // Object to hold this peer's persisted state - me int // this peer's index into peers[] - dead int32 // set by Kill() - heartbeatTimeout time.Duration - electionTimeout time.Duration - electionTimeStamp time.Time - applyCh chan ApplyMsg - - // state a Raft server must maintain. - broadcasterCond []*sync.Cond - applierCond *sync.Cond - - // server state - state ServerState - - // presistent state on all servers - currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) - votedFor int // candidateId that received vote in current term (or null if none) - logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) - - // volatile state on all servers - commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) - lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) - - // volatile state on leaders (reinitialized after election) - nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) - matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) - - // snapshot msg - smsg *ApplyMsg -} - -// return currentTerm and whether this server -// believes it is the leader. -func (rf *Raft) GetState() (int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - return rf.currentTerm, rf.state == LEADER -} - -func (rf *Raft) encodeState() []byte { - w := new(bytes.Buffer) - e := labgob.NewEncoder(w) - e.Encode(rf.currentTerm) - e.Encode(rf.votedFor) - e.Encode(rf.logs) - return w.Bytes() -} - -// save Raft's persistent state to stable storage, -// where it can later be retrieved after a crash and restart. -// see paper's Figure 2 for a description of what should be persistent. -// before you've implemented snapshots, you should pass nil as the -// second argument to persister.Save(). -// after you've implemented snapshots, pass the current snapshot -// (or nil if there's not yet a snapshot). -func (rf *Raft) persist() { - if rf.persister.ReadSnapshot() != nil { - rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) - } else { - rf.persister.Save(rf.encodeState(), nil) - } -} - -// restore previously persisted state. -func (rf *Raft) readPersist(data []byte) { - if data == nil || len(data) < 1 { // bootstrap without any state - return - } - r := bytes.NewBuffer(data) - d := labgob.NewDecoder(r) - var currentTerm int - var votedFor int - var logs []Entry - - if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { - log.Fatal("failed to read persist\n") - } else { - DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) - rf.currentTerm = currentTerm - rf.votedFor = votedFor - rf.logs = logs - rf.lastApplied = rf.logs[0].Index - rf.commitIndex = rf.logs[0].Index - } -} - -// the service says it has created a snapshot that has -// all info up to and including index. this means the -// service no longer needs the log through (and including) -// that index. Raft should now trim its log as much as possible. -func (rf *Raft) Snapshot(index int, snapshot []byte) { - // Your code here (3D). - rf.mu.Lock() - defer rf.mu.Unlock() - // if the snapshot is outdated, just ignore it - if rf.logs[0].Index >= index { - return - } - firstLogIndex := rf.logs[0].Index - trimLen := index - firstLogIndex - // trim the logs - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - rf.persister.Save(rf.encodeState(), snapshot) -} - -// the service using Raft (e.g. a k/v server) wants to start -// agreement on the next command to be appended to Raft's log. if this -// server isn't the leader, returns false. otherwise start the -// agreement and return immediately. there is no guarantee that this -// command will ever be committed to the Raft log, since the leader -// may fail or lose an election. even if the Raft instance has been killed, -// this function should return gracefully. -// -// the first return value is the index that the command will appear at -// if it's ever committed. the second return value is the current -// term. the third return value is true if this server believes it is -// the leader. -func (rf *Raft) Start(command interface{}) (int, int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - if rf.state != LEADER { - return -1, -1, false - } - defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) - rf.logs = append(rf.logs, Entry{ - Term: rf.currentTerm, - Index: rf.logs[len(rf.logs)-1].Index + 1, - Command: command, - }) - rf.broadcastAppendEntries(false) - // Your code here (3B). - return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true -} - -// Warning: this function is not thread-safe -func (rf *Raft) resetNewTermState(targetTerm int) { - DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) - if rf.currentTerm < targetTerm { - rf.votedFor = -1 - } - rf.currentTerm = targetTerm - rf.state = FOLLOWER // reset to follower -} - -// Reply false if term < currentTerm (§5.1) -// If RPC request contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { - term := args.GetTerm() - defer reply.SetTerm(rf.currentTerm) - if term < rf.currentTerm { - return false - } - if term > rf.currentTerm { - rf.resetNewTermState(term) - } - return true -} - -// If RPC request or response contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { - argsTerm := args.GetTerm() - replyTerm := reply.GetTerm() - if replyTerm > argsTerm { - rf.resetNewTermState(replyTerm) - rf.resetElectionTimer() - return false - } - return isElection || (rf.state == LEADER) -} - -// the tester doesn't halt goroutines created by Raft after each test, -// but it does call the Kill() method. your code can use killed() to -// check whether Kill() has been called. the use of atomic avoids the -// need for a lock. -// -// the issue is that long-running goroutines use memory and may chew -// up CPU time, perhaps causing later tests to fail and generating -// confusing debug output. any goroutine with a long-running loop -// should call killed() to check whether it should stop. -func (rf *Raft) Kill() { - atomic.StoreInt32(&rf.dead, 1) - // Your code here, if desired. -} - -func (rf *Raft) killed() bool { - z := atomic.LoadInt32(&rf.dead) - return z == 1 -} - -// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel -func (rf *Raft) applier() { - for !rf.killed() { - rf.mu.Lock() - // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries - for rf.lastApplied >= rf.commitIndex { - rf.applierCond.Wait() - } - firstLogIndex := rf.logs[0].Index - commitIndex, lastApplied := rf.commitIndex, rf.lastApplied - DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) - entries := make([]Entry, commitIndex-lastApplied) - copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) - if rf.smsg != nil { - msg := rf.smsg - rf.smsg = nil - rf.mu.Unlock() - rf.applyCh <- *msg - } else { - rf.mu.Unlock() - } - for _, entry := range entries { - DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) - rf.applyCh <- ApplyMsg{ - CommandValid: true, - Command: entry.Command, - CommandTerm: entry.Term, - CommandIndex: entry.Index, - } - } - rf.mu.Lock() - // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() - // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback - if rf.lastApplied < commitIndex { - rf.lastApplied = commitIndex - } - rf.mu.Unlock() - } -} - -/** - * Lets illustrate the time line of the ticker function - * e: election timeout - * h: heartbeat timeout - * - * ---- h ---- h ---- h ---- h ---- h ---- ... - * - * First, the server will wake up each fixed heartbeat timeout. This timeout is - * relatively shorter than the election timeout. If the server is not a leader, - * it basically do nothing about heartbeat. - * - * However, everytime when server wake up, it will check if the election timeout - * is reached. It might start a new election, if it is not a leader. - * - * v election timeout found! - * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... - * --------- e1 ------ e2 ------------ e ---- ... - * - * Reseting a new election timeout when the server receives a heartbeat or a - * vote from another server prevents the election. One shortcomming of the - * current implementation is that the election timeout does not trigger a new - * election immediately. It will wait until the next heartbeat timeout. - */ -func (rf *Raft) ticker() { - for !rf.killed() { - rf.mu.Lock() - if rf.state == LEADER { - rf.broadcastAppendEntries(true) - } else if rf.isElectionTimeout() { - rf.startElection() - } - rf.mu.Unlock() - time.Sleep(rf.heartbeatTimeout) - } -} - -// the service or tester wants to create a Raft server. the ports -// of all the Raft servers (including this one) are in peers[]. this -// server's port is peers[me]. all the servers' peers[] arrays -// have the same order. persister is a place for this server to -// save its persistent state, and also initially holds the most -// recent saved state, if any. applyCh is a channel on which the -// tester or service expects Raft to send ApplyMsg messages. -// Make() must return quickly, so it should start goroutines -// for any long-running work. -func Make(peers []*labrpc.ClientEnd, me int, - persister *Persister, applyCh chan ApplyMsg) *Raft { - rf := &Raft{} - rf.peers = peers - rf.persister = persister - rf.me = me - rf.applyCh = applyCh - rf.heartbeatTimeout = 125 * time.Millisecond - rf.resetElectionTimer() - rf.state = FOLLOWER - rf.votedFor = -1 - rf.logs = make([]Entry, 0) - - // dummy entry to make the index start from 1 - rf.logs = append(rf.logs, Entry{0, 0, nil}) - - rf.commitIndex = 0 - rf.lastApplied = 0 - - rf.applierCond = sync.NewCond(&rf.mu) - rf.broadcasterCond = make([]*sync.Cond, len(peers)) - - rf.nextIndex = make([]int, len(peers)) - rf.matchIndex = make([]int, len(peers)) - - for id := range peers { - rf.nextIndex[id] = 1 - if id != rf.me { - rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) - go rf.broadcaster(id) - } - } - - rf.smsg = nil - - // initialize from state persisted before a crash - rf.readPersist(persister.ReadRaftState()) - - // start ticker goroutine to start elections - go rf.ticker() - - go rf.applier() - - return rf -} - -FILE_EOF_raft_raft_go - -cat > src/raft/election.go << 'FILE_EOF_raft_election_go' -package raft - -import ( - "math/rand" - "sync/atomic" - "time" -) - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type RequestVoteArgs struct { - BaseRPC // candidate's term - CandidateId int // candidate requesting vote - LastLogIndex int // index of candidate's last log entry - LastLogTerm int // term of candidate's last log entry -} - -type RequestVoteReply struct { - BaseRPC // currentTerm, for candidate to update itself - VoteGranted bool // true means candidate received vote -} - -// RequestVote RPC handler -// Restart your election timer if you grant a vote to another peer. -func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - reply.VoteGranted = false - - DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) - - if !rf.checkRequestTerm(args, reply) { - return - } - - if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { - reply.VoteGranted = true - rf.votedFor = args.CandidateId - rf.resetElectionTimer() - } -} - -func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { - lastLog := rf.logs[len(rf.logs)-1] - candidateIndex := args.LastLogIndex - candidateTerm := args.LastLogTerm - return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) -} - -func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { - reply := &RequestVoteReply{} - ok := rf.peers[server].Call("Raft.RequestVote", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, true) { - return - } - - if !reply.VoteGranted { - return - } - - DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) - - // If votes received from majority of servers: become leader - if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && - rf.state == CANDIDATE && - rf.currentTerm == args.Term { - rf.state = LEADER - lastLogIndex := rf.logs[len(rf.logs)-1].Index - for i := range rf.peers { - rf.nextIndex[i] = lastLogIndex + 1 - rf.matchIndex[i] = 0 - } - DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) - // send initial empty AppendEntries RPCs (heartbeat) to each server immediately - rf.broadcastAppendEntries(true) - } - DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) -} - -func (rf *Raft) startElection() { - rf.currentTerm++ - rf.state = CANDIDATE - rf.votedFor = rf.me - rf.resetElectionTimer() - DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) - lastLog := rf.logs[len(rf.logs)-1] - - voteCount := int32(1) - args := RequestVoteArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - CandidateId: rf.me, - LastLogIndex: lastLog.Index, - LastLogTerm: lastLog.Term, - } - - for id := range rf.peers { - if id == rf.me { - continue - } - go rf.sendRequestVote(id, &args, &voteCount) - } -} - -func (rf *Raft) resetElectionTimer() { - // election timeout range from 350 to 550 - ms := 350 + (rand.Int63() % 200) - rf.electionTimeStamp = time.Now() - rf.electionTimeout = time.Duration(ms) * time.Millisecond -} - -func (rf *Raft) isElectionTimeout() bool { - return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) -} - -FILE_EOF_raft_election_go - -cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' -package raft - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type AppendEntriesArgs struct { - BaseRPC // leader's term - LeaderId int // so follower can redirect clients - PrevLogIndex int // index of log entry immediately preceding new ones - PrevLogTerm int // term of prevLogIndex entry - Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) - CommitIndex int // leader's commitIndex -} - -type AppendEntriesReply struct { - BaseRPC // currentTerm, for leader to update itself - Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm - ConflictIndex int // the index of the first conflicting entry -} - -// AppendEntries RPC handler -// Reset the election timer if you get an AppendEntries RPC from the current leader -// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); -func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) - - reply.Success = false - reply.ConflictIndex = -1 - - if !rf.checkRequestTerm(args, reply) { - return - } - - if rf.state == CANDIDATE { - rf.state = FOLLOWER - } - - rf.resetElectionTimer() - - prevLogIndex := args.PrevLogIndex - rf.logs[0].Index - - if prevLogIndex < 0 { - // force to send a snapshot - reply.ConflictIndex = 0 - return - } - - // Reply false if log doesn’t contain an entry at prevLogIndex - // whose term matches prevLogTerm (§5.3) - if prevLogIndex >= len(rf.logs) { - reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index - return - } - - // If an existing entry conflicts with a new one (same index - // but different terms), delete the existing entry and all that - // follow it (§5.3) - if rf.logs[prevLogIndex].Term != args.PrevLogTerm { - // optimization - curTerm := rf.logs[prevLogIndex].Term - var conflictIndex int - for i := prevLogIndex; i > 0; i-- { - if rf.logs[i-1].Term != curTerm { - conflictIndex = i - break - } - } - reply.ConflictIndex = conflictIndex + rf.logs[0].Index - return - } - for idx, entry := range args.Entries { - logIndex := entry.Index - rf.logs[0].Index - if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { - DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) - rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) - break - } - } - reply.Success = true - if args.CommitIndex > rf.commitIndex { - rf.commitIndex = args.CommitIndex - if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { - rf.commitIndex = rf.logs[len(rf.logs)-1].Index - } - } - rf.applierCond.Signal() -} - -func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { - reply := &AppendEntriesReply{} - ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) - if !ok { - return - } - - DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - // If successful: update nextIndex and matchIndex for - // follower (§5.3) - if reply.Success { - if len(args.Entries) > 0 { - rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 - } - rf.matchIndex[server] = rf.nextIndex[server] - 1 - for _, log := range rf.logs { - index := log.Index - count := 1 - for peer := range rf.peers { - if peer != rf.me && rf.matchIndex[peer] >= index { - count++ - } - } - // If there exists an N such that N > commitIndex, a majority - // of matchIndex[i] ≥ N, and log[N].term == currentTerm: - // set commitIndex = N (§5.3, §5.4). - if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { - rf.commitIndex = index - } - } - } else { - if reply.ConflictIndex != -1 { - rf.nextIndex[server] = reply.ConflictIndex - 1 - } else { - rf.nextIndex[server] = rf.nextIndex[server] - 1 - } - if rf.nextIndex[server] < 1 { - rf.nextIndex[server] = 1 - } - } - DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) - rf.applierCond.Signal() -} - -func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { - for peer := range rf.peers { - if peer != rf.me { - // if it is a heartbeat we dont care the linearizability of logs append - if isHeartBeat { - args := rf.prepareReplicationArgs(peer) - go rf.sendReplicationRPC(peer, args) - } else { - rf.broadcasterCond[peer].Signal() - } - } - } -} - -func (rf *Raft) prepareReplicationArgs(peer int) interface{} { - if rf.nextIndex[peer] > rf.logs[0].Index { - firstLog := rf.logs[0] - nextIndex := rf.nextIndex[peer] - firstLog.Index - prevLog := rf.logs[nextIndex-1] - logs := make([]Entry, len(rf.logs[nextIndex:])) - copy(logs, rf.logs[nextIndex:]) - return &AppendEntriesArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - PrevLogIndex: prevLog.Index, - PrevLogTerm: prevLog.Term, - Entries: logs, - CommitIndex: rf.commitIndex, - } - } else { - return &InstallSnapshotArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - LastIncludedIndex: rf.logs[0].Index, - LastIncludedTerm: rf.logs[0].Term, - Offset: 0, - Data: rf.persister.ReadSnapshot(), - Done: true, - } - } -} - -func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { - switch v := args.(type) { - case *AppendEntriesArgs: - rf.sendAppendEntries(peer, v) - case *InstallSnapshotArgs: - rf.sendInstallSnapshot(peer, v) - default: - panic("(sendReplicationRPC) SHOULD NOT REACH") - } -} - -func (rf *Raft) isReplicationNeeded(peer int) bool { - return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index -} - -func (rf *Raft) broadcaster(peer int) { - rf.broadcasterCond[peer].L.Lock() - defer rf.broadcasterCond[peer].L.Unlock() - for !rf.killed() { - rf.mu.Lock() - for !rf.isReplicationNeeded(peer) { - rf.mu.Unlock() - rf.broadcasterCond[peer].Wait() - rf.mu.Lock() - } - args := rf.prepareReplicationArgs(peer) - rf.mu.Unlock() - rf.sendReplicationRPC(peer, args) - } -} - -FILE_EOF_raft_append_entries_go - -cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' -package raft - -type InstallSnapshotArgs struct { - BaseRPC - LeaderId int - LastIncludedIndex int - LastIncludedTerm int - Offset int - Data []byte - Done bool -} - -type InstallSnapshotReply struct { - BaseRPC -} - -// InstallSnapshot RPC handler -func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkRequestTerm(args, reply) { - return - } - - if args.LastIncludedIndex <= rf.commitIndex { - return - } - prevCommitIndex := rf.commitIndex - prevLastApplied := rf.lastApplied - defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) - rf.resetElectionTimer() - - rf.commitIndex = args.LastIncludedIndex - rf.lastApplied = args.LastIncludedIndex - // 2. Create new snapshot file if first chunk (offset is 0) - // 3. Write data into snapshot file at given offset - // 4. Reply and wait for more data chunks if done is false - if !args.Done { - return - } - // 5. Save snapshot file, discard any existing or partial snapshot with a - // smaller index - // 6. If existing log entry has same index and term as snapshot’s last - // included entry, retain log entries following it and reply - // 7. Discard the entire log - // 8. Reset state machine using snapshot contents (and load snapshot’s - // cluster configuration) - firstLogIndex := rf.logs[0].Index - if firstLogIndex <= args.LastIncludedIndex { - rf.logs = append([]Entry{}, Entry{ - Index: args.LastIncludedIndex, - Term: args.LastIncludedTerm, - Command: nil, - }) - } else if firstLogIndex < args.LastIncludedIndex { - trimLen := args.LastIncludedIndex - firstLogIndex - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - } - rf.persister.Save(rf.encodeState(), args.Data) - rf.smsg = &ApplyMsg{ - SnapshotValid: true, - Snapshot: args.Data, - SnapshotTerm: args.LastIncludedTerm, - SnapshotIndex: args.LastIncludedIndex, - } -} - -func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { - reply := &InstallSnapshotReply{} - ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - - if args.LastIncludedIndex != rf.logs[0].Index { - return - } - - rf.nextIndex[server] = args.LastIncludedIndex + 1 - rf.matchIndex[server] = args.LastIncludedIndex - - rf.persister.Save(rf.encodeState(), args.Data) -} - -FILE_EOF_raft_install_snapshot_go - -cat > src/raft/util.go << 'FILE_EOF_raft_util_go' -package raft - -import ( - "log" - "os" -) - -// Debugging -var Debug = os.Getenv("DEBUG") == "1" - -func DPrintf(format string, a ...interface{}) { - if !Debug { - return - } - log.Printf(format, a...) -} - -FILE_EOF_raft_util_go - -echo 'Copying kvsrv implementation...' -mkdir -p src/kvsrv - -cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' -package kvsrv - -import ( - "crypto/rand" - "math/big" - - "6.5840/labrpc" -) - -type Clerk struct { - server *labrpc.ClientEnd - clientId int64 - seqNum int -} - -func nrand() int64 { - max := big.NewInt(int64(1) << 62) - bigx, _ := rand.Int(rand.Reader, max) - x := bigx.Int64() - return x -} - -func MakeClerk(server *labrpc.ClientEnd) *Clerk { - ck := new(Clerk) - ck.server = server - ck.clientId = nrand() - ck.seqNum = 0 - return ck -} - -// fetch the current value for a key. -// returns "" if the key does not exist. -// keeps trying forever in the face of all other errors. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer.Get", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) Get(key string) string { - ck.seqNum++ - args := GetArgs{ - Key: key, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := GetReply{} - for !ck.server.Call("KVServer.Get", &args, &reply) { - } - return reply.Value -} - -// shared by Put and Append. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer."+op, &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) PutAppend(key string, value string, op string) string { - ck.seqNum++ - args := PutAppendArgs{ - Key: key, - Value: value, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := PutAppendReply{} - for !ck.server.Call("KVServer."+op, &args, &reply) { - } - return reply.Value -} - -func (ck *Clerk) Put(key string, value string) { - ck.PutAppend(key, value, "Put") -} - -// Append value to key's value and return that value -func (ck *Clerk) Append(key string, value string) string { - return ck.PutAppend(key, value, "Append") -} - -FILE_EOF_kvsrv_client_go - -cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' -package kvsrv - -type PutAppendArgs struct { - Key string - Value string - ClientId int64 - SeqNum int -} - -type PutAppendReply struct { - Value string -} - -type GetArgs struct { - Key string - ClientId int64 - SeqNum int -} - -type GetReply struct { - Value string -} - -FILE_EOF_kvsrv_common_go - -cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' -package kvsrv - -import ( - "log" - "sync" -) - -const Debug = false - -func DPrintf(format string, a ...interface{}) (n int, err error) { - if Debug { - log.Printf(format, a...) - } - return -} - -type Cache struct { - seq int - value string -} - -type KVServer struct { - mu sync.Mutex - data map[string]string - cache map[int64]*Cache // client id -> seq ->value -} - -func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - key := args.Key - reply.Value = "" - // Either the client is new or the seqNum is greater than the cache seqNum. - // In both cases, we can return the value directly. - if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { - reply.Value = kv.data[key] - return - } -} - -func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - kv.data[k] = v - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = reply.Value -} - -func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - // For ca.seq == seqNum, it means that the value has been appended. - // However, the response might be lost, so we return the cache value. - // For ca.seq > seqNum, it doesnt matter what the value is, just return. - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - reply.Value = ca.value - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - reply.Value = kv.data[k] - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = kv.data[k] - kv.data[k] += v -} - -func StartKVServer() *KVServer { - kv := new(KVServer) - kv.data = make(map[string]string) - kv.cache = make(map[int64]*Cache) - return kv -} - -FILE_EOF_kvsrv_server_go - -echo 'Copying mr implementation...' -mkdir -p src/mr - -cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' -package mr - -import ( - "log" - "math" - "net" - "net/http" - "net/rpc" - "os" - "sync" - "time" -) - -const SUCCESS = math.MaxInt32 - -type Coordinator struct { - // Your definitions here. - tasks chan Work // a taskqueue - mu sync.Mutex - terms []int - wg sync.WaitGroup - nMap int - nReduce int - done bool -} - -func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { - if len(c.tasks) == 0 { - reply.HasWork = false - return nil - } - reply.Work = <-c.tasks - c.mu.Lock() - reply.Term = c.terms[reply.Work.FileIndex] - c.mu.Unlock() - reply.HasWork = true - - go func() { - time.Sleep(10 * time.Second) - c.mu.Lock() - defer c.mu.Unlock() - if c.terms[reply.Work.FileIndex] == SUCCESS { - return - } - c.terms[reply.Work.FileIndex]++ - c.tasks <- reply.Work - }() - - return nil -} - -func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { - c.mu.Lock() - defer c.mu.Unlock() - - if c.terms[args.Work.FileIndex] != args.Term { - reply.Success = false - return nil - } - c.terms[args.Work.FileIndex] = SUCCESS - c.wg.Done() - reply.Success = true - return nil -} - -// start a thread that listens for RPCs from worker.go -func (c *Coordinator) server() { - rpc.Register(c) - rpc.HandleHTTP() - //l, e := net.Listen("tcp", ":1234") - sockname := coordinatorSock() - os.Remove(sockname) - l, e := net.Listen("unix", sockname) - if e != nil { - log.Fatal("listen error:", e) - } - go http.Serve(l, nil) -} - -// main/mrcoordinator.go calls Done() periodically to find out -// if the entire job has finished. -func (c *Coordinator) Done() bool { - return c.done -} - -func StartReduceWork(c *Coordinator) { - c.wg.Wait() - c.terms = make([]int, c.nReduce) - for i := 0; i < c.nReduce; i++ { - c.tasks <- Work{ - WorkType: REDUCE, - FileIndex: i, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go WorkDone(c) -} - -func WorkDone(c *Coordinator) { - c.wg.Wait() - c.done = true -} - -// create a Coordinator. -// main/mrcoordinator.go calls this function. -// nReduce is the number of reduce tasks to use. -func MakeCoordinator(files []string, nReduce int) *Coordinator { - - var buflen int - if len(files) > nReduce { - buflen = len(files) - } else { - buflen = nReduce - } - - c := Coordinator{ - nMap: len(files), - nReduce: nReduce, - wg: sync.WaitGroup{}, - tasks: make(chan Work, buflen), - terms: make([]int, len(files)), - done: false, - } - - for idx, file := range files { - c.tasks <- Work{ - WorkType: MAP, - Filename: file, - FileIndex: idx, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go StartReduceWork(&c) - c.server() - - return &c -} - -FILE_EOF_mr_coordinator_go - -cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' -package mr - -// -// RPC definitions. -// -// remember to capitalize all names. -// - -import ( - "os" - "strconv" -) - -// -// example to show how to declare the arguments -// and reply for an RPC. -// - -type ExampleArgs struct { - X int -} - -type ExampleReply struct { - Y int -} - -/*-Define Work-*/ - -type WorkStatus int - -const ( - IDLE WorkStatus = iota - START - FINISH -) - -type WorkType int - -const ( - MAP WorkType = iota - REDUCE -) - -type Work struct { - WorkType WorkType // MAP or REDUCE - Filename string - FileIndex int // This is a convention for mr-X index - NMapWork int // how many map files - NReduce int // how many reduce files -} - -type WorkArgs struct { - WorkerID int -} - -type WorkReply struct { - HasWork bool - Work Work - Term int -} - -/*-Define Report-*/ -// Report work finish only if success -type ReportArgs struct { - Work Work - Term int -} - -type ReportReply struct { - Success bool -} - -// Cook up a unique-ish UNIX-domain socket name -// in /var/tmp, for the coordinator. -// Can't use the current directory since -// Athena AFS doesn't support UNIX-domain sockets. -func coordinatorSock() string { - s := "/var/tmp/5840-mr-" - s += strconv.Itoa(os.Getuid()) - return s -} - -FILE_EOF_mr_rpc_go - -cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' -package mr - -import ( - "encoding/json" - "fmt" - "hash/fnv" - "io/ioutil" - "log" - "net/rpc" - "os" - "sort" - "time" -) - -// for sorting by key. -type ByKey []KeyValue - -// for sorting by key. -func (a ByKey) Len() int { return len(a) } -func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } - -// Map functions return a slice of KeyValue. -type KeyValue struct { - Key string - Value string -} - -// use ihash(key) % NReduce to choose the reduce -// task number for each KeyValue emitted by Map. -func ihash(key string) int { - h := fnv.New32a() - h.Write([]byte(key)) - return int(h.Sum32() & 0x7fffffff) -} - -// main/mrworker.go calls this function. -func Worker(mapf func(string, string) []KeyValue, - reducef func(string, []string) string) { - - // Your worker implementation here. - for { - r := CallGetWok() - if !r.HasWork { - time.Sleep(3 * time.Second) - continue - } - - switch r.Work.WorkType { - case MAP: - DoMapWork(r.Work, mapf, r.Term) - case REDUCE: - DoReduceWork(r.Work, reducef, r.Term) - } - } -} - -func DoReduceWork(work Work, reducef func(string, []string) string, term int) { - fileIndex := work.FileIndex - intermediate := []KeyValue{} - - for i := 0; i < work.NMapWork; i++ { - filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) - file, err := os.Open(filename) - - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - dec := json.NewDecoder(file) - - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - intermediate = append(intermediate, kv) - } - file.Close() - } - - sort.Sort(ByKey(intermediate)) - - oname := fmt.Sprintf("mr-out-%d", fileIndex) - ofile, _ := ioutil.TempFile(".", oname) - - // - // call Reduce on each distinct key in intermediate[], - // and print the result to mr-out-0. - // - i := 0 - for i < len(intermediate) { - j := i + 1 - for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { - j++ - } - values := []string{} - for k := i; k < j; k++ { - values = append(values, intermediate[k].Value) - } - output := reducef(intermediate[i].Key, values) - - // this is the correct format for each line of Reduce output. - fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) - - i = j - } - - os.Rename(ofile.Name(), oname) - - CallReport(work, term) -} - -func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { - filename := work.Filename - - file, err := os.Open(filename) - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - content, err := ioutil.ReadAll(file) - - if err != nil { - log.Fatalf("cannot read %v", filename) - } - - file.Close() - - kva := mapf(work.Filename, string(content)) - - //make a - for i := 0; i < work.NReduce; i++ { - imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) - - imtFile, err := ioutil.TempFile(".", imtFilename) - - enc := json.NewEncoder(imtFile) - - if err != nil { - log.Fatalf("cannot create %v", imtFilename) - } - - for _, kv := range kva { - hash := ihash(kv.Key) % work.NReduce - if hash == i { - err := enc.Encode(&kv) - if err != nil { - log.Fatalf("cannot encode %v", kv) - } - } - } - - imtFile.Close() - - os.Rename(imtFile.Name(), imtFilename) - } - - CallReport(work, term) -} - -func CallReport(w Work, term int) { - args := ReportArgs{ - Work: w, - Term: term, - } - reply := ReportReply{} - ok := call("Coordinator.CallReport", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } -} - -func CallGetWok() WorkReply { - args := WorkArgs{} - reply := WorkReply{} - ok := call("Coordinator.CallGetWork", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } - - return reply -} - -// example function to show how to make an RPC call to the coordinator. -// -// the RPC argument and reply types are defined in rpc.go. -func CallExample() { - - // declare an argument structure. - args := ExampleArgs{} - - // fill in the argument(s). - args.X = 99 - - // declare a reply structure. - reply := ExampleReply{} - - // send the RPC request, wait for the reply. - // the "Coordinator.Example" tells the - // receiving server that we'd like to call - // the Example() method of struct Coordinator. - ok := call("Coordinator.Example", &args, &reply) - if ok { - // reply.Y should be 100. - fmt.Printf("reply.Y %v\n", reply.Y) - } else { - fmt.Printf("call failed!\n") - } -} - -// send an RPC request to the coordinator, wait for the response. -// usually returns true. -// returns false if something goes wrong. -func call(rpcname string, args interface{}, reply interface{}) bool { - // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") - sockname := coordinatorSock() - c, err := rpc.DialHTTP("unix", sockname) - if err != nil { - log.Fatal("dialing:", err) - } - defer c.Close() - - err = c.Call(rpcname, args, reply) - if err == nil { - return true - } - - fmt.Println(err) - return false -} - -FILE_EOF_mr_worker_go - echo 'Creating checksums for protected files...' PROTECTED_FILES=( diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/client.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/client.go new file mode 100644 index 0000000..cb36e2b --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/client.go @@ -0,0 +1,83 @@ +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/common.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/common.go new file mode 100644 index 0000000..610acdb --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/common.go @@ -0,0 +1,22 @@ +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/server.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/server.go new file mode 100644 index 0000000..4e03682 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/kvsrv/server.go @@ -0,0 +1,84 @@ +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/coordinator.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/coordinator.go new file mode 100644 index 0000000..4fc2518 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/coordinator.go @@ -0,0 +1,141 @@ +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/rpc.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/rpc.go new file mode 100644 index 0000000..0f90524 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/rpc.go @@ -0,0 +1,81 @@ +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/worker.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/worker.go new file mode 100644 index 0000000..95d142e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/mr/worker.go @@ -0,0 +1,233 @@ +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/append_entries.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/append_entries.go new file mode 100644 index 0000000..9856584 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/append_entries.go @@ -0,0 +1,214 @@ +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/election.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/election.go new file mode 100644 index 0000000..4274b32 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/election.go @@ -0,0 +1,123 @@ +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/install_snapshot.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/install_snapshot.go new file mode 100644 index 0000000..7ba645e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/install_snapshot.go @@ -0,0 +1,92 @@ +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/persister.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/persister.go new file mode 100644 index 0000000..c5f816c --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/persister.go @@ -0,0 +1,70 @@ +package raft + +// +// support for Raft and kvraft to save persistent +// Raft state (log &c) and k/v server snapshots. +// +// we will use the original persister.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "sync" + +type Persister struct { + mu sync.Mutex + raftstate []byte + snapshot []byte +} + +func MakePersister() *Persister { + return &Persister{} +} + +func clone(orig []byte) []byte { + x := make([]byte, len(orig)) + copy(x, orig) + return x +} + +func (ps *Persister) Copy() *Persister { + ps.mu.Lock() + defer ps.mu.Unlock() + np := MakePersister() + np.raftstate = ps.raftstate + np.snapshot = ps.snapshot + return np +} + +func (ps *Persister) ReadRaftState() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.raftstate) +} + +func (ps *Persister) RaftStateSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.raftstate) +} + +// Save both Raft state and K/V snapshot as a single atomic action, +// to help avoid them getting out of sync. +func (ps *Persister) Save(raftstate []byte, snapshot []byte) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.raftstate = clone(raftstate) + ps.snapshot = clone(snapshot) +} + +func (ps *Persister) ReadSnapshot() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.snapshot) +} + +func (ps *Persister) SnapshotSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.snapshot) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/raft.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/raft.go new file mode 100644 index 0000000..9946898 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/raft.go @@ -0,0 +1,416 @@ +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/util.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/util.go new file mode 100644 index 0000000..37c7fe6 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4a_kvraft/starter_files/raft/util.go @@ -0,0 +1,16 @@ +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json index 08bc44d..34a3b53 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/config.json @@ -10,5 +10,69 @@ "fault-tolerance", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "starter_files": [ + { + "src": "raft/persister.go", + "dest": "/workspace/src/raft/persister.go" + }, + { + "src": "raft/election.go", + "dest": "/workspace/src/raft/election.go" + }, + { + "src": "raft/append_entries.go", + "dest": "/workspace/src/raft/append_entries.go" + }, + { + "src": "raft/util.go", + "dest": "/workspace/src/raft/util.go" + }, + { + "src": "raft/install_snapshot.go", + "dest": "/workspace/src/raft/install_snapshot.go" + }, + { + "src": "raft/raft.go", + "dest": "/workspace/src/raft/raft.go" + }, + { + "src": "kvsrv/server.go", + "dest": "/workspace/src/kvsrv/server.go" + }, + { + "src": "kvsrv/client.go", + "dest": "/workspace/src/kvsrv/client.go" + }, + { + "src": "kvsrv/common.go", + "dest": "/workspace/src/kvsrv/common.go" + }, + { + "src": "mr/coordinator.go", + "dest": "/workspace/src/mr/coordinator.go" + }, + { + "src": "mr/worker.go", + "dest": "/workspace/src/mr/worker.go" + }, + { + "src": "mr/rpc.go", + "dest": "/workspace/src/mr/rpc.go" + } + ], + "output_files": [ + { + "src": "/workspace/src/kvraft/client.go", + "dest": "src/kvraft/client.go" + }, + { + "src": "/workspace/src/kvraft/common.go", + "dest": "src/kvraft/common.go" + }, + { + "src": "/workspace/src/kvraft/server.go", + "dest": "src/kvraft/server.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh index 8fd1995..89cb652 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/preprocess.sh @@ -5,1566 +5,6 @@ echo '=== Preprocessing 4B Kvraft ===' cd /workspace -echo 'Copying reference implementations from previous labs...' - -echo 'Copying raft implementation...' -mkdir -p src/raft - -cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' -package raft - -// -// this is an outline of the API that raft must expose to -// the service (or tester). see comments below for -// each of these functions for more details. -// -// rf = Make(...) -// create a new Raft server. -// rf.Start(command interface{}) (index, term, isleader) -// start agreement on a new log entry -// rf.GetState() (term, isLeader) -// ask a Raft for its current term, and whether it thinks it is leader -// ApplyMsg -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester) -// in the same server. -// - -import ( - // "bytes" - - "bytes" - "log" - "sync" - "sync/atomic" - "time" - - // "6.5840/labgob" - "6.5840/labgob" - "6.5840/labrpc" -) - -// as each Raft peer becomes aware that successive log entries are -// committed, the peer should send an ApplyMsg to the service (or -// tester) on the same server, via the applyCh passed to Make(). set -// CommandValid to true to indicate that the ApplyMsg contains a newly -// committed log entry. -// -// in part 3D you'll want to send other kinds of messages (e.g., -// snapshots) on the applyCh, but set CommandValid to false for these -// other uses. -type ApplyMsg struct { - CommandValid bool - Command interface{} - CommandIndex int - CommandTerm int - - // For 3D: - SnapshotValid bool - Snapshot []byte - SnapshotTerm int - SnapshotIndex int -} - -type Entry struct { - Term int - Index int - Command interface{} -} - -// Base struct for common fields -type BaseRPC struct { - Term int -} - -// Implement RaftRPC interface for BaseRPC -func (b *BaseRPC) GetTerm() int { - return b.Term -} - -func (b *BaseRPC) SetTerm(term int) { - b.Term = term -} - -// RaftRPC interface -type RaftRPC interface { - GetTerm() int - SetTerm(int) -} - -type ServerState int - -const ( - FOLLOWER ServerState = iota - CANDIDATE - LEADER -) - -// A Go object implementing a single Raft peer. -type Raft struct { - mu sync.Mutex // Lock to protect shared access to this peer's state - peers []*labrpc.ClientEnd // RPC end points of all peers - persister *Persister // Object to hold this peer's persisted state - me int // this peer's index into peers[] - dead int32 // set by Kill() - heartbeatTimeout time.Duration - electionTimeout time.Duration - electionTimeStamp time.Time - applyCh chan ApplyMsg - - // state a Raft server must maintain. - broadcasterCond []*sync.Cond - applierCond *sync.Cond - - // server state - state ServerState - - // presistent state on all servers - currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) - votedFor int // candidateId that received vote in current term (or null if none) - logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) - - // volatile state on all servers - commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) - lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) - - // volatile state on leaders (reinitialized after election) - nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) - matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) - - // snapshot msg - smsg *ApplyMsg -} - -// return currentTerm and whether this server -// believes it is the leader. -func (rf *Raft) GetState() (int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - return rf.currentTerm, rf.state == LEADER -} - -func (rf *Raft) encodeState() []byte { - w := new(bytes.Buffer) - e := labgob.NewEncoder(w) - e.Encode(rf.currentTerm) - e.Encode(rf.votedFor) - e.Encode(rf.logs) - return w.Bytes() -} - -// save Raft's persistent state to stable storage, -// where it can later be retrieved after a crash and restart. -// see paper's Figure 2 for a description of what should be persistent. -// before you've implemented snapshots, you should pass nil as the -// second argument to persister.Save(). -// after you've implemented snapshots, pass the current snapshot -// (or nil if there's not yet a snapshot). -func (rf *Raft) persist() { - if rf.persister.ReadSnapshot() != nil { - rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) - } else { - rf.persister.Save(rf.encodeState(), nil) - } -} - -// restore previously persisted state. -func (rf *Raft) readPersist(data []byte) { - if data == nil || len(data) < 1 { // bootstrap without any state - return - } - r := bytes.NewBuffer(data) - d := labgob.NewDecoder(r) - var currentTerm int - var votedFor int - var logs []Entry - - if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { - log.Fatal("failed to read persist\n") - } else { - DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) - rf.currentTerm = currentTerm - rf.votedFor = votedFor - rf.logs = logs - rf.lastApplied = rf.logs[0].Index - rf.commitIndex = rf.logs[0].Index - } -} - -// the service says it has created a snapshot that has -// all info up to and including index. this means the -// service no longer needs the log through (and including) -// that index. Raft should now trim its log as much as possible. -func (rf *Raft) Snapshot(index int, snapshot []byte) { - // Your code here (3D). - rf.mu.Lock() - defer rf.mu.Unlock() - // if the snapshot is outdated, just ignore it - if rf.logs[0].Index >= index { - return - } - firstLogIndex := rf.logs[0].Index - trimLen := index - firstLogIndex - // trim the logs - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - rf.persister.Save(rf.encodeState(), snapshot) -} - -// the service using Raft (e.g. a k/v server) wants to start -// agreement on the next command to be appended to Raft's log. if this -// server isn't the leader, returns false. otherwise start the -// agreement and return immediately. there is no guarantee that this -// command will ever be committed to the Raft log, since the leader -// may fail or lose an election. even if the Raft instance has been killed, -// this function should return gracefully. -// -// the first return value is the index that the command will appear at -// if it's ever committed. the second return value is the current -// term. the third return value is true if this server believes it is -// the leader. -func (rf *Raft) Start(command interface{}) (int, int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - if rf.state != LEADER { - return -1, -1, false - } - defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) - rf.logs = append(rf.logs, Entry{ - Term: rf.currentTerm, - Index: rf.logs[len(rf.logs)-1].Index + 1, - Command: command, - }) - rf.broadcastAppendEntries(false) - // Your code here (3B). - return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true -} - -// Warning: this function is not thread-safe -func (rf *Raft) resetNewTermState(targetTerm int) { - DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) - if rf.currentTerm < targetTerm { - rf.votedFor = -1 - } - rf.currentTerm = targetTerm - rf.state = FOLLOWER // reset to follower -} - -// Reply false if term < currentTerm (§5.1) -// If RPC request contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { - term := args.GetTerm() - defer reply.SetTerm(rf.currentTerm) - if term < rf.currentTerm { - return false - } - if term > rf.currentTerm { - rf.resetNewTermState(term) - } - return true -} - -// If RPC request or response contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { - argsTerm := args.GetTerm() - replyTerm := reply.GetTerm() - if replyTerm > argsTerm { - rf.resetNewTermState(replyTerm) - rf.resetElectionTimer() - return false - } - return isElection || (rf.state == LEADER) -} - -// the tester doesn't halt goroutines created by Raft after each test, -// but it does call the Kill() method. your code can use killed() to -// check whether Kill() has been called. the use of atomic avoids the -// need for a lock. -// -// the issue is that long-running goroutines use memory and may chew -// up CPU time, perhaps causing later tests to fail and generating -// confusing debug output. any goroutine with a long-running loop -// should call killed() to check whether it should stop. -func (rf *Raft) Kill() { - atomic.StoreInt32(&rf.dead, 1) - // Your code here, if desired. -} - -func (rf *Raft) killed() bool { - z := atomic.LoadInt32(&rf.dead) - return z == 1 -} - -// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel -func (rf *Raft) applier() { - for !rf.killed() { - rf.mu.Lock() - // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries - for rf.lastApplied >= rf.commitIndex { - rf.applierCond.Wait() - } - firstLogIndex := rf.logs[0].Index - commitIndex, lastApplied := rf.commitIndex, rf.lastApplied - DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) - entries := make([]Entry, commitIndex-lastApplied) - copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) - if rf.smsg != nil { - msg := rf.smsg - rf.smsg = nil - rf.mu.Unlock() - rf.applyCh <- *msg - } else { - rf.mu.Unlock() - } - for _, entry := range entries { - DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) - rf.applyCh <- ApplyMsg{ - CommandValid: true, - Command: entry.Command, - CommandTerm: entry.Term, - CommandIndex: entry.Index, - } - } - rf.mu.Lock() - // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() - // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback - if rf.lastApplied < commitIndex { - rf.lastApplied = commitIndex - } - rf.mu.Unlock() - } -} - -/** - * Lets illustrate the time line of the ticker function - * e: election timeout - * h: heartbeat timeout - * - * ---- h ---- h ---- h ---- h ---- h ---- ... - * - * First, the server will wake up each fixed heartbeat timeout. This timeout is - * relatively shorter than the election timeout. If the server is not a leader, - * it basically do nothing about heartbeat. - * - * However, everytime when server wake up, it will check if the election timeout - * is reached. It might start a new election, if it is not a leader. - * - * v election timeout found! - * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... - * --------- e1 ------ e2 ------------ e ---- ... - * - * Reseting a new election timeout when the server receives a heartbeat or a - * vote from another server prevents the election. One shortcomming of the - * current implementation is that the election timeout does not trigger a new - * election immediately. It will wait until the next heartbeat timeout. - */ -func (rf *Raft) ticker() { - for !rf.killed() { - rf.mu.Lock() - if rf.state == LEADER { - rf.broadcastAppendEntries(true) - } else if rf.isElectionTimeout() { - rf.startElection() - } - rf.mu.Unlock() - time.Sleep(rf.heartbeatTimeout) - } -} - -// the service or tester wants to create a Raft server. the ports -// of all the Raft servers (including this one) are in peers[]. this -// server's port is peers[me]. all the servers' peers[] arrays -// have the same order. persister is a place for this server to -// save its persistent state, and also initially holds the most -// recent saved state, if any. applyCh is a channel on which the -// tester or service expects Raft to send ApplyMsg messages. -// Make() must return quickly, so it should start goroutines -// for any long-running work. -func Make(peers []*labrpc.ClientEnd, me int, - persister *Persister, applyCh chan ApplyMsg) *Raft { - rf := &Raft{} - rf.peers = peers - rf.persister = persister - rf.me = me - rf.applyCh = applyCh - rf.heartbeatTimeout = 125 * time.Millisecond - rf.resetElectionTimer() - rf.state = FOLLOWER - rf.votedFor = -1 - rf.logs = make([]Entry, 0) - - // dummy entry to make the index start from 1 - rf.logs = append(rf.logs, Entry{0, 0, nil}) - - rf.commitIndex = 0 - rf.lastApplied = 0 - - rf.applierCond = sync.NewCond(&rf.mu) - rf.broadcasterCond = make([]*sync.Cond, len(peers)) - - rf.nextIndex = make([]int, len(peers)) - rf.matchIndex = make([]int, len(peers)) - - for id := range peers { - rf.nextIndex[id] = 1 - if id != rf.me { - rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) - go rf.broadcaster(id) - } - } - - rf.smsg = nil - - // initialize from state persisted before a crash - rf.readPersist(persister.ReadRaftState()) - - // start ticker goroutine to start elections - go rf.ticker() - - go rf.applier() - - return rf -} - -FILE_EOF_raft_raft_go - -cat > src/raft/election.go << 'FILE_EOF_raft_election_go' -package raft - -import ( - "math/rand" - "sync/atomic" - "time" -) - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type RequestVoteArgs struct { - BaseRPC // candidate's term - CandidateId int // candidate requesting vote - LastLogIndex int // index of candidate's last log entry - LastLogTerm int // term of candidate's last log entry -} - -type RequestVoteReply struct { - BaseRPC // currentTerm, for candidate to update itself - VoteGranted bool // true means candidate received vote -} - -// RequestVote RPC handler -// Restart your election timer if you grant a vote to another peer. -func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - reply.VoteGranted = false - - DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) - - if !rf.checkRequestTerm(args, reply) { - return - } - - if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { - reply.VoteGranted = true - rf.votedFor = args.CandidateId - rf.resetElectionTimer() - } -} - -func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { - lastLog := rf.logs[len(rf.logs)-1] - candidateIndex := args.LastLogIndex - candidateTerm := args.LastLogTerm - return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) -} - -func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { - reply := &RequestVoteReply{} - ok := rf.peers[server].Call("Raft.RequestVote", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, true) { - return - } - - if !reply.VoteGranted { - return - } - - DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) - - // If votes received from majority of servers: become leader - if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && - rf.state == CANDIDATE && - rf.currentTerm == args.Term { - rf.state = LEADER - lastLogIndex := rf.logs[len(rf.logs)-1].Index - for i := range rf.peers { - rf.nextIndex[i] = lastLogIndex + 1 - rf.matchIndex[i] = 0 - } - DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) - // send initial empty AppendEntries RPCs (heartbeat) to each server immediately - rf.broadcastAppendEntries(true) - } - DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) -} - -func (rf *Raft) startElection() { - rf.currentTerm++ - rf.state = CANDIDATE - rf.votedFor = rf.me - rf.resetElectionTimer() - DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) - lastLog := rf.logs[len(rf.logs)-1] - - voteCount := int32(1) - args := RequestVoteArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - CandidateId: rf.me, - LastLogIndex: lastLog.Index, - LastLogTerm: lastLog.Term, - } - - for id := range rf.peers { - if id == rf.me { - continue - } - go rf.sendRequestVote(id, &args, &voteCount) - } -} - -func (rf *Raft) resetElectionTimer() { - // election timeout range from 350 to 550 - ms := 350 + (rand.Int63() % 200) - rf.electionTimeStamp = time.Now() - rf.electionTimeout = time.Duration(ms) * time.Millisecond -} - -func (rf *Raft) isElectionTimeout() bool { - return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) -} - -FILE_EOF_raft_election_go - -cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' -package raft - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type AppendEntriesArgs struct { - BaseRPC // leader's term - LeaderId int // so follower can redirect clients - PrevLogIndex int // index of log entry immediately preceding new ones - PrevLogTerm int // term of prevLogIndex entry - Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) - CommitIndex int // leader's commitIndex -} - -type AppendEntriesReply struct { - BaseRPC // currentTerm, for leader to update itself - Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm - ConflictIndex int // the index of the first conflicting entry -} - -// AppendEntries RPC handler -// Reset the election timer if you get an AppendEntries RPC from the current leader -// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); -func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) - - reply.Success = false - reply.ConflictIndex = -1 - - if !rf.checkRequestTerm(args, reply) { - return - } - - if rf.state == CANDIDATE { - rf.state = FOLLOWER - } - - rf.resetElectionTimer() - - prevLogIndex := args.PrevLogIndex - rf.logs[0].Index - - if prevLogIndex < 0 { - // force to send a snapshot - reply.ConflictIndex = 0 - return - } - - // Reply false if log doesn’t contain an entry at prevLogIndex - // whose term matches prevLogTerm (§5.3) - if prevLogIndex >= len(rf.logs) { - reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index - return - } - - // If an existing entry conflicts with a new one (same index - // but different terms), delete the existing entry and all that - // follow it (§5.3) - if rf.logs[prevLogIndex].Term != args.PrevLogTerm { - // optimization - curTerm := rf.logs[prevLogIndex].Term - var conflictIndex int - for i := prevLogIndex; i > 0; i-- { - if rf.logs[i-1].Term != curTerm { - conflictIndex = i - break - } - } - reply.ConflictIndex = conflictIndex + rf.logs[0].Index - return - } - for idx, entry := range args.Entries { - logIndex := entry.Index - rf.logs[0].Index - if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { - DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) - rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) - break - } - } - reply.Success = true - if args.CommitIndex > rf.commitIndex { - rf.commitIndex = args.CommitIndex - if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { - rf.commitIndex = rf.logs[len(rf.logs)-1].Index - } - } - rf.applierCond.Signal() -} - -func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { - reply := &AppendEntriesReply{} - ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) - if !ok { - return - } - - DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - // If successful: update nextIndex and matchIndex for - // follower (§5.3) - if reply.Success { - if len(args.Entries) > 0 { - rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 - } - rf.matchIndex[server] = rf.nextIndex[server] - 1 - for _, log := range rf.logs { - index := log.Index - count := 1 - for peer := range rf.peers { - if peer != rf.me && rf.matchIndex[peer] >= index { - count++ - } - } - // If there exists an N such that N > commitIndex, a majority - // of matchIndex[i] ≥ N, and log[N].term == currentTerm: - // set commitIndex = N (§5.3, §5.4). - if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { - rf.commitIndex = index - } - } - } else { - if reply.ConflictIndex != -1 { - rf.nextIndex[server] = reply.ConflictIndex - 1 - } else { - rf.nextIndex[server] = rf.nextIndex[server] - 1 - } - if rf.nextIndex[server] < 1 { - rf.nextIndex[server] = 1 - } - } - DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) - rf.applierCond.Signal() -} - -func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { - for peer := range rf.peers { - if peer != rf.me { - // if it is a heartbeat we dont care the linearizability of logs append - if isHeartBeat { - args := rf.prepareReplicationArgs(peer) - go rf.sendReplicationRPC(peer, args) - } else { - rf.broadcasterCond[peer].Signal() - } - } - } -} - -func (rf *Raft) prepareReplicationArgs(peer int) interface{} { - if rf.nextIndex[peer] > rf.logs[0].Index { - firstLog := rf.logs[0] - nextIndex := rf.nextIndex[peer] - firstLog.Index - prevLog := rf.logs[nextIndex-1] - logs := make([]Entry, len(rf.logs[nextIndex:])) - copy(logs, rf.logs[nextIndex:]) - return &AppendEntriesArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - PrevLogIndex: prevLog.Index, - PrevLogTerm: prevLog.Term, - Entries: logs, - CommitIndex: rf.commitIndex, - } - } else { - return &InstallSnapshotArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - LastIncludedIndex: rf.logs[0].Index, - LastIncludedTerm: rf.logs[0].Term, - Offset: 0, - Data: rf.persister.ReadSnapshot(), - Done: true, - } - } -} - -func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { - switch v := args.(type) { - case *AppendEntriesArgs: - rf.sendAppendEntries(peer, v) - case *InstallSnapshotArgs: - rf.sendInstallSnapshot(peer, v) - default: - panic("(sendReplicationRPC) SHOULD NOT REACH") - } -} - -func (rf *Raft) isReplicationNeeded(peer int) bool { - return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index -} - -func (rf *Raft) broadcaster(peer int) { - rf.broadcasterCond[peer].L.Lock() - defer rf.broadcasterCond[peer].L.Unlock() - for !rf.killed() { - rf.mu.Lock() - for !rf.isReplicationNeeded(peer) { - rf.mu.Unlock() - rf.broadcasterCond[peer].Wait() - rf.mu.Lock() - } - args := rf.prepareReplicationArgs(peer) - rf.mu.Unlock() - rf.sendReplicationRPC(peer, args) - } -} - -FILE_EOF_raft_append_entries_go - -cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' -package raft - -type InstallSnapshotArgs struct { - BaseRPC - LeaderId int - LastIncludedIndex int - LastIncludedTerm int - Offset int - Data []byte - Done bool -} - -type InstallSnapshotReply struct { - BaseRPC -} - -// InstallSnapshot RPC handler -func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkRequestTerm(args, reply) { - return - } - - if args.LastIncludedIndex <= rf.commitIndex { - return - } - prevCommitIndex := rf.commitIndex - prevLastApplied := rf.lastApplied - defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) - rf.resetElectionTimer() - - rf.commitIndex = args.LastIncludedIndex - rf.lastApplied = args.LastIncludedIndex - // 2. Create new snapshot file if first chunk (offset is 0) - // 3. Write data into snapshot file at given offset - // 4. Reply and wait for more data chunks if done is false - if !args.Done { - return - } - // 5. Save snapshot file, discard any existing or partial snapshot with a - // smaller index - // 6. If existing log entry has same index and term as snapshot’s last - // included entry, retain log entries following it and reply - // 7. Discard the entire log - // 8. Reset state machine using snapshot contents (and load snapshot’s - // cluster configuration) - firstLogIndex := rf.logs[0].Index - if firstLogIndex <= args.LastIncludedIndex { - rf.logs = append([]Entry{}, Entry{ - Index: args.LastIncludedIndex, - Term: args.LastIncludedTerm, - Command: nil, - }) - } else if firstLogIndex < args.LastIncludedIndex { - trimLen := args.LastIncludedIndex - firstLogIndex - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - } - rf.persister.Save(rf.encodeState(), args.Data) - rf.smsg = &ApplyMsg{ - SnapshotValid: true, - Snapshot: args.Data, - SnapshotTerm: args.LastIncludedTerm, - SnapshotIndex: args.LastIncludedIndex, - } -} - -func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { - reply := &InstallSnapshotReply{} - ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - - if args.LastIncludedIndex != rf.logs[0].Index { - return - } - - rf.nextIndex[server] = args.LastIncludedIndex + 1 - rf.matchIndex[server] = args.LastIncludedIndex - - rf.persister.Save(rf.encodeState(), args.Data) -} - -FILE_EOF_raft_install_snapshot_go - -cat > src/raft/util.go << 'FILE_EOF_raft_util_go' -package raft - -import ( - "log" - "os" -) - -// Debugging -var Debug = os.Getenv("DEBUG") == "1" - -func DPrintf(format string, a ...interface{}) { - if !Debug { - return - } - log.Printf(format, a...) -} - -FILE_EOF_raft_util_go - -echo 'Copying kvsrv implementation...' -mkdir -p src/kvsrv - -cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' -package kvsrv - -import ( - "crypto/rand" - "math/big" - - "6.5840/labrpc" -) - -type Clerk struct { - server *labrpc.ClientEnd - clientId int64 - seqNum int -} - -func nrand() int64 { - max := big.NewInt(int64(1) << 62) - bigx, _ := rand.Int(rand.Reader, max) - x := bigx.Int64() - return x -} - -func MakeClerk(server *labrpc.ClientEnd) *Clerk { - ck := new(Clerk) - ck.server = server - ck.clientId = nrand() - ck.seqNum = 0 - return ck -} - -// fetch the current value for a key. -// returns "" if the key does not exist. -// keeps trying forever in the face of all other errors. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer.Get", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) Get(key string) string { - ck.seqNum++ - args := GetArgs{ - Key: key, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := GetReply{} - for !ck.server.Call("KVServer.Get", &args, &reply) { - } - return reply.Value -} - -// shared by Put and Append. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer."+op, &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) PutAppend(key string, value string, op string) string { - ck.seqNum++ - args := PutAppendArgs{ - Key: key, - Value: value, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := PutAppendReply{} - for !ck.server.Call("KVServer."+op, &args, &reply) { - } - return reply.Value -} - -func (ck *Clerk) Put(key string, value string) { - ck.PutAppend(key, value, "Put") -} - -// Append value to key's value and return that value -func (ck *Clerk) Append(key string, value string) string { - return ck.PutAppend(key, value, "Append") -} - -FILE_EOF_kvsrv_client_go - -cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' -package kvsrv - -type PutAppendArgs struct { - Key string - Value string - ClientId int64 - SeqNum int -} - -type PutAppendReply struct { - Value string -} - -type GetArgs struct { - Key string - ClientId int64 - SeqNum int -} - -type GetReply struct { - Value string -} - -FILE_EOF_kvsrv_common_go - -cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' -package kvsrv - -import ( - "log" - "sync" -) - -const Debug = false - -func DPrintf(format string, a ...interface{}) (n int, err error) { - if Debug { - log.Printf(format, a...) - } - return -} - -type Cache struct { - seq int - value string -} - -type KVServer struct { - mu sync.Mutex - data map[string]string - cache map[int64]*Cache // client id -> seq ->value -} - -func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - key := args.Key - reply.Value = "" - // Either the client is new or the seqNum is greater than the cache seqNum. - // In both cases, we can return the value directly. - if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { - reply.Value = kv.data[key] - return - } -} - -func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - kv.data[k] = v - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = reply.Value -} - -func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - // For ca.seq == seqNum, it means that the value has been appended. - // However, the response might be lost, so we return the cache value. - // For ca.seq > seqNum, it doesnt matter what the value is, just return. - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - reply.Value = ca.value - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - reply.Value = kv.data[k] - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = kv.data[k] - kv.data[k] += v -} - -func StartKVServer() *KVServer { - kv := new(KVServer) - kv.data = make(map[string]string) - kv.cache = make(map[int64]*Cache) - return kv -} - -FILE_EOF_kvsrv_server_go - -echo 'Copying mr implementation...' -mkdir -p src/mr - -cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' -package mr - -import ( - "log" - "math" - "net" - "net/http" - "net/rpc" - "os" - "sync" - "time" -) - -const SUCCESS = math.MaxInt32 - -type Coordinator struct { - // Your definitions here. - tasks chan Work // a taskqueue - mu sync.Mutex - terms []int - wg sync.WaitGroup - nMap int - nReduce int - done bool -} - -func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { - if len(c.tasks) == 0 { - reply.HasWork = false - return nil - } - reply.Work = <-c.tasks - c.mu.Lock() - reply.Term = c.terms[reply.Work.FileIndex] - c.mu.Unlock() - reply.HasWork = true - - go func() { - time.Sleep(10 * time.Second) - c.mu.Lock() - defer c.mu.Unlock() - if c.terms[reply.Work.FileIndex] == SUCCESS { - return - } - c.terms[reply.Work.FileIndex]++ - c.tasks <- reply.Work - }() - - return nil -} - -func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { - c.mu.Lock() - defer c.mu.Unlock() - - if c.terms[args.Work.FileIndex] != args.Term { - reply.Success = false - return nil - } - c.terms[args.Work.FileIndex] = SUCCESS - c.wg.Done() - reply.Success = true - return nil -} - -// start a thread that listens for RPCs from worker.go -func (c *Coordinator) server() { - rpc.Register(c) - rpc.HandleHTTP() - //l, e := net.Listen("tcp", ":1234") - sockname := coordinatorSock() - os.Remove(sockname) - l, e := net.Listen("unix", sockname) - if e != nil { - log.Fatal("listen error:", e) - } - go http.Serve(l, nil) -} - -// main/mrcoordinator.go calls Done() periodically to find out -// if the entire job has finished. -func (c *Coordinator) Done() bool { - return c.done -} - -func StartReduceWork(c *Coordinator) { - c.wg.Wait() - c.terms = make([]int, c.nReduce) - for i := 0; i < c.nReduce; i++ { - c.tasks <- Work{ - WorkType: REDUCE, - FileIndex: i, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go WorkDone(c) -} - -func WorkDone(c *Coordinator) { - c.wg.Wait() - c.done = true -} - -// create a Coordinator. -// main/mrcoordinator.go calls this function. -// nReduce is the number of reduce tasks to use. -func MakeCoordinator(files []string, nReduce int) *Coordinator { - - var buflen int - if len(files) > nReduce { - buflen = len(files) - } else { - buflen = nReduce - } - - c := Coordinator{ - nMap: len(files), - nReduce: nReduce, - wg: sync.WaitGroup{}, - tasks: make(chan Work, buflen), - terms: make([]int, len(files)), - done: false, - } - - for idx, file := range files { - c.tasks <- Work{ - WorkType: MAP, - Filename: file, - FileIndex: idx, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go StartReduceWork(&c) - c.server() - - return &c -} - -FILE_EOF_mr_coordinator_go - -cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' -package mr - -// -// RPC definitions. -// -// remember to capitalize all names. -// - -import ( - "os" - "strconv" -) - -// -// example to show how to declare the arguments -// and reply for an RPC. -// - -type ExampleArgs struct { - X int -} - -type ExampleReply struct { - Y int -} - -/*-Define Work-*/ - -type WorkStatus int - -const ( - IDLE WorkStatus = iota - START - FINISH -) - -type WorkType int - -const ( - MAP WorkType = iota - REDUCE -) - -type Work struct { - WorkType WorkType // MAP or REDUCE - Filename string - FileIndex int // This is a convention for mr-X index - NMapWork int // how many map files - NReduce int // how many reduce files -} - -type WorkArgs struct { - WorkerID int -} - -type WorkReply struct { - HasWork bool - Work Work - Term int -} - -/*-Define Report-*/ -// Report work finish only if success -type ReportArgs struct { - Work Work - Term int -} - -type ReportReply struct { - Success bool -} - -// Cook up a unique-ish UNIX-domain socket name -// in /var/tmp, for the coordinator. -// Can't use the current directory since -// Athena AFS doesn't support UNIX-domain sockets. -func coordinatorSock() string { - s := "/var/tmp/5840-mr-" - s += strconv.Itoa(os.Getuid()) - return s -} - -FILE_EOF_mr_rpc_go - -cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' -package mr - -import ( - "encoding/json" - "fmt" - "hash/fnv" - "io/ioutil" - "log" - "net/rpc" - "os" - "sort" - "time" -) - -// for sorting by key. -type ByKey []KeyValue - -// for sorting by key. -func (a ByKey) Len() int { return len(a) } -func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } - -// Map functions return a slice of KeyValue. -type KeyValue struct { - Key string - Value string -} - -// use ihash(key) % NReduce to choose the reduce -// task number for each KeyValue emitted by Map. -func ihash(key string) int { - h := fnv.New32a() - h.Write([]byte(key)) - return int(h.Sum32() & 0x7fffffff) -} - -// main/mrworker.go calls this function. -func Worker(mapf func(string, string) []KeyValue, - reducef func(string, []string) string) { - - // Your worker implementation here. - for { - r := CallGetWok() - if !r.HasWork { - time.Sleep(3 * time.Second) - continue - } - - switch r.Work.WorkType { - case MAP: - DoMapWork(r.Work, mapf, r.Term) - case REDUCE: - DoReduceWork(r.Work, reducef, r.Term) - } - } -} - -func DoReduceWork(work Work, reducef func(string, []string) string, term int) { - fileIndex := work.FileIndex - intermediate := []KeyValue{} - - for i := 0; i < work.NMapWork; i++ { - filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) - file, err := os.Open(filename) - - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - dec := json.NewDecoder(file) - - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - intermediate = append(intermediate, kv) - } - file.Close() - } - - sort.Sort(ByKey(intermediate)) - - oname := fmt.Sprintf("mr-out-%d", fileIndex) - ofile, _ := ioutil.TempFile(".", oname) - - // - // call Reduce on each distinct key in intermediate[], - // and print the result to mr-out-0. - // - i := 0 - for i < len(intermediate) { - j := i + 1 - for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { - j++ - } - values := []string{} - for k := i; k < j; k++ { - values = append(values, intermediate[k].Value) - } - output := reducef(intermediate[i].Key, values) - - // this is the correct format for each line of Reduce output. - fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) - - i = j - } - - os.Rename(ofile.Name(), oname) - - CallReport(work, term) -} - -func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { - filename := work.Filename - - file, err := os.Open(filename) - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - content, err := ioutil.ReadAll(file) - - if err != nil { - log.Fatalf("cannot read %v", filename) - } - - file.Close() - - kva := mapf(work.Filename, string(content)) - - //make a - for i := 0; i < work.NReduce; i++ { - imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) - - imtFile, err := ioutil.TempFile(".", imtFilename) - - enc := json.NewEncoder(imtFile) - - if err != nil { - log.Fatalf("cannot create %v", imtFilename) - } - - for _, kv := range kva { - hash := ihash(kv.Key) % work.NReduce - if hash == i { - err := enc.Encode(&kv) - if err != nil { - log.Fatalf("cannot encode %v", kv) - } - } - } - - imtFile.Close() - - os.Rename(imtFile.Name(), imtFilename) - } - - CallReport(work, term) -} - -func CallReport(w Work, term int) { - args := ReportArgs{ - Work: w, - Term: term, - } - reply := ReportReply{} - ok := call("Coordinator.CallReport", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } -} - -func CallGetWok() WorkReply { - args := WorkArgs{} - reply := WorkReply{} - ok := call("Coordinator.CallGetWork", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } - - return reply -} - -// example function to show how to make an RPC call to the coordinator. -// -// the RPC argument and reply types are defined in rpc.go. -func CallExample() { - - // declare an argument structure. - args := ExampleArgs{} - - // fill in the argument(s). - args.X = 99 - - // declare a reply structure. - reply := ExampleReply{} - - // send the RPC request, wait for the reply. - // the "Coordinator.Example" tells the - // receiving server that we'd like to call - // the Example() method of struct Coordinator. - ok := call("Coordinator.Example", &args, &reply) - if ok { - // reply.Y should be 100. - fmt.Printf("reply.Y %v\n", reply.Y) - } else { - fmt.Printf("call failed!\n") - } -} - -// send an RPC request to the coordinator, wait for the response. -// usually returns true. -// returns false if something goes wrong. -func call(rpcname string, args interface{}, reply interface{}) bool { - // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") - sockname := coordinatorSock() - c, err := rpc.DialHTTP("unix", sockname) - if err != nil { - log.Fatal("dialing:", err) - } - defer c.Close() - - err = c.Call(rpcname, args, reply) - if err == nil { - return true - } - - fmt.Println(err) - return false -} - -FILE_EOF_mr_worker_go - echo 'Creating checksums for protected files...' PROTECTED_FILES=( diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/client.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/client.go new file mode 100644 index 0000000..cb36e2b --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/client.go @@ -0,0 +1,83 @@ +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/common.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/common.go new file mode 100644 index 0000000..610acdb --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/common.go @@ -0,0 +1,22 @@ +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/server.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/server.go new file mode 100644 index 0000000..4e03682 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/kvsrv/server.go @@ -0,0 +1,84 @@ +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/coordinator.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/coordinator.go new file mode 100644 index 0000000..4fc2518 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/coordinator.go @@ -0,0 +1,141 @@ +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/rpc.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/rpc.go new file mode 100644 index 0000000..0f90524 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/rpc.go @@ -0,0 +1,81 @@ +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/worker.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/worker.go new file mode 100644 index 0000000..95d142e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/mr/worker.go @@ -0,0 +1,233 @@ +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/append_entries.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/append_entries.go new file mode 100644 index 0000000..9856584 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/append_entries.go @@ -0,0 +1,214 @@ +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/election.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/election.go new file mode 100644 index 0000000..4274b32 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/election.go @@ -0,0 +1,123 @@ +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/install_snapshot.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/install_snapshot.go new file mode 100644 index 0000000..7ba645e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/install_snapshot.go @@ -0,0 +1,92 @@ +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/persister.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/persister.go new file mode 100644 index 0000000..c5f816c --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/persister.go @@ -0,0 +1,70 @@ +package raft + +// +// support for Raft and kvraft to save persistent +// Raft state (log &c) and k/v server snapshots. +// +// we will use the original persister.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "sync" + +type Persister struct { + mu sync.Mutex + raftstate []byte + snapshot []byte +} + +func MakePersister() *Persister { + return &Persister{} +} + +func clone(orig []byte) []byte { + x := make([]byte, len(orig)) + copy(x, orig) + return x +} + +func (ps *Persister) Copy() *Persister { + ps.mu.Lock() + defer ps.mu.Unlock() + np := MakePersister() + np.raftstate = ps.raftstate + np.snapshot = ps.snapshot + return np +} + +func (ps *Persister) ReadRaftState() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.raftstate) +} + +func (ps *Persister) RaftStateSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.raftstate) +} + +// Save both Raft state and K/V snapshot as a single atomic action, +// to help avoid them getting out of sync. +func (ps *Persister) Save(raftstate []byte, snapshot []byte) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.raftstate = clone(raftstate) + ps.snapshot = clone(snapshot) +} + +func (ps *Persister) ReadSnapshot() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.snapshot) +} + +func (ps *Persister) SnapshotSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.snapshot) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/raft.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/raft.go new file mode 100644 index 0000000..9946898 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/raft.go @@ -0,0 +1,416 @@ +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/util.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/util.go new file mode 100644 index 0000000..37c7fe6 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/4b_kvraft/starter_files/raft/util.go @@ -0,0 +1,16 @@ +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json index 2ec4ff3..5672c91 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/config.json @@ -10,5 +10,93 @@ "fault-tolerance", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "starter_files": [ + { + "src": "raft/persister.go", + "dest": "/workspace/src/raft/persister.go" + }, + { + "src": "raft/election.go", + "dest": "/workspace/src/raft/election.go" + }, + { + "src": "raft/append_entries.go", + "dest": "/workspace/src/raft/append_entries.go" + }, + { + "src": "raft/util.go", + "dest": "/workspace/src/raft/util.go" + }, + { + "src": "raft/install_snapshot.go", + "dest": "/workspace/src/raft/install_snapshot.go" + }, + { + "src": "raft/raft.go", + "dest": "/workspace/src/raft/raft.go" + }, + { + "src": "kvraft/server.go", + "dest": "/workspace/src/kvraft/server.go" + }, + { + "src": "kvraft/client.go", + "dest": "/workspace/src/kvraft/client.go" + }, + { + "src": "kvraft/common.go", + "dest": "/workspace/src/kvraft/common.go" + }, + { + "src": "kvsrv/server.go", + "dest": "/workspace/src/kvsrv/server.go" + }, + { + "src": "kvsrv/client.go", + "dest": "/workspace/src/kvsrv/client.go" + }, + { + "src": "kvsrv/common.go", + "dest": "/workspace/src/kvsrv/common.go" + }, + { + "src": "mr/coordinator.go", + "dest": "/workspace/src/mr/coordinator.go" + }, + { + "src": "mr/worker.go", + "dest": "/workspace/src/mr/worker.go" + }, + { + "src": "mr/rpc.go", + "dest": "/workspace/src/mr/rpc.go" + } + ], + "output_files": [ + { + "src": "/workspace/src/shardctrler/client.go", + "dest": "src/shardctrler/client.go" + }, + { + "src": "/workspace/src/shardctrler/common.go", + "dest": "src/shardctrler/common.go" + }, + { + "src": "/workspace/src/shardctrler/server.go", + "dest": "src/shardctrler/server.go" + }, + { + "src": "/workspace/src/shardkv/client.go", + "dest": "src/shardkv/client.go" + }, + { + "src": "/workspace/src/shardkv/common.go", + "dest": "src/shardkv/common.go" + }, + { + "src": "/workspace/src/shardkv/server.go", + "dest": "src/shardkv/server.go" + } + ] } \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh index 648faac..d02e141 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/preprocess.sh @@ -5,2011 +5,6 @@ echo '=== Preprocessing 5A Shardkv ===' cd /workspace -echo 'Copying reference implementations from previous labs...' - -echo 'Copying raft implementation...' -mkdir -p src/raft - -cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' -package raft - -// -// this is an outline of the API that raft must expose to -// the service (or tester). see comments below for -// each of these functions for more details. -// -// rf = Make(...) -// create a new Raft server. -// rf.Start(command interface{}) (index, term, isleader) -// start agreement on a new log entry -// rf.GetState() (term, isLeader) -// ask a Raft for its current term, and whether it thinks it is leader -// ApplyMsg -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester) -// in the same server. -// - -import ( - // "bytes" - - "bytes" - "log" - "sync" - "sync/atomic" - "time" - - // "6.5840/labgob" - "6.5840/labgob" - "6.5840/labrpc" -) - -// as each Raft peer becomes aware that successive log entries are -// committed, the peer should send an ApplyMsg to the service (or -// tester) on the same server, via the applyCh passed to Make(). set -// CommandValid to true to indicate that the ApplyMsg contains a newly -// committed log entry. -// -// in part 3D you'll want to send other kinds of messages (e.g., -// snapshots) on the applyCh, but set CommandValid to false for these -// other uses. -type ApplyMsg struct { - CommandValid bool - Command interface{} - CommandIndex int - CommandTerm int - - // For 3D: - SnapshotValid bool - Snapshot []byte - SnapshotTerm int - SnapshotIndex int -} - -type Entry struct { - Term int - Index int - Command interface{} -} - -// Base struct for common fields -type BaseRPC struct { - Term int -} - -// Implement RaftRPC interface for BaseRPC -func (b *BaseRPC) GetTerm() int { - return b.Term -} - -func (b *BaseRPC) SetTerm(term int) { - b.Term = term -} - -// RaftRPC interface -type RaftRPC interface { - GetTerm() int - SetTerm(int) -} - -type ServerState int - -const ( - FOLLOWER ServerState = iota - CANDIDATE - LEADER -) - -// A Go object implementing a single Raft peer. -type Raft struct { - mu sync.Mutex // Lock to protect shared access to this peer's state - peers []*labrpc.ClientEnd // RPC end points of all peers - persister *Persister // Object to hold this peer's persisted state - me int // this peer's index into peers[] - dead int32 // set by Kill() - heartbeatTimeout time.Duration - electionTimeout time.Duration - electionTimeStamp time.Time - applyCh chan ApplyMsg - - // state a Raft server must maintain. - broadcasterCond []*sync.Cond - applierCond *sync.Cond - - // server state - state ServerState - - // presistent state on all servers - currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) - votedFor int // candidateId that received vote in current term (or null if none) - logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) - - // volatile state on all servers - commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) - lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) - - // volatile state on leaders (reinitialized after election) - nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) - matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) - - // snapshot msg - smsg *ApplyMsg -} - -// return currentTerm and whether this server -// believes it is the leader. -func (rf *Raft) GetState() (int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - return rf.currentTerm, rf.state == LEADER -} - -func (rf *Raft) encodeState() []byte { - w := new(bytes.Buffer) - e := labgob.NewEncoder(w) - e.Encode(rf.currentTerm) - e.Encode(rf.votedFor) - e.Encode(rf.logs) - return w.Bytes() -} - -// save Raft's persistent state to stable storage, -// where it can later be retrieved after a crash and restart. -// see paper's Figure 2 for a description of what should be persistent. -// before you've implemented snapshots, you should pass nil as the -// second argument to persister.Save(). -// after you've implemented snapshots, pass the current snapshot -// (or nil if there's not yet a snapshot). -func (rf *Raft) persist() { - if rf.persister.ReadSnapshot() != nil { - rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) - } else { - rf.persister.Save(rf.encodeState(), nil) - } -} - -// restore previously persisted state. -func (rf *Raft) readPersist(data []byte) { - if data == nil || len(data) < 1 { // bootstrap without any state - return - } - r := bytes.NewBuffer(data) - d := labgob.NewDecoder(r) - var currentTerm int - var votedFor int - var logs []Entry - - if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { - log.Fatal("failed to read persist\n") - } else { - DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) - rf.currentTerm = currentTerm - rf.votedFor = votedFor - rf.logs = logs - rf.lastApplied = rf.logs[0].Index - rf.commitIndex = rf.logs[0].Index - } -} - -// the service says it has created a snapshot that has -// all info up to and including index. this means the -// service no longer needs the log through (and including) -// that index. Raft should now trim its log as much as possible. -func (rf *Raft) Snapshot(index int, snapshot []byte) { - // Your code here (3D). - rf.mu.Lock() - defer rf.mu.Unlock() - // if the snapshot is outdated, just ignore it - if rf.logs[0].Index >= index { - return - } - firstLogIndex := rf.logs[0].Index - trimLen := index - firstLogIndex - // trim the logs - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - rf.persister.Save(rf.encodeState(), snapshot) -} - -// the service using Raft (e.g. a k/v server) wants to start -// agreement on the next command to be appended to Raft's log. if this -// server isn't the leader, returns false. otherwise start the -// agreement and return immediately. there is no guarantee that this -// command will ever be committed to the Raft log, since the leader -// may fail or lose an election. even if the Raft instance has been killed, -// this function should return gracefully. -// -// the first return value is the index that the command will appear at -// if it's ever committed. the second return value is the current -// term. the third return value is true if this server believes it is -// the leader. -func (rf *Raft) Start(command interface{}) (int, int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - if rf.state != LEADER { - return -1, -1, false - } - defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) - rf.logs = append(rf.logs, Entry{ - Term: rf.currentTerm, - Index: rf.logs[len(rf.logs)-1].Index + 1, - Command: command, - }) - rf.broadcastAppendEntries(false) - // Your code here (3B). - return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true -} - -// Warning: this function is not thread-safe -func (rf *Raft) resetNewTermState(targetTerm int) { - DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) - if rf.currentTerm < targetTerm { - rf.votedFor = -1 - } - rf.currentTerm = targetTerm - rf.state = FOLLOWER // reset to follower -} - -// Reply false if term < currentTerm (§5.1) -// If RPC request contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { - term := args.GetTerm() - defer reply.SetTerm(rf.currentTerm) - if term < rf.currentTerm { - return false - } - if term > rf.currentTerm { - rf.resetNewTermState(term) - } - return true -} - -// If RPC request or response contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { - argsTerm := args.GetTerm() - replyTerm := reply.GetTerm() - if replyTerm > argsTerm { - rf.resetNewTermState(replyTerm) - rf.resetElectionTimer() - return false - } - return isElection || (rf.state == LEADER) -} - -// the tester doesn't halt goroutines created by Raft after each test, -// but it does call the Kill() method. your code can use killed() to -// check whether Kill() has been called. the use of atomic avoids the -// need for a lock. -// -// the issue is that long-running goroutines use memory and may chew -// up CPU time, perhaps causing later tests to fail and generating -// confusing debug output. any goroutine with a long-running loop -// should call killed() to check whether it should stop. -func (rf *Raft) Kill() { - atomic.StoreInt32(&rf.dead, 1) - // Your code here, if desired. -} - -func (rf *Raft) killed() bool { - z := atomic.LoadInt32(&rf.dead) - return z == 1 -} - -// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel -func (rf *Raft) applier() { - for !rf.killed() { - rf.mu.Lock() - // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries - for rf.lastApplied >= rf.commitIndex { - rf.applierCond.Wait() - } - firstLogIndex := rf.logs[0].Index - commitIndex, lastApplied := rf.commitIndex, rf.lastApplied - DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) - entries := make([]Entry, commitIndex-lastApplied) - copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) - if rf.smsg != nil { - msg := rf.smsg - rf.smsg = nil - rf.mu.Unlock() - rf.applyCh <- *msg - } else { - rf.mu.Unlock() - } - for _, entry := range entries { - DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) - rf.applyCh <- ApplyMsg{ - CommandValid: true, - Command: entry.Command, - CommandTerm: entry.Term, - CommandIndex: entry.Index, - } - } - rf.mu.Lock() - // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() - // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback - if rf.lastApplied < commitIndex { - rf.lastApplied = commitIndex - } - rf.mu.Unlock() - } -} - -/** - * Lets illustrate the time line of the ticker function - * e: election timeout - * h: heartbeat timeout - * - * ---- h ---- h ---- h ---- h ---- h ---- ... - * - * First, the server will wake up each fixed heartbeat timeout. This timeout is - * relatively shorter than the election timeout. If the server is not a leader, - * it basically do nothing about heartbeat. - * - * However, everytime when server wake up, it will check if the election timeout - * is reached. It might start a new election, if it is not a leader. - * - * v election timeout found! - * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... - * --------- e1 ------ e2 ------------ e ---- ... - * - * Reseting a new election timeout when the server receives a heartbeat or a - * vote from another server prevents the election. One shortcomming of the - * current implementation is that the election timeout does not trigger a new - * election immediately. It will wait until the next heartbeat timeout. - */ -func (rf *Raft) ticker() { - for !rf.killed() { - rf.mu.Lock() - if rf.state == LEADER { - rf.broadcastAppendEntries(true) - } else if rf.isElectionTimeout() { - rf.startElection() - } - rf.mu.Unlock() - time.Sleep(rf.heartbeatTimeout) - } -} - -// the service or tester wants to create a Raft server. the ports -// of all the Raft servers (including this one) are in peers[]. this -// server's port is peers[me]. all the servers' peers[] arrays -// have the same order. persister is a place for this server to -// save its persistent state, and also initially holds the most -// recent saved state, if any. applyCh is a channel on which the -// tester or service expects Raft to send ApplyMsg messages. -// Make() must return quickly, so it should start goroutines -// for any long-running work. -func Make(peers []*labrpc.ClientEnd, me int, - persister *Persister, applyCh chan ApplyMsg) *Raft { - rf := &Raft{} - rf.peers = peers - rf.persister = persister - rf.me = me - rf.applyCh = applyCh - rf.heartbeatTimeout = 125 * time.Millisecond - rf.resetElectionTimer() - rf.state = FOLLOWER - rf.votedFor = -1 - rf.logs = make([]Entry, 0) - - // dummy entry to make the index start from 1 - rf.logs = append(rf.logs, Entry{0, 0, nil}) - - rf.commitIndex = 0 - rf.lastApplied = 0 - - rf.applierCond = sync.NewCond(&rf.mu) - rf.broadcasterCond = make([]*sync.Cond, len(peers)) - - rf.nextIndex = make([]int, len(peers)) - rf.matchIndex = make([]int, len(peers)) - - for id := range peers { - rf.nextIndex[id] = 1 - if id != rf.me { - rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) - go rf.broadcaster(id) - } - } - - rf.smsg = nil - - // initialize from state persisted before a crash - rf.readPersist(persister.ReadRaftState()) - - // start ticker goroutine to start elections - go rf.ticker() - - go rf.applier() - - return rf -} - -FILE_EOF_raft_raft_go - -cat > src/raft/election.go << 'FILE_EOF_raft_election_go' -package raft - -import ( - "math/rand" - "sync/atomic" - "time" -) - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type RequestVoteArgs struct { - BaseRPC // candidate's term - CandidateId int // candidate requesting vote - LastLogIndex int // index of candidate's last log entry - LastLogTerm int // term of candidate's last log entry -} - -type RequestVoteReply struct { - BaseRPC // currentTerm, for candidate to update itself - VoteGranted bool // true means candidate received vote -} - -// RequestVote RPC handler -// Restart your election timer if you grant a vote to another peer. -func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - reply.VoteGranted = false - - DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) - - if !rf.checkRequestTerm(args, reply) { - return - } - - if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { - reply.VoteGranted = true - rf.votedFor = args.CandidateId - rf.resetElectionTimer() - } -} - -func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { - lastLog := rf.logs[len(rf.logs)-1] - candidateIndex := args.LastLogIndex - candidateTerm := args.LastLogTerm - return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) -} - -func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { - reply := &RequestVoteReply{} - ok := rf.peers[server].Call("Raft.RequestVote", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, true) { - return - } - - if !reply.VoteGranted { - return - } - - DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) - - // If votes received from majority of servers: become leader - if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && - rf.state == CANDIDATE && - rf.currentTerm == args.Term { - rf.state = LEADER - lastLogIndex := rf.logs[len(rf.logs)-1].Index - for i := range rf.peers { - rf.nextIndex[i] = lastLogIndex + 1 - rf.matchIndex[i] = 0 - } - DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) - // send initial empty AppendEntries RPCs (heartbeat) to each server immediately - rf.broadcastAppendEntries(true) - } - DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) -} - -func (rf *Raft) startElection() { - rf.currentTerm++ - rf.state = CANDIDATE - rf.votedFor = rf.me - rf.resetElectionTimer() - DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) - lastLog := rf.logs[len(rf.logs)-1] - - voteCount := int32(1) - args := RequestVoteArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - CandidateId: rf.me, - LastLogIndex: lastLog.Index, - LastLogTerm: lastLog.Term, - } - - for id := range rf.peers { - if id == rf.me { - continue - } - go rf.sendRequestVote(id, &args, &voteCount) - } -} - -func (rf *Raft) resetElectionTimer() { - // election timeout range from 350 to 550 - ms := 350 + (rand.Int63() % 200) - rf.electionTimeStamp = time.Now() - rf.electionTimeout = time.Duration(ms) * time.Millisecond -} - -func (rf *Raft) isElectionTimeout() bool { - return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) -} - -FILE_EOF_raft_election_go - -cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' -package raft - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type AppendEntriesArgs struct { - BaseRPC // leader's term - LeaderId int // so follower can redirect clients - PrevLogIndex int // index of log entry immediately preceding new ones - PrevLogTerm int // term of prevLogIndex entry - Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) - CommitIndex int // leader's commitIndex -} - -type AppendEntriesReply struct { - BaseRPC // currentTerm, for leader to update itself - Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm - ConflictIndex int // the index of the first conflicting entry -} - -// AppendEntries RPC handler -// Reset the election timer if you get an AppendEntries RPC from the current leader -// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); -func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) - - reply.Success = false - reply.ConflictIndex = -1 - - if !rf.checkRequestTerm(args, reply) { - return - } - - if rf.state == CANDIDATE { - rf.state = FOLLOWER - } - - rf.resetElectionTimer() - - prevLogIndex := args.PrevLogIndex - rf.logs[0].Index - - if prevLogIndex < 0 { - // force to send a snapshot - reply.ConflictIndex = 0 - return - } - - // Reply false if log doesn’t contain an entry at prevLogIndex - // whose term matches prevLogTerm (§5.3) - if prevLogIndex >= len(rf.logs) { - reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index - return - } - - // If an existing entry conflicts with a new one (same index - // but different terms), delete the existing entry and all that - // follow it (§5.3) - if rf.logs[prevLogIndex].Term != args.PrevLogTerm { - // optimization - curTerm := rf.logs[prevLogIndex].Term - var conflictIndex int - for i := prevLogIndex; i > 0; i-- { - if rf.logs[i-1].Term != curTerm { - conflictIndex = i - break - } - } - reply.ConflictIndex = conflictIndex + rf.logs[0].Index - return - } - for idx, entry := range args.Entries { - logIndex := entry.Index - rf.logs[0].Index - if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { - DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) - rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) - break - } - } - reply.Success = true - if args.CommitIndex > rf.commitIndex { - rf.commitIndex = args.CommitIndex - if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { - rf.commitIndex = rf.logs[len(rf.logs)-1].Index - } - } - rf.applierCond.Signal() -} - -func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { - reply := &AppendEntriesReply{} - ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) - if !ok { - return - } - - DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - // If successful: update nextIndex and matchIndex for - // follower (§5.3) - if reply.Success { - if len(args.Entries) > 0 { - rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 - } - rf.matchIndex[server] = rf.nextIndex[server] - 1 - for _, log := range rf.logs { - index := log.Index - count := 1 - for peer := range rf.peers { - if peer != rf.me && rf.matchIndex[peer] >= index { - count++ - } - } - // If there exists an N such that N > commitIndex, a majority - // of matchIndex[i] ≥ N, and log[N].term == currentTerm: - // set commitIndex = N (§5.3, §5.4). - if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { - rf.commitIndex = index - } - } - } else { - if reply.ConflictIndex != -1 { - rf.nextIndex[server] = reply.ConflictIndex - 1 - } else { - rf.nextIndex[server] = rf.nextIndex[server] - 1 - } - if rf.nextIndex[server] < 1 { - rf.nextIndex[server] = 1 - } - } - DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) - rf.applierCond.Signal() -} - -func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { - for peer := range rf.peers { - if peer != rf.me { - // if it is a heartbeat we dont care the linearizability of logs append - if isHeartBeat { - args := rf.prepareReplicationArgs(peer) - go rf.sendReplicationRPC(peer, args) - } else { - rf.broadcasterCond[peer].Signal() - } - } - } -} - -func (rf *Raft) prepareReplicationArgs(peer int) interface{} { - if rf.nextIndex[peer] > rf.logs[0].Index { - firstLog := rf.logs[0] - nextIndex := rf.nextIndex[peer] - firstLog.Index - prevLog := rf.logs[nextIndex-1] - logs := make([]Entry, len(rf.logs[nextIndex:])) - copy(logs, rf.logs[nextIndex:]) - return &AppendEntriesArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - PrevLogIndex: prevLog.Index, - PrevLogTerm: prevLog.Term, - Entries: logs, - CommitIndex: rf.commitIndex, - } - } else { - return &InstallSnapshotArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - LastIncludedIndex: rf.logs[0].Index, - LastIncludedTerm: rf.logs[0].Term, - Offset: 0, - Data: rf.persister.ReadSnapshot(), - Done: true, - } - } -} - -func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { - switch v := args.(type) { - case *AppendEntriesArgs: - rf.sendAppendEntries(peer, v) - case *InstallSnapshotArgs: - rf.sendInstallSnapshot(peer, v) - default: - panic("(sendReplicationRPC) SHOULD NOT REACH") - } -} - -func (rf *Raft) isReplicationNeeded(peer int) bool { - return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index -} - -func (rf *Raft) broadcaster(peer int) { - rf.broadcasterCond[peer].L.Lock() - defer rf.broadcasterCond[peer].L.Unlock() - for !rf.killed() { - rf.mu.Lock() - for !rf.isReplicationNeeded(peer) { - rf.mu.Unlock() - rf.broadcasterCond[peer].Wait() - rf.mu.Lock() - } - args := rf.prepareReplicationArgs(peer) - rf.mu.Unlock() - rf.sendReplicationRPC(peer, args) - } -} - -FILE_EOF_raft_append_entries_go - -cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' -package raft - -type InstallSnapshotArgs struct { - BaseRPC - LeaderId int - LastIncludedIndex int - LastIncludedTerm int - Offset int - Data []byte - Done bool -} - -type InstallSnapshotReply struct { - BaseRPC -} - -// InstallSnapshot RPC handler -func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkRequestTerm(args, reply) { - return - } - - if args.LastIncludedIndex <= rf.commitIndex { - return - } - prevCommitIndex := rf.commitIndex - prevLastApplied := rf.lastApplied - defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) - rf.resetElectionTimer() - - rf.commitIndex = args.LastIncludedIndex - rf.lastApplied = args.LastIncludedIndex - // 2. Create new snapshot file if first chunk (offset is 0) - // 3. Write data into snapshot file at given offset - // 4. Reply and wait for more data chunks if done is false - if !args.Done { - return - } - // 5. Save snapshot file, discard any existing or partial snapshot with a - // smaller index - // 6. If existing log entry has same index and term as snapshot’s last - // included entry, retain log entries following it and reply - // 7. Discard the entire log - // 8. Reset state machine using snapshot contents (and load snapshot’s - // cluster configuration) - firstLogIndex := rf.logs[0].Index - if firstLogIndex <= args.LastIncludedIndex { - rf.logs = append([]Entry{}, Entry{ - Index: args.LastIncludedIndex, - Term: args.LastIncludedTerm, - Command: nil, - }) - } else if firstLogIndex < args.LastIncludedIndex { - trimLen := args.LastIncludedIndex - firstLogIndex - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - } - rf.persister.Save(rf.encodeState(), args.Data) - rf.smsg = &ApplyMsg{ - SnapshotValid: true, - Snapshot: args.Data, - SnapshotTerm: args.LastIncludedTerm, - SnapshotIndex: args.LastIncludedIndex, - } -} - -func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { - reply := &InstallSnapshotReply{} - ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - - if args.LastIncludedIndex != rf.logs[0].Index { - return - } - - rf.nextIndex[server] = args.LastIncludedIndex + 1 - rf.matchIndex[server] = args.LastIncludedIndex - - rf.persister.Save(rf.encodeState(), args.Data) -} - -FILE_EOF_raft_install_snapshot_go - -cat > src/raft/util.go << 'FILE_EOF_raft_util_go' -package raft - -import ( - "log" - "os" -) - -// Debugging -var Debug = os.Getenv("DEBUG") == "1" - -func DPrintf(format string, a ...interface{}) { - if !Debug { - return - } - log.Printf(format, a...) -} - -FILE_EOF_raft_util_go - -echo 'Copying kvraft implementation...' -mkdir -p src/kvraft - -cat > src/kvraft/client.go << 'FILE_EOF_kvraft_client_go' -package kvraft - -import ( - "crypto/rand" - "math/big" - "sync/atomic" - "time" - - "6.5840/labrpc" -) - -type Clerk struct { - servers []*labrpc.ClientEnd - cid int64 - seq int - leader int32 // cache the leader -} - -func nrand() int64 { - max := big.NewInt(int64(1) << 62) - bigx, _ := rand.Int(rand.Reader, max) - x := bigx.Int64() - return x -} - -func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { - ck := new(Clerk) - ck.servers, ck.cid, ck.seq = servers, nrand(), 0 - return ck -} - -// fetch the current value for a key. -// returns "" if the key does not exist. -// keeps trying forever in the face of all other errors. -// -// you can send an RPC with code like this: -// ok := ck.servers[i].Call("KVServer.Get", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) Get(key string) string { - ck.seq++ - - args := new(GetArgs) - args.Key, args.Cid, args.Seq = key, ck.cid, ck.seq - - leader := int(atomic.LoadInt32(&ck.leader)) - for { - for i := 0; i < len(ck.servers); i++ { - peer := (leader + i) % len(ck.servers) - reply := new(GetReply) - ok := ck.servers[peer].Call("KVServer.Get", args, reply) - if ok && (reply.Err == OK || reply.Err == ErrNoKey) { - atomic.StoreInt32(&ck.leader, int32(peer)) - return reply.Value - } - } - time.Sleep(100 * time.Millisecond) - } -} - -// shared by Put and Append. -// -// you can send an RPC with code like this: -// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) PutAppend(key string, value string, op string) { - ck.seq++ - - args := new(PutAppendArgs) - args.OpStr, args.Key, args.Value, args.Cid, args.Seq = op, key, value, ck.cid, ck.seq - - leader := int(atomic.LoadInt32(&ck.leader)) - for { - for i := 0; i < len(ck.servers); i++ { - peer := (leader + i) % len(ck.servers) - reply := new(PutAppendReply) - ok := ck.servers[peer].Call("KVServer.PutAppend", args, reply) - if ok && reply.Err == OK { - atomic.StoreInt32(&ck.leader, int32(peer)) - return - } - } - time.Sleep(100 * time.Millisecond) - } -} - -func (ck *Clerk) Put(key string, value string) { - ck.PutAppend(key, value, "Put") -} -func (ck *Clerk) Append(key string, value string) { - ck.PutAppend(key, value, "Append") -} - -FILE_EOF_kvraft_client_go - -cat > src/kvraft/common.go << 'FILE_EOF_kvraft_common_go' -package kvraft - -const ( - OK = "OK" - ErrNoKey = "ErrNoKey" - ErrWrongLeader = "ErrWrongLeader" -) - -type ClientInfo struct { - Cid int64 - Seq int -} - -type Err string - -type RaftReply struct { - Value string - Err Err -} - -type GetArgs struct { - Key string - ClientInfo -} - -type GetReply = RaftReply - -// Put or Append -type PutAppendArgs struct { - OpStr string // "Put" or "Append" - Key string - Value string - ClientInfo -} - -type PutAppendReply = RaftReply - -type Cache struct { - Seq int - RaftReply -} - -FILE_EOF_kvraft_common_go - -cat > src/kvraft/server.go << 'FILE_EOF_kvraft_server_go' -package kvraft - -import ( - "bytes" - "log" - "os" - "sync" - "sync/atomic" - "time" - - "6.5840/labgob" - "6.5840/labrpc" - "6.5840/raft" -) - -var Debug = os.Getenv("DEBUG") == "1" - -func DPrintf(format string, a ...interface{}) (n int, err error) { - if Debug { - log.Printf(format, a...) - } - return -} - -type Opcode int - -const ( - GET Opcode = iota - PUT - APPEND -) - -type Op struct { - Cmd interface{} - ClientInfo -} - -type Done struct { - index int - term int - value string - err Err -} - -type KVServer struct { - mu sync.Mutex - me int - rf *raft.Raft - ps *raft.Persister - applyCh chan raft.ApplyMsg - dead int32 // set by Kill() - - maxraftstate int // snapshot if log grows this big - - data map[string]string - cache map[int64]*Cache // client id -> seq - chanmap map[int64]chan Done -} - -func getChanId(term, index int) (id int64) { - id = int64(term) << 32 - id += int64(index) - return -} - -func (kv *KVServer) makeChan(term, index int) chan Done { - id := getChanId(term, index) - ch := make(chan Done, 1) - kv.chanmap[id] = ch - return ch -} - -func (kv *KVServer) closeAndDeleteChan(term, index int) { - kv.mu.Lock() - defer kv.mu.Unlock() - id := getChanId(term, index) - close(kv.chanmap[id]) - delete(kv.chanmap, id) -} - -func (kv *KVServer) isCacheHit(Cid int64, Seq int) (bool, *Cache) { - // Why cache.Seq >= Seq works? - // 1. If the seq of cache equals to Seq, it means the operation has been - // executed. Return the value directly. - // 2. If the seq of cache is Greater than Seq, it means some operations - // after this Op have been executed, which implies client has already - // received the result of this Op (the operation must be completed before - // next operation happened). Theorically, return anything is OK. - if cache, ok := kv.cache[Cid]; ok && cache.Seq >= Seq { - return true, cache - } else if ok { - return false, cache - } else { - kv.cache[Cid] = new(Cache) - return false, kv.cache[Cid] - } -} - -func (kv *KVServer) encode() []byte { - w := new(bytes.Buffer) - e := labgob.NewEncoder(w) - e.Encode(kv.cache) - e.Encode(kv.data) - return w.Bytes() -} - -func (kv *KVServer) decode(buf []byte) { - if buf == nil || len(buf) < 1 { - return - } - r := bytes.NewBuffer(buf) - d := labgob.NewDecoder(r) - var cache map[int64]*Cache - var data map[string]string - if d.Decode(&cache) != nil || d.Decode(&data) != nil { - log.Fatal("Decode error") - return - } - kv.cache = cache - kv.data = data -} - -func (kv *KVServer) startRaft(cmd interface{}, cid int64, seq int, ch chan *Cache) { - kv.mu.Lock() - defer kv.mu.Unlock() - rr := new(Cache) - if hit, cache := kv.isCacheHit(cid, seq); hit { - rr.Seq, rr.Value, rr.Err = cache.Seq, cache.Value, cache.Err - ch <- rr - } else { - op := new(Op) - op.Cmd, op.Cid, op.Seq = cmd, cid, seq - index, term, isLeader := kv.rf.Start(op) - if !isLeader { - cache.Value, cache.Err = "", ErrWrongLeader - rr.Err = ErrWrongLeader - ch <- rr - return - } - donech := kv.makeChan(term, index) - go kv.waitRaft(term, index, ch, donech) - DPrintf("(startRaft) [%d] start raft with op %+v\n", kv.me, op) - } -} - -func (kv *KVServer) waitRaft(term, index int, ch chan *Cache, donech chan Done) { - timer := time.NewTimer(500 * time.Millisecond) - rr := new(Cache) - DPrintf("(waitRaft) [%d] wait for term: %d, index: %d\n", kv.me, term, index) - select { - case <-timer.C: - DPrintf("(waitRaft) [%d] timeout, term: %d, index: %d\n", kv.me, term, index) - rr.Value = "" - rr.Err = ErrWrongLeader - ch <- rr - case done := <-donech: - rr.Value = done.value - rr.Err = done.err - ch <- rr - } - kv.closeAndDeleteChan(term, index) -} - -func (kv *KVServer) raft(cmd interface{}, cid int64, seq int) *Cache { - ch := make(chan *Cache) - go kv.startRaft(cmd, cid, seq, ch) - r := <-ch - close(ch) - return r -} - -func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { - DPrintf("(Get) [%d] get %s\n", kv.me, args.Key) - r := kv.raft(args, args.Cid, args.Seq) - reply.Value = r.Value - reply.Err = r.Err -} - -func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { - DPrintf("(PutAppend) [%d] %s %s: %s\n", kv.me, args.OpStr, args.Key, args.Value) - r := kv.raft(args, args.Cid, args.Seq) - reply.Err = r.Err -} - -// Serializes the execution of operations on the key-value store. -func (kv *KVServer) executor() { - for !kv.killed() { - msg := <-kv.applyCh - DPrintf("(executor) [%d] receive msg %+v\n", kv.me, msg) - kv.mu.Lock() - if msg.CommandValid { - DPrintf("(executor) [%d] type of command: %T\n", kv.me, msg.Command) - op := msg.Command.(*Op) - index, term, cid, seq := msg.CommandIndex, msg.CommandTerm, op.Cid, op.Seq - hit, cache := kv.isCacheHit(cid, seq) - if !hit { - cache.Seq, cache.Value, cache.Err = seq, "", OK - switch v := op.Cmd.(type) { - case *GetArgs: - key := v.Key - DPrintf("(executor) [%d] get %s: %s\n", kv.me, key, kv.data[key]) - if val, ok := kv.data[key]; ok { - cache.Value = val - } else { - cache.Err = ErrNoKey - } - case *PutAppendArgs: - if v.OpStr == "Put" { - kv.data[v.Key] = v.Value - } else if v.OpStr == "Append" { - kv.data[v.Key] += v.Value - } - DPrintf("(executor) [%d] %s %s: %s\n", kv.me, v.OpStr, v.Key, kv.data[v.Key]) - } - if kv.maxraftstate != -1 && kv.maxraftstate < kv.ps.RaftStateSize() { - kv.rf.Snapshot(index, kv.encode()) - } - } - if ch, ok := kv.chanmap[getChanId(term, index)]; ok { - select { - case ch <- Done{index, term, cache.Value, cache.Err}: - default: - panic("Channel is full or closed") - } - } - } else if msg.SnapshotValid { - kv.decode(msg.Snapshot) - } else { - log.Fatalf("Invalid applyMsg, %+v\n", msg) - } - kv.mu.Unlock() - } -} - -// the tester calls Kill() when a KVServer instance won't -// be needed again. for your convenience, we supply -// code to set rf.dead (without needing a lock), -// and a killed() method to test rf.dead in -// long-running loops. you can also add your own -// code to Kill(). you're not required to do anything -// about this, but it may be convenient (for example) -// to suppress debug output from a Kill()ed instance. -func (kv *KVServer) Kill() { - atomic.StoreInt32(&kv.dead, 1) - kv.rf.Kill() - // Your code here, if desired. -} - -func (kv *KVServer) killed() bool { - z := atomic.LoadInt32(&kv.dead) - return z == 1 -} - -// servers[] contains the ports of the set of -// servers that will cooperate via Raft to -// form the fault-tolerant key/value service. -// me is the index of the current server in servers[]. -// the k/v server should store snapshots through the underlying Raft -// implementation, which should call persister.SaveStateAndSnapshot() to -// atomically save the Raft state along with the snapshot. -// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, -// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, -// you don't need to snapshot. -// StartKVServer() must return quickly, so it should start goroutines -// for any long-running work. -func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { - // call labgob.Register on structures you want - // Go's RPC library to marshall/unmarshall. - labgob.Register(&Op{}) - labgob.Register(&GetArgs{}) - labgob.Register(&PutAppendArgs{}) - labgob.Register(&RaftReply{}) - labgob.Register(&Cache{}) - - kv := new(KVServer) - kv.me = me - kv.maxraftstate = maxraftstate - - kv.applyCh = make(chan raft.ApplyMsg) - kv.rf = raft.Make(servers, me, persister, kv.applyCh) - kv.ps = persister - kv.data = make(map[string]string) - kv.cache = make(map[int64]*Cache) - kv.chanmap = make(map[int64]chan Done) - - // Read from persister if any - kv.decode(kv.ps.ReadSnapshot()) - - go kv.executor() - - return kv -} - -FILE_EOF_kvraft_server_go - -echo 'Copying kvsrv implementation...' -mkdir -p src/kvsrv - -cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' -package kvsrv - -import ( - "crypto/rand" - "math/big" - - "6.5840/labrpc" -) - -type Clerk struct { - server *labrpc.ClientEnd - clientId int64 - seqNum int -} - -func nrand() int64 { - max := big.NewInt(int64(1) << 62) - bigx, _ := rand.Int(rand.Reader, max) - x := bigx.Int64() - return x -} - -func MakeClerk(server *labrpc.ClientEnd) *Clerk { - ck := new(Clerk) - ck.server = server - ck.clientId = nrand() - ck.seqNum = 0 - return ck -} - -// fetch the current value for a key. -// returns "" if the key does not exist. -// keeps trying forever in the face of all other errors. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer.Get", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) Get(key string) string { - ck.seqNum++ - args := GetArgs{ - Key: key, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := GetReply{} - for !ck.server.Call("KVServer.Get", &args, &reply) { - } - return reply.Value -} - -// shared by Put and Append. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer."+op, &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) PutAppend(key string, value string, op string) string { - ck.seqNum++ - args := PutAppendArgs{ - Key: key, - Value: value, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := PutAppendReply{} - for !ck.server.Call("KVServer."+op, &args, &reply) { - } - return reply.Value -} - -func (ck *Clerk) Put(key string, value string) { - ck.PutAppend(key, value, "Put") -} - -// Append value to key's value and return that value -func (ck *Clerk) Append(key string, value string) string { - return ck.PutAppend(key, value, "Append") -} - -FILE_EOF_kvsrv_client_go - -cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' -package kvsrv - -type PutAppendArgs struct { - Key string - Value string - ClientId int64 - SeqNum int -} - -type PutAppendReply struct { - Value string -} - -type GetArgs struct { - Key string - ClientId int64 - SeqNum int -} - -type GetReply struct { - Value string -} - -FILE_EOF_kvsrv_common_go - -cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' -package kvsrv - -import ( - "log" - "sync" -) - -const Debug = false - -func DPrintf(format string, a ...interface{}) (n int, err error) { - if Debug { - log.Printf(format, a...) - } - return -} - -type Cache struct { - seq int - value string -} - -type KVServer struct { - mu sync.Mutex - data map[string]string - cache map[int64]*Cache // client id -> seq ->value -} - -func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - key := args.Key - reply.Value = "" - // Either the client is new or the seqNum is greater than the cache seqNum. - // In both cases, we can return the value directly. - if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { - reply.Value = kv.data[key] - return - } -} - -func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - kv.data[k] = v - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = reply.Value -} - -func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - // For ca.seq == seqNum, it means that the value has been appended. - // However, the response might be lost, so we return the cache value. - // For ca.seq > seqNum, it doesnt matter what the value is, just return. - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - reply.Value = ca.value - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - reply.Value = kv.data[k] - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = kv.data[k] - kv.data[k] += v -} - -func StartKVServer() *KVServer { - kv := new(KVServer) - kv.data = make(map[string]string) - kv.cache = make(map[int64]*Cache) - return kv -} - -FILE_EOF_kvsrv_server_go - -echo 'Copying mr implementation...' -mkdir -p src/mr - -cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' -package mr - -import ( - "log" - "math" - "net" - "net/http" - "net/rpc" - "os" - "sync" - "time" -) - -const SUCCESS = math.MaxInt32 - -type Coordinator struct { - // Your definitions here. - tasks chan Work // a taskqueue - mu sync.Mutex - terms []int - wg sync.WaitGroup - nMap int - nReduce int - done bool -} - -func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { - if len(c.tasks) == 0 { - reply.HasWork = false - return nil - } - reply.Work = <-c.tasks - c.mu.Lock() - reply.Term = c.terms[reply.Work.FileIndex] - c.mu.Unlock() - reply.HasWork = true - - go func() { - time.Sleep(10 * time.Second) - c.mu.Lock() - defer c.mu.Unlock() - if c.terms[reply.Work.FileIndex] == SUCCESS { - return - } - c.terms[reply.Work.FileIndex]++ - c.tasks <- reply.Work - }() - - return nil -} - -func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { - c.mu.Lock() - defer c.mu.Unlock() - - if c.terms[args.Work.FileIndex] != args.Term { - reply.Success = false - return nil - } - c.terms[args.Work.FileIndex] = SUCCESS - c.wg.Done() - reply.Success = true - return nil -} - -// start a thread that listens for RPCs from worker.go -func (c *Coordinator) server() { - rpc.Register(c) - rpc.HandleHTTP() - //l, e := net.Listen("tcp", ":1234") - sockname := coordinatorSock() - os.Remove(sockname) - l, e := net.Listen("unix", sockname) - if e != nil { - log.Fatal("listen error:", e) - } - go http.Serve(l, nil) -} - -// main/mrcoordinator.go calls Done() periodically to find out -// if the entire job has finished. -func (c *Coordinator) Done() bool { - return c.done -} - -func StartReduceWork(c *Coordinator) { - c.wg.Wait() - c.terms = make([]int, c.nReduce) - for i := 0; i < c.nReduce; i++ { - c.tasks <- Work{ - WorkType: REDUCE, - FileIndex: i, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go WorkDone(c) -} - -func WorkDone(c *Coordinator) { - c.wg.Wait() - c.done = true -} - -// create a Coordinator. -// main/mrcoordinator.go calls this function. -// nReduce is the number of reduce tasks to use. -func MakeCoordinator(files []string, nReduce int) *Coordinator { - - var buflen int - if len(files) > nReduce { - buflen = len(files) - } else { - buflen = nReduce - } - - c := Coordinator{ - nMap: len(files), - nReduce: nReduce, - wg: sync.WaitGroup{}, - tasks: make(chan Work, buflen), - terms: make([]int, len(files)), - done: false, - } - - for idx, file := range files { - c.tasks <- Work{ - WorkType: MAP, - Filename: file, - FileIndex: idx, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go StartReduceWork(&c) - c.server() - - return &c -} - -FILE_EOF_mr_coordinator_go - -cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' -package mr - -// -// RPC definitions. -// -// remember to capitalize all names. -// - -import ( - "os" - "strconv" -) - -// -// example to show how to declare the arguments -// and reply for an RPC. -// - -type ExampleArgs struct { - X int -} - -type ExampleReply struct { - Y int -} - -/*-Define Work-*/ - -type WorkStatus int - -const ( - IDLE WorkStatus = iota - START - FINISH -) - -type WorkType int - -const ( - MAP WorkType = iota - REDUCE -) - -type Work struct { - WorkType WorkType // MAP or REDUCE - Filename string - FileIndex int // This is a convention for mr-X index - NMapWork int // how many map files - NReduce int // how many reduce files -} - -type WorkArgs struct { - WorkerID int -} - -type WorkReply struct { - HasWork bool - Work Work - Term int -} - -/*-Define Report-*/ -// Report work finish only if success -type ReportArgs struct { - Work Work - Term int -} - -type ReportReply struct { - Success bool -} - -// Cook up a unique-ish UNIX-domain socket name -// in /var/tmp, for the coordinator. -// Can't use the current directory since -// Athena AFS doesn't support UNIX-domain sockets. -func coordinatorSock() string { - s := "/var/tmp/5840-mr-" - s += strconv.Itoa(os.Getuid()) - return s -} - -FILE_EOF_mr_rpc_go - -cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' -package mr - -import ( - "encoding/json" - "fmt" - "hash/fnv" - "io/ioutil" - "log" - "net/rpc" - "os" - "sort" - "time" -) - -// for sorting by key. -type ByKey []KeyValue - -// for sorting by key. -func (a ByKey) Len() int { return len(a) } -func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } - -// Map functions return a slice of KeyValue. -type KeyValue struct { - Key string - Value string -} - -// use ihash(key) % NReduce to choose the reduce -// task number for each KeyValue emitted by Map. -func ihash(key string) int { - h := fnv.New32a() - h.Write([]byte(key)) - return int(h.Sum32() & 0x7fffffff) -} - -// main/mrworker.go calls this function. -func Worker(mapf func(string, string) []KeyValue, - reducef func(string, []string) string) { - - // Your worker implementation here. - for { - r := CallGetWok() - if !r.HasWork { - time.Sleep(3 * time.Second) - continue - } - - switch r.Work.WorkType { - case MAP: - DoMapWork(r.Work, mapf, r.Term) - case REDUCE: - DoReduceWork(r.Work, reducef, r.Term) - } - } -} - -func DoReduceWork(work Work, reducef func(string, []string) string, term int) { - fileIndex := work.FileIndex - intermediate := []KeyValue{} - - for i := 0; i < work.NMapWork; i++ { - filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) - file, err := os.Open(filename) - - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - dec := json.NewDecoder(file) - - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - intermediate = append(intermediate, kv) - } - file.Close() - } - - sort.Sort(ByKey(intermediate)) - - oname := fmt.Sprintf("mr-out-%d", fileIndex) - ofile, _ := ioutil.TempFile(".", oname) - - // - // call Reduce on each distinct key in intermediate[], - // and print the result to mr-out-0. - // - i := 0 - for i < len(intermediate) { - j := i + 1 - for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { - j++ - } - values := []string{} - for k := i; k < j; k++ { - values = append(values, intermediate[k].Value) - } - output := reducef(intermediate[i].Key, values) - - // this is the correct format for each line of Reduce output. - fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) - - i = j - } - - os.Rename(ofile.Name(), oname) - - CallReport(work, term) -} - -func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { - filename := work.Filename - - file, err := os.Open(filename) - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - content, err := ioutil.ReadAll(file) - - if err != nil { - log.Fatalf("cannot read %v", filename) - } - - file.Close() - - kva := mapf(work.Filename, string(content)) - - //make a - for i := 0; i < work.NReduce; i++ { - imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) - - imtFile, err := ioutil.TempFile(".", imtFilename) - - enc := json.NewEncoder(imtFile) - - if err != nil { - log.Fatalf("cannot create %v", imtFilename) - } - - for _, kv := range kva { - hash := ihash(kv.Key) % work.NReduce - if hash == i { - err := enc.Encode(&kv) - if err != nil { - log.Fatalf("cannot encode %v", kv) - } - } - } - - imtFile.Close() - - os.Rename(imtFile.Name(), imtFilename) - } - - CallReport(work, term) -} - -func CallReport(w Work, term int) { - args := ReportArgs{ - Work: w, - Term: term, - } - reply := ReportReply{} - ok := call("Coordinator.CallReport", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } -} - -func CallGetWok() WorkReply { - args := WorkArgs{} - reply := WorkReply{} - ok := call("Coordinator.CallGetWork", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } - - return reply -} - -// example function to show how to make an RPC call to the coordinator. -// -// the RPC argument and reply types are defined in rpc.go. -func CallExample() { - - // declare an argument structure. - args := ExampleArgs{} - - // fill in the argument(s). - args.X = 99 - - // declare a reply structure. - reply := ExampleReply{} - - // send the RPC request, wait for the reply. - // the "Coordinator.Example" tells the - // receiving server that we'd like to call - // the Example() method of struct Coordinator. - ok := call("Coordinator.Example", &args, &reply) - if ok { - // reply.Y should be 100. - fmt.Printf("reply.Y %v\n", reply.Y) - } else { - fmt.Printf("call failed!\n") - } -} - -// send an RPC request to the coordinator, wait for the response. -// usually returns true. -// returns false if something goes wrong. -func call(rpcname string, args interface{}, reply interface{}) bool { - // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") - sockname := coordinatorSock() - c, err := rpc.DialHTTP("unix", sockname) - if err != nil { - log.Fatal("dialing:", err) - } - defer c.Close() - - err = c.Call(rpcname, args, reply) - if err == nil { - return true - } - - fmt.Println(err) - return false -} - -FILE_EOF_mr_worker_go - echo 'Creating checksums for protected files...' PROTECTED_FILES=( diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/client.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/client.go new file mode 100644 index 0000000..f0d52d1 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/client.go @@ -0,0 +1,97 @@ +package kvraft + +import ( + "crypto/rand" + "math/big" + "sync/atomic" + "time" + + "6.5840/labrpc" +) + +type Clerk struct { + servers []*labrpc.ClientEnd + cid int64 + seq int + leader int32 // cache the leader +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.servers, ck.cid, ck.seq = servers, nrand(), 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seq++ + + args := new(GetArgs) + args.Key, args.Cid, args.Seq = key, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(GetReply) + ok := ck.servers[peer].Call("KVServer.Get", args, reply) + if ok && (reply.Err == OK || reply.Err == ErrNoKey) { + atomic.StoreInt32(&ck.leader, int32(peer)) + return reply.Value + } + } + time.Sleep(100 * time.Millisecond) + } +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) { + ck.seq++ + + args := new(PutAppendArgs) + args.OpStr, args.Key, args.Value, args.Cid, args.Seq = op, key, value, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(PutAppendReply) + ok := ck.servers[peer].Call("KVServer.PutAppend", args, reply) + if ok && reply.Err == OK { + atomic.StoreInt32(&ck.leader, int32(peer)) + return + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} +func (ck *Clerk) Append(key string, value string) { + ck.PutAppend(key, value, "Append") +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/common.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/common.go new file mode 100644 index 0000000..8a67661 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/common.go @@ -0,0 +1,41 @@ +package kvraft + +const ( + OK = "OK" + ErrNoKey = "ErrNoKey" + ErrWrongLeader = "ErrWrongLeader" +) + +type ClientInfo struct { + Cid int64 + Seq int +} + +type Err string + +type RaftReply struct { + Value string + Err Err +} + +type GetArgs struct { + Key string + ClientInfo +} + +type GetReply = RaftReply + +// Put or Append +type PutAppendArgs struct { + OpStr string // "Put" or "Append" + Key string + Value string + ClientInfo +} + +type PutAppendReply = RaftReply + +type Cache struct { + Seq int + RaftReply +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/server.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/server.go new file mode 100644 index 0000000..0f62b2e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvraft/server.go @@ -0,0 +1,292 @@ +package kvraft + +import ( + "bytes" + "log" + "os" + "sync" + "sync/atomic" + "time" + + "6.5840/labgob" + "6.5840/labrpc" + "6.5840/raft" +) + +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Opcode int + +const ( + GET Opcode = iota + PUT + APPEND +) + +type Op struct { + Cmd interface{} + ClientInfo +} + +type Done struct { + index int + term int + value string + err Err +} + +type KVServer struct { + mu sync.Mutex + me int + rf *raft.Raft + ps *raft.Persister + applyCh chan raft.ApplyMsg + dead int32 // set by Kill() + + maxraftstate int // snapshot if log grows this big + + data map[string]string + cache map[int64]*Cache // client id -> seq + chanmap map[int64]chan Done +} + +func getChanId(term, index int) (id int64) { + id = int64(term) << 32 + id += int64(index) + return +} + +func (kv *KVServer) makeChan(term, index int) chan Done { + id := getChanId(term, index) + ch := make(chan Done, 1) + kv.chanmap[id] = ch + return ch +} + +func (kv *KVServer) closeAndDeleteChan(term, index int) { + kv.mu.Lock() + defer kv.mu.Unlock() + id := getChanId(term, index) + close(kv.chanmap[id]) + delete(kv.chanmap, id) +} + +func (kv *KVServer) isCacheHit(Cid int64, Seq int) (bool, *Cache) { + // Why cache.Seq >= Seq works? + // 1. If the seq of cache equals to Seq, it means the operation has been + // executed. Return the value directly. + // 2. If the seq of cache is Greater than Seq, it means some operations + // after this Op have been executed, which implies client has already + // received the result of this Op (the operation must be completed before + // next operation happened). Theorically, return anything is OK. + if cache, ok := kv.cache[Cid]; ok && cache.Seq >= Seq { + return true, cache + } else if ok { + return false, cache + } else { + kv.cache[Cid] = new(Cache) + return false, kv.cache[Cid] + } +} + +func (kv *KVServer) encode() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(kv.cache) + e.Encode(kv.data) + return w.Bytes() +} + +func (kv *KVServer) decode(buf []byte) { + if buf == nil || len(buf) < 1 { + return + } + r := bytes.NewBuffer(buf) + d := labgob.NewDecoder(r) + var cache map[int64]*Cache + var data map[string]string + if d.Decode(&cache) != nil || d.Decode(&data) != nil { + log.Fatal("Decode error") + return + } + kv.cache = cache + kv.data = data +} + +func (kv *KVServer) startRaft(cmd interface{}, cid int64, seq int, ch chan *Cache) { + kv.mu.Lock() + defer kv.mu.Unlock() + rr := new(Cache) + if hit, cache := kv.isCacheHit(cid, seq); hit { + rr.Seq, rr.Value, rr.Err = cache.Seq, cache.Value, cache.Err + ch <- rr + } else { + op := new(Op) + op.Cmd, op.Cid, op.Seq = cmd, cid, seq + index, term, isLeader := kv.rf.Start(op) + if !isLeader { + cache.Value, cache.Err = "", ErrWrongLeader + rr.Err = ErrWrongLeader + ch <- rr + return + } + donech := kv.makeChan(term, index) + go kv.waitRaft(term, index, ch, donech) + DPrintf("(startRaft) [%d] start raft with op %+v\n", kv.me, op) + } +} + +func (kv *KVServer) waitRaft(term, index int, ch chan *Cache, donech chan Done) { + timer := time.NewTimer(500 * time.Millisecond) + rr := new(Cache) + DPrintf("(waitRaft) [%d] wait for term: %d, index: %d\n", kv.me, term, index) + select { + case <-timer.C: + DPrintf("(waitRaft) [%d] timeout, term: %d, index: %d\n", kv.me, term, index) + rr.Value = "" + rr.Err = ErrWrongLeader + ch <- rr + case done := <-donech: + rr.Value = done.value + rr.Err = done.err + ch <- rr + } + kv.closeAndDeleteChan(term, index) +} + +func (kv *KVServer) raft(cmd interface{}, cid int64, seq int) *Cache { + ch := make(chan *Cache) + go kv.startRaft(cmd, cid, seq, ch) + r := <-ch + close(ch) + return r +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + DPrintf("(Get) [%d] get %s\n", kv.me, args.Key) + r := kv.raft(args, args.Cid, args.Seq) + reply.Value = r.Value + reply.Err = r.Err +} + +func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { + DPrintf("(PutAppend) [%d] %s %s: %s\n", kv.me, args.OpStr, args.Key, args.Value) + r := kv.raft(args, args.Cid, args.Seq) + reply.Err = r.Err +} + +// Serializes the execution of operations on the key-value store. +func (kv *KVServer) executor() { + for !kv.killed() { + msg := <-kv.applyCh + DPrintf("(executor) [%d] receive msg %+v\n", kv.me, msg) + kv.mu.Lock() + if msg.CommandValid { + DPrintf("(executor) [%d] type of command: %T\n", kv.me, msg.Command) + op := msg.Command.(*Op) + index, term, cid, seq := msg.CommandIndex, msg.CommandTerm, op.Cid, op.Seq + hit, cache := kv.isCacheHit(cid, seq) + if !hit { + cache.Seq, cache.Value, cache.Err = seq, "", OK + switch v := op.Cmd.(type) { + case *GetArgs: + key := v.Key + DPrintf("(executor) [%d] get %s: %s\n", kv.me, key, kv.data[key]) + if val, ok := kv.data[key]; ok { + cache.Value = val + } else { + cache.Err = ErrNoKey + } + case *PutAppendArgs: + if v.OpStr == "Put" { + kv.data[v.Key] = v.Value + } else if v.OpStr == "Append" { + kv.data[v.Key] += v.Value + } + DPrintf("(executor) [%d] %s %s: %s\n", kv.me, v.OpStr, v.Key, kv.data[v.Key]) + } + if kv.maxraftstate != -1 && kv.maxraftstate < kv.ps.RaftStateSize() { + kv.rf.Snapshot(index, kv.encode()) + } + } + if ch, ok := kv.chanmap[getChanId(term, index)]; ok { + select { + case ch <- Done{index, term, cache.Value, cache.Err}: + default: + panic("Channel is full or closed") + } + } + } else if msg.SnapshotValid { + kv.decode(msg.Snapshot) + } else { + log.Fatalf("Invalid applyMsg, %+v\n", msg) + } + kv.mu.Unlock() + } +} + +// the tester calls Kill() when a KVServer instance won't +// be needed again. for your convenience, we supply +// code to set rf.dead (without needing a lock), +// and a killed() method to test rf.dead in +// long-running loops. you can also add your own +// code to Kill(). you're not required to do anything +// about this, but it may be convenient (for example) +// to suppress debug output from a Kill()ed instance. +func (kv *KVServer) Kill() { + atomic.StoreInt32(&kv.dead, 1) + kv.rf.Kill() + // Your code here, if desired. +} + +func (kv *KVServer) killed() bool { + z := atomic.LoadInt32(&kv.dead) + return z == 1 +} + +// servers[] contains the ports of the set of +// servers that will cooperate via Raft to +// form the fault-tolerant key/value service. +// me is the index of the current server in servers[]. +// the k/v server should store snapshots through the underlying Raft +// implementation, which should call persister.SaveStateAndSnapshot() to +// atomically save the Raft state along with the snapshot. +// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, +// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, +// you don't need to snapshot. +// StartKVServer() must return quickly, so it should start goroutines +// for any long-running work. +func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { + // call labgob.Register on structures you want + // Go's RPC library to marshall/unmarshall. + labgob.Register(&Op{}) + labgob.Register(&GetArgs{}) + labgob.Register(&PutAppendArgs{}) + labgob.Register(&RaftReply{}) + labgob.Register(&Cache{}) + + kv := new(KVServer) + kv.me = me + kv.maxraftstate = maxraftstate + + kv.applyCh = make(chan raft.ApplyMsg) + kv.rf = raft.Make(servers, me, persister, kv.applyCh) + kv.ps = persister + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + kv.chanmap = make(map[int64]chan Done) + + // Read from persister if any + kv.decode(kv.ps.ReadSnapshot()) + + go kv.executor() + + return kv +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/client.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/client.go new file mode 100644 index 0000000..cb36e2b --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/client.go @@ -0,0 +1,83 @@ +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/common.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/common.go new file mode 100644 index 0000000..610acdb --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/common.go @@ -0,0 +1,22 @@ +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/server.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/server.go new file mode 100644 index 0000000..4e03682 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/kvsrv/server.go @@ -0,0 +1,84 @@ +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/coordinator.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/coordinator.go new file mode 100644 index 0000000..4fc2518 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/coordinator.go @@ -0,0 +1,141 @@ +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/rpc.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/rpc.go new file mode 100644 index 0000000..0f90524 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/rpc.go @@ -0,0 +1,81 @@ +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/worker.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/worker.go new file mode 100644 index 0000000..95d142e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/mr/worker.go @@ -0,0 +1,233 @@ +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/append_entries.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/append_entries.go new file mode 100644 index 0000000..9856584 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/append_entries.go @@ -0,0 +1,214 @@ +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/election.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/election.go new file mode 100644 index 0000000..4274b32 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/election.go @@ -0,0 +1,123 @@ +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/install_snapshot.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/install_snapshot.go new file mode 100644 index 0000000..7ba645e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/install_snapshot.go @@ -0,0 +1,92 @@ +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/persister.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/persister.go new file mode 100644 index 0000000..c5f816c --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/persister.go @@ -0,0 +1,70 @@ +package raft + +// +// support for Raft and kvraft to save persistent +// Raft state (log &c) and k/v server snapshots. +// +// we will use the original persister.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "sync" + +type Persister struct { + mu sync.Mutex + raftstate []byte + snapshot []byte +} + +func MakePersister() *Persister { + return &Persister{} +} + +func clone(orig []byte) []byte { + x := make([]byte, len(orig)) + copy(x, orig) + return x +} + +func (ps *Persister) Copy() *Persister { + ps.mu.Lock() + defer ps.mu.Unlock() + np := MakePersister() + np.raftstate = ps.raftstate + np.snapshot = ps.snapshot + return np +} + +func (ps *Persister) ReadRaftState() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.raftstate) +} + +func (ps *Persister) RaftStateSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.raftstate) +} + +// Save both Raft state and K/V snapshot as a single atomic action, +// to help avoid them getting out of sync. +func (ps *Persister) Save(raftstate []byte, snapshot []byte) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.raftstate = clone(raftstate) + ps.snapshot = clone(snapshot) +} + +func (ps *Persister) ReadSnapshot() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.snapshot) +} + +func (ps *Persister) SnapshotSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.snapshot) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/raft.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/raft.go new file mode 100644 index 0000000..9946898 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/raft.go @@ -0,0 +1,416 @@ +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/util.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/util.go new file mode 100644 index 0000000..37c7fe6 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5a_shardkv/starter_files/raft/util.go @@ -0,0 +1,16 @@ +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json index d290052..3d359ed 100644 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/config.json @@ -10,5 +10,93 @@ "fault-tolerance", "go" ], - "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024" -} \ No newline at end of file + "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", + "starter_files": [ + { + "src": "raft/persister.go", + "dest": "/workspace/src/raft/persister.go" + }, + { + "src": "raft/election.go", + "dest": "/workspace/src/raft/election.go" + }, + { + "src": "raft/append_entries.go", + "dest": "/workspace/src/raft/append_entries.go" + }, + { + "src": "raft/util.go", + "dest": "/workspace/src/raft/util.go" + }, + { + "src": "raft/install_snapshot.go", + "dest": "/workspace/src/raft/install_snapshot.go" + }, + { + "src": "raft/raft.go", + "dest": "/workspace/src/raft/raft.go" + }, + { + "src": "kvraft/server.go", + "dest": "/workspace/src/kvraft/server.go" + }, + { + "src": "kvraft/client.go", + "dest": "/workspace/src/kvraft/client.go" + }, + { + "src": "kvraft/common.go", + "dest": "/workspace/src/kvraft/common.go" + }, + { + "src": "kvsrv/server.go", + "dest": "/workspace/src/kvsrv/server.go" + }, + { + "src": "kvsrv/client.go", + "dest": "/workspace/src/kvsrv/client.go" + }, + { + "src": "kvsrv/common.go", + "dest": "/workspace/src/kvsrv/common.go" + }, + { + "src": "mr/coordinator.go", + "dest": "/workspace/src/mr/coordinator.go" + }, + { + "src": "mr/worker.go", + "dest": "/workspace/src/mr/worker.go" + }, + { + "src": "mr/rpc.go", + "dest": "/workspace/src/mr/rpc.go" + } + ], + "output_files": [ + { + "src": "/workspace/src/shardctrler/client.go", + "dest": "src/shardctrler/client.go" + }, + { + "src": "/workspace/src/shardctrler/common.go", + "dest": "src/shardctrler/common.go" + }, + { + "src": "/workspace/src/shardctrler/server.go", + "dest": "src/shardctrler/server.go" + }, + { + "src": "/workspace/src/shardkv/client.go", + "dest": "src/shardkv/client.go" + }, + { + "src": "/workspace/src/shardkv/common.go", + "dest": "src/shardkv/common.go" + }, + { + "src": "/workspace/src/shardkv/server.go", + "dest": "src/shardkv/server.go" + } + ] +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh index a6edaed..1c9ab00 100755 --- a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/preprocess.sh @@ -5,2011 +5,6 @@ echo '=== Preprocessing 5B Shardkv ===' cd /workspace -echo 'Copying reference implementations from previous labs...' - -echo 'Copying raft implementation...' -mkdir -p src/raft - -cat > src/raft/raft.go << 'FILE_EOF_raft_raft_go' -package raft - -// -// this is an outline of the API that raft must expose to -// the service (or tester). see comments below for -// each of these functions for more details. -// -// rf = Make(...) -// create a new Raft server. -// rf.Start(command interface{}) (index, term, isleader) -// start agreement on a new log entry -// rf.GetState() (term, isLeader) -// ask a Raft for its current term, and whether it thinks it is leader -// ApplyMsg -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester) -// in the same server. -// - -import ( - // "bytes" - - "bytes" - "log" - "sync" - "sync/atomic" - "time" - - // "6.5840/labgob" - "6.5840/labgob" - "6.5840/labrpc" -) - -// as each Raft peer becomes aware that successive log entries are -// committed, the peer should send an ApplyMsg to the service (or -// tester) on the same server, via the applyCh passed to Make(). set -// CommandValid to true to indicate that the ApplyMsg contains a newly -// committed log entry. -// -// in part 3D you'll want to send other kinds of messages (e.g., -// snapshots) on the applyCh, but set CommandValid to false for these -// other uses. -type ApplyMsg struct { - CommandValid bool - Command interface{} - CommandIndex int - CommandTerm int - - // For 3D: - SnapshotValid bool - Snapshot []byte - SnapshotTerm int - SnapshotIndex int -} - -type Entry struct { - Term int - Index int - Command interface{} -} - -// Base struct for common fields -type BaseRPC struct { - Term int -} - -// Implement RaftRPC interface for BaseRPC -func (b *BaseRPC) GetTerm() int { - return b.Term -} - -func (b *BaseRPC) SetTerm(term int) { - b.Term = term -} - -// RaftRPC interface -type RaftRPC interface { - GetTerm() int - SetTerm(int) -} - -type ServerState int - -const ( - FOLLOWER ServerState = iota - CANDIDATE - LEADER -) - -// A Go object implementing a single Raft peer. -type Raft struct { - mu sync.Mutex // Lock to protect shared access to this peer's state - peers []*labrpc.ClientEnd // RPC end points of all peers - persister *Persister // Object to hold this peer's persisted state - me int // this peer's index into peers[] - dead int32 // set by Kill() - heartbeatTimeout time.Duration - electionTimeout time.Duration - electionTimeStamp time.Time - applyCh chan ApplyMsg - - // state a Raft server must maintain. - broadcasterCond []*sync.Cond - applierCond *sync.Cond - - // server state - state ServerState - - // presistent state on all servers - currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) - votedFor int // candidateId that received vote in current term (or null if none) - logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) - - // volatile state on all servers - commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) - lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) - - // volatile state on leaders (reinitialized after election) - nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) - matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) - - // snapshot msg - smsg *ApplyMsg -} - -// return currentTerm and whether this server -// believes it is the leader. -func (rf *Raft) GetState() (int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - return rf.currentTerm, rf.state == LEADER -} - -func (rf *Raft) encodeState() []byte { - w := new(bytes.Buffer) - e := labgob.NewEncoder(w) - e.Encode(rf.currentTerm) - e.Encode(rf.votedFor) - e.Encode(rf.logs) - return w.Bytes() -} - -// save Raft's persistent state to stable storage, -// where it can later be retrieved after a crash and restart. -// see paper's Figure 2 for a description of what should be persistent. -// before you've implemented snapshots, you should pass nil as the -// second argument to persister.Save(). -// after you've implemented snapshots, pass the current snapshot -// (or nil if there's not yet a snapshot). -func (rf *Raft) persist() { - if rf.persister.ReadSnapshot() != nil { - rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) - } else { - rf.persister.Save(rf.encodeState(), nil) - } -} - -// restore previously persisted state. -func (rf *Raft) readPersist(data []byte) { - if data == nil || len(data) < 1 { // bootstrap without any state - return - } - r := bytes.NewBuffer(data) - d := labgob.NewDecoder(r) - var currentTerm int - var votedFor int - var logs []Entry - - if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { - log.Fatal("failed to read persist\n") - } else { - DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) - rf.currentTerm = currentTerm - rf.votedFor = votedFor - rf.logs = logs - rf.lastApplied = rf.logs[0].Index - rf.commitIndex = rf.logs[0].Index - } -} - -// the service says it has created a snapshot that has -// all info up to and including index. this means the -// service no longer needs the log through (and including) -// that index. Raft should now trim its log as much as possible. -func (rf *Raft) Snapshot(index int, snapshot []byte) { - // Your code here (3D). - rf.mu.Lock() - defer rf.mu.Unlock() - // if the snapshot is outdated, just ignore it - if rf.logs[0].Index >= index { - return - } - firstLogIndex := rf.logs[0].Index - trimLen := index - firstLogIndex - // trim the logs - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - rf.persister.Save(rf.encodeState(), snapshot) -} - -// the service using Raft (e.g. a k/v server) wants to start -// agreement on the next command to be appended to Raft's log. if this -// server isn't the leader, returns false. otherwise start the -// agreement and return immediately. there is no guarantee that this -// command will ever be committed to the Raft log, since the leader -// may fail or lose an election. even if the Raft instance has been killed, -// this function should return gracefully. -// -// the first return value is the index that the command will appear at -// if it's ever committed. the second return value is the current -// term. the third return value is true if this server believes it is -// the leader. -func (rf *Raft) Start(command interface{}) (int, int, bool) { - rf.mu.Lock() - defer rf.mu.Unlock() - if rf.state != LEADER { - return -1, -1, false - } - defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) - rf.logs = append(rf.logs, Entry{ - Term: rf.currentTerm, - Index: rf.logs[len(rf.logs)-1].Index + 1, - Command: command, - }) - rf.broadcastAppendEntries(false) - // Your code here (3B). - return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true -} - -// Warning: this function is not thread-safe -func (rf *Raft) resetNewTermState(targetTerm int) { - DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) - if rf.currentTerm < targetTerm { - rf.votedFor = -1 - } - rf.currentTerm = targetTerm - rf.state = FOLLOWER // reset to follower -} - -// Reply false if term < currentTerm (§5.1) -// If RPC request contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { - term := args.GetTerm() - defer reply.SetTerm(rf.currentTerm) - if term < rf.currentTerm { - return false - } - if term > rf.currentTerm { - rf.resetNewTermState(term) - } - return true -} - -// If RPC request or response contains term T > currentTerm: -// set currentTerm = T, convert to follower (§5.1) -func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { - argsTerm := args.GetTerm() - replyTerm := reply.GetTerm() - if replyTerm > argsTerm { - rf.resetNewTermState(replyTerm) - rf.resetElectionTimer() - return false - } - return isElection || (rf.state == LEADER) -} - -// the tester doesn't halt goroutines created by Raft after each test, -// but it does call the Kill() method. your code can use killed() to -// check whether Kill() has been called. the use of atomic avoids the -// need for a lock. -// -// the issue is that long-running goroutines use memory and may chew -// up CPU time, perhaps causing later tests to fail and generating -// confusing debug output. any goroutine with a long-running loop -// should call killed() to check whether it should stop. -func (rf *Raft) Kill() { - atomic.StoreInt32(&rf.dead, 1) - // Your code here, if desired. -} - -func (rf *Raft) killed() bool { - z := atomic.LoadInt32(&rf.dead) - return z == 1 -} - -// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel -func (rf *Raft) applier() { - for !rf.killed() { - rf.mu.Lock() - // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries - for rf.lastApplied >= rf.commitIndex { - rf.applierCond.Wait() - } - firstLogIndex := rf.logs[0].Index - commitIndex, lastApplied := rf.commitIndex, rf.lastApplied - DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) - entries := make([]Entry, commitIndex-lastApplied) - copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) - if rf.smsg != nil { - msg := rf.smsg - rf.smsg = nil - rf.mu.Unlock() - rf.applyCh <- *msg - } else { - rf.mu.Unlock() - } - for _, entry := range entries { - DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) - rf.applyCh <- ApplyMsg{ - CommandValid: true, - Command: entry.Command, - CommandTerm: entry.Term, - CommandIndex: entry.Index, - } - } - rf.mu.Lock() - // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() - // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback - if rf.lastApplied < commitIndex { - rf.lastApplied = commitIndex - } - rf.mu.Unlock() - } -} - -/** - * Lets illustrate the time line of the ticker function - * e: election timeout - * h: heartbeat timeout - * - * ---- h ---- h ---- h ---- h ---- h ---- ... - * - * First, the server will wake up each fixed heartbeat timeout. This timeout is - * relatively shorter than the election timeout. If the server is not a leader, - * it basically do nothing about heartbeat. - * - * However, everytime when server wake up, it will check if the election timeout - * is reached. It might start a new election, if it is not a leader. - * - * v election timeout found! - * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... - * --------- e1 ------ e2 ------------ e ---- ... - * - * Reseting a new election timeout when the server receives a heartbeat or a - * vote from another server prevents the election. One shortcomming of the - * current implementation is that the election timeout does not trigger a new - * election immediately. It will wait until the next heartbeat timeout. - */ -func (rf *Raft) ticker() { - for !rf.killed() { - rf.mu.Lock() - if rf.state == LEADER { - rf.broadcastAppendEntries(true) - } else if rf.isElectionTimeout() { - rf.startElection() - } - rf.mu.Unlock() - time.Sleep(rf.heartbeatTimeout) - } -} - -// the service or tester wants to create a Raft server. the ports -// of all the Raft servers (including this one) are in peers[]. this -// server's port is peers[me]. all the servers' peers[] arrays -// have the same order. persister is a place for this server to -// save its persistent state, and also initially holds the most -// recent saved state, if any. applyCh is a channel on which the -// tester or service expects Raft to send ApplyMsg messages. -// Make() must return quickly, so it should start goroutines -// for any long-running work. -func Make(peers []*labrpc.ClientEnd, me int, - persister *Persister, applyCh chan ApplyMsg) *Raft { - rf := &Raft{} - rf.peers = peers - rf.persister = persister - rf.me = me - rf.applyCh = applyCh - rf.heartbeatTimeout = 125 * time.Millisecond - rf.resetElectionTimer() - rf.state = FOLLOWER - rf.votedFor = -1 - rf.logs = make([]Entry, 0) - - // dummy entry to make the index start from 1 - rf.logs = append(rf.logs, Entry{0, 0, nil}) - - rf.commitIndex = 0 - rf.lastApplied = 0 - - rf.applierCond = sync.NewCond(&rf.mu) - rf.broadcasterCond = make([]*sync.Cond, len(peers)) - - rf.nextIndex = make([]int, len(peers)) - rf.matchIndex = make([]int, len(peers)) - - for id := range peers { - rf.nextIndex[id] = 1 - if id != rf.me { - rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) - go rf.broadcaster(id) - } - } - - rf.smsg = nil - - // initialize from state persisted before a crash - rf.readPersist(persister.ReadRaftState()) - - // start ticker goroutine to start elections - go rf.ticker() - - go rf.applier() - - return rf -} - -FILE_EOF_raft_raft_go - -cat > src/raft/election.go << 'FILE_EOF_raft_election_go' -package raft - -import ( - "math/rand" - "sync/atomic" - "time" -) - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type RequestVoteArgs struct { - BaseRPC // candidate's term - CandidateId int // candidate requesting vote - LastLogIndex int // index of candidate's last log entry - LastLogTerm int // term of candidate's last log entry -} - -type RequestVoteReply struct { - BaseRPC // currentTerm, for candidate to update itself - VoteGranted bool // true means candidate received vote -} - -// RequestVote RPC handler -// Restart your election timer if you grant a vote to another peer. -func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - reply.VoteGranted = false - - DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) - - if !rf.checkRequestTerm(args, reply) { - return - } - - if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { - reply.VoteGranted = true - rf.votedFor = args.CandidateId - rf.resetElectionTimer() - } -} - -func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { - lastLog := rf.logs[len(rf.logs)-1] - candidateIndex := args.LastLogIndex - candidateTerm := args.LastLogTerm - return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) -} - -func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { - reply := &RequestVoteReply{} - ok := rf.peers[server].Call("Raft.RequestVote", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, true) { - return - } - - if !reply.VoteGranted { - return - } - - DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) - - // If votes received from majority of servers: become leader - if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && - rf.state == CANDIDATE && - rf.currentTerm == args.Term { - rf.state = LEADER - lastLogIndex := rf.logs[len(rf.logs)-1].Index - for i := range rf.peers { - rf.nextIndex[i] = lastLogIndex + 1 - rf.matchIndex[i] = 0 - } - DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) - // send initial empty AppendEntries RPCs (heartbeat) to each server immediately - rf.broadcastAppendEntries(true) - } - DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) -} - -func (rf *Raft) startElection() { - rf.currentTerm++ - rf.state = CANDIDATE - rf.votedFor = rf.me - rf.resetElectionTimer() - DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) - lastLog := rf.logs[len(rf.logs)-1] - - voteCount := int32(1) - args := RequestVoteArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - CandidateId: rf.me, - LastLogIndex: lastLog.Index, - LastLogTerm: lastLog.Term, - } - - for id := range rf.peers { - if id == rf.me { - continue - } - go rf.sendRequestVote(id, &args, &voteCount) - } -} - -func (rf *Raft) resetElectionTimer() { - // election timeout range from 350 to 550 - ms := 350 + (rand.Int63() % 200) - rf.electionTimeStamp = time.Now() - rf.electionTimeout = time.Duration(ms) * time.Millisecond -} - -func (rf *Raft) isElectionTimeout() bool { - return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) -} - -FILE_EOF_raft_election_go - -cat > src/raft/append_entries.go << 'FILE_EOF_raft_append_entries_go' -package raft - -// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 - -type AppendEntriesArgs struct { - BaseRPC // leader's term - LeaderId int // so follower can redirect clients - PrevLogIndex int // index of log entry immediately preceding new ones - PrevLogTerm int // term of prevLogIndex entry - Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) - CommitIndex int // leader's commitIndex -} - -type AppendEntriesReply struct { - BaseRPC // currentTerm, for leader to update itself - Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm - ConflictIndex int // the index of the first conflicting entry -} - -// AppendEntries RPC handler -// Reset the election timer if you get an AppendEntries RPC from the current leader -// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); -func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) - - reply.Success = false - reply.ConflictIndex = -1 - - if !rf.checkRequestTerm(args, reply) { - return - } - - if rf.state == CANDIDATE { - rf.state = FOLLOWER - } - - rf.resetElectionTimer() - - prevLogIndex := args.PrevLogIndex - rf.logs[0].Index - - if prevLogIndex < 0 { - // force to send a snapshot - reply.ConflictIndex = 0 - return - } - - // Reply false if log doesn’t contain an entry at prevLogIndex - // whose term matches prevLogTerm (§5.3) - if prevLogIndex >= len(rf.logs) { - reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index - return - } - - // If an existing entry conflicts with a new one (same index - // but different terms), delete the existing entry and all that - // follow it (§5.3) - if rf.logs[prevLogIndex].Term != args.PrevLogTerm { - // optimization - curTerm := rf.logs[prevLogIndex].Term - var conflictIndex int - for i := prevLogIndex; i > 0; i-- { - if rf.logs[i-1].Term != curTerm { - conflictIndex = i - break - } - } - reply.ConflictIndex = conflictIndex + rf.logs[0].Index - return - } - for idx, entry := range args.Entries { - logIndex := entry.Index - rf.logs[0].Index - if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { - DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) - rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) - break - } - } - reply.Success = true - if args.CommitIndex > rf.commitIndex { - rf.commitIndex = args.CommitIndex - if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { - rf.commitIndex = rf.logs[len(rf.logs)-1].Index - } - } - rf.applierCond.Signal() -} - -func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { - reply := &AppendEntriesReply{} - ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) - if !ok { - return - } - - DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) - - rf.mu.Lock() - defer rf.mu.Unlock() - defer rf.persist() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - // If successful: update nextIndex and matchIndex for - // follower (§5.3) - if reply.Success { - if len(args.Entries) > 0 { - rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 - } - rf.matchIndex[server] = rf.nextIndex[server] - 1 - for _, log := range rf.logs { - index := log.Index - count := 1 - for peer := range rf.peers { - if peer != rf.me && rf.matchIndex[peer] >= index { - count++ - } - } - // If there exists an N such that N > commitIndex, a majority - // of matchIndex[i] ≥ N, and log[N].term == currentTerm: - // set commitIndex = N (§5.3, §5.4). - if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { - rf.commitIndex = index - } - } - } else { - if reply.ConflictIndex != -1 { - rf.nextIndex[server] = reply.ConflictIndex - 1 - } else { - rf.nextIndex[server] = rf.nextIndex[server] - 1 - } - if rf.nextIndex[server] < 1 { - rf.nextIndex[server] = 1 - } - } - DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) - rf.applierCond.Signal() -} - -func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { - for peer := range rf.peers { - if peer != rf.me { - // if it is a heartbeat we dont care the linearizability of logs append - if isHeartBeat { - args := rf.prepareReplicationArgs(peer) - go rf.sendReplicationRPC(peer, args) - } else { - rf.broadcasterCond[peer].Signal() - } - } - } -} - -func (rf *Raft) prepareReplicationArgs(peer int) interface{} { - if rf.nextIndex[peer] > rf.logs[0].Index { - firstLog := rf.logs[0] - nextIndex := rf.nextIndex[peer] - firstLog.Index - prevLog := rf.logs[nextIndex-1] - logs := make([]Entry, len(rf.logs[nextIndex:])) - copy(logs, rf.logs[nextIndex:]) - return &AppendEntriesArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - PrevLogIndex: prevLog.Index, - PrevLogTerm: prevLog.Term, - Entries: logs, - CommitIndex: rf.commitIndex, - } - } else { - return &InstallSnapshotArgs{ - BaseRPC: BaseRPC{rf.currentTerm}, - LeaderId: rf.me, - LastIncludedIndex: rf.logs[0].Index, - LastIncludedTerm: rf.logs[0].Term, - Offset: 0, - Data: rf.persister.ReadSnapshot(), - Done: true, - } - } -} - -func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { - switch v := args.(type) { - case *AppendEntriesArgs: - rf.sendAppendEntries(peer, v) - case *InstallSnapshotArgs: - rf.sendInstallSnapshot(peer, v) - default: - panic("(sendReplicationRPC) SHOULD NOT REACH") - } -} - -func (rf *Raft) isReplicationNeeded(peer int) bool { - return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index -} - -func (rf *Raft) broadcaster(peer int) { - rf.broadcasterCond[peer].L.Lock() - defer rf.broadcasterCond[peer].L.Unlock() - for !rf.killed() { - rf.mu.Lock() - for !rf.isReplicationNeeded(peer) { - rf.mu.Unlock() - rf.broadcasterCond[peer].Wait() - rf.mu.Lock() - } - args := rf.prepareReplicationArgs(peer) - rf.mu.Unlock() - rf.sendReplicationRPC(peer, args) - } -} - -FILE_EOF_raft_append_entries_go - -cat > src/raft/install_snapshot.go << 'FILE_EOF_raft_install_snapshot_go' -package raft - -type InstallSnapshotArgs struct { - BaseRPC - LeaderId int - LastIncludedIndex int - LastIncludedTerm int - Offset int - Data []byte - Done bool -} - -type InstallSnapshotReply struct { - BaseRPC -} - -// InstallSnapshot RPC handler -func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkRequestTerm(args, reply) { - return - } - - if args.LastIncludedIndex <= rf.commitIndex { - return - } - prevCommitIndex := rf.commitIndex - prevLastApplied := rf.lastApplied - defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) - rf.resetElectionTimer() - - rf.commitIndex = args.LastIncludedIndex - rf.lastApplied = args.LastIncludedIndex - // 2. Create new snapshot file if first chunk (offset is 0) - // 3. Write data into snapshot file at given offset - // 4. Reply and wait for more data chunks if done is false - if !args.Done { - return - } - // 5. Save snapshot file, discard any existing or partial snapshot with a - // smaller index - // 6. If existing log entry has same index and term as snapshot’s last - // included entry, retain log entries following it and reply - // 7. Discard the entire log - // 8. Reset state machine using snapshot contents (and load snapshot’s - // cluster configuration) - firstLogIndex := rf.logs[0].Index - if firstLogIndex <= args.LastIncludedIndex { - rf.logs = append([]Entry{}, Entry{ - Index: args.LastIncludedIndex, - Term: args.LastIncludedTerm, - Command: nil, - }) - } else if firstLogIndex < args.LastIncludedIndex { - trimLen := args.LastIncludedIndex - firstLogIndex - rf.logs = append([]Entry{}, rf.logs[trimLen:]...) - rf.logs[0].Command = nil - } - rf.persister.Save(rf.encodeState(), args.Data) - rf.smsg = &ApplyMsg{ - SnapshotValid: true, - Snapshot: args.Data, - SnapshotTerm: args.LastIncludedTerm, - SnapshotIndex: args.LastIncludedIndex, - } -} - -func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { - reply := &InstallSnapshotReply{} - ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) - if !ok { - return - } - - rf.mu.Lock() - defer rf.mu.Unlock() - - if !rf.checkResponseTerm(args, reply, false) { - return - } - - if args.LastIncludedIndex != rf.logs[0].Index { - return - } - - rf.nextIndex[server] = args.LastIncludedIndex + 1 - rf.matchIndex[server] = args.LastIncludedIndex - - rf.persister.Save(rf.encodeState(), args.Data) -} - -FILE_EOF_raft_install_snapshot_go - -cat > src/raft/util.go << 'FILE_EOF_raft_util_go' -package raft - -import ( - "log" - "os" -) - -// Debugging -var Debug = os.Getenv("DEBUG") == "1" - -func DPrintf(format string, a ...interface{}) { - if !Debug { - return - } - log.Printf(format, a...) -} - -FILE_EOF_raft_util_go - -echo 'Copying kvraft implementation...' -mkdir -p src/kvraft - -cat > src/kvraft/client.go << 'FILE_EOF_kvraft_client_go' -package kvraft - -import ( - "crypto/rand" - "math/big" - "sync/atomic" - "time" - - "6.5840/labrpc" -) - -type Clerk struct { - servers []*labrpc.ClientEnd - cid int64 - seq int - leader int32 // cache the leader -} - -func nrand() int64 { - max := big.NewInt(int64(1) << 62) - bigx, _ := rand.Int(rand.Reader, max) - x := bigx.Int64() - return x -} - -func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { - ck := new(Clerk) - ck.servers, ck.cid, ck.seq = servers, nrand(), 0 - return ck -} - -// fetch the current value for a key. -// returns "" if the key does not exist. -// keeps trying forever in the face of all other errors. -// -// you can send an RPC with code like this: -// ok := ck.servers[i].Call("KVServer.Get", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) Get(key string) string { - ck.seq++ - - args := new(GetArgs) - args.Key, args.Cid, args.Seq = key, ck.cid, ck.seq - - leader := int(atomic.LoadInt32(&ck.leader)) - for { - for i := 0; i < len(ck.servers); i++ { - peer := (leader + i) % len(ck.servers) - reply := new(GetReply) - ok := ck.servers[peer].Call("KVServer.Get", args, reply) - if ok && (reply.Err == OK || reply.Err == ErrNoKey) { - atomic.StoreInt32(&ck.leader, int32(peer)) - return reply.Value - } - } - time.Sleep(100 * time.Millisecond) - } -} - -// shared by Put and Append. -// -// you can send an RPC with code like this: -// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) PutAppend(key string, value string, op string) { - ck.seq++ - - args := new(PutAppendArgs) - args.OpStr, args.Key, args.Value, args.Cid, args.Seq = op, key, value, ck.cid, ck.seq - - leader := int(atomic.LoadInt32(&ck.leader)) - for { - for i := 0; i < len(ck.servers); i++ { - peer := (leader + i) % len(ck.servers) - reply := new(PutAppendReply) - ok := ck.servers[peer].Call("KVServer.PutAppend", args, reply) - if ok && reply.Err == OK { - atomic.StoreInt32(&ck.leader, int32(peer)) - return - } - } - time.Sleep(100 * time.Millisecond) - } -} - -func (ck *Clerk) Put(key string, value string) { - ck.PutAppend(key, value, "Put") -} -func (ck *Clerk) Append(key string, value string) { - ck.PutAppend(key, value, "Append") -} - -FILE_EOF_kvraft_client_go - -cat > src/kvraft/common.go << 'FILE_EOF_kvraft_common_go' -package kvraft - -const ( - OK = "OK" - ErrNoKey = "ErrNoKey" - ErrWrongLeader = "ErrWrongLeader" -) - -type ClientInfo struct { - Cid int64 - Seq int -} - -type Err string - -type RaftReply struct { - Value string - Err Err -} - -type GetArgs struct { - Key string - ClientInfo -} - -type GetReply = RaftReply - -// Put or Append -type PutAppendArgs struct { - OpStr string // "Put" or "Append" - Key string - Value string - ClientInfo -} - -type PutAppendReply = RaftReply - -type Cache struct { - Seq int - RaftReply -} - -FILE_EOF_kvraft_common_go - -cat > src/kvraft/server.go << 'FILE_EOF_kvraft_server_go' -package kvraft - -import ( - "bytes" - "log" - "os" - "sync" - "sync/atomic" - "time" - - "6.5840/labgob" - "6.5840/labrpc" - "6.5840/raft" -) - -var Debug = os.Getenv("DEBUG") == "1" - -func DPrintf(format string, a ...interface{}) (n int, err error) { - if Debug { - log.Printf(format, a...) - } - return -} - -type Opcode int - -const ( - GET Opcode = iota - PUT - APPEND -) - -type Op struct { - Cmd interface{} - ClientInfo -} - -type Done struct { - index int - term int - value string - err Err -} - -type KVServer struct { - mu sync.Mutex - me int - rf *raft.Raft - ps *raft.Persister - applyCh chan raft.ApplyMsg - dead int32 // set by Kill() - - maxraftstate int // snapshot if log grows this big - - data map[string]string - cache map[int64]*Cache // client id -> seq - chanmap map[int64]chan Done -} - -func getChanId(term, index int) (id int64) { - id = int64(term) << 32 - id += int64(index) - return -} - -func (kv *KVServer) makeChan(term, index int) chan Done { - id := getChanId(term, index) - ch := make(chan Done, 1) - kv.chanmap[id] = ch - return ch -} - -func (kv *KVServer) closeAndDeleteChan(term, index int) { - kv.mu.Lock() - defer kv.mu.Unlock() - id := getChanId(term, index) - close(kv.chanmap[id]) - delete(kv.chanmap, id) -} - -func (kv *KVServer) isCacheHit(Cid int64, Seq int) (bool, *Cache) { - // Why cache.Seq >= Seq works? - // 1. If the seq of cache equals to Seq, it means the operation has been - // executed. Return the value directly. - // 2. If the seq of cache is Greater than Seq, it means some operations - // after this Op have been executed, which implies client has already - // received the result of this Op (the operation must be completed before - // next operation happened). Theorically, return anything is OK. - if cache, ok := kv.cache[Cid]; ok && cache.Seq >= Seq { - return true, cache - } else if ok { - return false, cache - } else { - kv.cache[Cid] = new(Cache) - return false, kv.cache[Cid] - } -} - -func (kv *KVServer) encode() []byte { - w := new(bytes.Buffer) - e := labgob.NewEncoder(w) - e.Encode(kv.cache) - e.Encode(kv.data) - return w.Bytes() -} - -func (kv *KVServer) decode(buf []byte) { - if buf == nil || len(buf) < 1 { - return - } - r := bytes.NewBuffer(buf) - d := labgob.NewDecoder(r) - var cache map[int64]*Cache - var data map[string]string - if d.Decode(&cache) != nil || d.Decode(&data) != nil { - log.Fatal("Decode error") - return - } - kv.cache = cache - kv.data = data -} - -func (kv *KVServer) startRaft(cmd interface{}, cid int64, seq int, ch chan *Cache) { - kv.mu.Lock() - defer kv.mu.Unlock() - rr := new(Cache) - if hit, cache := kv.isCacheHit(cid, seq); hit { - rr.Seq, rr.Value, rr.Err = cache.Seq, cache.Value, cache.Err - ch <- rr - } else { - op := new(Op) - op.Cmd, op.Cid, op.Seq = cmd, cid, seq - index, term, isLeader := kv.rf.Start(op) - if !isLeader { - cache.Value, cache.Err = "", ErrWrongLeader - rr.Err = ErrWrongLeader - ch <- rr - return - } - donech := kv.makeChan(term, index) - go kv.waitRaft(term, index, ch, donech) - DPrintf("(startRaft) [%d] start raft with op %+v\n", kv.me, op) - } -} - -func (kv *KVServer) waitRaft(term, index int, ch chan *Cache, donech chan Done) { - timer := time.NewTimer(500 * time.Millisecond) - rr := new(Cache) - DPrintf("(waitRaft) [%d] wait for term: %d, index: %d\n", kv.me, term, index) - select { - case <-timer.C: - DPrintf("(waitRaft) [%d] timeout, term: %d, index: %d\n", kv.me, term, index) - rr.Value = "" - rr.Err = ErrWrongLeader - ch <- rr - case done := <-donech: - rr.Value = done.value - rr.Err = done.err - ch <- rr - } - kv.closeAndDeleteChan(term, index) -} - -func (kv *KVServer) raft(cmd interface{}, cid int64, seq int) *Cache { - ch := make(chan *Cache) - go kv.startRaft(cmd, cid, seq, ch) - r := <-ch - close(ch) - return r -} - -func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { - DPrintf("(Get) [%d] get %s\n", kv.me, args.Key) - r := kv.raft(args, args.Cid, args.Seq) - reply.Value = r.Value - reply.Err = r.Err -} - -func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { - DPrintf("(PutAppend) [%d] %s %s: %s\n", kv.me, args.OpStr, args.Key, args.Value) - r := kv.raft(args, args.Cid, args.Seq) - reply.Err = r.Err -} - -// Serializes the execution of operations on the key-value store. -func (kv *KVServer) executor() { - for !kv.killed() { - msg := <-kv.applyCh - DPrintf("(executor) [%d] receive msg %+v\n", kv.me, msg) - kv.mu.Lock() - if msg.CommandValid { - DPrintf("(executor) [%d] type of command: %T\n", kv.me, msg.Command) - op := msg.Command.(*Op) - index, term, cid, seq := msg.CommandIndex, msg.CommandTerm, op.Cid, op.Seq - hit, cache := kv.isCacheHit(cid, seq) - if !hit { - cache.Seq, cache.Value, cache.Err = seq, "", OK - switch v := op.Cmd.(type) { - case *GetArgs: - key := v.Key - DPrintf("(executor) [%d] get %s: %s\n", kv.me, key, kv.data[key]) - if val, ok := kv.data[key]; ok { - cache.Value = val - } else { - cache.Err = ErrNoKey - } - case *PutAppendArgs: - if v.OpStr == "Put" { - kv.data[v.Key] = v.Value - } else if v.OpStr == "Append" { - kv.data[v.Key] += v.Value - } - DPrintf("(executor) [%d] %s %s: %s\n", kv.me, v.OpStr, v.Key, kv.data[v.Key]) - } - if kv.maxraftstate != -1 && kv.maxraftstate < kv.ps.RaftStateSize() { - kv.rf.Snapshot(index, kv.encode()) - } - } - if ch, ok := kv.chanmap[getChanId(term, index)]; ok { - select { - case ch <- Done{index, term, cache.Value, cache.Err}: - default: - panic("Channel is full or closed") - } - } - } else if msg.SnapshotValid { - kv.decode(msg.Snapshot) - } else { - log.Fatalf("Invalid applyMsg, %+v\n", msg) - } - kv.mu.Unlock() - } -} - -// the tester calls Kill() when a KVServer instance won't -// be needed again. for your convenience, we supply -// code to set rf.dead (without needing a lock), -// and a killed() method to test rf.dead in -// long-running loops. you can also add your own -// code to Kill(). you're not required to do anything -// about this, but it may be convenient (for example) -// to suppress debug output from a Kill()ed instance. -func (kv *KVServer) Kill() { - atomic.StoreInt32(&kv.dead, 1) - kv.rf.Kill() - // Your code here, if desired. -} - -func (kv *KVServer) killed() bool { - z := atomic.LoadInt32(&kv.dead) - return z == 1 -} - -// servers[] contains the ports of the set of -// servers that will cooperate via Raft to -// form the fault-tolerant key/value service. -// me is the index of the current server in servers[]. -// the k/v server should store snapshots through the underlying Raft -// implementation, which should call persister.SaveStateAndSnapshot() to -// atomically save the Raft state along with the snapshot. -// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, -// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, -// you don't need to snapshot. -// StartKVServer() must return quickly, so it should start goroutines -// for any long-running work. -func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { - // call labgob.Register on structures you want - // Go's RPC library to marshall/unmarshall. - labgob.Register(&Op{}) - labgob.Register(&GetArgs{}) - labgob.Register(&PutAppendArgs{}) - labgob.Register(&RaftReply{}) - labgob.Register(&Cache{}) - - kv := new(KVServer) - kv.me = me - kv.maxraftstate = maxraftstate - - kv.applyCh = make(chan raft.ApplyMsg) - kv.rf = raft.Make(servers, me, persister, kv.applyCh) - kv.ps = persister - kv.data = make(map[string]string) - kv.cache = make(map[int64]*Cache) - kv.chanmap = make(map[int64]chan Done) - - // Read from persister if any - kv.decode(kv.ps.ReadSnapshot()) - - go kv.executor() - - return kv -} - -FILE_EOF_kvraft_server_go - -echo 'Copying kvsrv implementation...' -mkdir -p src/kvsrv - -cat > src/kvsrv/client.go << 'FILE_EOF_kvsrv_client_go' -package kvsrv - -import ( - "crypto/rand" - "math/big" - - "6.5840/labrpc" -) - -type Clerk struct { - server *labrpc.ClientEnd - clientId int64 - seqNum int -} - -func nrand() int64 { - max := big.NewInt(int64(1) << 62) - bigx, _ := rand.Int(rand.Reader, max) - x := bigx.Int64() - return x -} - -func MakeClerk(server *labrpc.ClientEnd) *Clerk { - ck := new(Clerk) - ck.server = server - ck.clientId = nrand() - ck.seqNum = 0 - return ck -} - -// fetch the current value for a key. -// returns "" if the key does not exist. -// keeps trying forever in the face of all other errors. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer.Get", &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) Get(key string) string { - ck.seqNum++ - args := GetArgs{ - Key: key, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := GetReply{} - for !ck.server.Call("KVServer.Get", &args, &reply) { - } - return reply.Value -} - -// shared by Put and Append. -// -// you can send an RPC with code like this: -// ok := ck.server.Call("KVServer."+op, &args, &reply) -// -// the types of args and reply (including whether they are pointers) -// must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. -func (ck *Clerk) PutAppend(key string, value string, op string) string { - ck.seqNum++ - args := PutAppendArgs{ - Key: key, - Value: value, - ClientId: ck.clientId, - SeqNum: ck.seqNum, - } - reply := PutAppendReply{} - for !ck.server.Call("KVServer."+op, &args, &reply) { - } - return reply.Value -} - -func (ck *Clerk) Put(key string, value string) { - ck.PutAppend(key, value, "Put") -} - -// Append value to key's value and return that value -func (ck *Clerk) Append(key string, value string) string { - return ck.PutAppend(key, value, "Append") -} - -FILE_EOF_kvsrv_client_go - -cat > src/kvsrv/common.go << 'FILE_EOF_kvsrv_common_go' -package kvsrv - -type PutAppendArgs struct { - Key string - Value string - ClientId int64 - SeqNum int -} - -type PutAppendReply struct { - Value string -} - -type GetArgs struct { - Key string - ClientId int64 - SeqNum int -} - -type GetReply struct { - Value string -} - -FILE_EOF_kvsrv_common_go - -cat > src/kvsrv/server.go << 'FILE_EOF_kvsrv_server_go' -package kvsrv - -import ( - "log" - "sync" -) - -const Debug = false - -func DPrintf(format string, a ...interface{}) (n int, err error) { - if Debug { - log.Printf(format, a...) - } - return -} - -type Cache struct { - seq int - value string -} - -type KVServer struct { - mu sync.Mutex - data map[string]string - cache map[int64]*Cache // client id -> seq ->value -} - -func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - key := args.Key - reply.Value = "" - // Either the client is new or the seqNum is greater than the cache seqNum. - // In both cases, we can return the value directly. - if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { - reply.Value = kv.data[key] - return - } -} - -func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - kv.data[k] = v - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = reply.Value -} - -func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { - kv.mu.Lock() - defer kv.mu.Unlock() - clientId, seqNum := args.ClientId, args.SeqNum - k, v := args.Key, args.Value - reply.Value = "" - // For ca.seq == seqNum, it means that the value has been appended. - // However, the response might be lost, so we return the cache value. - // For ca.seq > seqNum, it doesnt matter what the value is, just return. - if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { - reply.Value = ca.value - return - } else if !ok { - kv.cache[clientId] = new(Cache) - } - reply.Value = kv.data[k] - kv.cache[clientId].seq = seqNum - kv.cache[clientId].value = kv.data[k] - kv.data[k] += v -} - -func StartKVServer() *KVServer { - kv := new(KVServer) - kv.data = make(map[string]string) - kv.cache = make(map[int64]*Cache) - return kv -} - -FILE_EOF_kvsrv_server_go - -echo 'Copying mr implementation...' -mkdir -p src/mr - -cat > src/mr/coordinator.go << 'FILE_EOF_mr_coordinator_go' -package mr - -import ( - "log" - "math" - "net" - "net/http" - "net/rpc" - "os" - "sync" - "time" -) - -const SUCCESS = math.MaxInt32 - -type Coordinator struct { - // Your definitions here. - tasks chan Work // a taskqueue - mu sync.Mutex - terms []int - wg sync.WaitGroup - nMap int - nReduce int - done bool -} - -func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { - if len(c.tasks) == 0 { - reply.HasWork = false - return nil - } - reply.Work = <-c.tasks - c.mu.Lock() - reply.Term = c.terms[reply.Work.FileIndex] - c.mu.Unlock() - reply.HasWork = true - - go func() { - time.Sleep(10 * time.Second) - c.mu.Lock() - defer c.mu.Unlock() - if c.terms[reply.Work.FileIndex] == SUCCESS { - return - } - c.terms[reply.Work.FileIndex]++ - c.tasks <- reply.Work - }() - - return nil -} - -func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { - c.mu.Lock() - defer c.mu.Unlock() - - if c.terms[args.Work.FileIndex] != args.Term { - reply.Success = false - return nil - } - c.terms[args.Work.FileIndex] = SUCCESS - c.wg.Done() - reply.Success = true - return nil -} - -// start a thread that listens for RPCs from worker.go -func (c *Coordinator) server() { - rpc.Register(c) - rpc.HandleHTTP() - //l, e := net.Listen("tcp", ":1234") - sockname := coordinatorSock() - os.Remove(sockname) - l, e := net.Listen("unix", sockname) - if e != nil { - log.Fatal("listen error:", e) - } - go http.Serve(l, nil) -} - -// main/mrcoordinator.go calls Done() periodically to find out -// if the entire job has finished. -func (c *Coordinator) Done() bool { - return c.done -} - -func StartReduceWork(c *Coordinator) { - c.wg.Wait() - c.terms = make([]int, c.nReduce) - for i := 0; i < c.nReduce; i++ { - c.tasks <- Work{ - WorkType: REDUCE, - FileIndex: i, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go WorkDone(c) -} - -func WorkDone(c *Coordinator) { - c.wg.Wait() - c.done = true -} - -// create a Coordinator. -// main/mrcoordinator.go calls this function. -// nReduce is the number of reduce tasks to use. -func MakeCoordinator(files []string, nReduce int) *Coordinator { - - var buflen int - if len(files) > nReduce { - buflen = len(files) - } else { - buflen = nReduce - } - - c := Coordinator{ - nMap: len(files), - nReduce: nReduce, - wg: sync.WaitGroup{}, - tasks: make(chan Work, buflen), - terms: make([]int, len(files)), - done: false, - } - - for idx, file := range files { - c.tasks <- Work{ - WorkType: MAP, - Filename: file, - FileIndex: idx, - NReduce: c.nReduce, - NMapWork: c.nMap, - } - c.wg.Add(1) - } - go StartReduceWork(&c) - c.server() - - return &c -} - -FILE_EOF_mr_coordinator_go - -cat > src/mr/rpc.go << 'FILE_EOF_mr_rpc_go' -package mr - -// -// RPC definitions. -// -// remember to capitalize all names. -// - -import ( - "os" - "strconv" -) - -// -// example to show how to declare the arguments -// and reply for an RPC. -// - -type ExampleArgs struct { - X int -} - -type ExampleReply struct { - Y int -} - -/*-Define Work-*/ - -type WorkStatus int - -const ( - IDLE WorkStatus = iota - START - FINISH -) - -type WorkType int - -const ( - MAP WorkType = iota - REDUCE -) - -type Work struct { - WorkType WorkType // MAP or REDUCE - Filename string - FileIndex int // This is a convention for mr-X index - NMapWork int // how many map files - NReduce int // how many reduce files -} - -type WorkArgs struct { - WorkerID int -} - -type WorkReply struct { - HasWork bool - Work Work - Term int -} - -/*-Define Report-*/ -// Report work finish only if success -type ReportArgs struct { - Work Work - Term int -} - -type ReportReply struct { - Success bool -} - -// Cook up a unique-ish UNIX-domain socket name -// in /var/tmp, for the coordinator. -// Can't use the current directory since -// Athena AFS doesn't support UNIX-domain sockets. -func coordinatorSock() string { - s := "/var/tmp/5840-mr-" - s += strconv.Itoa(os.Getuid()) - return s -} - -FILE_EOF_mr_rpc_go - -cat > src/mr/worker.go << 'FILE_EOF_mr_worker_go' -package mr - -import ( - "encoding/json" - "fmt" - "hash/fnv" - "io/ioutil" - "log" - "net/rpc" - "os" - "sort" - "time" -) - -// for sorting by key. -type ByKey []KeyValue - -// for sorting by key. -func (a ByKey) Len() int { return len(a) } -func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } - -// Map functions return a slice of KeyValue. -type KeyValue struct { - Key string - Value string -} - -// use ihash(key) % NReduce to choose the reduce -// task number for each KeyValue emitted by Map. -func ihash(key string) int { - h := fnv.New32a() - h.Write([]byte(key)) - return int(h.Sum32() & 0x7fffffff) -} - -// main/mrworker.go calls this function. -func Worker(mapf func(string, string) []KeyValue, - reducef func(string, []string) string) { - - // Your worker implementation here. - for { - r := CallGetWok() - if !r.HasWork { - time.Sleep(3 * time.Second) - continue - } - - switch r.Work.WorkType { - case MAP: - DoMapWork(r.Work, mapf, r.Term) - case REDUCE: - DoReduceWork(r.Work, reducef, r.Term) - } - } -} - -func DoReduceWork(work Work, reducef func(string, []string) string, term int) { - fileIndex := work.FileIndex - intermediate := []KeyValue{} - - for i := 0; i < work.NMapWork; i++ { - filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) - file, err := os.Open(filename) - - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - dec := json.NewDecoder(file) - - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - intermediate = append(intermediate, kv) - } - file.Close() - } - - sort.Sort(ByKey(intermediate)) - - oname := fmt.Sprintf("mr-out-%d", fileIndex) - ofile, _ := ioutil.TempFile(".", oname) - - // - // call Reduce on each distinct key in intermediate[], - // and print the result to mr-out-0. - // - i := 0 - for i < len(intermediate) { - j := i + 1 - for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { - j++ - } - values := []string{} - for k := i; k < j; k++ { - values = append(values, intermediate[k].Value) - } - output := reducef(intermediate[i].Key, values) - - // this is the correct format for each line of Reduce output. - fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) - - i = j - } - - os.Rename(ofile.Name(), oname) - - CallReport(work, term) -} - -func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { - filename := work.Filename - - file, err := os.Open(filename) - if err != nil { - log.Fatalf("cannot open %v", filename) - } - - content, err := ioutil.ReadAll(file) - - if err != nil { - log.Fatalf("cannot read %v", filename) - } - - file.Close() - - kva := mapf(work.Filename, string(content)) - - //make a - for i := 0; i < work.NReduce; i++ { - imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) - - imtFile, err := ioutil.TempFile(".", imtFilename) - - enc := json.NewEncoder(imtFile) - - if err != nil { - log.Fatalf("cannot create %v", imtFilename) - } - - for _, kv := range kva { - hash := ihash(kv.Key) % work.NReduce - if hash == i { - err := enc.Encode(&kv) - if err != nil { - log.Fatalf("cannot encode %v", kv) - } - } - } - - imtFile.Close() - - os.Rename(imtFile.Name(), imtFilename) - } - - CallReport(work, term) -} - -func CallReport(w Work, term int) { - args := ReportArgs{ - Work: w, - Term: term, - } - reply := ReportReply{} - ok := call("Coordinator.CallReport", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } -} - -func CallGetWok() WorkReply { - args := WorkArgs{} - reply := WorkReply{} - ok := call("Coordinator.CallGetWork", &args, &reply) - - if !ok { - fmt.Printf("call failed!\n") - } - - return reply -} - -// example function to show how to make an RPC call to the coordinator. -// -// the RPC argument and reply types are defined in rpc.go. -func CallExample() { - - // declare an argument structure. - args := ExampleArgs{} - - // fill in the argument(s). - args.X = 99 - - // declare a reply structure. - reply := ExampleReply{} - - // send the RPC request, wait for the reply. - // the "Coordinator.Example" tells the - // receiving server that we'd like to call - // the Example() method of struct Coordinator. - ok := call("Coordinator.Example", &args, &reply) - if ok { - // reply.Y should be 100. - fmt.Printf("reply.Y %v\n", reply.Y) - } else { - fmt.Printf("call failed!\n") - } -} - -// send an RPC request to the coordinator, wait for the response. -// usually returns true. -// returns false if something goes wrong. -func call(rpcname string, args interface{}, reply interface{}) bool { - // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") - sockname := coordinatorSock() - c, err := rpc.DialHTTP("unix", sockname) - if err != nil { - log.Fatal("dialing:", err) - } - defer c.Close() - - err = c.Call(rpcname, args, reply) - if err == nil { - return true - } - - fmt.Println(err) - return false -} - -FILE_EOF_mr_worker_go - echo 'Creating checksums for protected files...' PROTECTED_FILES=( diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/client.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/client.go new file mode 100644 index 0000000..f0d52d1 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/client.go @@ -0,0 +1,97 @@ +package kvraft + +import ( + "crypto/rand" + "math/big" + "sync/atomic" + "time" + + "6.5840/labrpc" +) + +type Clerk struct { + servers []*labrpc.ClientEnd + cid int64 + seq int + leader int32 // cache the leader +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.servers, ck.cid, ck.seq = servers, nrand(), 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seq++ + + args := new(GetArgs) + args.Key, args.Cid, args.Seq = key, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(GetReply) + ok := ck.servers[peer].Call("KVServer.Get", args, reply) + if ok && (reply.Err == OK || reply.Err == ErrNoKey) { + atomic.StoreInt32(&ck.leader, int32(peer)) + return reply.Value + } + } + time.Sleep(100 * time.Millisecond) + } +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) { + ck.seq++ + + args := new(PutAppendArgs) + args.OpStr, args.Key, args.Value, args.Cid, args.Seq = op, key, value, ck.cid, ck.seq + + leader := int(atomic.LoadInt32(&ck.leader)) + for { + for i := 0; i < len(ck.servers); i++ { + peer := (leader + i) % len(ck.servers) + reply := new(PutAppendReply) + ok := ck.servers[peer].Call("KVServer.PutAppend", args, reply) + if ok && reply.Err == OK { + atomic.StoreInt32(&ck.leader, int32(peer)) + return + } + } + time.Sleep(100 * time.Millisecond) + } +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} +func (ck *Clerk) Append(key string, value string) { + ck.PutAppend(key, value, "Append") +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/common.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/common.go new file mode 100644 index 0000000..8a67661 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/common.go @@ -0,0 +1,41 @@ +package kvraft + +const ( + OK = "OK" + ErrNoKey = "ErrNoKey" + ErrWrongLeader = "ErrWrongLeader" +) + +type ClientInfo struct { + Cid int64 + Seq int +} + +type Err string + +type RaftReply struct { + Value string + Err Err +} + +type GetArgs struct { + Key string + ClientInfo +} + +type GetReply = RaftReply + +// Put or Append +type PutAppendArgs struct { + OpStr string // "Put" or "Append" + Key string + Value string + ClientInfo +} + +type PutAppendReply = RaftReply + +type Cache struct { + Seq int + RaftReply +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/server.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/server.go new file mode 100644 index 0000000..0f62b2e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvraft/server.go @@ -0,0 +1,292 @@ +package kvraft + +import ( + "bytes" + "log" + "os" + "sync" + "sync/atomic" + "time" + + "6.5840/labgob" + "6.5840/labrpc" + "6.5840/raft" +) + +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Opcode int + +const ( + GET Opcode = iota + PUT + APPEND +) + +type Op struct { + Cmd interface{} + ClientInfo +} + +type Done struct { + index int + term int + value string + err Err +} + +type KVServer struct { + mu sync.Mutex + me int + rf *raft.Raft + ps *raft.Persister + applyCh chan raft.ApplyMsg + dead int32 // set by Kill() + + maxraftstate int // snapshot if log grows this big + + data map[string]string + cache map[int64]*Cache // client id -> seq + chanmap map[int64]chan Done +} + +func getChanId(term, index int) (id int64) { + id = int64(term) << 32 + id += int64(index) + return +} + +func (kv *KVServer) makeChan(term, index int) chan Done { + id := getChanId(term, index) + ch := make(chan Done, 1) + kv.chanmap[id] = ch + return ch +} + +func (kv *KVServer) closeAndDeleteChan(term, index int) { + kv.mu.Lock() + defer kv.mu.Unlock() + id := getChanId(term, index) + close(kv.chanmap[id]) + delete(kv.chanmap, id) +} + +func (kv *KVServer) isCacheHit(Cid int64, Seq int) (bool, *Cache) { + // Why cache.Seq >= Seq works? + // 1. If the seq of cache equals to Seq, it means the operation has been + // executed. Return the value directly. + // 2. If the seq of cache is Greater than Seq, it means some operations + // after this Op have been executed, which implies client has already + // received the result of this Op (the operation must be completed before + // next operation happened). Theorically, return anything is OK. + if cache, ok := kv.cache[Cid]; ok && cache.Seq >= Seq { + return true, cache + } else if ok { + return false, cache + } else { + kv.cache[Cid] = new(Cache) + return false, kv.cache[Cid] + } +} + +func (kv *KVServer) encode() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(kv.cache) + e.Encode(kv.data) + return w.Bytes() +} + +func (kv *KVServer) decode(buf []byte) { + if buf == nil || len(buf) < 1 { + return + } + r := bytes.NewBuffer(buf) + d := labgob.NewDecoder(r) + var cache map[int64]*Cache + var data map[string]string + if d.Decode(&cache) != nil || d.Decode(&data) != nil { + log.Fatal("Decode error") + return + } + kv.cache = cache + kv.data = data +} + +func (kv *KVServer) startRaft(cmd interface{}, cid int64, seq int, ch chan *Cache) { + kv.mu.Lock() + defer kv.mu.Unlock() + rr := new(Cache) + if hit, cache := kv.isCacheHit(cid, seq); hit { + rr.Seq, rr.Value, rr.Err = cache.Seq, cache.Value, cache.Err + ch <- rr + } else { + op := new(Op) + op.Cmd, op.Cid, op.Seq = cmd, cid, seq + index, term, isLeader := kv.rf.Start(op) + if !isLeader { + cache.Value, cache.Err = "", ErrWrongLeader + rr.Err = ErrWrongLeader + ch <- rr + return + } + donech := kv.makeChan(term, index) + go kv.waitRaft(term, index, ch, donech) + DPrintf("(startRaft) [%d] start raft with op %+v\n", kv.me, op) + } +} + +func (kv *KVServer) waitRaft(term, index int, ch chan *Cache, donech chan Done) { + timer := time.NewTimer(500 * time.Millisecond) + rr := new(Cache) + DPrintf("(waitRaft) [%d] wait for term: %d, index: %d\n", kv.me, term, index) + select { + case <-timer.C: + DPrintf("(waitRaft) [%d] timeout, term: %d, index: %d\n", kv.me, term, index) + rr.Value = "" + rr.Err = ErrWrongLeader + ch <- rr + case done := <-donech: + rr.Value = done.value + rr.Err = done.err + ch <- rr + } + kv.closeAndDeleteChan(term, index) +} + +func (kv *KVServer) raft(cmd interface{}, cid int64, seq int) *Cache { + ch := make(chan *Cache) + go kv.startRaft(cmd, cid, seq, ch) + r := <-ch + close(ch) + return r +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + DPrintf("(Get) [%d] get %s\n", kv.me, args.Key) + r := kv.raft(args, args.Cid, args.Seq) + reply.Value = r.Value + reply.Err = r.Err +} + +func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { + DPrintf("(PutAppend) [%d] %s %s: %s\n", kv.me, args.OpStr, args.Key, args.Value) + r := kv.raft(args, args.Cid, args.Seq) + reply.Err = r.Err +} + +// Serializes the execution of operations on the key-value store. +func (kv *KVServer) executor() { + for !kv.killed() { + msg := <-kv.applyCh + DPrintf("(executor) [%d] receive msg %+v\n", kv.me, msg) + kv.mu.Lock() + if msg.CommandValid { + DPrintf("(executor) [%d] type of command: %T\n", kv.me, msg.Command) + op := msg.Command.(*Op) + index, term, cid, seq := msg.CommandIndex, msg.CommandTerm, op.Cid, op.Seq + hit, cache := kv.isCacheHit(cid, seq) + if !hit { + cache.Seq, cache.Value, cache.Err = seq, "", OK + switch v := op.Cmd.(type) { + case *GetArgs: + key := v.Key + DPrintf("(executor) [%d] get %s: %s\n", kv.me, key, kv.data[key]) + if val, ok := kv.data[key]; ok { + cache.Value = val + } else { + cache.Err = ErrNoKey + } + case *PutAppendArgs: + if v.OpStr == "Put" { + kv.data[v.Key] = v.Value + } else if v.OpStr == "Append" { + kv.data[v.Key] += v.Value + } + DPrintf("(executor) [%d] %s %s: %s\n", kv.me, v.OpStr, v.Key, kv.data[v.Key]) + } + if kv.maxraftstate != -1 && kv.maxraftstate < kv.ps.RaftStateSize() { + kv.rf.Snapshot(index, kv.encode()) + } + } + if ch, ok := kv.chanmap[getChanId(term, index)]; ok { + select { + case ch <- Done{index, term, cache.Value, cache.Err}: + default: + panic("Channel is full or closed") + } + } + } else if msg.SnapshotValid { + kv.decode(msg.Snapshot) + } else { + log.Fatalf("Invalid applyMsg, %+v\n", msg) + } + kv.mu.Unlock() + } +} + +// the tester calls Kill() when a KVServer instance won't +// be needed again. for your convenience, we supply +// code to set rf.dead (without needing a lock), +// and a killed() method to test rf.dead in +// long-running loops. you can also add your own +// code to Kill(). you're not required to do anything +// about this, but it may be convenient (for example) +// to suppress debug output from a Kill()ed instance. +func (kv *KVServer) Kill() { + atomic.StoreInt32(&kv.dead, 1) + kv.rf.Kill() + // Your code here, if desired. +} + +func (kv *KVServer) killed() bool { + z := atomic.LoadInt32(&kv.dead) + return z == 1 +} + +// servers[] contains the ports of the set of +// servers that will cooperate via Raft to +// form the fault-tolerant key/value service. +// me is the index of the current server in servers[]. +// the k/v server should store snapshots through the underlying Raft +// implementation, which should call persister.SaveStateAndSnapshot() to +// atomically save the Raft state along with the snapshot. +// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, +// in order to allow Raft to garbage-collect its log. if maxraftstate is -1, +// you don't need to snapshot. +// StartKVServer() must return quickly, so it should start goroutines +// for any long-running work. +func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { + // call labgob.Register on structures you want + // Go's RPC library to marshall/unmarshall. + labgob.Register(&Op{}) + labgob.Register(&GetArgs{}) + labgob.Register(&PutAppendArgs{}) + labgob.Register(&RaftReply{}) + labgob.Register(&Cache{}) + + kv := new(KVServer) + kv.me = me + kv.maxraftstate = maxraftstate + + kv.applyCh = make(chan raft.ApplyMsg) + kv.rf = raft.Make(servers, me, persister, kv.applyCh) + kv.ps = persister + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + kv.chanmap = make(map[int64]chan Done) + + // Read from persister if any + kv.decode(kv.ps.ReadSnapshot()) + + go kv.executor() + + return kv +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/client.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/client.go new file mode 100644 index 0000000..cb36e2b --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/client.go @@ -0,0 +1,83 @@ +package kvsrv + +import ( + "crypto/rand" + "math/big" + + "6.5840/labrpc" +) + +type Clerk struct { + server *labrpc.ClientEnd + clientId int64 + seqNum int +} + +func nrand() int64 { + max := big.NewInt(int64(1) << 62) + bigx, _ := rand.Int(rand.Reader, max) + x := bigx.Int64() + return x +} + +func MakeClerk(server *labrpc.ClientEnd) *Clerk { + ck := new(Clerk) + ck.server = server + ck.clientId = nrand() + ck.seqNum = 0 + return ck +} + +// fetch the current value for a key. +// returns "" if the key does not exist. +// keeps trying forever in the face of all other errors. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer.Get", &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) Get(key string) string { + ck.seqNum++ + args := GetArgs{ + Key: key, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := GetReply{} + for !ck.server.Call("KVServer.Get", &args, &reply) { + } + return reply.Value +} + +// shared by Put and Append. +// +// you can send an RPC with code like this: +// ok := ck.server.Call("KVServer."+op, &args, &reply) +// +// the types of args and reply (including whether they are pointers) +// must match the declared types of the RPC handler function's +// arguments. and reply must be passed as a pointer. +func (ck *Clerk) PutAppend(key string, value string, op string) string { + ck.seqNum++ + args := PutAppendArgs{ + Key: key, + Value: value, + ClientId: ck.clientId, + SeqNum: ck.seqNum, + } + reply := PutAppendReply{} + for !ck.server.Call("KVServer."+op, &args, &reply) { + } + return reply.Value +} + +func (ck *Clerk) Put(key string, value string) { + ck.PutAppend(key, value, "Put") +} + +// Append value to key's value and return that value +func (ck *Clerk) Append(key string, value string) string { + return ck.PutAppend(key, value, "Append") +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/common.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/common.go new file mode 100644 index 0000000..610acdb --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/common.go @@ -0,0 +1,22 @@ +package kvsrv + +type PutAppendArgs struct { + Key string + Value string + ClientId int64 + SeqNum int +} + +type PutAppendReply struct { + Value string +} + +type GetArgs struct { + Key string + ClientId int64 + SeqNum int +} + +type GetReply struct { + Value string +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/server.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/server.go new file mode 100644 index 0000000..4e03682 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/kvsrv/server.go @@ -0,0 +1,84 @@ +package kvsrv + +import ( + "log" + "sync" +) + +const Debug = false + +func DPrintf(format string, a ...interface{}) (n int, err error) { + if Debug { + log.Printf(format, a...) + } + return +} + +type Cache struct { + seq int + value string +} + +type KVServer struct { + mu sync.Mutex + data map[string]string + cache map[int64]*Cache // client id -> seq ->value +} + +func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + key := args.Key + reply.Value = "" + // Either the client is new or the seqNum is greater than the cache seqNum. + // In both cases, we can return the value directly. + if ca, ok := kv.cache[clientId]; !ok || ca.seq <= seqNum { + reply.Value = kv.data[key] + return + } +} + +func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + kv.data[k] = v + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = reply.Value +} + +func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) { + kv.mu.Lock() + defer kv.mu.Unlock() + clientId, seqNum := args.ClientId, args.SeqNum + k, v := args.Key, args.Value + reply.Value = "" + // For ca.seq == seqNum, it means that the value has been appended. + // However, the response might be lost, so we return the cache value. + // For ca.seq > seqNum, it doesnt matter what the value is, just return. + if ca, ok := kv.cache[clientId]; ok && ca.seq >= seqNum { + reply.Value = ca.value + return + } else if !ok { + kv.cache[clientId] = new(Cache) + } + reply.Value = kv.data[k] + kv.cache[clientId].seq = seqNum + kv.cache[clientId].value = kv.data[k] + kv.data[k] += v +} + +func StartKVServer() *KVServer { + kv := new(KVServer) + kv.data = make(map[string]string) + kv.cache = make(map[int64]*Cache) + return kv +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/coordinator.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/coordinator.go new file mode 100644 index 0000000..4fc2518 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/coordinator.go @@ -0,0 +1,141 @@ +package mr + +import ( + "log" + "math" + "net" + "net/http" + "net/rpc" + "os" + "sync" + "time" +) + +const SUCCESS = math.MaxInt32 + +type Coordinator struct { + // Your definitions here. + tasks chan Work // a taskqueue + mu sync.Mutex + terms []int + wg sync.WaitGroup + nMap int + nReduce int + done bool +} + +func (c *Coordinator) CallGetWork(args *WorkArgs, reply *WorkReply) error { + if len(c.tasks) == 0 { + reply.HasWork = false + return nil + } + reply.Work = <-c.tasks + c.mu.Lock() + reply.Term = c.terms[reply.Work.FileIndex] + c.mu.Unlock() + reply.HasWork = true + + go func() { + time.Sleep(10 * time.Second) + c.mu.Lock() + defer c.mu.Unlock() + if c.terms[reply.Work.FileIndex] == SUCCESS { + return + } + c.terms[reply.Work.FileIndex]++ + c.tasks <- reply.Work + }() + + return nil +} + +func (c *Coordinator) CallReport(args *ReportArgs, reply *ReportReply) error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.terms[args.Work.FileIndex] != args.Term { + reply.Success = false + return nil + } + c.terms[args.Work.FileIndex] = SUCCESS + c.wg.Done() + reply.Success = true + return nil +} + +// start a thread that listens for RPCs from worker.go +func (c *Coordinator) server() { + rpc.Register(c) + rpc.HandleHTTP() + //l, e := net.Listen("tcp", ":1234") + sockname := coordinatorSock() + os.Remove(sockname) + l, e := net.Listen("unix", sockname) + if e != nil { + log.Fatal("listen error:", e) + } + go http.Serve(l, nil) +} + +// main/mrcoordinator.go calls Done() periodically to find out +// if the entire job has finished. +func (c *Coordinator) Done() bool { + return c.done +} + +func StartReduceWork(c *Coordinator) { + c.wg.Wait() + c.terms = make([]int, c.nReduce) + for i := 0; i < c.nReduce; i++ { + c.tasks <- Work{ + WorkType: REDUCE, + FileIndex: i, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go WorkDone(c) +} + +func WorkDone(c *Coordinator) { + c.wg.Wait() + c.done = true +} + +// create a Coordinator. +// main/mrcoordinator.go calls this function. +// nReduce is the number of reduce tasks to use. +func MakeCoordinator(files []string, nReduce int) *Coordinator { + + var buflen int + if len(files) > nReduce { + buflen = len(files) + } else { + buflen = nReduce + } + + c := Coordinator{ + nMap: len(files), + nReduce: nReduce, + wg: sync.WaitGroup{}, + tasks: make(chan Work, buflen), + terms: make([]int, len(files)), + done: false, + } + + for idx, file := range files { + c.tasks <- Work{ + WorkType: MAP, + Filename: file, + FileIndex: idx, + NReduce: c.nReduce, + NMapWork: c.nMap, + } + c.wg.Add(1) + } + go StartReduceWork(&c) + c.server() + + return &c +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/rpc.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/rpc.go new file mode 100644 index 0000000..0f90524 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/rpc.go @@ -0,0 +1,81 @@ +package mr + +// +// RPC definitions. +// +// remember to capitalize all names. +// + +import ( + "os" + "strconv" +) + +// +// example to show how to declare the arguments +// and reply for an RPC. +// + +type ExampleArgs struct { + X int +} + +type ExampleReply struct { + Y int +} + +/*-Define Work-*/ + +type WorkStatus int + +const ( + IDLE WorkStatus = iota + START + FINISH +) + +type WorkType int + +const ( + MAP WorkType = iota + REDUCE +) + +type Work struct { + WorkType WorkType // MAP or REDUCE + Filename string + FileIndex int // This is a convention for mr-X index + NMapWork int // how many map files + NReduce int // how many reduce files +} + +type WorkArgs struct { + WorkerID int +} + +type WorkReply struct { + HasWork bool + Work Work + Term int +} + +/*-Define Report-*/ +// Report work finish only if success +type ReportArgs struct { + Work Work + Term int +} + +type ReportReply struct { + Success bool +} + +// Cook up a unique-ish UNIX-domain socket name +// in /var/tmp, for the coordinator. +// Can't use the current directory since +// Athena AFS doesn't support UNIX-domain sockets. +func coordinatorSock() string { + s := "/var/tmp/5840-mr-" + s += strconv.Itoa(os.Getuid()) + return s +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/worker.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/worker.go new file mode 100644 index 0000000..95d142e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/mr/worker.go @@ -0,0 +1,233 @@ +package mr + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "log" + "net/rpc" + "os" + "sort" + "time" +) + +// for sorting by key. +type ByKey []KeyValue + +// for sorting by key. +func (a ByKey) Len() int { return len(a) } +func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } + +// Map functions return a slice of KeyValue. +type KeyValue struct { + Key string + Value string +} + +// use ihash(key) % NReduce to choose the reduce +// task number for each KeyValue emitted by Map. +func ihash(key string) int { + h := fnv.New32a() + h.Write([]byte(key)) + return int(h.Sum32() & 0x7fffffff) +} + +// main/mrworker.go calls this function. +func Worker(mapf func(string, string) []KeyValue, + reducef func(string, []string) string) { + + // Your worker implementation here. + for { + r := CallGetWok() + if !r.HasWork { + time.Sleep(3 * time.Second) + continue + } + + switch r.Work.WorkType { + case MAP: + DoMapWork(r.Work, mapf, r.Term) + case REDUCE: + DoReduceWork(r.Work, reducef, r.Term) + } + } +} + +func DoReduceWork(work Work, reducef func(string, []string) string, term int) { + fileIndex := work.FileIndex + intermediate := []KeyValue{} + + for i := 0; i < work.NMapWork; i++ { + filename := fmt.Sprintf("mr-%d-%d", i, fileIndex) + file, err := os.Open(filename) + + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + dec := json.NewDecoder(file) + + for { + var kv KeyValue + if err := dec.Decode(&kv); err != nil { + break + } + intermediate = append(intermediate, kv) + } + file.Close() + } + + sort.Sort(ByKey(intermediate)) + + oname := fmt.Sprintf("mr-out-%d", fileIndex) + ofile, _ := ioutil.TempFile(".", oname) + + // + // call Reduce on each distinct key in intermediate[], + // and print the result to mr-out-0. + // + i := 0 + for i < len(intermediate) { + j := i + 1 + for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { + j++ + } + values := []string{} + for k := i; k < j; k++ { + values = append(values, intermediate[k].Value) + } + output := reducef(intermediate[i].Key, values) + + // this is the correct format for each line of Reduce output. + fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) + + i = j + } + + os.Rename(ofile.Name(), oname) + + CallReport(work, term) +} + +func DoMapWork(work Work, mapf func(string, string) []KeyValue, term int) { + filename := work.Filename + + file, err := os.Open(filename) + if err != nil { + log.Fatalf("cannot open %v", filename) + } + + content, err := ioutil.ReadAll(file) + + if err != nil { + log.Fatalf("cannot read %v", filename) + } + + file.Close() + + kva := mapf(work.Filename, string(content)) + + //make a + for i := 0; i < work.NReduce; i++ { + imtFilename := fmt.Sprintf("mr-%d-%d", work.FileIndex, i) + + imtFile, err := ioutil.TempFile(".", imtFilename) + + enc := json.NewEncoder(imtFile) + + if err != nil { + log.Fatalf("cannot create %v", imtFilename) + } + + for _, kv := range kva { + hash := ihash(kv.Key) % work.NReduce + if hash == i { + err := enc.Encode(&kv) + if err != nil { + log.Fatalf("cannot encode %v", kv) + } + } + } + + imtFile.Close() + + os.Rename(imtFile.Name(), imtFilename) + } + + CallReport(work, term) +} + +func CallReport(w Work, term int) { + args := ReportArgs{ + Work: w, + Term: term, + } + reply := ReportReply{} + ok := call("Coordinator.CallReport", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } +} + +func CallGetWok() WorkReply { + args := WorkArgs{} + reply := WorkReply{} + ok := call("Coordinator.CallGetWork", &args, &reply) + + if !ok { + fmt.Printf("call failed!\n") + } + + return reply +} + +// example function to show how to make an RPC call to the coordinator. +// +// the RPC argument and reply types are defined in rpc.go. +func CallExample() { + + // declare an argument structure. + args := ExampleArgs{} + + // fill in the argument(s). + args.X = 99 + + // declare a reply structure. + reply := ExampleReply{} + + // send the RPC request, wait for the reply. + // the "Coordinator.Example" tells the + // receiving server that we'd like to call + // the Example() method of struct Coordinator. + ok := call("Coordinator.Example", &args, &reply) + if ok { + // reply.Y should be 100. + fmt.Printf("reply.Y %v\n", reply.Y) + } else { + fmt.Printf("call failed!\n") + } +} + +// send an RPC request to the coordinator, wait for the response. +// usually returns true. +// returns false if something goes wrong. +func call(rpcname string, args interface{}, reply interface{}) bool { + // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") + sockname := coordinatorSock() + c, err := rpc.DialHTTP("unix", sockname) + if err != nil { + log.Fatal("dialing:", err) + } + defer c.Close() + + err = c.Call(rpcname, args, reply) + if err == nil { + return true + } + + fmt.Println(err) + return false +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/append_entries.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/append_entries.go new file mode 100644 index 0000000..9856584 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/append_entries.go @@ -0,0 +1,214 @@ +package raft + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type AppendEntriesArgs struct { + BaseRPC // leader's term + LeaderId int // so follower can redirect clients + PrevLogIndex int // index of log entry immediately preceding new ones + PrevLogTerm int // term of prevLogIndex entry + Entries []Entry // log entries to store (empty for heartbeat; may send more than one for efficiency) + CommitIndex int // leader's commitIndex +} + +type AppendEntriesReply struct { + BaseRPC // currentTerm, for leader to update itself + Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm + ConflictIndex int // the index of the first conflicting entry +} + +// AppendEntries RPC handler +// Reset the election timer if you get an AppendEntries RPC from the current leader +// (i.e., if the term of the AppendEntries arguments is outdated, you should not reset your timer); +func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + DPrintf("(AppendEntries) [%d] recieve from %d, Term: %d, PrevLogIndex: %d, PrevLogTerm: %d\n", rf.me, args.LeaderId, args.Term, args.PrevLogIndex, args.PrevLogTerm) + + reply.Success = false + reply.ConflictIndex = -1 + + if !rf.checkRequestTerm(args, reply) { + return + } + + if rf.state == CANDIDATE { + rf.state = FOLLOWER + } + + rf.resetElectionTimer() + + prevLogIndex := args.PrevLogIndex - rf.logs[0].Index + + if prevLogIndex < 0 { + // force to send a snapshot + reply.ConflictIndex = 0 + return + } + + // Reply false if log doesn’t contain an entry at prevLogIndex + // whose term matches prevLogTerm (§5.3) + if prevLogIndex >= len(rf.logs) { + reply.ConflictIndex = rf.logs[len(rf.logs)-1].Index + return + } + + // If an existing entry conflicts with a new one (same index + // but different terms), delete the existing entry and all that + // follow it (§5.3) + if rf.logs[prevLogIndex].Term != args.PrevLogTerm { + // optimization + curTerm := rf.logs[prevLogIndex].Term + var conflictIndex int + for i := prevLogIndex; i > 0; i-- { + if rf.logs[i-1].Term != curTerm { + conflictIndex = i + break + } + } + reply.ConflictIndex = conflictIndex + rf.logs[0].Index + return + } + for idx, entry := range args.Entries { + logIndex := entry.Index - rf.logs[0].Index + if logIndex >= len(rf.logs) || rf.logs[logIndex].Term != entry.Term { + DPrintf("(AppendEntries) [%d] append logs: %v\n", rf.me, args.Entries) + rf.logs = append([]Entry{}, append(rf.logs[:logIndex], args.Entries[idx:]...)...) + break + } + } + reply.Success = true + if args.CommitIndex > rf.commitIndex { + rf.commitIndex = args.CommitIndex + if args.CommitIndex-rf.logs[0].Index >= len(rf.logs) { + rf.commitIndex = rf.logs[len(rf.logs)-1].Index + } + } + rf.applierCond.Signal() +} + +func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs) { + reply := &AppendEntriesReply{} + ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) + if !ok { + return + } + + DPrintf("(AppendEntries) [%d] recieve reply from %d, Term: %d, Success: %v, ConflictIndex: %d\n", rf.me, server, reply.Term, reply.Success, reply.ConflictIndex) + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + // If successful: update nextIndex and matchIndex for + // follower (§5.3) + if reply.Success { + if len(args.Entries) > 0 { + rf.nextIndex[server] = args.Entries[len(args.Entries)-1].Index + 1 + } + rf.matchIndex[server] = rf.nextIndex[server] - 1 + for _, log := range rf.logs { + index := log.Index + count := 1 + for peer := range rf.peers { + if peer != rf.me && rf.matchIndex[peer] >= index { + count++ + } + } + // If there exists an N such that N > commitIndex, a majority + // of matchIndex[i] ≥ N, and log[N].term == currentTerm: + // set commitIndex = N (§5.3, §5.4). + if count > len(rf.peers)/2 && index > rf.commitIndex && log.Term == rf.currentTerm { + rf.commitIndex = index + } + } + } else { + if reply.ConflictIndex != -1 { + rf.nextIndex[server] = reply.ConflictIndex - 1 + } else { + rf.nextIndex[server] = rf.nextIndex[server] - 1 + } + if rf.nextIndex[server] < 1 { + rf.nextIndex[server] = 1 + } + } + DPrintf("(AppendEntries) [%d] nextIndex: %v, matchIndex: %v, commitIndex: %d\n", rf.me, rf.nextIndex, rf.matchIndex, rf.commitIndex) + rf.applierCond.Signal() +} + +func (rf *Raft) broadcastAppendEntries(isHeartBeat bool) { + for peer := range rf.peers { + if peer != rf.me { + // if it is a heartbeat we dont care the linearizability of logs append + if isHeartBeat { + args := rf.prepareReplicationArgs(peer) + go rf.sendReplicationRPC(peer, args) + } else { + rf.broadcasterCond[peer].Signal() + } + } + } +} + +func (rf *Raft) prepareReplicationArgs(peer int) interface{} { + if rf.nextIndex[peer] > rf.logs[0].Index { + firstLog := rf.logs[0] + nextIndex := rf.nextIndex[peer] - firstLog.Index + prevLog := rf.logs[nextIndex-1] + logs := make([]Entry, len(rf.logs[nextIndex:])) + copy(logs, rf.logs[nextIndex:]) + return &AppendEntriesArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + PrevLogIndex: prevLog.Index, + PrevLogTerm: prevLog.Term, + Entries: logs, + CommitIndex: rf.commitIndex, + } + } else { + return &InstallSnapshotArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + LeaderId: rf.me, + LastIncludedIndex: rf.logs[0].Index, + LastIncludedTerm: rf.logs[0].Term, + Offset: 0, + Data: rf.persister.ReadSnapshot(), + Done: true, + } + } +} + +func (rf *Raft) sendReplicationRPC(peer int, args interface{}) { + switch v := args.(type) { + case *AppendEntriesArgs: + rf.sendAppendEntries(peer, v) + case *InstallSnapshotArgs: + rf.sendInstallSnapshot(peer, v) + default: + panic("(sendReplicationRPC) SHOULD NOT REACH") + } +} + +func (rf *Raft) isReplicationNeeded(peer int) bool { + return rf.state == LEADER && rf.matchIndex[peer] < rf.logs[len(rf.logs)-1].Index +} + +func (rf *Raft) broadcaster(peer int) { + rf.broadcasterCond[peer].L.Lock() + defer rf.broadcasterCond[peer].L.Unlock() + for !rf.killed() { + rf.mu.Lock() + for !rf.isReplicationNeeded(peer) { + rf.mu.Unlock() + rf.broadcasterCond[peer].Wait() + rf.mu.Lock() + } + args := rf.prepareReplicationArgs(peer) + rf.mu.Unlock() + rf.sendReplicationRPC(peer, args) + } +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/election.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/election.go new file mode 100644 index 0000000..4274b32 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/election.go @@ -0,0 +1,123 @@ +package raft + +import ( + "math/rand" + "sync/atomic" + "time" +) + +// Source: https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf, Figure 2 + +type RequestVoteArgs struct { + BaseRPC // candidate's term + CandidateId int // candidate requesting vote + LastLogIndex int // index of candidate's last log entry + LastLogTerm int // term of candidate's last log entry +} + +type RequestVoteReply struct { + BaseRPC // currentTerm, for candidate to update itself + VoteGranted bool // true means candidate received vote +} + +// RequestVote RPC handler +// Restart your election timer if you grant a vote to another peer. +func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + reply.VoteGranted = false + + DPrintf("(RequestVote) [%d]: receive vote request from %d, term %d\n", rf.me, args.CandidateId, args.Term) + + if !rf.checkRequestTerm(args, reply) { + return + } + + if (rf.votedFor == -1 || rf.votedFor == args.CandidateId) && rf.isUpToDate(args) { + reply.VoteGranted = true + rf.votedFor = args.CandidateId + rf.resetElectionTimer() + } +} + +func (rf *Raft) isUpToDate(args *RequestVoteArgs) bool { + lastLog := rf.logs[len(rf.logs)-1] + candidateIndex := args.LastLogIndex + candidateTerm := args.LastLogTerm + return candidateTerm > lastLog.Term || (candidateTerm == lastLog.Term && candidateIndex >= lastLog.Index) +} + +func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, voteCount *int32) { + reply := &RequestVoteReply{} + ok := rf.peers[server].Call("Raft.RequestVote", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + defer rf.persist() + + if !rf.checkResponseTerm(args, reply, true) { + return + } + + if !reply.VoteGranted { + return + } + + DPrintf("(RequestVote) [%d]: received vote from %d, voteCount: %d\n", rf.me, server, *voteCount) + + // If votes received from majority of servers: become leader + if atomic.AddInt32(voteCount, 1) > int32(len(rf.peers)/2) && + rf.state == CANDIDATE && + rf.currentTerm == args.Term { + rf.state = LEADER + lastLogIndex := rf.logs[len(rf.logs)-1].Index + for i := range rf.peers { + rf.nextIndex[i] = lastLogIndex + 1 + rf.matchIndex[i] = 0 + } + DPrintf("[%d]: become leader to term %d\n", rf.me, rf.currentTerm) + // send initial empty AppendEntries RPCs (heartbeat) to each server immediately + rf.broadcastAppendEntries(true) + } + DPrintf("(RequestVote) [%d]: voteCount: %d\n", rf.me, *voteCount) +} + +func (rf *Raft) startElection() { + rf.currentTerm++ + rf.state = CANDIDATE + rf.votedFor = rf.me + rf.resetElectionTimer() + DPrintf("(RequestVote) [%d]: start election, term %d", rf.me, rf.currentTerm) + lastLog := rf.logs[len(rf.logs)-1] + + voteCount := int32(1) + args := RequestVoteArgs{ + BaseRPC: BaseRPC{rf.currentTerm}, + CandidateId: rf.me, + LastLogIndex: lastLog.Index, + LastLogTerm: lastLog.Term, + } + + for id := range rf.peers { + if id == rf.me { + continue + } + go rf.sendRequestVote(id, &args, &voteCount) + } +} + +func (rf *Raft) resetElectionTimer() { + // election timeout range from 350 to 550 + ms := 350 + (rand.Int63() % 200) + rf.electionTimeStamp = time.Now() + rf.electionTimeout = time.Duration(ms) * time.Millisecond +} + +func (rf *Raft) isElectionTimeout() bool { + return time.Now().After(rf.electionTimeStamp.Add(rf.electionTimeout)) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/install_snapshot.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/install_snapshot.go new file mode 100644 index 0000000..7ba645e --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/install_snapshot.go @@ -0,0 +1,92 @@ +package raft + +type InstallSnapshotArgs struct { + BaseRPC + LeaderId int + LastIncludedIndex int + LastIncludedTerm int + Offset int + Data []byte + Done bool +} + +type InstallSnapshotReply struct { + BaseRPC +} + +// InstallSnapshot RPC handler +func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkRequestTerm(args, reply) { + return + } + + if args.LastIncludedIndex <= rf.commitIndex { + return + } + prevCommitIndex := rf.commitIndex + prevLastApplied := rf.lastApplied + defer DPrintf("(InstallSnapshot) [%d]: LastIncludedIndex: %d, LastIncludedTerm: %d, prevCommitIndex: %d, prevLastApplied: %d\n", rf.me, args.LastIncludedIndex, args.LastIncludedTerm, prevCommitIndex, prevLastApplied) + rf.resetElectionTimer() + + rf.commitIndex = args.LastIncludedIndex + rf.lastApplied = args.LastIncludedIndex + // 2. Create new snapshot file if first chunk (offset is 0) + // 3. Write data into snapshot file at given offset + // 4. Reply and wait for more data chunks if done is false + if !args.Done { + return + } + // 5. Save snapshot file, discard any existing or partial snapshot with a + // smaller index + // 6. If existing log entry has same index and term as snapshot’s last + // included entry, retain log entries following it and reply + // 7. Discard the entire log + // 8. Reset state machine using snapshot contents (and load snapshot’s + // cluster configuration) + firstLogIndex := rf.logs[0].Index + if firstLogIndex <= args.LastIncludedIndex { + rf.logs = append([]Entry{}, Entry{ + Index: args.LastIncludedIndex, + Term: args.LastIncludedTerm, + Command: nil, + }) + } else if firstLogIndex < args.LastIncludedIndex { + trimLen := args.LastIncludedIndex - firstLogIndex + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + } + rf.persister.Save(rf.encodeState(), args.Data) + rf.smsg = &ApplyMsg{ + SnapshotValid: true, + Snapshot: args.Data, + SnapshotTerm: args.LastIncludedTerm, + SnapshotIndex: args.LastIncludedIndex, + } +} + +func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs) { + reply := &InstallSnapshotReply{} + ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) + if !ok { + return + } + + rf.mu.Lock() + defer rf.mu.Unlock() + + if !rf.checkResponseTerm(args, reply, false) { + return + } + + if args.LastIncludedIndex != rf.logs[0].Index { + return + } + + rf.nextIndex[server] = args.LastIncludedIndex + 1 + rf.matchIndex[server] = args.LastIncludedIndex + + rf.persister.Save(rf.encodeState(), args.Data) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/persister.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/persister.go new file mode 100644 index 0000000..c5f816c --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/persister.go @@ -0,0 +1,70 @@ +package raft + +// +// support for Raft and kvraft to save persistent +// Raft state (log &c) and k/v server snapshots. +// +// we will use the original persister.go to test your code for grading. +// so, while you can modify this code to help you debug, please +// test with the original before submitting. +// + +import "sync" + +type Persister struct { + mu sync.Mutex + raftstate []byte + snapshot []byte +} + +func MakePersister() *Persister { + return &Persister{} +} + +func clone(orig []byte) []byte { + x := make([]byte, len(orig)) + copy(x, orig) + return x +} + +func (ps *Persister) Copy() *Persister { + ps.mu.Lock() + defer ps.mu.Unlock() + np := MakePersister() + np.raftstate = ps.raftstate + np.snapshot = ps.snapshot + return np +} + +func (ps *Persister) ReadRaftState() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.raftstate) +} + +func (ps *Persister) RaftStateSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.raftstate) +} + +// Save both Raft state and K/V snapshot as a single atomic action, +// to help avoid them getting out of sync. +func (ps *Persister) Save(raftstate []byte, snapshot []byte) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.raftstate = clone(raftstate) + ps.snapshot = clone(snapshot) +} + +func (ps *Persister) ReadSnapshot() []byte { + ps.mu.Lock() + defer ps.mu.Unlock() + return clone(ps.snapshot) +} + +func (ps *Persister) SnapshotSize() int { + ps.mu.Lock() + defer ps.mu.Unlock() + return len(ps.snapshot) +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/raft.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/raft.go new file mode 100644 index 0000000..9946898 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/raft.go @@ -0,0 +1,416 @@ +package raft + +// +// this is an outline of the API that raft must expose to +// the service (or tester). see comments below for +// each of these functions for more details. +// +// rf = Make(...) +// create a new Raft server. +// rf.Start(command interface{}) (index, term, isleader) +// start agreement on a new log entry +// rf.GetState() (term, isLeader) +// ask a Raft for its current term, and whether it thinks it is leader +// ApplyMsg +// each time a new entry is committed to the log, each Raft peer +// should send an ApplyMsg to the service (or tester) +// in the same server. +// + +import ( + // "bytes" + + "bytes" + "log" + "sync" + "sync/atomic" + "time" + + // "6.5840/labgob" + "6.5840/labgob" + "6.5840/labrpc" +) + +// as each Raft peer becomes aware that successive log entries are +// committed, the peer should send an ApplyMsg to the service (or +// tester) on the same server, via the applyCh passed to Make(). set +// CommandValid to true to indicate that the ApplyMsg contains a newly +// committed log entry. +// +// in part 3D you'll want to send other kinds of messages (e.g., +// snapshots) on the applyCh, but set CommandValid to false for these +// other uses. +type ApplyMsg struct { + CommandValid bool + Command interface{} + CommandIndex int + CommandTerm int + + // For 3D: + SnapshotValid bool + Snapshot []byte + SnapshotTerm int + SnapshotIndex int +} + +type Entry struct { + Term int + Index int + Command interface{} +} + +// Base struct for common fields +type BaseRPC struct { + Term int +} + +// Implement RaftRPC interface for BaseRPC +func (b *BaseRPC) GetTerm() int { + return b.Term +} + +func (b *BaseRPC) SetTerm(term int) { + b.Term = term +} + +// RaftRPC interface +type RaftRPC interface { + GetTerm() int + SetTerm(int) +} + +type ServerState int + +const ( + FOLLOWER ServerState = iota + CANDIDATE + LEADER +) + +// A Go object implementing a single Raft peer. +type Raft struct { + mu sync.Mutex // Lock to protect shared access to this peer's state + peers []*labrpc.ClientEnd // RPC end points of all peers + persister *Persister // Object to hold this peer's persisted state + me int // this peer's index into peers[] + dead int32 // set by Kill() + heartbeatTimeout time.Duration + electionTimeout time.Duration + electionTimeStamp time.Time + applyCh chan ApplyMsg + + // state a Raft server must maintain. + broadcasterCond []*sync.Cond + applierCond *sync.Cond + + // server state + state ServerState + + // presistent state on all servers + currentTerm int // latest term server has seen (initialized to 0 on first boot, increases monotonically) + votedFor int // candidateId that received vote in current term (or null if none) + logs []Entry // log entries; each entry contains command for state machine, and term when entry was received by leader (first index is 1) + + // volatile state on all servers + commitIndex int // index of highest log entry known to be committed (initialized to 0, increases monotonically) + lastApplied int // index of highest log entry applied to state machine (initialized to 0, increases monotonically) + + // volatile state on leaders (reinitialized after election) + nextIndex []int // for each server, index of the next log entry to send to that server (initialized to leader last log index + 1) + matchIndex []int // for each server, index of highest log entry known to be replicated on server (initialized to 0, increases monotonically) + + // snapshot msg + smsg *ApplyMsg +} + +// return currentTerm and whether this server +// believes it is the leader. +func (rf *Raft) GetState() (int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + return rf.currentTerm, rf.state == LEADER +} + +func (rf *Raft) encodeState() []byte { + w := new(bytes.Buffer) + e := labgob.NewEncoder(w) + e.Encode(rf.currentTerm) + e.Encode(rf.votedFor) + e.Encode(rf.logs) + return w.Bytes() +} + +// save Raft's persistent state to stable storage, +// where it can later be retrieved after a crash and restart. +// see paper's Figure 2 for a description of what should be persistent. +// before you've implemented snapshots, you should pass nil as the +// second argument to persister.Save(). +// after you've implemented snapshots, pass the current snapshot +// (or nil if there's not yet a snapshot). +func (rf *Raft) persist() { + if rf.persister.ReadSnapshot() != nil { + rf.persister.Save(rf.encodeState(), rf.persister.ReadSnapshot()) + } else { + rf.persister.Save(rf.encodeState(), nil) + } +} + +// restore previously persisted state. +func (rf *Raft) readPersist(data []byte) { + if data == nil || len(data) < 1 { // bootstrap without any state + return + } + r := bytes.NewBuffer(data) + d := labgob.NewDecoder(r) + var currentTerm int + var votedFor int + var logs []Entry + + if d.Decode(¤tTerm) != nil || d.Decode(&votedFor) != nil || d.Decode(&logs) != nil { + log.Fatal("failed to read persist\n") + } else { + DPrintf("[%d]: read persist, currentTerm: %d, votedFor: %d, logs: %v\n", rf.me, currentTerm, votedFor, logs) + rf.currentTerm = currentTerm + rf.votedFor = votedFor + rf.logs = logs + rf.lastApplied = rf.logs[0].Index + rf.commitIndex = rf.logs[0].Index + } +} + +// the service says it has created a snapshot that has +// all info up to and including index. this means the +// service no longer needs the log through (and including) +// that index. Raft should now trim its log as much as possible. +func (rf *Raft) Snapshot(index int, snapshot []byte) { + // Your code here (3D). + rf.mu.Lock() + defer rf.mu.Unlock() + // if the snapshot is outdated, just ignore it + if rf.logs[0].Index >= index { + return + } + firstLogIndex := rf.logs[0].Index + trimLen := index - firstLogIndex + // trim the logs + rf.logs = append([]Entry{}, rf.logs[trimLen:]...) + rf.logs[0].Command = nil + rf.persister.Save(rf.encodeState(), snapshot) +} + +// the service using Raft (e.g. a k/v server) wants to start +// agreement on the next command to be appended to Raft's log. if this +// server isn't the leader, returns false. otherwise start the +// agreement and return immediately. there is no guarantee that this +// command will ever be committed to the Raft log, since the leader +// may fail or lose an election. even if the Raft instance has been killed, +// this function should return gracefully. +// +// the first return value is the index that the command will appear at +// if it's ever committed. the second return value is the current +// term. the third return value is true if this server believes it is +// the leader. +func (rf *Raft) Start(command interface{}) (int, int, bool) { + rf.mu.Lock() + defer rf.mu.Unlock() + if rf.state != LEADER { + return -1, -1, false + } + defer DPrintf("(Start) [%d]: command %+v, index:%d, term: %d\n", rf.me, command, rf.logs[len(rf.logs)-1].Index, rf.currentTerm) + rf.logs = append(rf.logs, Entry{ + Term: rf.currentTerm, + Index: rf.logs[len(rf.logs)-1].Index + 1, + Command: command, + }) + rf.broadcastAppendEntries(false) + // Your code here (3B). + return rf.logs[len(rf.logs)-1].Index, rf.currentTerm, true +} + +// Warning: this function is not thread-safe +func (rf *Raft) resetNewTermState(targetTerm int) { + DPrintf("(ResetTerm)[%d]: received newer term, set term to %d\n", rf.me, targetTerm) + if rf.currentTerm < targetTerm { + rf.votedFor = -1 + } + rf.currentTerm = targetTerm + rf.state = FOLLOWER // reset to follower +} + +// Reply false if term < currentTerm (§5.1) +// If RPC request contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkRequestTerm(args, reply RaftRPC) bool { + term := args.GetTerm() + defer reply.SetTerm(rf.currentTerm) + if term < rf.currentTerm { + return false + } + if term > rf.currentTerm { + rf.resetNewTermState(term) + } + return true +} + +// If RPC request or response contains term T > currentTerm: +// set currentTerm = T, convert to follower (§5.1) +func (rf *Raft) checkResponseTerm(args, reply RaftRPC, isElection bool) bool { + argsTerm := args.GetTerm() + replyTerm := reply.GetTerm() + if replyTerm > argsTerm { + rf.resetNewTermState(replyTerm) + rf.resetElectionTimer() + return false + } + return isElection || (rf.state == LEADER) +} + +// the tester doesn't halt goroutines created by Raft after each test, +// but it does call the Kill() method. your code can use killed() to +// check whether Kill() has been called. the use of atomic avoids the +// need for a lock. +// +// the issue is that long-running goroutines use memory and may chew +// up CPU time, perhaps causing later tests to fail and generating +// confusing debug output. any goroutine with a long-running loop +// should call killed() to check whether it should stop. +func (rf *Raft) Kill() { + atomic.StoreInt32(&rf.dead, 1) + // Your code here, if desired. +} + +func (rf *Raft) killed() bool { + z := atomic.LoadInt32(&rf.dead) + return z == 1 +} + +// a dedicated applier goroutine to guarantee that each log will be push into applyCh exactly once, ensuring that service's applying entries and raft's committing entries can be parallel +func (rf *Raft) applier() { + for !rf.killed() { + rf.mu.Lock() + // if there is no need to apply entries, just release CPU and wait other goroutine's signal if they commit new entries + for rf.lastApplied >= rf.commitIndex { + rf.applierCond.Wait() + } + firstLogIndex := rf.logs[0].Index + commitIndex, lastApplied := rf.commitIndex, rf.lastApplied + DPrintf("(applier) [%d]: commitIndex: %d, lastApplied: %d, logFirstIndex: %d, logLastIndex: %d\n", rf.me, commitIndex, lastApplied, firstLogIndex, rf.logs[len(rf.logs)-1].Index) + entries := make([]Entry, commitIndex-lastApplied) + copy(entries, rf.logs[lastApplied+1-firstLogIndex:commitIndex+1-firstLogIndex]) + if rf.smsg != nil { + msg := rf.smsg + rf.smsg = nil + rf.mu.Unlock() + rf.applyCh <- *msg + } else { + rf.mu.Unlock() + } + for _, entry := range entries { + DPrintf("(applier) [%d]: apply entry %+v\n", rf.me, entry) + rf.applyCh <- ApplyMsg{ + CommandValid: true, + Command: entry.Command, + CommandTerm: entry.Term, + CommandIndex: entry.Index, + } + } + rf.mu.Lock() + // use commitIndex rather than rf.commitIndex because rf.commitIndex may change during the Unlock() and Lock() + // use Max(rf.lastApplied, commitIndex) rather than commitIndex directly to avoid concurrently InstallSnapshot rpc causing lastApplied to rollback + if rf.lastApplied < commitIndex { + rf.lastApplied = commitIndex + } + rf.mu.Unlock() + } +} + +/** + * Lets illustrate the time line of the ticker function + * e: election timeout + * h: heartbeat timeout + * + * ---- h ---- h ---- h ---- h ---- h ---- ... + * + * First, the server will wake up each fixed heartbeat timeout. This timeout is + * relatively shorter than the election timeout. If the server is not a leader, + * it basically do nothing about heartbeat. + * + * However, everytime when server wake up, it will check if the election timeout + * is reached. It might start a new election, if it is not a leader. + * + * v election timeout found! + * ---- h1 ---- h2 ---- h3 ---- h ---- h ---- ... + * --------- e1 ------ e2 ------------ e ---- ... + * + * Reseting a new election timeout when the server receives a heartbeat or a + * vote from another server prevents the election. One shortcomming of the + * current implementation is that the election timeout does not trigger a new + * election immediately. It will wait until the next heartbeat timeout. + */ +func (rf *Raft) ticker() { + for !rf.killed() { + rf.mu.Lock() + if rf.state == LEADER { + rf.broadcastAppendEntries(true) + } else if rf.isElectionTimeout() { + rf.startElection() + } + rf.mu.Unlock() + time.Sleep(rf.heartbeatTimeout) + } +} + +// the service or tester wants to create a Raft server. the ports +// of all the Raft servers (including this one) are in peers[]. this +// server's port is peers[me]. all the servers' peers[] arrays +// have the same order. persister is a place for this server to +// save its persistent state, and also initially holds the most +// recent saved state, if any. applyCh is a channel on which the +// tester or service expects Raft to send ApplyMsg messages. +// Make() must return quickly, so it should start goroutines +// for any long-running work. +func Make(peers []*labrpc.ClientEnd, me int, + persister *Persister, applyCh chan ApplyMsg) *Raft { + rf := &Raft{} + rf.peers = peers + rf.persister = persister + rf.me = me + rf.applyCh = applyCh + rf.heartbeatTimeout = 125 * time.Millisecond + rf.resetElectionTimer() + rf.state = FOLLOWER + rf.votedFor = -1 + rf.logs = make([]Entry, 0) + + // dummy entry to make the index start from 1 + rf.logs = append(rf.logs, Entry{0, 0, nil}) + + rf.commitIndex = 0 + rf.lastApplied = 0 + + rf.applierCond = sync.NewCond(&rf.mu) + rf.broadcasterCond = make([]*sync.Cond, len(peers)) + + rf.nextIndex = make([]int, len(peers)) + rf.matchIndex = make([]int, len(peers)) + + for id := range peers { + rf.nextIndex[id] = 1 + if id != rf.me { + rf.broadcasterCond[id] = sync.NewCond(&sync.Mutex{}) + go rf.broadcaster(id) + } + } + + rf.smsg = nil + + // initialize from state persisted before a crash + rf.readPersist(persister.ReadRaftState()) + + // start ticker goroutine to start elections + go rf.ticker() + + go rf.applier() + + return rf +} diff --git a/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/util.go b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/util.go new file mode 100644 index 0000000..37c7fe6 --- /dev/null +++ b/benchmarks/courselab_bench/data/mit_6_5840_2024/5b_shardkv/starter_files/raft/util.go @@ -0,0 +1,16 @@ +package raft + +import ( + "log" + "os" +) + +// Debugging +var Debug = os.Getenv("DEBUG") == "1" + +func DPrintf(format string, a ...interface{}) { + if !Debug { + return + } + log.Printf(format, a...) +} diff --git a/benchmarks/courselab_bench/prepare_dataset.py b/benchmarks/courselab_bench/prepare_dataset.py index 7721b6a..6d5c31c 100755 --- a/benchmarks/courselab_bench/prepare_dataset.py +++ b/benchmarks/courselab_bench/prepare_dataset.py @@ -28,7 +28,7 @@ def load_task_from_folder(task_folder: Path) -> dict: with evaluate_path.open("r") as f: evaluate_script = f.read() - return { + task_data = { "instance_id": config["instance_id"], "course_id": config["course_id"], "problem_statement": problem_statement, @@ -39,8 +39,16 @@ def load_task_from_folder(task_folder: Path) -> dict: "evaluate_script": evaluate_script, "repo_url": config.get("repo_url"), "base_commit": config.get("base_commit"), + "task_folder": str(task_folder.resolve()), } + if "starter_files" in config: + task_data["starter_files"] = config["starter_files"] + if "output_files" in config: + task_data["output_files"] = config["output_files"] + + return task_data + def prepare_dataset(data_dir: Path, output_file: Path) -> None: if not data_dir.exists(): diff --git a/benchmarks/courselab_bench/run_benchmark.py b/benchmarks/courselab_bench/run_benchmark.py index 40c0bdb..2fd5b33 100644 --- a/benchmarks/courselab_bench/run_benchmark.py +++ b/benchmarks/courselab_bench/run_benchmark.py @@ -60,10 +60,15 @@ def main(): for idx, task in enumerate(tasks, 1): logger.info(f"\n[{idx}/{len(tasks)}] {task['instance_id']}") + task_folder = task.get("task_folder") + if task_folder: + task_folder = Path(task_folder) + env = DockerEnvironment( image=task["docker_image"], timeout=task.get("timeout_minutes", 30) * 60, work_dir="/workspace", + task_folder=task_folder, ) model = LiteLLMModel(model_name=args.model, temperature=0.0, max_tokens=4096) agent = REACTAgent( @@ -71,7 +76,7 @@ def main(): ) try: - result = execute_task(task, agent, env) + result = execute_task(task, agent, env, output_dir=output_dir) result["course_id"] = task["course_id"] result["passed"] = evaluate_task(result) From 6989976e774b9f85b27593c7fb0a6f8554d37696 Mon Sep 17 00:00:00 2001 From: Tarek Date: Thu, 18 Dec 2025 15:49:42 -0800 Subject: [PATCH 09/10] feat(courselab_bench): add validation for starter and output files in config Signed-off-by: Tarek --- .../test__simple__echo/config.json | 13 ++++++-- .../courselab_bench/tests/test_data_schema.py | 32 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json b/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json index 428122b..0400e51 100644 --- a/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json +++ b/benchmarks/courselab_bench/data/test_course/test__simple__echo/config.json @@ -3,5 +3,14 @@ "course_id": "test_course", "docker_image": "xuafeng/swe-go-python:latest", "timeout_minutes": 5, - "tags": ["simple", "test"] -} + "tags": [ + "simple", + "test" + ], + "output_files": [ + { + "src": "/workspace/result.txt", + "dest": "result.txt" + } + ] +} \ No newline at end of file diff --git a/benchmarks/courselab_bench/tests/test_data_schema.py b/benchmarks/courselab_bench/tests/test_data_schema.py index 7b4c3cb..a3b5ec2 100644 --- a/benchmarks/courselab_bench/tests/test_data_schema.py +++ b/benchmarks/courselab_bench/tests/test_data_schema.py @@ -81,6 +81,24 @@ def test_config_optional_fields(self): if "base_commit" in config: assert isinstance(config["base_commit"], (str, type(None))) + if "starter_files" in config: + assert isinstance(config["starter_files"], list) + for item in config["starter_files"]: + assert isinstance(item, dict) + assert "src" in item + assert "dest" in item + assert isinstance(item["src"], str) + assert isinstance(item["dest"], str) + + if "output_files" in config: + assert isinstance(config["output_files"], list) + for item in config["output_files"]: + assert isinstance(item, dict) + assert "src" in item + assert "dest" in item + assert isinstance(item["src"], str) + assert isinstance(item["dest"], str) + def test_scripts_executable(self): task_folders = get_task_folders(DATA_DIR) script_files = ["preprocess.sh", "evaluate.sh"] @@ -102,6 +120,20 @@ def test_instance_ids_unique(self): assert len(instance_ids) == len(set(instance_ids)), "Duplicate instance_ids found" + def test_starter_files_exist(self): + task_folders = get_task_folders(DATA_DIR) + for task_folder in task_folders: + config_path = task_folder / "config.json" + with config_path.open("r") as f: + config = json.load(f) + + if "starter_files" in config: + for item in config["starter_files"]: + src_file = task_folder / "starter_files" / item["src"] + assert ( + src_file.exists() + ), f"{task_folder.name}: starter file not found: {item['src']}" + if __name__ == "__main__": pytest.main([__file__, "-v"]) From 0af69cf6ce1e3b3f7e43696c2ba46641db1d7b16 Mon Sep 17 00:00:00 2001 From: Tarek Date: Tue, 6 Jan 2026 11:13:21 -0800 Subject: [PATCH 10/10] refactor: deprecate old course lab benchmark Signed-off-by: Tarek --- README.md | 4 +- benchmarks/course_lab_bench/Dockerfile | 14 - benchmarks/course_lab_bench/README.md | 140 - benchmarks/course_lab_bench/add_agents.md | 152 - .../data/benchmark/convert_promblems.py | 51 - .../benchmark/course_lab_task_examples.jsonl | 2 - .../course_lab_tasks_mit_65840_2024.jsonl | 26 - .../benchmark/database_systems_mit_65830.json | 647 -- .../data/benchmark/env_setup_examples.jsonl | 3 - .../data/benchmark/lab_exam_data.csv | 5660 ----------------- .../data/benchmark/lab_exam_data_20250529.csv | 5621 ---------------- .../data/benchmark/lab_exercise_data.xlsx | Bin 265216 -> 0 bytes .../data/benchmark/lab_exercise_data_old.xlsx | Bin 227840 -> 0 bytes .../data/benchmark/problems/system_lab_1.md | 219 - .../data/benchmark/problems/system_lab_10.md | 122 - .../data/benchmark/problems/system_lab_11.md | 119 - .../data/benchmark/problems/system_lab_12.md | 221 - .../data/benchmark/problems/system_lab_13.md | 67 - .../data/benchmark/problems/system_lab_14.md | 72 - .../data/benchmark/problems/system_lab_15.md | 79 - .../data/benchmark/problems/system_lab_16.md | 65 - .../data/benchmark/problems/system_lab_17.md | 110 - .../data/benchmark/problems/system_lab_18.md | 114 - .../data/benchmark/problems/system_lab_19.md | 133 - .../data/benchmark/problems/system_lab_2.md | 41 - .../data/benchmark/problems/system_lab_20.md | 124 - .../data/benchmark/problems/system_lab_21.md | 79 - .../data/benchmark/problems/system_lab_22.md | 103 - .../data/benchmark/problems/system_lab_23.md | 82 - .../data/benchmark/problems/system_lab_24.md | 166 - .../data/benchmark/problems/system_lab_25.md | 85 - .../data/benchmark/problems/system_lab_26.md | 122 - .../data/benchmark/problems/system_lab_3.md | 67 - .../data/benchmark/problems/system_lab_4.md | 108 - .../data/benchmark/problems/system_lab_5.md | 112 - .../data/benchmark/problems/system_lab_6.md | 131 - .../data/benchmark/problems/system_lab_7.md | 119 - .../data/benchmark/problems/system_lab_8.md | 112 - .../data/benchmark/problems/system_lab_9.md | 81 - .../benchmark/problems/system_lab_test.md | 30 - .../problems/test-repo-problems/1.md | 14 - .../problems/test-repo-problems/1a.md | 14 - .../problems/test-repo-problems/22.md | 10 - .../benchmark/problems/test_problems/1.md | 67 - .../benchmark/problems/test_problems/2.md | 102 - .../benchmark/problems/test_problems/3.md | 75 - .../problems/test_problems/simple.md | 34 - .../problems/test_problems/simple1.md | 49 - .../benchmark/problems/test_problems/tesr.md | 28 - .../example_bench_pretrain_timestamp.jsonl | 1 - .../sft/example_bench_sft_timestamp.jsonl | 2 - benchmarks/course_lab_bench/env.toml | 13 - benchmarks/course_lab_bench/install.sh | 57 - benchmarks/course_lab_bench/requirements.txt | 6 - benchmarks/course_lab_bench/run.sh | 39 - benchmarks/course_lab_bench/src/__init__.py | 1 - .../src/agents/claudecode/install.sh | 8 - .../src/agents/claudecode/runner.sh | 14 - .../src/agents/minisweagent/runner.sh | 20 - .../src/agents/openhand/config.toml | 6 - .../src/agents/openhand/install.sh | 18 - .../src/agents/openhand/runner.sh | 17 - .../course_lab_bench/src/config_aoi.yaml | 122 - .../src/config_aoi_anthropic_tools.yaml | 69 - benchmarks/course_lab_bench/src/main.py | 110 - benchmarks/course_lab_bench/src/main_patch.py | 122 - .../course_lab_bench/src/patch_evaluator.py | 133 - .../course_lab_bench/src/run_eval_in_env.py | 165 - .../course_lab_bench/src/run_eval_sweagent.py | 53 - benchmarks/course_lab_bench/test.sh | 10 - benchmarks/course_lab_bench/tests/__init__.py | 0 .../go-python.Dockerfile | 0 72 files changed, 2 insertions(+), 16580 deletions(-) delete mode 100644 benchmarks/course_lab_bench/Dockerfile delete mode 100644 benchmarks/course_lab_bench/README.md delete mode 100644 benchmarks/course_lab_bench/add_agents.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/convert_promblems.py delete mode 100644 benchmarks/course_lab_bench/data/benchmark/course_lab_task_examples.jsonl delete mode 100644 benchmarks/course_lab_bench/data/benchmark/course_lab_tasks_mit_65840_2024.jsonl delete mode 100644 benchmarks/course_lab_bench/data/benchmark/database_systems_mit_65830.json delete mode 100644 benchmarks/course_lab_bench/data/benchmark/env_setup_examples.jsonl delete mode 100644 benchmarks/course_lab_bench/data/benchmark/lab_exam_data.csv delete mode 100644 benchmarks/course_lab_bench/data/benchmark/lab_exam_data_20250529.csv delete mode 100644 benchmarks/course_lab_bench/data/benchmark/lab_exercise_data.xlsx delete mode 100644 benchmarks/course_lab_bench/data/benchmark/lab_exercise_data_old.xlsx delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_1.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_10.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_11.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_12.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_13.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_14.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_15.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_16.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_17.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_18.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_19.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_2.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_20.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_21.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_22.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_23.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_24.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_25.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_26.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_3.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_4.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_5.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_6.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_7.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_8.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_9.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/system_lab_test.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1a.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/22.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test_problems/1.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test_problems/2.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test_problems/3.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple1.md delete mode 100644 benchmarks/course_lab_bench/data/benchmark/problems/test_problems/tesr.md delete mode 100644 benchmarks/course_lab_bench/data/pretrain/example_bench_pretrain_timestamp.jsonl delete mode 100644 benchmarks/course_lab_bench/data/sft/example_bench_sft_timestamp.jsonl delete mode 100644 benchmarks/course_lab_bench/env.toml delete mode 100644 benchmarks/course_lab_bench/install.sh delete mode 100644 benchmarks/course_lab_bench/requirements.txt delete mode 100644 benchmarks/course_lab_bench/run.sh delete mode 100644 benchmarks/course_lab_bench/src/__init__.py delete mode 100644 benchmarks/course_lab_bench/src/agents/claudecode/install.sh delete mode 100644 benchmarks/course_lab_bench/src/agents/claudecode/runner.sh delete mode 100644 benchmarks/course_lab_bench/src/agents/minisweagent/runner.sh delete mode 100644 benchmarks/course_lab_bench/src/agents/openhand/config.toml delete mode 100644 benchmarks/course_lab_bench/src/agents/openhand/install.sh delete mode 100644 benchmarks/course_lab_bench/src/agents/openhand/runner.sh delete mode 100644 benchmarks/course_lab_bench/src/config_aoi.yaml delete mode 100644 benchmarks/course_lab_bench/src/config_aoi_anthropic_tools.yaml delete mode 100644 benchmarks/course_lab_bench/src/main.py delete mode 100644 benchmarks/course_lab_bench/src/main_patch.py delete mode 100644 benchmarks/course_lab_bench/src/patch_evaluator.py delete mode 100644 benchmarks/course_lab_bench/src/run_eval_in_env.py delete mode 100644 benchmarks/course_lab_bench/src/run_eval_sweagent.py delete mode 100644 benchmarks/course_lab_bench/test.sh delete mode 100644 benchmarks/course_lab_bench/tests/__init__.py rename benchmarks/{course_lab_bench => courselab_bench}/go-python.Dockerfile (100%) diff --git a/README.md b/README.md index 47300b5..d435127 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ The benchmark framework is **still under development**. If you have any question System Intelligence Benchmark currently includes the following example benchmarks. Each benchmark assesses specific capabilities across multiple levels within a given research direction. Some benchmarks are still under development — we're actively updating them. Stay tuned! -- **System Exam Benchmark** ([benchmarks/course_exam_bench/](benchmarks/course_exam_bench/)) - Tests LLM understanding of system concepts through university course exams (54 questions across 4 exams) -- **System Lab Benchmark** ([benchmarks/course_lab_bench/](benchmarks/course_lab_bench/)) - Assesses AI capability on practical system course labs and projects +- **System Exam Benchmark** ([benchmarks/course_exam_bench/](benchmarks/course_exam_bench/)) - Tests LLM understanding of system concepts through university course exams +- **System Lab Benchmark** ([benchmarks/courselab_bench/](benchmarks/courselab_bench/)) - Assesses AI capability on practical system course labs and projects - **System Artifact Benchmark** ([benchmarks/arteval_bench/](benchmarks/arteval_bench/)) - Evaluates AI performance on artifact evaluation - **System Modeling Benchmark** ([benchmarks/sysmobench/](benchmarks/sysmobench/)) - Evaluates an agent's ability to produce correct TLA+ models for real-world concurrent and distributed systems, covering system capabilities across system comprehension, abstraction, and potentially tool fluency. - **TopoSense Benchmark** ([benchmarks/toposense_bench/](benchmarks/toposense_bench/)) - Evaluates Semantic-Spatial Sensor Scheduling (S³) capabilities in large-scale IoT digital twins (5,250 queries across 2,510 cameras) diff --git a/benchmarks/course_lab_bench/Dockerfile b/benchmarks/course_lab_bench/Dockerfile deleted file mode 100644 index 3e1536e..0000000 --- a/benchmarks/course_lab_bench/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:24.04 - -WORKDIR /usr/src -COPY . . -RUN apt-get update && apt-get install -y \ - build-essential \ - git \ - wget \ - python3-pip \ - python3-venv - -RUN chmod +x install.sh test.sh && ./install.sh - -ENTRYPOINT ["./test.sh"] diff --git a/benchmarks/course_lab_bench/README.md b/benchmarks/course_lab_bench/README.md deleted file mode 100644 index 3e5a489..0000000 --- a/benchmarks/course_lab_bench/README.md +++ /dev/null @@ -1,140 +0,0 @@ -# Course Lab Benchmark [Under Development] - -## Scenario Description - -This benchmark evaluates AI agents on their ability to complete systems course labs and projects, particularly from MIT's 6.5840 (Distributed Systems). The benchmark tests agents on realistic system assignments that require: - -- Understanding complex codebases written in Go -- Implementing distributed systems concepts (MapReduce, Raft, key-value stores) -- Working with database internals (storage, query execution) -- Writing concurrent, race-free code -- Passing comprehensive test suites - -### Task Details - -- **Input**: JSON/JSONL files containing: - - Task descriptions from course lab assignments - - Repository information (URLs, paths) - - Docker environment specifications - - Test methods and expected results - - Links to original course materials - -- **Output**: - - Implementation code that passes provided test suites - - Evaluation results with pass/fail status - - Execution logs and error reports - - Performance metrics stored in `outputs/` directory - -- **Evaluation**: - - Automated testing via course-provided test scripts - - Binary pass/fail based on test suite results - - Support for multiple test scenarios (sequential, concurrent, crash recovery) - - Evaluation can run in Docker containers or manually - -## Dataset - -The benchmark includes tasks from: -- **6.5840 Distributed Systems Labs**: MapReduce, Raft consensus, fault-tolerant key-value service -- **Environment Setup Tasks**: Project configuration and dependency management - -Files: -- `data/benchmark/course_lab_task_examples.jsonl` - Course lab examples -- `data/benchmark/env_setup_examples.jsonl` - Env Setup examples -- `data/benchmark/course_lab_tasks_mit_65840.jsonl` - System tasks from 6.5840 Distributed Systems 2024/2025 - -## Benchmark Setup - -#### Install Dependencies - -1. Run the `install.sh` script to set up the environment: - - ```sh - ./install.sh - ``` - - This will: - - Install Python 3.12 virtual environment - - Clone and install SWE-agent - - Install required Python packages (pytest, pytest-cov) - - Clone course repositories (6.5840-golabs-2024, xv6-labs-2024, etc.) - -#### Run - -To run the benchmark: - -1. Execute the `run.sh` script with your model: - - ```sh - ./run.sh - # Example: ./run.sh claude-sonnet-4-5-20250929 - ``` - -2. Configure your LLM endpoint in `env.toml`: - - For Azure/OpenAI models: Set `AZURE_API_KEY`, `AZURE_API_BASE`, `AZURE_API_VERSION` - - For Anthropic models: Set `ANTHROPIC_API_KEY` - - For self-hosted models: Configure `OPENAI_API_TYPE` and `OPENAI_BASE_URL` - -3. Results will be saved to `outputs/` with timestamp and model information - - -## Supported Agents - -The benchmark supports multiple AI agents: -- **Claude Code**: Anthropic's code assistant -- **OpenHands**: Open-source coding agent - -To add your own agent to the benchmark, see [add_agents.md](add_agents.md). - -## How to Extend the Benchmark - -This section describes how to add additional labs to the benchmark. We show the workflow using the existing [MapReduce lab](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html) as an example: - -### Step 1: Add a row to the CSV file - -Edit `data/benchmark/lab_exam_data_20250529.csv` and add a new row. Here is what each column represents: - -| Column | Value | Description | -| ----------------- | -------------------------------- | ---------------------------------------------------------- | -| `instance_id` | `1` | Unique numeric ID for the task | -| `course` | `6.5840: Distributed Systems` | Course name | -| `year` | `Spring 2024` | Course term/year | -| `index` | `Lab 1: MapReduce` | Lab name | -| `introduction` | `In this lab you'll build...` | Goes into markdown: Problem Context → Introduction | -| `getting_started` | `You need to setup Go...` | Goes into markdown: Getting Started section | -| `The code` | (starter code description) | Goes into markdown: The Code section | -| `description` | `Your job is to implement...` | Goes into markdown: Your Task section | -| `repo` | `6.5840-golabs-2024` | Repository folder name (will be prefixed with `projects/`) | -| `test_method` | `cd src/main && bash test-mr.sh` | Shell command to run tests | -| `test_results` | `*** PASSED ALL TESTS` | Expected test output when solution is correct | -| `difficluty` | `moderate/hard` | Difficulty: `easy`, `moderate`, `moderate/hard`, or `hard` | -| `link` | `http://.../lab-mr.html` | URL to original course lab assignment | - -### Step 2: Run the conversion script - -```bash -cd data/benchmark -python3 convert_promblems.py -``` - -This generates: - -- `problems/system_lab_.md` - Markdown file with task description -- Updates `system_lab_tasks.jsonl` - JSONL with all tasks - -### Step 3: Update `install.sh` (if adding a new repository) - -```bash -if [ -d "6.5840-golabs-2024" ]; then - echo "==> 6.5840-golabs-2024 already exists, skipping clone." -else - echo "==> Cloning 6.5840-golabs-2024..." - git clone git://g.csail.mit.edu/6.5840-golabs-2024 -fi -``` - -### Step 4: Test your addition - -```bash -./install.sh -./run.sh -``` diff --git a/benchmarks/course_lab_bench/add_agents.md b/benchmarks/course_lab_bench/add_agents.md deleted file mode 100644 index d812c4b..0000000 --- a/benchmarks/course_lab_bench/add_agents.md +++ /dev/null @@ -1,152 +0,0 @@ -# Adding a New Agent - -To integrate a new agent into the benchmark, follow these steps: - -## 1. Create Agent Directory - -Create a new directory under `src/agents/` with your agent name: - -```sh -mkdir src/agents/your_agent_name -cd src/agents/your_agent_name -``` - -## 2. Create Required Files - -Each agent requires two files: - -### `install.sh` (optional but recommended) - -Installation script for your agent's dependencies: - -```bash -#!/bin/bash -set -e # Exit immediately on error. - -# Install your agent's dependencies -# Example: pip install your-agent-package -# Example: npm install -g your-agent-cli -``` - -### `runner.sh` (required) - -Execution script that accepts model and task parameters: - -```bash -#!/bin/bash -set -e # Exit immediately on error. - -# Validate parameters -if [ $# -ne 2 ]; then - echo "Usage: $0 " - echo "Example: $0 azure/gpt-4 \"implement MapReduce\"" - exit 1 -fi - -# Set API keys (read from env.toml or environment variables) -export YOUR_API_KEY="your_key_here" - -# Run your agent with the provided model and task -# $1 = model_location -# $2 = task_description -your-agent-command -m "$1" -t "$2" -o agent_trajectory.json -``` - -## 3. Agent Integration Points - -Your agent runner will be executed in a Docker container with: - -- **Working directory**: `/repo` (contains the project to work on) -- **Agent directory**: `/agent` (contains your install.sh and runner.sh) -- **Parameters**: - - `$1`: Model name/location (e.g., `anthropic/claude-sonnet-4-5-20250929`) - - `$2`: Task description (multi-line text describing what to implement) - -## 4. Examples - -### Claude Code Agent -```bash -# install.sh -apt-get update -y -apt-get install -y nodejs npm -npm install -g @anthropic-ai/claude-code - -# runner.sh -export ANTHROPIC_API_KEY="sk-ant-..." -claude -p "$2" --model "$1" --output-format json -``` - -### OpenHands Agent -```bash -# install.sh -curl -sSL https://install.python-poetry.org | python3 - -export PATH="$HOME/.local/bin:$PATH" -git clone https://github.com/All-Hands-AI/OpenHands.git -cd OpenHands/ -poetry install - -# runner.sh -cd OpenHands/ -poetry run python -m openhands.core.main \ - --config-file /agent/config.toml \ - --agent-cls CodeActAgent \ - --selected-repo /repo \ - -t "$2" -``` - -## 5. Testing Your Agent - -1. Add your agent path to the evaluation script -2. Run the benchmark: - ```sh - python src/main.py --agent ./src/agents/your_agent_name - ``` - -## 6. Best Practices - -- Make scripts executable: `chmod +x install.sh runner.sh` -- Handle errors gracefully with `set -e` -- Use environment variables for API keys -- Output agent trajectory/logs for debugging -- Test with simple tasks first before running full benchmark -- Ensure your agent can work within the `/repo` directory context - -## 7. Agent Execution Flow - -The benchmark framework executes your agent as follows: - -1. **Setup Phase**: - - Docker container starts with base image `xuafeng/swe-go-python:latest` - - Project files uploaded to `/repo` - - Agent files uploaded to `/agent` - - `/agent/install.sh` executed (if exists) - -2. **Execution Phase**: - - Runner script executed: `/agent/runner.sh "" ""` - - Agent works in `/repo` directory - - Agent should modify files to complete the task - -3. **Evaluation Phase**: - - Test method from task specification executed (e.g., `cd src/main && bash test-mr.sh`) - - Results captured and saved to `outputs/` - - -## 8. Troubleshooting - -### Common Issues - -**Agent can't find dependencies**: -- Ensure `install.sh` installs all required packages -- Check Docker image has necessary base dependencies - -**Permission denied errors**: -- Make scripts executable: `chmod +x install.sh runner.sh` -- Check file permissions in Docker container - -**API key not found**: -- Set environment variables in `runner.sh` -- Alternatively, configure in `env.toml` and read from there - -**Agent output not captured**: -- Ensure agent outputs to `agent_trajectory.json` or logs properly -- Check exit codes and error handling diff --git a/benchmarks/course_lab_bench/data/benchmark/convert_promblems.py b/benchmarks/course_lab_bench/data/benchmark/convert_promblems.py deleted file mode 100644 index ccd35d8..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/convert_promblems.py +++ /dev/null @@ -1,51 +0,0 @@ -import csv -import json - - -def covert_to_dict(): - fw_all_tasks = open('system_lab_tasks.jsonl', 'w', encoding='utf-8') - with open('lab_exam_data_20250529.csv', newline='', encoding='latin1') as csvfile: - reader = csv.DictReader(csvfile) - id = 0 - # instance_id,course,year,index,part_name,introduction,getting_started,The code,description,task,hint,rules,repo_location,test_method,test_results,difficluty,link - for row in reader: - if id > 100: # Process up to 100 tasks - break - id += 1 - unique_id = row['instance_id'] + row['course'] + '_' + row['year'] + '_' + row['index'] - task = ( - '# Problem Context\n## Introduction\n' - + row['introduction'] - + '\n## Getiting Started\n' - + row['getting_started'] - + '\n## The Code\n' - + row['The code'] - + '\n# Your Task \n' - + row['description'] - ) - # "\n\n# Your Task\n" + row["task"] + "\n## Hits\n" + row["hint"] - repo_name = 'projects/' + row['repo'] - test_method = row['test_method'] - test_results = row['test_results'] - difficulty = row['difficluty'] - link = row['link'] - task_name = 'problems/system_lab_' + row['instance_id'] + '.md' - task_dict = { - 'task_id': 'system_lab_' + row['instance_id'], - 'task_name': task_name, - 'repo_name': repo_name, - 'task': task, - 'test_method': test_method, - 'test_results': test_results, - 'difficulty': difficulty, - 'link': link, - } - - fw = open('problems/system_lab_' + row['instance_id'] + '.md', 'w', encoding='utf-8') - fw.write(task + '\n') - fw_all_tasks.write(json.dumps(task_dict) + '\n') - - -if __name__ == '__main__': - covert_to_dict() - print('Conversion completed successfully.') diff --git a/benchmarks/course_lab_bench/data/benchmark/course_lab_task_examples.jsonl b/benchmarks/course_lab_bench/data/benchmark/course_lab_task_examples.jsonl deleted file mode 100644 index 705af1f..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/course_lab_task_examples.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"task_id": "system_lab_1", "task_name": "problems/system_lab_1.md", "task": "# Problem Context\n## Introduction\nIn this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses \"coordinator\" instead of the paper's \"master\".)\n## Getiting Started\nYou need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs.\n\nFetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html).\n\n```\n$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840\n$ cd 6.5840\n$ ls\nMakefile src\n$\n```\n\nWe supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows:\n\n```\n$ cd ~/6.5840\n$ cd src/main\n$ go build -buildmode=plugin ../mrapps/wc.go\n$ rm mr-out*\n$ go run mrsequential.go wc.so pg*.txt\n$ more mr-out-0\nA 509\nABOUT 2\nACT 8\n...\n```\n\n`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`.\n\nFeel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like.\n\nFor this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.\n## The Code\n\n# Your Task \nYour job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker.\n\nWe have given you a little code to start you off. The \"main\" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`.\n\nHere's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built:\n\n```\n$ go build -buildmode=plugin ../mrapps/wc.go\n```\n\nIn the `main` directory, run the coordinator.\n\n```\n$ rm mr-out*\n$ go run mrcoordinator.go pg-*.txt\n```\n\nThe `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one \"split\", and is the input to one Map task.\n\nIn one or more other windows, run some workers:\n\n```\n$ go run mrworker.go wc.so\n```\n\nWhen the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this:\n\n```\n$ cat mr-out-* | sort | more\nA 509\nABOUT 2\nACT 8\n...\n```\n\nWe supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks.\n\nIf you run the test script now, it will hang because the coordinator never finishes:\n\n```\n$ cd ~/6.5840/src/main\n$ bash test-mr.sh\n*** Starting wc test.\n```\n\nYou can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then:\n\n```\n$ bash test-mr.sh\n*** Starting wc test.\nsort: No such file or directory\ncmp: EOF on mr-wc-all\n--- wc output is not the same as mr-correct-wc.txt\n--- wc test: FAIL\n$\n```\n\nThe test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails.\n\nWhen you've finished, the test script output should look like this:\n\n```\n$ bash test-mr.sh\n*** Starting wc test.\n--- wc test: PASS\n*** Starting indexer test.\n--- indexer test: PASS\n*** Starting map parallelism test.\n--- map parallelism test: PASS\n*** Starting reduce parallelism test.\n--- reduce parallelism test: PASS\n*** Starting job count test.\n--- job count test: PASS\n*** Starting early exit test.\n--- early exit test: PASS\n*** Starting crash test.\n--- crash test: PASS\n*** PASSED ALL TESTS\n$\n```\n\nYou may see some errors from the Go RPC package that look like\n\n```\n2019/12/16 13:27:09 rpc.Register: method \"Done\" has 1 input parameters; needs exactly three\n```\n\nIgnore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC.\n\nAdditionally, depending on your strategy for terminating worker processes, you may see some errors of the form\n\n```\n2024/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused\n```\n\nIt is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited.\n\n### A few rules:\n\n- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks.\n- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`.\n- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `\"%v %v\"` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented \"this is the correct format\". The test script will fail if your implementation deviates too much from this format.\n- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions.\n- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks.\n- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit.\n- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a \"please exit\" pseudo-task that the coordinator can give to workers.\n\n### Hints\n\n- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging.\n\n- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`.\n\n- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`.\n\n- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go`\n\n- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines.\n\n- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number.\n\n- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file:\n\n ```\n enc := json.NewEncoder(file)\n for _, kv := ... {\n err := enc.Encode(&kv)\n ```\n\n and to read such a file back:\n\n ```\n dec := json.NewDecoder(file)\n for {\n var kv KeyValue\n if err := dec.Decode(&kv); err != nil {\n break\n }\n kva = append(kva, kv)\n }\n ```\n\n- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key.\n\n- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files.\n\n- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data.\n\n- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector.\n\n- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs.\n\n- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have).\n\n- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s).\n\n- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions.\n\n- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it.\n\n- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files).\n\n- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts.\n\n- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names.\n\n- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this:\n\n ```\n reply := SomeType{}\n call(..., &reply)\n ```\n\n without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.\n", "test_method": "cd src/main && bash test-mr.sh", "test_results": "*** Starting wc test.\n--- wc test: PASS\n*** Starting indexer test.\n--- indexer test: PASS\n*** Starting map parallelism test.\n--- map parallelism test: PASS\n*** Starting reduce parallelism test.\n--- reduce parallelism test: PASS\n*** Starting job count test.\n--- job count test: PASS\n*** Starting early exit test.\n--- early exit test: PASS\n*** Starting crash test.\n--- crash test: PASS\n*** PASSED ALL TESTS", "difficulty": "moderate/hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_2", "task_name": "problems/system_lab_2.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes.\n\nClients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server.\n\nYour server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv\n$ go test\n...\n$\n```\n\n## The Code\n\n# Your Task \nYour first task is to implement a solution that works when there are no dropped messages.\n\nYou'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`.\n\nYou have completed this task when you pass the first two tests in the test suite: \"one client\" and \"many clients\".\n\n- Check that your code is race-free using `go test -race`.", "test_method": "cd src/kvsr && go test", "test_results": "", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} \ No newline at end of file diff --git a/benchmarks/course_lab_bench/data/benchmark/course_lab_tasks_mit_65840_2024.jsonl b/benchmarks/course_lab_bench/data/benchmark/course_lab_tasks_mit_65840_2024.jsonl deleted file mode 100644 index 00ded43..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/course_lab_tasks_mit_65840_2024.jsonl +++ /dev/null @@ -1,26 +0,0 @@ -{"task_id": "system_lab_1", "task_name": "problems/system_lab_1.md", "task": "# Problem Context\n## Introduction\nIn this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses \"coordinator\" instead of the paper's \"master\".)\n## Getiting Started\nYou need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs.\n\nFetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html).\n\n```\n$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840\n$ cd 6.5840\n$ ls\nMakefile src\n$\n```\n\nWe supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows:\n\n```\n$ cd ~/6.5840\n$ cd src/main\n$ go build -buildmode=plugin ../mrapps/wc.go\n$ rm mr-out*\n$ go run mrsequential.go wc.so pg*.txt\n$ more mr-out-0\nA 509\nABOUT 2\nACT 8\n...\n```\n\n`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`.\n\nFeel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like.\n\nFor this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.\n## The Code\n\n# Your Task \nYour job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker.\n\nWe have given you a little code to start you off. The \"main\" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`.\n\nHere's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built:\n\n```\n$ go build -buildmode=plugin ../mrapps/wc.go\n```\n\nIn the `main` directory, run the coordinator.\n\n```\n$ rm mr-out*\n$ go run mrcoordinator.go pg-*.txt\n```\n\nThe `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one \"split\", and is the input to one Map task.\n\nIn one or more other windows, run some workers:\n\n```\n$ go run mrworker.go wc.so\n```\n\nWhen the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this:\n\n```\n$ cat mr-out-* | sort | more\nA 509\nABOUT 2\nACT 8\n...\n```\n\nWe supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks.\n\nIf you run the test script now, it will hang because the coordinator never finishes:\n\n```\n$ cd ~/6.5840/src/main\n$ bash test-mr.sh\n*** Starting wc test.\n```\n\nYou can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then:\n\n```\n$ bash test-mr.sh\n*** Starting wc test.\nsort: No such file or directory\ncmp: EOF on mr-wc-all\n--- wc output is not the same as mr-correct-wc.txt\n--- wc test: FAIL\n$\n```\n\nThe test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails.\n\nWhen you've finished, the test script output should look like this:\n\n```\n$ bash test-mr.sh\n*** Starting wc test.\n--- wc test: PASS\n*** Starting indexer test.\n--- indexer test: PASS\n*** Starting map parallelism test.\n--- map parallelism test: PASS\n*** Starting reduce parallelism test.\n--- reduce parallelism test: PASS\n*** Starting job count test.\n--- job count test: PASS\n*** Starting early exit test.\n--- early exit test: PASS\n*** Starting crash test.\n--- crash test: PASS\n*** PASSED ALL TESTS\n$\n```\n\nYou may see some errors from the Go RPC package that look like\n\n```\n2019/12/16 13:27:09 rpc.Register: method \"Done\" has 1 input parameters; needs exactly three\n```\n\nIgnore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC.\n\nAdditionally, depending on your strategy for terminating worker processes, you may see some errors of the form\n\n```\n2024/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused\n```\n\nIt is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited.\n\n### A few rules:\n\n- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks.\n- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`.\n- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `\"%v %v\"` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented \"this is the correct format\". The test script will fail if your implementation deviates too much from this format.\n- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions.\n- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks.\n- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit.\n- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a \"please exit\" pseudo-task that the coordinator can give to workers.\n\n### Hints\n\n- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging.\n\n- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`.\n\n- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`.\n\n- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go`\n\n- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines.\n\n- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number.\n\n- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file:\n\n ```\n enc := json.NewEncoder(file)\n for _, kv := ... {\n err := enc.Encode(&kv)\n ```\n\n and to read such a file back:\n\n ```\n dec := json.NewDecoder(file)\n for {\n var kv KeyValue\n if err := dec.Decode(&kv); err != nil {\n break\n }\n kva = append(kva, kv)\n }\n ```\n\n- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key.\n\n- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files.\n\n- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data.\n\n- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector.\n\n- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs.\n\n- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have).\n\n- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s).\n\n- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions.\n\n- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it.\n\n- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files).\n\n- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts.\n\n- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names.\n\n- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this:\n\n ```\n reply := SomeType{}\n call(..., &reply)\n ```\n\n without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.\n", "test_method": "cd src/main && bash test-mr.sh", "test_results": "*** Starting wc test.\n--- wc test: PASS\n*** Starting indexer test.\n--- indexer test: PASS\n*** Starting map parallelism test.\n--- map parallelism test: PASS\n*** Starting reduce parallelism test.\n--- reduce parallelism test: PASS\n*** Starting job count test.\n--- job count test: PASS\n*** Starting early exit test.\n--- early exit test: PASS\n*** Starting crash test.\n--- crash test: PASS\n*** PASSED ALL TESTS", "difficulty": "moderate/hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_2", "task_name": "problems/system_lab_2.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes.\n\nClients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server.\n\nYour server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv\n$ go test\n...\n$\n```\n\n## The Code\n\n# Your Task \nYour first task is to implement a solution that works when there are no dropped messages.\n\nYou'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`.\n\nYou have completed this task when you pass the first two tests in the test suite: \"one client\" and \"many clients\".\n\n- Check that your code is race-free using `go test -race`.", "test_method": "cd src/kvsr && go test", "test_results": "", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_3", "task_name": "problems/system_lab_3.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes.\n\nClients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server.\n\nYour server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv\n$ go test\n...\n$\n```\n\n## The Code\n\n# Your Task \nNow you should modify your solution to continue in the face of dropped messages (e.g., RPC requests and RPC replies). If a message was lost, then the client's `ck.server.Call()` will return `false` (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it succeeds. Each call to `Clerk.Put()` or `Clerk.Append()`, however, should result in just a *single* execution, so you will have to ensure that the re-send doesn't result in the server executing the request twice.\n\nAdd code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt).\n\n- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once.\n- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all.\n- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time.\n\nYour code should now pass all tests, like this:\n\n```\n$ go test\nTest: one client ...\n ... Passed -- t 3.8 nrpc 31135 ops 31135\nTest: many clients ...\n ... Passed -- t 4.7 nrpc 102853 ops 102853\nTest: unreliable net, many clients ...\n ... Passed -- t 4.1 nrpc 580 ops 496\nTest: concurrent append to same key, unreliable ...\n ... Passed -- t 0.6 nrpc 61 ops 52\nTest: memory use get ...\n ... Passed -- t 0.4 nrpc 4 ops 0\nTest: memory use put ...\n ... Passed -- t 0.2 nrpc 2 ops 0\nTest: memory use append ...\n ... Passed -- t 0.4 nrpc 2 ops 0\nTest: memory use many puts ...\n ... Passed -- t 11.5 nrpc 100000 ops 0\nTest: memory use many gets ...\n ... Passed -- t 12.2 nrpc 100001 ops 0\nPASS\nok 6.5840/kvsrv 39.000s\n```\n\nThe numbers after each `Passed` are real time in seconds, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls).\n", "test_method": "cd src/kvsr && go test", "test_results": "Test: one client ...\n ... Passed -- t 3.8 nrpc 31135 ops 31135\nTest: many clients ...\n ... Passed -- t 4.7 nrpc 102853 ops 102853\nTest: unreliable net, many clients ...\n ... Passed -- t 4.1 nrpc 580 ops 496\nTest: concurrent append to same key, unreliable ...\n ... Passed -- t 0.6 nrpc 61 ops 52\nTest: memory use get ...\n ... Passed -- t 0.4 nrpc 4 ops 0\nTest: memory use put ...\n ... Passed -- t 0.2 nrpc 2 ops 0\nTest: memory use append ...\n ... Passed -- t 0.4 nrpc 2 ops 0\nTest: memory use many puts ...\n ... Passed -- t 11.5 nrpc 100000 ops 0\nTest: memory use many gets ...\n ... Passed -- t 12.2 nrpc 100001 ops 0\nPASS\nok 6.5840/kvsrv 39.000s", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_4", "task_name": "problems/system_lab_4.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft\n$ go test\nTest (3A): initial election ...\n--- FAIL: TestInitialElection3A (5.04s)\n config.go:326: expected one leader, got none\nTest (3A): election after network failure ...\n--- FAIL: TestReElection3A (5.03s)\n config.go:326: expected one leader, got none\n...\n$\n```\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nImplement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code.\n\n- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `.\n- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election,\n- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry.\n- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another.\n- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method.\n- The tester requires that the leader send heartbeat RPCs no more than ten times per second.\n- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate).\n- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds.\n- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful.\n- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly.\n- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure.\n- Don't forget to implement `GetState()`.\n- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages.\n- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings.\n- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](https://pdos.csail.mit.edu/6.824/labs/guidance.html) page for debugging tips.\n- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](https://pdos.csail.mit.edu/6.824/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(\"Server 0\", \"short description\", \"details\")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know!\n\nBe sure you pass the 3A tests before submitting Part 3A, so that you see something like this:\n\n```\n$ go test -run 3A\nTest (3A): initial election (reliable network)...\n ... Passed -- 3.6 3 106 0\nTest (3A): election after network failure (reliable network)...\n ... Passed -- 7.6 3 304 0\nTest (3A): multiple elections (reliable network)...\n ... Passed -- 8.4 7 954 0\nPASS\nok 6.5840/raft1 19.834sak\n$\n```\n\nEach \"Passed\" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag.\n", "test_method": "cd src/raft && go test -run 3A", "test_results": "Test (3A): initial election ...\n ... Passed -- 3.5 3 58 16840 0\nTest (3A): election after network failure ...\n ... Passed -- 5.4 3 118 25269 0\nTest (3A): multiple elections ...\n ... Passed -- 7.3 7 624 138014 0\nPASS\nok \t6.5840/raft\t16.265s", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_5", "task_name": "problems/system_lab_5.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft\n$ go test\nTest (3A): initial election ...\n--- FAIL: TestInitialElection3A (5.04s)\n config.go:326: expected one leader, got none\nTest (3A): election after network failure ...\n--- FAIL: TestReElection3A (5.03s)\n config.go:326: expected one leader, got none\n...\n$\n```\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nImplement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass.\n\n- Run `git pull` to get the latest lab software.\n- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer.\n- You will need to implement the election restriction (section 5.4.1 in the paper).\n- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration.\n- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) with tips on how to develop and debug your code.\n- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API.\n\nThe tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output:\n\n```\n$ time go test -run 3B\nTest (3B): basic agreement ...\n ... Passed -- 0.9 3 16 4572 3\nTest (3B): RPC byte count ...\n ... Passed -- 1.7 3 48 114536 11\nTest (3B): agreement after follower reconnects ...\n ... Passed -- 3.6 3 78 22131 7\nTest (3B): no agreement if too many followers disconnect ...\n ... Passed -- 3.8 5 172 40935 3\nTest (3B): concurrent Start()s ...\n ... Passed -- 1.1 3 24 7379 6\nTest (3B): rejoin of partitioned leader ...\n ... Passed -- 5.1 3 152 37021 4\nTest (3B): leader backs up quickly over incorrect follower logs ...\n ... Passed -- 17.2 5 2080 1587388 102\nTest (3B): RPC counts aren't too high ...\n ... Passed -- 2.2 3 60 20119 12\nPASS\nok \t6.5840/raft\t35.557s\n\nreal\t0m35.899s\nuser\t0m2.556s\nsys\t0m1.458s\n$\n```\n\nThe \"ok 6.5840/raft 35.557s\" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The \"user 0m2.556s\" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent.\n\n### ", "test_method": "cd src/raft && time go test -run 3B", "test_results": "Test (3B): basic agreement ...\n ... Passed -- 0.9 3 16 4572 3\nTest (3B): RPC byte count ...\n ... Passed -- 1.7 3 48 114536 11\nTest (3B): agreement after follower reconnects ...\n ... Passed -- 3.6 3 78 22131 7\nTest (3B): no agreement if too many followers disconnect ...\n ... Passed -- 3.8 5 172 40935 3\nTest (3B): concurrent Start()s ...\n ... Passed -- 1.1 3 24 7379 6\nTest (3B): rejoin of partitioned leader ...\n ... Passed -- 5.1 3 152 37021 4\nTest (3B): leader backs up quickly over incorrect follower logs ...\n ... Passed -- 17.2 5 2080 1587388 102\nTest (3B): RPC counts aren't too high ...\n ... Passed -- 2.2 3 60 20119 12\nPASS\nok \t6.5840/raft\t35.557s\n\nreal\t0m35.899s\nuser\t0m2.556s\nsys\t0m1.458s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_6", "task_name": "problems/system_lab_6.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft\n$ go test\nTest (3A): initial election ...\n--- FAIL: TestInitialElection3A (5.04s)\n config.go:326: expected one leader, got none\nTest (3A): election after network failure ...\n--- FAIL: TestReElection3A (5.03s)\n config.go:326: expected one leader, got none\n...\n$\n```\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nIf a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent.\n\nA real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods.\n\nComplete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or \"serialize\") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests.\n\nYou will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include:\n\n```\n XTerm: term in the conflicting entry (if any)\n XIndex: index of first entry with that term (if any)\n XLen: log length\n```\n\nThen the leader's logic can be something like:\n\n```\n Case 1: leader doesn't have XTerm:\n nextIndex = XIndex\n Case 2: leader has XTerm:\n nextIndex = leader's last entry for XTerm\n Case 3: follower's log is too short:\n nextIndex = XLen\n```\n\nA few other hints:\n\n- Run `git pull` to get the latest lab software.\n- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B.\n\nYour code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests.\n\n```\n$ go test -run 3C\nTest (3C): basic persistence ...\n ... Passed -- 5.0 3 86 22849 6\nTest (3C): more persistence ...\n ... Passed -- 17.6 5 952 218854 16\nTest (3C): partitioned leader and one follower crash, leader restarts ...\n ... Passed -- 2.0 3 34 8937 4\nTest (3C): Figure 8 ...\n ... Passed -- 31.2 5 580 130675 32\nTest (3C): unreliable agreement ...\n ... Passed -- 1.7 5 1044 366392 246\nTest (3C): Figure 8 (unreliable) ...\n ... Passed -- 33.6 5 10700 33695245 308\nTest (3C): churn ...\n ... Passed -- 16.1 5 8864 44771259 1544\nTest (3C): unreliable churn ...\n ... Passed -- 16.5 5 4220 6414632 906\nPASS\nok \t6.5840/raft\t123.564s\n$\n```\n\nIt is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`.\n\n```\n$ for i in {0..10}; do go test; done\n```", "test_method": "cd src/raft && go test -run 3C", "test_results": "Test (3C): basic persistence ...\n ... Passed -- 5.0 3 86 22849 6\nTest (3C): more persistence ...\n ... Passed -- 17.6 5 952 218854 16\nTest (3C): partitioned leader and one follower crash, leader restarts ...\n ... Passed -- 2.0 3 34 8937 4\nTest (3C): Figure 8 ...\n ... Passed -- 31.2 5 580 130675 32\nTest (3C): unreliable agreement ...\n ... Passed -- 1.7 5 1044 366392 246\nTest (3C): Figure 8 (unreliable) ...\n ... Passed -- 33.6 5 10700 33695245 308\nTest (3C): churn ...\n ... Passed -- 16.1 5 8864 44771259 1544\nTest (3C): unreliable churn ...\n ... Passed -- 16.5 5 4220 6414632 906\nPASS\nok \t6.5840/raft\t123.564s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_7", "task_name": "problems/system_lab_7.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft\n$ go test\nTest (3A): initial election ...\n--- FAIL: TestInitialElection3A (5.04s)\n config.go:326: expected one leader, got none\nTest (3A): election after network failure ...\n--- FAIL: TestReElection3A (5.03s)\n config.go:326: expected one leader, got none\n...\n$\n```\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nAs things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a \"snapshot\" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) outlines the scheme; you will have to design the details.\n\nYour Raft must provide the following function that the service can call with a serialized snapshot of its state:\n\n```\nSnapshot(index int, snapshot []byte)\n```\n\nIn Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader).\n\nThe `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log.\n\nYou'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2.\n\nWhen a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards.\n\nIf a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument.\n\nWhen a server restarts, the application layer reads the persisted snapshot and restores its saved state.\n\nImplement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests).\n\n- `git pull` to make sure you have the latest software.\n- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test.\n- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date.\n- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot.\n- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries.\n- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time.\n\nYour code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests.\n\n```\n$ go test -run 3D\nTest (3D): snapshots basic ...\n ... Passed -- 11.6 3 176 61716 192\nTest (3D): install snapshots (disconnect) ...\n ... Passed -- 64.2 3 878 320610 336\nTest (3D): install snapshots (disconnect+unreliable) ...\n ... Passed -- 81.1 3 1059 375850 341\nTest (3D): install snapshots (crash) ...\n ... Passed -- 53.5 3 601 256638 339\nTest (3D): install snapshots (unreliable+crash) ...\n ... Passed -- 63.5 3 687 288294 336\nTest (3D): crash and restart all servers ...\n ... Passed -- 19.5 3 268 81352 58\nPASS\nok 6.5840/raft 293.456s\n```", "test_method": "cd src/raft && go test -run 3D", "test_results": "Test (3D): snapshots basic ...\n ... Passed -- 11.6 3 176 61716 192\nTest (3D): install snapshots (disconnect) ...\n ... Passed -- 64.2 3 878 320610 336\nTest (3D): install snapshots (disconnect+unreliable) ...\n ... Passed -- 81.1 3 1059 375850 341\nTest (3D): install snapshots (crash) ...\n ... Passed -- 53.5 3 601 256638 339\nTest (3D): install snapshots (unreliable+crash) ...\n ... Passed -- 63.5 3 687 288294 336\nTest (3D): crash and restart all servers ...\n ... Passed -- 19.5 3 268 81352 58\nPASS\nok 6.5840/raft 293.456s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_8", "task_name": "problems/system_lab_8.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf).\n\nClients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service:\n\n- `Put(key, value)`: replaces the value for a particular key in the database\n- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent)\n- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys)\n\nKeys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers.\n\nYour service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts.\n\nProviding linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates.\n\nThis lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline.\n\nYou should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf)\n\nStart early.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvraft\n$ go test\n...\n$\n```\n\n## The Code\n\n# Your Task \nEach of your key/value servers (\"kvservers\") will have an associated Raft peer. Clerks send `Put()`, `Append()`, and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Append/Get operation to Raft, so that the Raft log holds a sequence of Put/Append/Get operations. All of the kvservers execute operations from the Raft log in order, applying the operations to their key/value databases; the intent is for the servers to maintain identical replicas of the key/value database.\n\nA `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server.\n\nYour kvservers should not directly communicate; they should only interact with each other through Raft.\n\nYour first task is to implement a solution that works when there are no dropped messages, and no failed servers.\n\nFeel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk.\n\nYou'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC.\n\nYou have completed this task when you **reliably** pass the first test in the test suite: \"One client\".\n\n- After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library.\n- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8.\n- You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so.\n- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`.\n\nNow you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` or `Clerk.Append()` should result in just a single execution, so you will have to ensure that the re-send doesn't result in the servers executing the request twice.\n\nAdd code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). Your code should pass the `go test -run 4A` tests.\n\n- Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals.\n- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough.\n- You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2.\n\nYour code should now pass the Lab 4A tests, like this:\n\n```\n$ go test -run 4A\nTest: one client (4A) ...\n ... Passed -- 15.5 5 4576 903\nTest: ops complete fast enough (4A) ...\n ... Passed -- 15.7 3 3022 0\nTest: many clients (4A) ...\n ... Passed -- 15.9 5 5884 1160\nTest: unreliable net, many clients (4A) ...\n ... Passed -- 19.2 5 3083 441\nTest: concurrent append to same key, unreliable (4A) ...\n ... Passed -- 2.5 3 218 52\nTest: progress in majority (4A) ...\n ... Passed -- 1.7 5 103 2\nTest: no progress in minority (4A) ...\n ... Passed -- 1.0 5 102 3\nTest: completion after heal (4A) ...\n ... Passed -- 1.2 5 70 3\nTest: partitions, one client (4A) ...\n ... Passed -- 23.8 5 4501 765\nTest: partitions, many clients (4A) ...\n ... Passed -- 23.5 5 5692 974\nTest: restarts, one client (4A) ...\n ... Passed -- 22.2 5 4721 908\nTest: restarts, many clients (4A) ...\n ... Passed -- 22.5 5 5490 1033\nTest: unreliable net, restarts, many clients (4A) ...\n ... Passed -- 26.5 5 3532 474\nTest: restarts, partitions, many clients (4A) ...\n ... Passed -- 29.7 5 6122 1060\nTest: unreliable net, restarts, partitions, many clients (4A) ...\n ... Passed -- 32.9 5 2967 317\nTest: unreliable net, restarts, partitions, random keys, many clients (4A) ...\n ... Passed -- 35.0 7 8249 746\nPASS\nok \t6.5840/kvraft\t290.184s\n```\n\nThe numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls).", "test_method": "cd src/kvraft && go test -run 4A", "test_results": "Test: one client (4A) ...\n ... Passed -- 15.5 5 4576 903\nTest: ops complete fast enough (4A) ...\n ... Passed -- 15.7 3 3022 0\nTest: many clients (4A) ...\n ... Passed -- 15.9 5 5884 1160\nTest: unreliable net, many clients (4A) ...\n ... Passed -- 19.2 5 3083 441\nTest: concurrent append to same key, unreliable (4A) ...\n ... Passed -- 2.5 3 218 52\nTest: progress in majority (4A) ...\n ... Passed -- 1.7 5 103 2\nTest: no progress in minority (4A) ...\n ... Passed -- 1.0 5 102 3\nTest: completion after heal (4A) ...\n ... Passed -- 1.2 5 70 3\nTest: partitions, one client (4A) ...\n ... Passed -- 23.8 5 4501 765\nTest: partitions, many clients (4A) ...\n ... Passed -- 23.5 5 5692 974\nTest: restarts, one client (4A) ...\n ... Passed -- 22.2 5 4721 908\nTest: restarts, many clients (4A) ...\n ... Passed -- 22.5 5 5490 1033\nTest: unreliable net, restarts, many clients (4A) ...\n ... Passed -- 26.5 5 3532 474\nTest: restarts, partitions, many clients (4A) ...\n ... Passed -- 29.7 5 6122 1060\nTest: unreliable net, restarts, partitions, many clients (4A) ...\n ... Passed -- 32.9 5 2967 317\nTest: unreliable net, restarts, partitions, random keys, many clients (4A) ...\n ... Passed -- 35.0 7 8249 746\nPASS\nok \t6.5840/kvraft\t290.184s", "difficulty": "moderate/hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvraft.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_9", "task_name": "problems/system_lab_9.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf).\n\nClients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service:\n\n- `Put(key, value)`: replaces the value for a particular key in the database\n- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent)\n- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys)\n\nKeys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers.\n\nYour service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts.\n\nProviding linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates.\n\nThis lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline.\n\nYou should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf)\n\nStart early.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvraft\n$ go test\n...\n$\n```\n\n## The Code\n\n# Your Task \nAs things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver to cooperate with Raft to save log space, and reduce restart time, using Raft's `Snapshot()` from Lab 3D.\n\nThe tester passes `maxraftstate` to your `StartKVServer()`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `persister.RaftStateSize()`. Whenever your key/value server detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. If `maxraftstate` is -1, you do not have to snapshot. `maxraftstate` applies to the GOB-encoded bytes your Raft passes as the first argument to to `persister.Save()`.\n\nModify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot.\n\n- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`.\n- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots.\n- Capitalize all fields of structures stored in the snapshot.\n- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests.\n- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time.\n\nYour code should pass the 4B tests (as in the example here) as well as the 4A tests (and your Raft must continue to pass the Lab 3 tests).\n\n```\n$ go test -run 4B\nTest: InstallSnapshot RPC (4B) ...\n ... Passed -- 4.0 3 289 63\nTest: snapshot size is reasonable (4B) ...\n ... Passed -- 2.6 3 2418 800\nTest: ops complete fast enough (4B) ...\n ... Passed -- 3.2 3 3025 0\nTest: restarts, snapshots, one client (4B) ...\n ... Passed -- 21.9 5 29266 5820\nTest: restarts, snapshots, many clients (4B) ...\n ... Passed -- 21.5 5 33115 6420\nTest: unreliable net, snapshots, many clients (4B) ...\n ... Passed -- 17.4 5 3233 482\nTest: unreliable net, restarts, snapshots, many clients (4B) ...\n ... Passed -- 22.7 5 3337 471\nTest: unreliable net, restarts, partitions, snapshots, many clients (4B) ...\n ... Passed -- 30.4 5 2725 274\nTest: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ...\n ... Passed -- 37.7 7 8378 681\nPASS\nok \t6.5840/kvraft\t161.538s\n```", "test_method": "cd src/kvraft && go test -run 4B", "test_results": "Test: InstallSnapshot RPC (4B) ...\n ... Passed -- 4.0 3 289 63\nTest: snapshot size is reasonable (4B) ...\n ... Passed -- 2.6 3 2418 800\nTest: ops complete fast enough (4B) ...\n ... Passed -- 3.2 3 3025 0\nTest: restarts, snapshots, one client (4B) ...\n ... Passed -- 21.9 5 29266 5820\nTest: restarts, snapshots, many clients (4B) ...\n ... Passed -- 21.5 5 33115 6420\nTest: unreliable net, snapshots, many clients (4B) ...\n ... Passed -- 17.4 5 3233 482\nTest: unreliable net, restarts, snapshots, many clients (4B) ...\n ... Passed -- 22.7 5 3337 471\nTest: unreliable net, restarts, partitions, snapshots, many clients (4B) ...\n ... Passed -- 30.4 5 2725 274\nTest: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ...\n ... Passed -- 37.7 7 8378 681\nPASS\nok \t6.5840/kvraft\t161.538s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvraft.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_10", "task_name": "problems/system_lab_10.md", "task": "# Problem Context\n## Introduction\nYou can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab.\n\nIn this lab you'll build a key/value storage system that \"shards,\" or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with \"a\" might be one shard, all the keys starting with \"b\" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups.\n\nYour sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the \"shard controller\". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft.\n\nA sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement.\n\nThe main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time.\n\nReconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2.\n\nOnly RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files.\n\nThis lab uses \"configuration\" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes.\n\nThis lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access.\n\nYour Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nWe supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`.\n\nTo get up and running, execute the following commands:\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/shardctrler\n$ go test\n--- FAIL: TestBasic (0.00s)\n test_test.go:11: wanted 1 groups, got 0\nFAIL\nexit status 1\nFAIL shardctrler 0.008s\n$\n```\n\nWhen you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`.\n## The Code\n\n# Your Task \nFirst you'll implement the shard controller, in `shardctrler/server.go` and `client.go`, and a sharded key/value server that can handle an unchanging (static) configuration. When you're done, your code should pass all the tests in the `shardctrler/` directory, and the `5A` tests in `shardkv/`.\n\n```\n$ cd ~/6.5840/src/shardctrler\n$ go test\nTest: Basic leave/join ...\n ... Passed\nTest: Historical queries ...\n ... Passed\nTest: Move ...\n ... Passed\nTest: Concurrent leave/join ...\n ... Passed\nTest: Minimal transfers after joins ...\n ... Passed\nTest: Minimal transfers after leaves ...\n ... Passed\nTest: Multi-group join/leave ...\n ... Passed\nTest: Concurrent multi leave/join ...\n ... Passed\nTest: Minimal transfers after multijoins ...\n ... Passed\nTest: Minimal transfers after multileaves ...\n ... Passed\nTest: Check Same config on servers ...\n ... Passed\nPASS\nok \t6.5840/shardctrler\t5.863s\n$\n$ cd ../shardkv\n$ go test -run 5A\nTest (5A): static shards ...\n ... Passed\nTest (5A): rejection ...\n ... Passed\nPASS\nok 6.5840/shardkv 9.262s\n$\n```\n\nThe shardctrler manages a sequence of numbered configurations. Each configuration describes a set of replica groups and an assignment of shards to replica groups. Whenever this assignment needs to change, the shard controller creates a new configuration with the new assignment. Key/value clients and servers contact the shardctrler when they want to know the current (or a past) configuration.\n\nYour implementation must support the RPC interface described in `shardctrler/common.go`, which consists of `Join`, `Leave`, `Move`, and `Query` RPCs. These RPCs are intended to allow an administrator (and the tests) to control the shardctrler: to add new replica groups, to eliminate replica groups, and to move shards between replica groups.\n\nThe `Join` RPC is used by an administrator to add new replica groups. Its argument is a set of mappings from unique, non-zero replica group identifiers (GIDs) to lists of server names. The shardctrler should react by creating a new configuration that includes the new replica groups. The new configuration should divide the shards as evenly as possible among the full set of groups, and should move as few shards as possible to achieve that goal. The shardctrler should allow re-use of a GID if it's not part of the current configuration (i.e. a GID should be allowed to Join, then Leave, then Join again).\n\nThe `Leave` RPC's argument is a list of GIDs of previously joined groups. The shardctrler should create a new configuration that does not include those groups, and that assigns those groups' shards to the remaining groups. The new configuration should divide the shards as evenly as possible among the groups, and should move as few shards as possible to achieve that goal.\n\nThe `Move` RPC's arguments are a shard number and a GID. The shardctrler should create a new configuration in which the shard is assigned to the group. The purpose of `Move` is to allow us to test your software. A `Join` or `Leave` following a `Move` will likely un-do the `Move`, since `Join` and `Leave` re-balance.\n\nThe `Query` RPC's argument is a configuration number. The shardctrler replies with the configuration that has that number. If the number is -1 or bigger than the biggest known configuration number, the shardctrler should reply with the latest configuration. The result of `Query(-1)` should reflect every `Join`, `Leave`, or `Move` RPC that the shardctrler finished handling before it received the `Query(-1)` RPC.\n\nThe very first configuration should be numbered zero. It should contain no groups, and all shards should be assigned to GID zero (an invalid GID). The next configuration (created in response to a `Join` RPC) should be numbered 1, &c. There will usually be significantly more shards than groups (i.e., each group will serve more than one shard), in order that load can be shifted at a fairly fine granularity.\n\nYou must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`.\n\n- Start with a stripped-down copy of your kvraft server.\n- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs.\n- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is [not deterministic](https://blog.golang.org/maps#TOC_7.).\n- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually.\n- The Go race detector (go test -race) may help you find bugs.\n\nNext, in the `shardkv/` directory, implement enough of a sharded key/value server to pass the first two tests in `shardkv/`. Again, start by copying code from your existing `kvraft` server. You should be able to get the first test to pass without doing anything special regarding sharding, since the `shardkv/client.go` we give you takes care of sending RPCs to the group that the controller assigns to the key in question.\n\nFor the second `shardkv` test, each k/v replica group must reject requests for keys for shards for which the group is not the assigned group. At this point, it's enough for the k/v servers to periodically ask the controller for the latest configuration, and to check that configuration each time a client Get/Put/Append RPC arrives. Use `key2shard()` (in `client.go`) to find the shard number for a key.\n\nYour server should respond with an `ErrWrongGroup` error to a client RPC with a key that the server isn't responsible for (i.e. for a key whose shard is not assigned to the server's group).\n\nYour server should not call the shard controller's `Join()` handler. The tester will call `Join()` when appropriate.", "test_method": "cd src/shardctrler && go test", "test_results": "Test: Basic leave/join ...\n ... Passed\nTest: Historical queries ...\n ... Passed\nTest: Move ...\n ... Passed\nTest: Concurrent leave/join ...\n ... Passed\nTest: Minimal transfers after joins ...\n ... Passed\nTest: Minimal transfers after leaves ...\n ... Passed\nTest: Multi-group join/leave ...\n ... Passed\nTest: Concurrent multi leave/join ...\n ... Passed\nTest: Minimal transfers after multijoins ...\n ... Passed\nTest: Minimal transfers after multileaves ...\n ... Passed\nTest: Check Same config on servers ...\n ... Passed\nPASS\nok \t6.5840/shardctrler\t5.863s\n$\n$ cd ../shardkv\n$ go test -run 5A\nTest (5A): static shards ...\n ... Passed\nTest (5A): rejection ...\n ... Passed\nPASS\nok 6.5840/shardkv 9.262s", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-shard.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_11", "task_name": "problems/system_lab_11.md", "task": "# Problem Context\n## Introduction\nYou can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab.\n\nIn this lab you'll build a key/value storage system that \"shards,\" or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with \"a\" might be one shard, all the keys starting with \"b\" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups.\n\nYour sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the \"shard controller\". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft.\n\nA sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement.\n\nThe main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time.\n\nReconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2.\n\nOnly RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files.\n\nThis lab uses \"configuration\" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes.\n\nThis lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access.\n\nYour Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nWe supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`.\n\nTo get up and running, execute the following commands:\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/shardctrler\n$ go test\n--- FAIL: TestBasic (0.00s)\n test_test.go:11: wanted 1 groups, got 0\nFAIL\nexit status 1\nFAIL shardctrler 0.008s\n$\n```\n\nWhen you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`.\n## The Code\n\n# Your Task \nDo a `git pull` to get the latest lab software.\n\nThe main task in this part of the lab is to move shards among replica groups when the controller changes the sharding, and do it in a way that provides linearizable k/v client operations.\n\nEach of your shards is only required to make progress when a majority of servers in the shard's Raft replica group is alive and can talk to each other, and can talk to a majority of the `shardctrler` servers. Your implementation must operate (serve requests and be able to re-configure as needed) even if a minority of servers in some replica group(s) are dead, temporarily unavailable, or slow.\n\nA shardkv server is a member of only a single replica group. The set of servers in a given replica group will never change.\n\nWe supply you with `client.go` code that sends each RPC to the replica group responsible for the RPC's key. It re-tries if the replica group says it is not responsible for the key; in that case, the client code asks the shard controller for the latest configuration and tries again. You'll have to modify client.go as part of your support for dealing with duplicate client RPCs, much as in the kvraft lab.\n\nWhen you're done your code should pass all the shardkv tests other than the challenge tests:\n\n```\n$ cd ~/6.5840/src/shardkv\n$ go test\nTest (5A): static shards ...\n ... Passed\nTest (5A): rejection ...\n ... Passed\nTest (5B): join then leave ...\n ... Passed\nTest (5B): snapshots, join, and leave ...\nlabgob warning: Decoding into a non-default variable/field Num may not work\n ... Passed\nTest (5B): servers miss configuration changes...\n ... Passed\nTest (5B): concurrent puts and configuration changes...\n ... Passed\nTest (5B): more concurrent puts and configuration changes...\n ... Passed\nTest (5B): concurrent configuration change and restart...\n ... Passed\nTest (5B): unreliable 1...\n ... Passed\nTest (5B): unreliable 2...\n ... Passed\nTest (5B): unreliable 3...\n ... Passed\nTest: shard deletion (challenge 1) ...\n ... Passed\nTest: unaffected shard access (challenge 2) ...\n ... Passed\nTest: partial migration shard access (challenge 2) ...\n ... Passed\nPASS\nok \t6.5840/shardkv\t173.974s\n$\n```\n\nYou will need to make your servers watch for configuration changes, and when one is detected, to start the shard migration process. If a replica group loses a shard, it must stop serving requests to keys in that shard immediately, and start migrating the data for that shard to the replica group that is taking over ownership. If a replica group gains a shard, it needs to wait for the previous owner to send over the old shard data before accepting requests for that shard.\n\nImplement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test (\"join then leave\") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`.\n\nYour server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems.\n\nServers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this.\n\n- Process re-configurations one at a time, in order.\n- If a test fails, check for gob errors (e.g. \"gob: type not registered for interface ...\"). Go doesn't consider gob errors to be fatal, although they are fatal for the lab.\n- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement.\n- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request?\n- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation.\n- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1?\n- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple.\n- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply.\n- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log.\n- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source.", "test_method": "cd src/shardkv && $ go test", "test_results": "Test (5A): static shards ...\n ... Passed\nTest (5A): rejection ...\n ... Passed\nTest (5B): join then leave ...\n ... Passed\nTest (5B): snapshots, join, and leave ...\nlabgob warning: Decoding into a non-default variable/field Num may not work\n ... Passed\nTest (5B): servers miss configuration changes...\n ... Passed\nTest (5B): concurrent puts and configuration changes...\n ... Passed\nTest (5B): more concurrent puts and configuration changes...\n ... Passed\nTest (5B): concurrent configuration change and restart...\n ... Passed\nTest (5B): unreliable 1...\n ... Passed\nTest (5B): unreliable 2...\n ... Passed\nTest (5B): unreliable 3...\n ... Passed\nTest: shard deletion (challenge 1) ...\n ... Passed\nTest: unaffected shard access (challenge 2) ...\n ... Passed\nTest: partial migration shard access (challenge 2) ...\n ... Passed\nPASS\nok \t6.5840/shardkv\t173.974s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2024/labs/lab-shard.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2024", "repo_path": "projects/6.5840-golabs-2024"} -{"task_id": "system_lab_12", "task_name": "problems/system_lab_12.md", "task": "# Problem Context\n## Introduction\nIn this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses \"coordinator\" instead of the paper's \"master\".)\n## Getiting Started\nYou need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs.\n\nFetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html).\n\n```\n$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840\n$ cd 6.5840\n$ ls\nMakefile src\n$\n```\n\nWe supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows:\n\n```\n$ cd ~/6.5840\n$ cd src/main\n$ go build -buildmode=plugin ../mrapps/wc.go\n$ rm mr-out*\n$ go run mrsequential.go wc.so pg*.txt\n$ more mr-out-0\nA 509\nABOUT 2\nACT 8\n...\n```\n\n`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`.\n\nFeel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like.\n\nFor this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.\n## The Code\n\n# Your Task \nYour job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker.\n\nWe have given you a little code to start you off. The \"main\" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`.\n\nHere's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built:\n\n```\n$ go build -buildmode=plugin ../mrapps/wc.go\n```\n\nIn the `main` directory, run the coordinator.\n\n```\n$ rm mr-out*\n$ go run mrcoordinator.go pg-*.txt\n```\n\nThe `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one \"split\", and is the input to one Map task.\n\nIn one or more other windows, run some workers:\n\n```\n$ go run mrworker.go wc.so\n```\n\nWhen the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this:\n\n```\n$ cat mr-out-* | sort | more\nA 509\nABOUT 2\nACT 8\n...\n```\n\nWe supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks.\n\nIf you run the test script now, it will hang because the coordinator never finishes:\n\n```\n$ cd ~/6.5840/src/main\n$ bash test-mr.sh\n*** Starting wc test.\n```\n\nYou can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then:\n\n```\n$ bash test-mr.sh\n*** Starting wc test.\nsort: No such file or directory\ncmp: EOF on mr-wc-all\n--- wc output is not the same as mr-correct-wc.txt\n--- wc test: FAIL\n$\n```\n\nThe test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails.\n\nWhen you've finished, the test script output should look like this:\n\n```\n$ bash test-mr.sh\n*** Starting wc test.\n--- wc test: PASS\n*** Starting indexer test.\n--- indexer test: PASS\n*** Starting map parallelism test.\n--- map parallelism test: PASS\n*** Starting reduce parallelism test.\n--- reduce parallelism test: PASS\n*** Starting job count test.\n--- job count test: PASS\n*** Starting early exit test.\n--- early exit test: PASS\n*** Starting crash test.\n--- crash test: PASS\n*** PASSED ALL TESTS\n$\n```\n\nYou may see some errors from the Go RPC package that look like\n\n```\n2019/12/16 13:27:09 rpc.Register: method \"Done\" has 1 input parameters; needs exactly three\n```\n\nIgnore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC.\n\nAdditionally, depending on your strategy for terminating worker processes, you may see some errors of the form\n\n```\n2025/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused\n```\n\nIt is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited.\n\n\n\n### A few rules:\n\n- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks.\n- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`.\n- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `\"%v %v\"` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented \"this is the correct format\". The test script will fail if your implementation deviates too much from this format.\n- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions.\n- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks.\n- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit.\n- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a \"please exit\" pseudo-task that the coordinator can give to workers.\n\n### Hints\n\n- The [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) has some tips on developing and debugging.\n\n- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`.\n\n- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`.\n\n- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go`\n\n- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines.\n\n- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number.\n\n- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file:\n\n ```\n enc := json.NewEncoder(file)\n for _, kv := ... {\n err := enc.Encode(&kv)\n ```\n\n and to read such a file back:\n\n ```\n dec := json.NewDecoder(file)\n for {\n var kv KeyValue\n if err := dec.Decode(&kv); err != nil {\n break\n }\n kva = append(kva, kv)\n }\n ```\n\n- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key.\n\n- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files.\n\n- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data.\n\n- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector.\n\n- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs.\n\n- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have).\n\n- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s).\n\n- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions.\n\n- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it.\n\n- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files).\n\n- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts.\n\n- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names.\n\n- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this:\n\n ```\n reply := SomeType{}\n call(..., &reply)\n ```\n\n without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.\n\n\n\n\n\n### \n", "test_method": "cd src/main && bash test-mr.sh", "test_results": "*** Starting wc test.\n--- wc test: PASS\n*** Starting indexer test.\n--- indexer test: PASS\n*** Starting map parallelism test.\n--- map parallelism test: PASS\n*** Starting reduce parallelism test.\n--- reduce parallelism test: PASS\n*** Starting job count test.\n--- job count test: PASS\n*** Starting early exit test.\n--- early exit test: PASS\n*** Starting crash test.\n--- crash test: PASS\n*** PASSED ALL TESTS", "difficulty": "moderate/hard", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_13", "task_name": "problems/system_lab_13.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes.\n\n### KV server\n\nEach client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`.\n\n`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`.\n\nMaintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits.\n\nWhen you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv1\n$ go test -v\n=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n kvsrv_test.go:25: Put err ErrNoKey\n...\n$\n```\n\n## The Code\n\n# Your Task \nYour first task is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in `client.go`, and implement `Put` and `Get` RPC handlers in `server.go`.\n\nYou have completed this task when you pass the Reliable tests in the test suite:\n\n```\n$ go test -v -run Reliable\n=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n ... Passed -- 0.0 1 5 0\n--- PASS: TestReliablePut (0.00s)\n=== RUN TestPutConcurrentReliable\nTest: many clients racing to put values to the same key (reliable network)...\ninfo: linearizability check timed out, assuming history is ok\n ... Passed -- 3.1 1 90171 90171\n--- PASS: TestPutConcurrentReliable (3.07s)\n=== RUN TestMemPutManyClientsReliable\nTest: memory use many put clients (reliable network)...\n ... Passed -- 9.2 1 100000 0\n--- PASS: TestMemPutManyClientsReliable (16.59s)\nPASS\nok \t6.5840/kvsrv1\t19.681s\n```\n\nThe numbers after each `Passed` are real time in seconds, the constant 1, the number of RPCs sent (including client RPCs), and the number of key/value operations executed (`Clerk` `Get` and `Put` calls).\n\n- Check that your code is race-free using `go test -race`.", "test_method": "cd src/kvsrv1 && go test -v -run Reliable", "test_results": "=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n ... Passed -- 0.0 1 5 0\n--- PASS: TestReliablePut (0.00s)\n=== RUN TestPutConcurrentReliable\nTest: many clients racing to put values to the same key (reliable network)...\ninfo: linearizability check timed out, assuming history is ok\n ... Passed -- 3.1 1 90171 90171\n--- PASS: TestPutConcurrentReliable (3.07s)\n=== RUN TestMemPutManyClientsReliable\nTest: memory use many put clients (reliable network)...\n ... Passed -- 9.2 1 100000 0\n--- PASS: TestMemPutManyClientsReliable (16.59s)\nPASS\nok \t6.5840/kvsrv1\t19.681s", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_14", "task_name": "problems/system_lab_14.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes.\n\n### KV server\n\nEach client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`.\n\n`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`.\n\nMaintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits.\n\nWhen you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv1\n$ go test -v\n=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n kvsrv_test.go:25: Put err ErrNoKey\n...\n$\n```\n\n## The Code\n\n# Your Task \nIn many distributed applications, clients running on different machines use a key/value server to coordinate their activities. For example, ZooKeeper and Etcd allow clients to coordinate using a distributed lock, in analogy with how threads in a Go program can coordinate with locks (i.e., `sync.Mutex`). Zookeeper and Etcd implement such a lock with conditional put.\n\nIn this exercise your task is to implement a lock layered on client `Clerk.Put` and `Clerk.Get` calls. The lock supports two methods: `Acquire` and `Release`. The lock's specification is that only one client can successfully acquire the lock at a time; other clients must wait until the first client has released the lock using `Release`.\n\nWe supply you with skeleton code and tests in `src/kvsrv1/lock/`. You will need to modify `src/kvsrv1/lock/lock.go`. Your `Acquire` and `Release` code can talk to your key/value server by calling `lk.ck.Put()` and `lk.ck.Get()`.\n\nIf a client crashes while holding a lock, the lock will never be released. In a design more sophisticated than this lab, the client would attach a [lease](https://en.wikipedia.org/wiki/Lease_(computer_science)#:~:text=Leases are commonly used in,to rely on the resource.) to a lock. When the lease expires, the lock server would release the lock on behalf of the client. In this lab clients don't crash and you can ignore this problem.\n\nImplement `Acquire` and `Release`. You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory:\n\n```\n$ cd lock\n$ go test -v -run Reliable\n=== RUN TestOneClientReliable\nTest: 1 lock clients (reliable network)...\n ... Passed -- 2.0 1 974 0\n--- PASS: TestOneClientReliable (2.01s)\n=== RUN TestManyClientsReliable\nTest: 10 lock clients (reliable network)...\n ... Passed -- 2.1 1 83194 0\n--- PASS: TestManyClientsReliable (2.11s)\nPASS\nok \t6.5840/kvsrv1/lock\t4.120s\n```\n\nIf you haven't implemented the lock yet, the first test will succeed.\n\nThis exercise requires little code but will require a bit more independent thought than the previous exercise.\n\n- You will need a unique identifier for each lock client; call `kvtest.RandValue(8)` to generate a random string.\n- The lock service should use a specific key to store the \"lock state\" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter `l` of `MakeLock` in `src/kvsrv1/lock/lock.go`.", "test_method": "cd cd src/kvsrv1/lock && go test -v -run Reliable", "test_results": "=== RUN TestOneClientReliable\nTest: 1 lock clients (reliable network)...\n ... Passed -- 2.0 1 974 0\n--- PASS: TestOneClientReliable (2.01s)\n=== RUN TestManyClientsReliable\nTest: 10 lock clients (reliable network)...\n ... Passed -- 2.1 1 83194 0\n--- PASS: TestManyClientsReliable (2.11s)\nPASS\nok \t6.5840/kvsrv1/lock\t4.120s", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_15", "task_name": "problems/system_lab_15.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes.\n\n### KV server\n\nEach client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`.\n\n`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`.\n\nMaintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits.\n\nWhen you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv1\n$ go test -v\n=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n kvsrv_test.go:25: Put err ErrNoKey\n...\n$\n```\n\n## The Code\n\n# Your Task \nThe main challenge in this exercise is that the network may re-order, delay, or discard RPC requests and/or replies. To recover from discarded requests/replies, the Clerk must keep re-trying each RPC until it receives a reply from the server.\n\nIf the network discards an RPC request message, then the client re-sending the request will solve the problem: the server will receive and execute just the re-sent request.\n\nHowever, the network might instead discard an RPC reply message. The client does not know which message was discarded; the client only observes that it received no reply. If it was the reply that was discarded, and the client re-sends the RPC request, then the server will receive two copies of the request. That's OK for a `Get`, since `Get` doesn't modify the server state. It is safe to resend a `Put` RPC with the same version number, since the server executes `Put` conditionally on the version number; if the server received and executed a `Put` RPC, it will respond to a re-transmitted copy of that RPC with `rpc.ErrVersion` rather than executing the Put a second time.\n\nA tricky case is if the server replies with an `rpc.ErrVersion` in a response to an RPC that the Clerk retried. In this case, the Clerk cannot know if the Clerk's `Put` was executed by the server or not: the first RPC might have been executed by the server but the network may have discarded the successful response from the server, so that the server sent `rpc.ErrVersion` only for the retransmitted RPC. Or, it might be that another Clerk updated the key before the Clerk's first RPC arrived at the server, so that the server executed neither of the Clerk's RPCs and replied `rpc.ErrVersion` to both. Therefore, if a Clerk receives `rpc.ErrVersion` for a retransmitted Put RPC, `Clerk.Put` must return `rpc.ErrMaybe` to the application instead of `rpc.ErrVersion` since the request may have been executed. It is then up to the application to handle this case. If the server responds to an initial (not retransmitted) Put RPC with `rpc.ErrVersion`, then the Clerk should return `rpc.ErrVersion` to the application, since the RPC was definitely not executed by the server.\n\nIt would be more convenient for application developers if `Put`'s were exactly-once (i.e., no `rpc.ErrMaybe` errors) but that is difficult to guarantee without maintaining state at the server for each Clerk. In the last exercise of this lab, you will implement a lock using your Clerk to explore how to program with at-most-once `Clerk.Put`.\n\nNow you should modify your `kvsrv1/client.go` to continue in the face of dropped RPC requests and replies. A return value of `true` from the client's `ck.clnt.Call()` indicates that the client received an RPC reply from the server; a return value of `false` indicates that it did not receive a reply (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). Your `Clerk` should keep re-sending an RPC until it receives a reply. Keep in mind the discussion of `rpc.ErrMaybe` above. Your solution shouldn't require any changes to the server.\n\nAdd code to `Clerk` to retry if doesn't receive a reply. Your have completed this task if your code passes all tests in `kvsrv1/`, like this:\n\n```\n$ go test -v\n=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n ... Passed -- 0.0 1 5 0\n--- PASS: TestReliablePut (0.00s)\n=== RUN TestPutConcurrentReliable\nTest: many clients racing to put values to the same key (reliable network)...\ninfo: linearizability check timed out, assuming history is ok\n ... Passed -- 3.1 1 106647 106647\n--- PASS: TestPutConcurrentReliable (3.09s)\n=== RUN TestMemPutManyClientsReliable\nTest: memory use many put clients (reliable network)...\n ... Passed -- 8.0 1 100000 0\n--- PASS: TestMemPutManyClientsReliable (14.61s)\n=== RUN TestUnreliableNet\nOne client (unreliable network)...\n ... Passed -- 7.6 1 251 208\n--- PASS: TestUnreliableNet (7.60s)\nPASS\nok \t6.5840/kvsrv1\t25.319s\n```\n\n- Before the client retries, it should wait a little bit; you can use go's `time` package and call `time.Sleep(100 * time.Millisecond)`", "test_method": "cd src/kvsrv1 && go test -v", "test_results": "=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n ... Passed -- 0.0 1 5 0\n--- PASS: TestReliablePut (0.00s)\n=== RUN TestPutConcurrentReliable\nTest: many clients racing to put values to the same key (reliable network)...\ninfo: linearizability check timed out, assuming history is ok\n ... Passed -- 3.1 1 106647 106647\n--- PASS: TestPutConcurrentReliable (3.09s)\n=== RUN TestMemPutManyClientsReliable\nTest: memory use many put clients (reliable network)...\n ... Passed -- 8.0 1 100000 0\n--- PASS: TestMemPutManyClientsReliable (14.61s)\n=== RUN TestUnreliableNet\nOne client (unreliable network)...\n ... Passed -- 7.6 1 251 208\n--- PASS: TestUnreliableNet (7.60s)\nPASS\nok \t6.5840/kvsrv1\t25.319s", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_16", "task_name": "problems/system_lab_16.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes.\n\n### KV server\n\nEach client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`.\n\n`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`.\n\nMaintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits.\n\nWhen you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background.\n\nLinearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/kvsrv1\n$ go test -v\n=== RUN TestReliablePut\nOne client and reliable Put (reliable network)...\n kvsrv_test.go:25: Put err ErrNoKey\n...\n$\n```\n\n## The Code\n\n# Your Task \nModify your lock implementation to work correctly with your modified key/value client when the network is not reliable. You have completed this exercise when your code passes all the `kvsrv1/lock/` tests, including the unreliable ones:\n\n```\n$ cd lock\n$ go test -v\n=== RUN TestOneClientReliable\nTest: 1 lock clients (reliable network)...\n ... Passed -- 2.0 1 968 0\n--- PASS: TestOneClientReliable (2.01s)\n=== RUN TestManyClientsReliable\nTest: 10 lock clients (reliable network)...\n ... Passed -- 2.1 1 10789 0\n--- PASS: TestManyClientsReliable (2.12s)\n=== RUN TestOneClientUnreliable\nTest: 1 lock clients (unreliable network)...\n ... Passed -- 2.3 1 70 0\n--- PASS: TestOneClientUnreliable (2.27s)\n=== RUN TestManyClientsUnreliable\nTest: 10 lock clients (unreliable network)...\n ... Passed -- 3.6 1 908 0\n--- PASS: TestManyClientsUnreliable (3.62s)\nPASS\nok \t6.5840/kvsrv1/lock\t10.033s\n```\n", "test_method": "cd src/kvsrv1/lock && go test -v", "test_results": "=== RUN TestOneClientReliable\nTest: 1 lock clients (reliable network)...\n ... Passed -- 2.0 1 968 0\n--- PASS: TestOneClientReliable (2.01s)\n=== RUN TestManyClientsReliable\nTest: 10 lock clients (reliable network)...\n ... Passed -- 2.1 1 10789 0\n--- PASS: TestManyClientsReliable (2.12s)\n=== RUN TestOneClientUnreliable\nTest: 1 lock clients (unreliable network)...\n ... Passed -- 2.3 1 70 0\n--- PASS: TestOneClientUnreliable (2.27s)\n=== RUN TestManyClientsUnreliable\nTest: 10 lock clients (unreliable network)...\n ... Passed -- 3.6 1 908 0\n--- PASS: TestManyClientsUnreliable (3.62s)\nPASS\nok \t6.5840/kvsrv1/lock\t10.033s", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_17", "task_name": "problems/system_lab_17.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft1\n$ go test\nTest (3A): initial election (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestInitialElection3A (4.90s)\nTest (3A): election after network failure (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestReElection3A (5.05s)\n...\n$\n```\n\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nImplement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code.\n\n- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `.\n- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election,\n- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry.\n- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another.\n- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method.\n- The tester requires that the leader send heartbeat RPCs no more than ten times per second.\n- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate).\n- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds.\n- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful.\n- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly.\n- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure.\n- Don't forget to implement `GetState()`.\n- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages.\n- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings.\n- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) page for debugging tips.\n- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](http://nil.csail.mit.edu/6.5840/2025/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(\"Server 0\", \"short description\", \"details\")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know!\n\nBe sure you pass the 3A tests before submitting Part 3A, so that you see something like this:\n\n```\n$ go test -run 3A\nTest (3A): initial election (reliable network)...\n ... Passed -- 3.6 3 106 0\nTest (3A): election after network failure (reliable network)...\n ... Passed -- 7.6 3 304 0\nTest (3A): multiple elections (reliable network)...\n ... Passed -- 8.4 7 954 0\nPASS\nok 6.5840/raft1 19.834sak\n$\n```\n\nEach \"Passed\" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag.", "test_method": "cd src/raft1 && go test -run 3A", "test_results": "Test (3A): initial election (reliable network)...\n ... Passed -- 3.6 3 106 0\nTest (3A): election after network failure (reliable network)...\n ... Passed -- 7.6 3 304 0\nTest (3A): multiple elections (reliable network)...\n ... Passed -- 8.4 7 954 0\nPASS\nok 6.5840/raft1 19.834sak", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_18", "task_name": "problems/system_lab_18.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft1\n$ go test\nTest (3A): initial election (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestInitialElection3A (4.90s)\nTest (3A): election after network failure (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestReElection3A (5.05s)\n...\n$\n```\n\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nImplement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass.\n\n- Run `git pull` to get the latest lab software.\n- Raft log is 1-indexed, but we suggest that you view it as 0-indexed, and starting out with an entry (at index=0) that has term 0. That allows the very first AppendEntries RPC to contain 0 as PrevLogIndex, and be a valid index into the log.\n- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer.\n- You will need to implement the election restriction (section 5.4.1 in the paper).\n- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration.\n- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) with tips on how to develop and debug your code.\n- If you fail a test, look at `raft_test.go` and trace the test code from there to understand what's being tested.\n\nThe tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output:\n\n```\n$ time go test -run 3B\nTest (3B): basic agreement (reliable network)...\n ... Passed -- 1.3 3 18 0\nTest (3B): RPC byte count (reliable network)...\n ... Passed -- 2.8 3 56 0\nTest (3B): test progressive failure of followers (reliable network)...\n ... Passed -- 5.3 3 188 0\nTest (3B): test failure of leaders (reliable network)...\n ... Passed -- 6.4 3 378 0\nTest (3B): agreement after follower reconnects (reliable network)...\n ... Passed -- 5.9 3 176 0\nTest (3B): no agreement if too many followers disconnect (reliable network)...\n ... Passed -- 4.3 5 288 0\nTest (3B): concurrent Start()s (reliable network)...\n ... Passed -- 1.5 3 32 0\nTest (3B): rejoin of partitioned leader (reliable network)...\n ... Passed -- 5.3 3 216 0\nTest (3B): leader backs up quickly over incorrect follower logs (reliable network)...\n ... Passed -- 12.1 5 1528 0\nTest (3B): RPC counts aren't too high (reliable network)...\n ... Passed -- 3.1 3 106 0\nPASS\nok 6.5840/raft1 48.353s\ngo test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total\n$\n```\n\nThe \"ok 6.5840/raft 35.557s\" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The \"user 0m2.556s\" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent.", "test_method": "cd src/raft1 && time go test -run 3B", "test_results": "Test (3B): basic agreement (reliable network)...\n ... Passed -- 1.3 3 18 0\nTest (3B): RPC byte count (reliable network)...\n ... Passed -- 2.8 3 56 0\nTest (3B): test progressive failure of followers (reliable network)...\n ... Passed -- 5.3 3 188 0\nTest (3B): test failure of leaders (reliable network)...\n ... Passed -- 6.4 3 378 0\nTest (3B): agreement after follower reconnects (reliable network)...\n ... Passed -- 5.9 3 176 0\nTest (3B): no agreement if too many followers disconnect (reliable network)...\n ... Passed -- 4.3 5 288 0\nTest (3B): concurrent Start()s (reliable network)...\n ... Passed -- 1.5 3 32 0\nTest (3B): rejoin of partitioned leader (reliable network)...\n ... Passed -- 5.3 3 216 0\nTest (3B): leader backs up quickly over incorrect follower logs (reliable network)...\n ... Passed -- 12.1 5 1528 0\nTest (3B): RPC counts aren't too high (reliable network)...\n ... Passed -- 3.1 3 106 0\nPASS\nok 6.5840/raft1 48.353s\ngo test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_19", "task_name": "problems/system_lab_19.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft1\n$ go test\nTest (3A): initial election (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestInitialElection3A (4.90s)\nTest (3A): election after network failure (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestReElection3A (5.05s)\n...\n$\n```\n\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nIf a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent.\n\nA real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods.\n\nComplete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or \"serialize\") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests.\n\nYou will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include:\n\n```\n XTerm: term in the conflicting entry (if any)\n XIndex: index of first entry with that term (if any)\n XLen: log length\n```\n\nThen the leader's logic can be something like:\n\n```\n Case 1: leader doesn't have XTerm:\n nextIndex = XIndex\n Case 2: leader has XTerm:\n nextIndex = (index of leader's last entry for XTerm) + 1\n Case 3: follower's log is too short:\n nextIndex = XLen\n```\n\nA few other hints:\n\n- Run `git pull` to get the latest lab software.\n- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B.\n\nYour code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests.\n\n```\n$ go test -run 3C\nTest (3C): basic persistence (reliable network)...\n ... Passed -- 6.6 3 110 0\nTest (3C): more persistence (reliable network)...\n ... Passed -- 15.6 5 428 0\nTest (3C): partitioned leader and one follower crash, leader restarts (reliable network)...\n ... Passed -- 3.1 3 50 0\nTest (3C): Figure 8 (reliable network)...\n ... Passed -- 33.7 5 654 0\nTest (3C): unreliable agreement (unreliable network)...\n ... Passed -- 2.1 5 1076 0\nTest (3C): Figure 8 (unreliable) (unreliable network)...\n ... Passed -- 31.9 5 4400 0\nTest (3C): churn (reliable network)...\n ... Passed -- 16.8 5 4896 0\nTest (3C): unreliable churn (unreliable network)...\n ... Passed -- 16.1 5 7204 0\nPASS\nok 6.5840/raft1 126.054s\n$\n```\n\nIt is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`.\n\n```\n$ for i in {0..10}; do go test; done\n```\n", "test_method": "cd src/raft1 && go test -run 3C", "test_results": "Test (3C): basic persistence (reliable network)...\n ... Passed -- 6.6 3 110 0\nTest (3C): more persistence (reliable network)...\n ... Passed -- 15.6 5 428 0\nTest (3C): partitioned leader and one follower crash, leader restarts (reliable network)...\n ... Passed -- 3.1 3 50 0\nTest (3C): Figure 8 (reliable network)...\n ... Passed -- 33.7 5 654 0\nTest (3C): unreliable agreement (unreliable network)...\n ... Passed -- 2.1 5 1076 0\nTest (3C): Figure 8 (unreliable) (unreliable network)...\n ... Passed -- 31.9 5 4400 0\nTest (3C): churn (reliable network)...\n ... Passed -- 16.8 5 4896 0\nTest (3C): unreliable churn (unreliable network)...\n ... Passed -- 16.1 5 7204 0\nPASS\nok 6.5840/raft1 126.054s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_20", "task_name": "problems/system_lab_20.md", "task": "# Problem Context\n## Introduction\nThis is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will \u00c2\u0093shard\u00c2\u0094 your service over multiple replicated state machines for higher performance.\n\nA replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data.\n\nRaft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again.\n\nIn this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute.\n\nYou should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6).\n\nThis lab is due in four parts. You must submit each part on the corresponding due date.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nIf you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html).\n\nWe supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`.\n\nWhen we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/raft1\n$ go test\nTest (3A): initial election (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestInitialElection3A (4.90s)\nTest (3A): election after network failure (reliable network)...\nFatal: expected one leader, got none\n--- FAIL: TestReElection3A (5.05s)\n...\n$\n```\n\n## The Code\nImplement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs.\n\nYour implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`.\n\n```\n// create a new Raft server instance:\nrf := Make(peers, me, persister, applyCh)\n\n// start agreement on a new log entry:\nrf.Start(command interface{}) (index, term, isleader)\n\n// ask a Raft for its current term, and whether it thinks it is leader\nrf.GetState() (term, isLeader)\n\n// each time a new entry is committed to the log, each Raft peer\n// should send an ApplyMsg to the service (or tester).\ntype ApplyMsg\n```\n\nA service calls `Make(peers,me,\u00c2\u0085)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`.\n\n`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files.\n\nSubsequent labs build on this lab, so it is important to give yourself enough time to write solid code.\n# Your Task \nAs things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a \"snapshot\" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) outlines the scheme; you will have to design the details.\n\nYour Raft must provide the following function that the service can call with a serialized snapshot of its state:\n\n```\nSnapshot(index int, snapshot []byte)\n```\n\nIn Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader).\n\nThe `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log.\n\nYou'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2.\n\nWhen a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards.\n\nIf a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument.\n\nWhen a server restarts, the application layer reads the persisted snapshot and restores its saved state.\n\nImplement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests).\n\n- `git pull` to make sure you have the latest software.\n- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test.\n- A common reason for failing the first 3D test is that followers take too long to catch up to the leader.\n- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date.\n- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot.\n- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries.\n- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time.\n\nYour code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests.\n\n```\n$ go test -run 3D\nTest (3D): snapshots basic (reliable network)...\n ... Passed -- 3.3 3 522 0\nTest (3D): install snapshots (disconnect) (reliable network)...\n ... Passed -- 48.4 3 2710 0\nTest (3D): install snapshots (disconnect) (unreliable network)...\n ... Passed -- 56.1 3 3025 0\nTest (3D): install snapshots (crash) (reliable network)...\n ... Passed -- 33.3 3 1559 0\nTest (3D): install snapshots (crash) (unreliable network)...\n ... Passed -- 38.1 3 1723 0\nTest (3D): crash and restart all servers (unreliable network)...\n ... Passed -- 11.2 3 296 0\nTest (3D): snapshot initialization after crash (unreliable network)...\n ... Passed -- 4.3 3 84 0\nPASS\nok 6.5840/raft1 195.006s\n```\n", "test_method": "cd src/raft1 && go test -run 3D", "test_results": "Test (3D): snapshots basic (reliable network)...\n ... Passed -- 3.3 3 522 0\nTest (3D): install snapshots (disconnect) (reliable network)...\n ... Passed -- 48.4 3 2710 0\nTest (3D): install snapshots (disconnect) (unreliable network)...\n ... Passed -- 56.1 3 3025 0\nTest (3D): install snapshots (crash) (reliable network)...\n ... Passed -- 33.3 3 1559 0\nTest (3D): install snapshots (crash) (unreliable network)...\n ... Passed -- 38.1 3 1723 0\nTest (3D): crash and restart all servers (unreliable network)...\n ... Passed -- 11.2 3 296 0\nTest (3D): snapshot initialization after crash (unreliable network)...\n ... Passed -- 4.3 3 84 0\nPASS\nok 6.5840/raft1 195.006s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_21", "task_name": "problems/system_lab_21.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf).\n\nClients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history.\n\nProviding linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates.\n\nThis lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline.\n\nYou should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `\"src/kvsrv1\"` package) but it is not a requirement.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n..\n```\n\n## The Code\n\n# Your Task \n\n\n```\n$ cd src/kvraft1/rsm\n$ go test -v\n=== RUN TestBasic\nTest RSM basic (reliable network)...\n..\n config.go:147: one: took too long\n```\n\nIn the common situation of a client/server service using Raft for replication, the service interacts with Raft in two ways: the service leader submits client operations by calling `raft.Start()`, and all service replicas receive committed operations via Raft's `applyCh`, which they execute. On the leader, these two activities interact. At any given time, some server goroutines are handling client requests, have called `raft.Start()`, and each is waiting for its operation to commit and to find out what the result of executing the operation is. And as committed operations appear on the `applyCh`, each needs to be executed by the service, and the results need to be handed to the goroutine that called `raft.Start()` so that it can return the result to the client.\n\nThe `rsm` package encapsulates the above interaction. It sits as a layer between the service (e.g. a key/value database) and Raft. In `rsm/rsm.go` you will need to implement a \"reader\" goroutine that reads the `applyCh`, and a `rsm.Submit()` function that calls `raft.Start()` for a client operation and then waits for the reader goroutine to hand it the result of executing that operation.\n\nThe service that is using `rsm` appears to the `rsm` reader goroutine as a `StateMachine` object providing a `DoOp()` method. The reader goroutine should hand each committed operation to `DoOp()`; `DoOp()`'s return value should be given to the corresponding `rsm.Submit()` call for it to return. `DoOp()`'s argument and return value have type `any`; the actual values should have the same types as the argument and return values that the service passes to `rsm.Submit()`, respectively.\n\nThe service should pass each client operation to `rsm.Submit()`. To help the reader goroutine match `applyCh` messages with waiting calls to `rsm.Submit()`, `Submit()` should wrap each client operation in an `Op` structure along with a unique identifier. `Submit()` should then wait until the operation has committed and been executed, and return the result of execution (the value returned by `DoOp()`). If `raft.Start()` indicates that the current peer is not the Raft leader, `Submit()` should return an `rpc.ErrWrongLeader` error. `Submit()` should detect and handle the situation in which leadership changed just after it called `raft.Start()`, causing the operation to be lost (never committed).\n\nFor Part A, the `rsm` tester acts as the service, submitting operations that it interprets as increments on a state consisting of a single integer. In Part B you'll use `rsm` as part of a key/value service that implements `StateMachine` (and `DoOp()`), and calls `rsm.Submit()`.\n\nIf all goes well, the sequence of events for a client request is:\n\n- The client sends a request to the service leader.\n- The service leader calls `rsm.Submit()` with the request.\n- `rsm.Submit()` calls `raft.Start()` with the request, and then waits.\n- Raft commits the request and sends it on all peers' `applyCh`s.\n- The `rsm` reader goroutine on each peer reads the request from the `applyCh` and passes it to the service's `DoOp()`.\n- On the leader, the `rsm` reader goroutine hands the `DoOp()` return value to the `Submit()` goroutine that originally submitted the request, and `Submit()` returns that value.\n\nYour servers should not directly communicate; they should only interact with each other through Raft.\n\nImplement `rsm.go`: the `Submit()` method and a reader goroutine. You have completed this task if you pass the `rsm` 4A tests:\n\n```\n $ cd src/kvraft1/rsm\n $ go test -v -run 4A\n=== RUN TestBasic4A\nTest RSM basic (reliable network)...\n ... Passed -- 1.2 3 48 0\n--- PASS: TestBasic4A (1.21s)\n=== RUN TestLeaderFailure4A\n ... Passed -- 9223372036.9 3 31 0\n--- PASS: TestLeaderFailure4A (1.50s)\nPASS\nok 6.5840/kvraft1/rsm 2.887s\n```\n\n- You should not need to add any fields to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so.\n- Your solution needs to handle an `rsm` leader that has called `Start()` for a request submitted with `Submit()` but loses its leadership before the request is committed to the log. One way to do this is for the `rsm` to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by `Start()`, and return `rpc.ErrWrongLeader` from `Submit()`. If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server to wait indefinitely until the partition heals.\n- The tester calls your Raft's `rf.Kill()` when it is shutting down a peer. Raft should close the `applyCh` so that your rsm learns about the shutdown, and can exit out of all loops.", "test_method": "cd src/kvraft1/rsm && go test -v -run 4A", "test_results": "=== RUN TestBasic4A\nTest RSM basic (reliable network)...\n ... Passed -- 1.2 3 48 0\n--- PASS: TestBasic4A (1.21s)\n=== RUN TestLeaderFailure4A\n ... Passed -- 9223372036.9 3 31 0\n--- PASS: TestLeaderFailure4A (1.50s)\nPASS\nok 6.5840/kvraft1/rsm 2.887s", "difficulty": "moderate/hard", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_22", "task_name": "problems/system_lab_22.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf).\n\nClients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history.\n\nProviding linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates.\n\nThis lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline.\n\nYou should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `\"src/kvsrv1\"` package) but it is not a requirement.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n..\n```\n\n## The Code\n\n# Your Task \n\n\n```\n$ cd src/kvraft1\n$ go test -v -run TestBasic4B\n=== RUN TestBasic4B\nTest: one client (4B basic) (reliable network)...\n kvtest.go:62: Wrong error \n$\n```\n\nNow you will use the `rsm` package to replicate a key/value server. Each of the servers (\"kvservers\") will have an associated rsm/Raft peer. Clerks send `Put()` and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Get operation to `rsm`, which replicates it using Raft and invokes your server's `DoOp` at each peer, which should apply the operations to the peer's key/value database; the intent is for the servers to maintain identical replicas of the key/value database.\n\nA `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server.\n\nYour kvservers should not directly communicate; they should only interact with each other through Raft.\n\nYour first task is to implement a solution that works when there are no dropped messages, and no failed servers.\n\nFeel free to copy your client code from Lab 2 (`kvsrv1/client.go`) into `kvraft1/client.go`. You will need to add logic for deciding which kvserver to send each RPC to.\n\nYou'll also need to implement `Put()` and `Get()` RPC handlers in `server.go`. These handlers should submit the request to Raft using `rsm.Submit()`. As the `rsm` package reads commands from `applyCh`, it should invoke the `DoOp` method, which you will have to implement in `server.go`.\n\nYou have completed this task when you **reliably** pass the first test in the test suite, with `go test -v -run TestBasic4B`.\n\n- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()`) in the Raft log using `Submit()`. You don't have to implement the optimization for read-only operations that is described in Section 8.\n- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`.\n\nNow you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` should result in just a single execution for a particular version number.\n\nAdd code to handle failures. Your `Clerk` can use a similar retry plan as in lab 2, including returning `ErrMaybe` if a response to a retried `Put` RPC is lost. You are done when your code reliably passes all the 4B tests, with `go test -v -run 4B`.\n\n- Recall that the rsm leader may lose its leadership and return `rpc.ErrWrongLeader` from `Submit()`. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader.\n- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough.\n\nYour code should now pass the Lab 4B tests, like this:\n\n```\n$ cd kvraft1\n$ go test -run 4B\nTest: one client (4B basic) ...\n ... Passed -- 3.2 5 1041 183\nTest: one client (4B speed) ...\n ... Passed -- 15.9 3 3169 0\nTest: many clients (4B many clients) ...\n ... Passed -- 3.9 5 3247 871\nTest: unreliable net, many clients (4B unreliable net, many clients) ...\n ... Passed -- 5.3 5 1035 167\nTest: unreliable net, one client (4B progress in majority) ...\n ... Passed -- 2.9 5 155 3\nTest: no progress in minority (4B) ...\n ... Passed -- 1.6 5 102 3\nTest: completion after heal (4B) ...\n ... Passed -- 1.3 5 67 4\nTest: partitions, one client (4B partitions, one client) ...\n ... Passed -- 6.2 5 958 155\nTest: partitions, many clients (4B partitions, many clients (4B)) ...\n ... Passed -- 6.8 5 3096 855\nTest: restarts, one client (4B restarts, one client 4B ) ...\n ... Passed -- 6.7 5 311 13\nTest: restarts, many clients (4B restarts, many clients) ...\n ... Passed -- 7.5 5 1223 95\nTest: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ...\n ... Passed -- 8.4 5 804 33\nTest: restarts, partitions, many clients (4B restarts, partitions, many clients) ...\n ... Passed -- 10.1 5 1308 105\nTest: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ...\n ... Passed -- 11.9 5 1040 33\nTest: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ...\n ... Passed -- 12.1 7 2801 93\nPASS\nok 6.5840/kvraft1 103.797s\n```\n\nThe numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put calls).", "test_method": "cd src/kvraft1 && go test -run 4B", "test_results": "Test: one client (4B basic) ...\n ... Passed -- 3.2 5 1041 183\nTest: one client (4B speed) ...\n ... Passed -- 15.9 3 3169 0\nTest: many clients (4B many clients) ...\n ... Passed -- 3.9 5 3247 871\nTest: unreliable net, many clients (4B unreliable net, many clients) ...\n ... Passed -- 5.3 5 1035 167\nTest: unreliable net, one client (4B progress in majority) ...\n ... Passed -- 2.9 5 155 3\nTest: no progress in minority (4B) ...\n ... Passed -- 1.6 5 102 3\nTest: completion after heal (4B) ...\n ... Passed -- 1.3 5 67 4\nTest: partitions, one client (4B partitions, one client) ...\n ... Passed -- 6.2 5 958 155\nTest: partitions, many clients (4B partitions, many clients (4B)) ...\n ... Passed -- 6.8 5 3096 855\nTest: restarts, one client (4B restarts, one client 4B ) ...\n ... Passed -- 6.7 5 311 13\nTest: restarts, many clients (4B restarts, many clients) ...\n ... Passed -- 7.5 5 1223 95\nTest: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ...\n ... Passed -- 8.4 5 804 33\nTest: restarts, partitions, many clients (4B restarts, partitions, many clients) ...\n ... Passed -- 10.1 5 1308 105\nTest: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ...\n ... Passed -- 11.9 5 1040 33\nTest: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ...\n ... Passed -- 12.1 7 2801 93\nPASS\nok 6.5840/kvraft1 103.797s", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_23", "task_name": "problems/system_lab_23.md", "task": "# Problem Context\n## Introduction\nIn this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf).\n\nClients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history.\n\nProviding linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates.\n\nThis lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline.\n\nYou should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.\n## Getiting Started\nWe supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `\"src/kvsrv1\"` package) but it is not a requirement.\n\nTo get up and running, execute the following commands. Don't forget the `git pull` to get the latest software.\n\n```\n$ cd ~/6.5840\n$ git pull\n..\n```\n\n## The Code\n\n# Your Task \nAs things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver and `rsm` to cooperate with Raft to save log space and reduce restart time, using Raft's `Snapshot()` from Lab 3D.\n\nThe tester passes `maxraftstate` to your `StartKVServer()`, which passes it to `rsm`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `rf.PersistBytes()`. Whenever your `rsm` detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. `rsm` can create this snapshot by calling the `Snapshot` method of the `StateMachine` interface to obtain a snapshot of the kvserver. If `maxraftstate` is -1, you do not have to snapshot. The `maxraftstate` limit applies to the GOB-encoded bytes your Raft passes as the first argument to `persister.Save()`.\n\nYou can find the source for the `persister` object in `tester1/persister.go`.\n\nModify your rsm so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a `rsm` server restarts, it should read the snapshot with `persister.ReadSnapshot()` and, if the snapshot's length is greater than zero, pass the snapshot to the `StateMachine`'s `Restore()` method. You complete this task if you pass TestSnapshot4C in `rsm`.\n\n```\n$ cd kvraft1/rsm\n$ go test -run TestSnapshot4C\n=== RUN TestSnapshot4C\n ... Passed -- 9223372036.9 3 230 0\n--- PASS: TestSnapshot4C (3.88s)\nPASS\nok 6.5840/kvraft1/rsm 3.882s\n```\n\n- Think about when `rsm` should snapshot its state and what should be included in the snapshot beyond just the server state. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`.\n- Capitalize all fields of structures stored in the snapshot.\n\nImplement the `kvraft1/server.go` `Snapshot()` and `Restore()` methods, which `rsm` calls. Modify `rsm` to handle applyCh messages that contain snapshots.\n\n- You may have bugs in your Raft and rsm library that this task exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests.\n- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time.\n\nYour code should pass the 4C tests (as in the example here) as well as the 4A+B tests (and your Raft must continue to pass the Lab 3 tests).\n\n```\n$ go test -run 4C\nTest: snapshots, one client (4C SnapshotsRPC) ...\nTest: InstallSnapshot RPC (4C) ...\n ... Passed -- 4.5 3 241 64\nTest: snapshots, one client (4C snapshot size is reasonable) ...\n ... Passed -- 11.4 3 2526 800\nTest: snapshots, one client (4C speed) ...\n ... Passed -- 14.2 3 3149 0\nTest: restarts, snapshots, one client (4C restarts, snapshots, one client) ...\n ... Passed -- 6.8 5 305 13\nTest: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ...\n ... Passed -- 9.0 5 5583 795\nTest: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ...\n ... Passed -- 4.7 5 977 155\nTest: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ...\n ... Passed -- 8.6 5 847 33\nTest: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ...\n ... Passed -- 11.5 5 841 33\nTest: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ...\n ... Passed -- 12.8 7 2903 93\nPASS\nok 6.5840/kvraft1 83.543s\n```\n", "test_method": "cd kvraft1/rsm && go test -run 4C", "test_results": "Test: snapshots, one client (4C SnapshotsRPC) ...\nTest: InstallSnapshot RPC (4C) ...\n ... Passed -- 4.5 3 241 64\nTest: snapshots, one client (4C snapshot size is reasonable) ...\n ... Passed -- 11.4 3 2526 800\nTest: snapshots, one client (4C speed) ...\n ... Passed -- 14.2 3 3149 0\nTest: restarts, snapshots, one client (4C restarts, snapshots, one client) ...\n ... Passed -- 6.8 5 305 13\nTest: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ...\n ... Passed -- 9.0 5 5583 795\nTest: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ...\n ... Passed -- 4.7 5 977 155\nTest: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ...\n ... Passed -- 8.6 5 847 33\nTest: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ...\n ... Passed -- 11.5 5 841 33\nTest: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ...\n ... Passed -- 12.8 7 2903 93\nPASS\nok 6.5840/kvraft1 83.543s", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_24", "task_name": "problems/system_lab_24.md", "task": "# Problem Context\n## Introduction\nYou can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab.\n\nIn this lab you'll build a key/value storage system that \"shards,\" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with \"a\" might be one shard, all the keys starting with \"b\" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps.\n\n![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png)\n\nThe sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key \"a\", and shardgrp 2 holds a shard storing key \"b\". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3).\n\nAn administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv.\n\nThe reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement.\n\nThe main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`.\n\n1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time.\n2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started.\n\nThis lab uses \"configuration\" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes.\n\nA shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change.\n\nOnly RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files.\n\nIn Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups.\n\nIn Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like.\n\nThis lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on.\n\nLab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations.\n\nYou may use late hours for Part A, but you may not use late hours for Parts B-D.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nWe supply you with tests and skeleton code in `src/shardkv1`:\n\n- `client.go` for the shardkv clerk\n- `shardcfg` package for computing shard configurations\n- `shardgrp` package: for the shardgrp clerk and server.\n- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`)\n\nTo get up and running, execute the following commands:\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/shardkv1\n$ go test -v\n=== RUN TestInitQuery5A\nTest (5A): Init and Query ... (reliable network)...\n shardkv_test.go:46: Static wrong null 0\n...\n```\n\n## The Code\n\n# Your Task \nYour first job is to implement shardgrps and the `InitConfig`, `Query`, and `ChangeConfigTo` methods when there are no failures. We have given you the code for describing a configuration, in `shardkv1/shardcfg`. Each `shardcfg.ShardConfig` has a unique identifying number, a mapping from shard number to group number, and a mapping from group number to the list of servers replicating that group. There will usually be more shards than groups (so that each group serves more than one shard), in order that load can be shifted at a fairly fine granularity.\n\nImplement these two methods in `shardctrler/shardctrler.go`:\n\n- The `InitConfig` method receives the first configuration, passed to it by the tester as a `shardcfg.ShardConfig`. `InitConfig` should store the configuration in an instance of Lab 2's `kvsrv`.\n- The `Query` method returns the current configuration; it should read the configuration from `kvsrv`, previously stored there by `InitConfig`.\n\nImplement `InitConfig` and `Query`, and store the configuration in `kvsrv`. You're done when your code passes the first test. Note this task doesn't require any shardgrps.\n\n```\n$ cd ~/6.5840/src/shardkv1\n$ go test -run TestInitQuery5A \nTest (5A): Init and Query ... (reliable network)...\n ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0\nPASS\nok 6.5840/shardkv1 0.197s\n$\n```\n\n- Implement `InitConfig` and `Query` by storing and reading the initial configuration from `kvsrv`: use the `Get`/`Put` methods of `ShardCtrler.IKVClerk` to talk to `kvsrv`, use the `String` method of `ShardConfig` to turn a `ShardConfig` into a string that you can pass to `Put`, and use the `shardcfg.FromString()` function to turn a string into a `ShardConfig`.\n\nImplement an initial version of `shardgrp` in `shardkv1/shardgrp/server.go` and a corresponding clerk in `shardkv1/shardgrp/client.go` by copying code from your Lab 4 `kvraft` solution.\n\nImplement a clerk in `shardkv1/client.go` that uses the `Query` method to find the shardgrp for a key, and then talks to that shardgrp. You're done when your code passes the `Static` test.\n\n```\n$ cd ~/6.5840/src/shardkv1\n$ go test -run Static\nTest (5A): one shard group ... (reliable network)...\n ... Passed -- time 5.4s #peers 1 #RPCs 793 #Ops 180\nPASS\nok 6.5840/shardkv1 5.632s\n$\n```\n\n- Copy code from your `kvraft` client.go and server.go for `Put` and `Get`, and any other code you need from `kvraft`.\n- The code in `shardkv1/client.go` provides the `Put`/`Get` clerk for the overall system: it finds out which shardgrp holds the desired key's shard by invoking the `Query` method, and then talks to the shardgrp that holds that shard.\n- Implement `shardkv1/client.go`, including its `Put`/`Get` methods. Use `shardcfg.Key2Shard()` to find the shard number for a key. The tester passes a `ShardCtrler` object to `MakeClerk` in `shardkv1/client.go`. Retrieve the current configuration using the `Query` method.\n- To put/get a key from a shardgrp, the shardkv clerk should create a shardgrp clerk for the shardgrp by calling `shardgrp.MakeClerk`, passing in the servers found in the configuration and the shardkv clerk's `ck.clnt`. Use the `GidServers()` method from `ShardConfig` to get the group for a shard.\n- `shardkv1/client.go`'s Put must return `ErrMaybe` when the reply was maybe lost, but this Put invokes `shardgrp`'s Put to talk a particular shardgrp. The inner Put can signal this with an error.\n- Upon creation, the first shardgrp (`shardcfg.Gid1`) should initialize itself to own all shards.\n\nNow you should support movement of shards among groups by implementing the `ChangeConfigTo` method, which changes from an old configuration to a new configuration. The new configuration may include new shardgrps that are not present in the old configuration, and may exclude shardgrps that were present in the old configuration. The controller should move shards (the key/value data) so that the set of shards stored by each shardgrp matches the new configuration.\n\nThe approach we suggest for moving a shard is for `ChangeConfigTo` to first \"freeze\" the shard at the source shardgrp, causing that shardgrp to reject `Put`'s for keys in the moving shard. Then, copy (install) the shard to the destination shardgrp; then delete the frozen shard. Finally, post a new configuration so that clients can find the moved shard. A nice property of this approach is that it avoids any direct interactions among the shardgrps. It also supports serving shards that are not affected by an ongoing configuration change.\n\nTo be able to order changes to the configuration, each configuration has a unique number `Num` (see `shardcfg/shardcfg.go`). The tester in Part A invokes `ChangeConfigTo` sequentially, and the configuration passed to `ChangeConfigTo` will have a `Num` one larger than the previous one; thus, a configuration with a higher `Num` is newer than one with a lower `Num`.\n\nThe network may delay RPCs, and RPCs may arrive out of order at the shardgrps. To reject old `FreezeShard`, `InstallShard`, and `DeleteShard` RPCs, they should include `Num` (see `shardgrp/shardrpc/shardrpc.go`), and shardgrps must remember the largest `Num` they have seen for each shard.\n\nImplement `ChangeConfigTo` (in `shardctrler/shardctrler.go`) and extend `shardgrp` to support freeze, install, and delete. `ChangeConfigTo` should always succeed in Part A because the tester doesn't induce failures in this part. You will need to implement `FreezeShard`, `InstallShard`, and `DeleteShard` in `shardgrp/client.go` and `shardgrp/server.go` using the RPCs in the `shardgrp/shardrpc` package, and reject old RPCs based on `Num`. You will also need modify the shardkv clerk in `shardkv1/client.go` to handle `ErrWrongGroup`, which a shardgrp should return if it isn't responsible for the shard.\n\nYou have completed this task when you pass the `JoinBasic` and `DeleteBasic` tests. These tests focus on adding shardgrps; you don't have to worry about shardgrps leaving just yet.\n\n- A shardgrp should respond with an `ErrWrongGroup` error to a client `Put`/`Get` with a key that the shardgrp isn't responsible for (i.e., for a key whose shard is not assigned to the shardgrp). You will have to modify `shardkv1/client.go` to reread the configuration and retry the `Put`/`Get`.\n- Note that you will have to run `FreezeShard`, `InstallShard`, and `DeleteShard` through your `rsm` package, just like `Put` and `Get`.\n- You can send an entire map as your state in an RPC request or reply, which may help keep the code for shard transfer simple.\n- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply.\n\nExtend `ChangeConfigTo` to handle shard groups that leave; i.e., shardgrps that are present in the current configuration but not in the new one. Your solution should pass `TestJoinLeaveBasic5A` now. (You may have handled this scenario already in the previous task, but the previous tests didn't test for shardgrps leaving.)\n\nMake your solution pass all Part A tests, which check that your sharded key/value service supports many groups joining and leaving, shardgrps restarting from snapshots, processing `Get`s while some shards are offline or involved in a configuration change, and linearizability when many clients interact with the service while the tester concurrently invokes the controller's `ChangeConfigTo` to rebalance shards.\n\n```\n$ cd ~/6.5840/src/shardkv1\n$ go test -run 5A\nTest (5A): Init and Query ... (reliable network)...\n ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0\nTest (5A): one shard group ... (reliable network)...\n ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180\nTest (5A): a group joins... (reliable network)...\n ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180\nTest (5A): delete ... (reliable network)...\n ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360\nTest (5A): basic groups join/leave ... (reliable network)...\n ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240\nTest (5A): many groups join/leave ... (reliable network)...\n ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180\nTest (5A): many groups join/leave ... (unreliable network)...\n ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180\nTest (5A): shutdown ... (reliable network)...\n ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180\nTest (5A): progress ... (reliable network)...\n ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82\nTest (5A): progress ... (reliable network)...\n ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390\nTest (5A): one concurrent clerk reliable... (reliable network)...\n ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248\nTest (5A): many concurrent clerks reliable... (reliable network)...\n ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500\nTest (5A): one concurrent clerk unreliable ... (unreliable network)...\n ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176\nTest (5A): many concurrent clerks unreliable... (unreliable network)...\n ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896\nPASS\nok 6.5840/shardkv1 243.115s\n$\n```\n\nYour solution must continue serving shards that are not affected by an ongoing configuration change.", "test_method": "cd src/shardkv1 && go test -run 5A", "test_results": "Test (5A): Init and Query ... (reliable network)...\n ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0\nTest (5A): one shard group ... (reliable network)...\n ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180\nTest (5A): a group joins... (reliable network)...\n ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180\nTest (5A): delete ... (reliable network)...\n ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360\nTest (5A): basic groups join/leave ... (reliable network)...\n ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240\nTest (5A): many groups join/leave ... (reliable network)...\n ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180\nTest (5A): many groups join/leave ... (unreliable network)...\n ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180\nTest (5A): shutdown ... (reliable network)...\n ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180\nTest (5A): progress ... (reliable network)...\n ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82\nTest (5A): progress ... (reliable network)...\n ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390\nTest (5A): one concurrent clerk reliable... (reliable network)...\n ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248\nTest (5A): many concurrent clerks reliable... (reliable network)...\n ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500\nTest (5A): one concurrent clerk unreliable ... (unreliable network)...\n ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176\nTest (5A): many concurrent clerks unreliable... (unreliable network)...\n ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896\nPASS\nok 6.5840/shardkv1 243.115s", "difficulty": "hard", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_25", "task_name": "problems/system_lab_25.md", "task": "# Problem Context\n## Introduction\nYou can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab.\n\nIn this lab you'll build a key/value storage system that \"shards,\" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with \"a\" might be one shard, all the keys starting with \"b\" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps.\n\n![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png)\n\nThe sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key \"a\", and shardgrp 2 holds a shard storing key \"b\". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3).\n\nAn administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv.\n\nThe reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement.\n\nThe main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`.\n\n1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time.\n2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started.\n\nThis lab uses \"configuration\" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes.\n\nA shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change.\n\nOnly RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files.\n\nIn Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups.\n\nIn Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like.\n\nThis lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on.\n\nLab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations.\n\nYou may use late hours for Part A, but you may not use late hours for Parts B-D.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nWe supply you with tests and skeleton code in `src/shardkv1`:\n\n- `client.go` for the shardkv clerk\n- `shardcfg` package for computing shard configurations\n- `shardgrp` package: for the shardgrp clerk and server.\n- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`)\n\nTo get up and running, execute the following commands:\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/shardkv1\n$ go test -v\n=== RUN TestInitQuery5A\nTest (5A): Init and Query ... (reliable network)...\n shardkv_test.go:46: Static wrong null 0\n...\n```\n\n## The Code\n\n# Your Task \nThe controller is a short-lived command, which an administrator invokes: it moves shards and then exits. But, it may fail or lose network connectivity while moving shards. The main task in this part of the lab is recovering from a controller that fails to complete `ChangeConfigTo`. The tester starts a new controller and invokes its `ChangeConfigTo` after partitioning the first controller; you have to modify the controller so that the new one finishes the reconfiguration. The tester calls `InitController` when starting a controller; you can modify that function to check whether an interrupted configuration change needs to be completed.\n\nA good approach to allowing a controller to finish a reconfiguration that a previous one started is to keep two configurations: a current one and a next one, both stored in the controller's kvsrv. When a controller starts a reconfiguration, it stores the next configuration. Once a controller completes the reconfiguration, it makes the next configuration the current one. Modify `InitController` to first check if there is a stored next configuration with a higher configuration number than the current one, and if so, complete the shard moves necessary to reconfigure to the next one.\n\nModify shardctrler to implement the above approach. A controller that picks up the work from a failed controller may repeat `FreezeShard`, `InstallShard`, and `Delete` RPCs; shardgrps can use `Num` to detect duplicates and reject them. You have completed this task if your solution passes the Part B tests.\n\n```\n$ cd ~/6.5840/src/shardkv1\n$ go test -run 5B\nTest (5B): Join/leave while a shardgrp is down... (reliable network)...\n ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120\nTest (5B): recover controller ... (reliable network)...\n ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360\nPASS\nok 6.5840/shardkv1 35.805s\n$\n```\n\n- The tester calls `InitController` when starting a controller; you can implement recovery in that method in `shardctrler/shardctrler.go`.", "test_method": "cd src/shardkv1 && go test -run 5B", "test_results": "Test (5B): Join/leave while a shardgrp is down... (reliable network)...\n ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120\nTest (5B): recover controller ... (reliable network)...\n ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360\nPASS\nok 6.5840/shardkv1 35.805s", "difficulty": "easy", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} -{"task_id": "system_lab_26", "task_name": "problems/system_lab_26.md", "task": "# Problem Context\n## Introduction\nYou can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab.\n\nIn this lab you'll build a key/value storage system that \"shards,\" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with \"a\" might be one shard, all the keys starting with \"b\" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps.\n\n![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png)\n\nThe sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key \"a\", and shardgrp 2 holds a shard storing key \"b\". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3).\n\nAn administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv.\n\nThe reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement.\n\nThe main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`.\n\n1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time.\n2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started.\n\nThis lab uses \"configuration\" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes.\n\nA shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change.\n\nOnly RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files.\n\nIn Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups.\n\nIn Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like.\n\nThis lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on.\n\nLab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations.\n\nYou may use late hours for Part A, but you may not use late hours for Parts B-D.\n## Getiting Started\nDo a `git pull` to get the latest lab software.\n\nWe supply you with tests and skeleton code in `src/shardkv1`:\n\n- `client.go` for the shardkv clerk\n- `shardcfg` package for computing shard configurations\n- `shardgrp` package: for the shardgrp clerk and server.\n- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`)\n\nTo get up and running, execute the following commands:\n\n```\n$ cd ~/6.5840\n$ git pull\n...\n$ cd src/shardkv1\n$ go test -v\n=== RUN TestInitQuery5A\nTest (5A): Init and Query ... (reliable network)...\n shardkv_test.go:46: Static wrong null 0\n...\n```\n\n## The Code\n\n# Your Task \nIn this part of the lab you will modify the controller to allow for concurrent controllers. When a controller crashes or is partitioned, the tester will start a new controller, which must finish any work that the old controller might have in progress (i.e., finishing moving shards like in Part B). This means that several controllers may run concurrently and send RPCs to the shardgrps and the `kvsrv` that stores configurations.\n\nThe main challenge is to ensure these controllers don't step on each other. In Part A you already fenced all the shardgrp RPCs with `Num` so that old RPCs are rejected. Even if several controllers pick up the work of an old controller concurrently, one of them succeeds and the others repeat all the RPCs, the shardgrps will ignore them.\n\nThus the challenging case left is to ensure that only one controller updates the next configuration to avoid that two controllers (e.g., a partitioned one and a new one) put different configurations in the next one. To stress this scenario, the tester runs several controllers concurrently and each one computes the next configuration by reading the current configuration and updating it for a shardgrp that left or joined, and then the tester invokes `ChangeConfigTo`; thus multiple controllers may invoke `ChangeConfigTo` with different configuration with the same `Num`. You can use the version number of a key and versioned `Put`s to ensure that only one controller updates the next configuration and that the other invocations return without doing anything.\n\nModify your controller so that only one controller can post a next configuration for a configuration `Num`. The tester will start many controllers but only one should start `ChangeConfigTo` for a new configuation. You have completed this task if you pass the concurrent tests of Part C:\n\n```\n$ cd ~/6.5840/src/shardkv1\n$ go test -run TestConcurrentReliable5C\nTest (5C): Concurrent ctrlers ... (reliable network)...\n ... Passed -- time 8.2s #peers 1 #RPCs 1753 #Ops 120\nPASS\nok 6.5840/shardkv1 8.364s\n$ go test -run TestAcquireLockConcurrentUnreliable5C\nTest (5C): Concurrent ctrlers ... (unreliable network)...\n ... Passed -- time 23.8s #peers 1 #RPCs 1850 #Ops 120\nPASS\nok 6.5840/shardkv1 24.008s\n$\n```\n\n- See `concurCtrler` in `test.go` to see how the tester runs controllers concurrently.\n\nIn this exercise you will put recovery of an old controller together with a new controller: a new controller should perform recovery from Part B. If the old controller was partitioned during `ChangeConfigTo`, you will have to make sure that the old controller doesn't interfere with the new controller. If all the controller's updates are already properly fenced with `Num` checks from Part B, you don't have to write extra code. You have completed this task if you pass the `Partition` tests.\n\n```\n$ cd ~/6.5840/src/shardkv1\n$ go test -run Partition\nTest (5C): partition controller in join... (reliable network)...\n ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120\nTest (5C): controllers with leased leadership ... (reliable network)...\n ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360\nTest (5C): controllers with leased leadership ... (unreliable network)...\n ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240\nTest (5C): controllers with leased leadership ... (reliable network)...\n ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182\nTest (5C): controllers with leased leadership ... (unreliable network)...\n ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336\nPASS\nok 6.5840/shardkv1 217.779s\n$\n```\n\nYou have completed implementing a highly-available sharded key/value service with many shard groups for scalability, reconfiguration to handle changes in load, and with a fault-tolerant controller; congrats!\n\nRerun all tests to check that your recent changes to the controller haven't broken earlier tests.\n\nGradescope will rerun the Lab 3A-D and Lab 4A-C tests on your submission, in addition to the 5C tests. Before submitting, double check that your solution works:\n\n```\n$ go test ./raft1\n$ go test ./kvraft1\n$ go test ./shardkv1\n```\n", "test_method": "cd src/shardkv1 && go test -run Partition", "test_results": "Test (5C): partition controller in join... (reliable network)...\n ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120\nTest (5C): controllers with leased leadership ... (reliable network)...\n ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360\nTest (5C): controllers with leased leadership ... (unreliable network)...\n ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240\nTest (5C): controllers with leased leadership ... (reliable network)...\n ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182\nTest (5C): controllers with leased leadership ... (unreliable network)...\n ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336\nPASS\nok 6.5840/shardkv1 217.779s", "difficulty": "moderate", "link": "http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html", "docker_env": "xuafeng/swe-go-python:latest", "repo_url": "git://g.csail.mit.edu/6.5840-golabs-2025", "repo_path": "projects/6.5840-golabs-2025"} diff --git a/benchmarks/course_lab_bench/data/benchmark/database_systems_mit_65830.json b/benchmarks/course_lab_bench/data/benchmark/database_systems_mit_65830.json deleted file mode 100644 index 206115d..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/database_systems_mit_65830.json +++ /dev/null @@ -1,647 +0,0 @@ -[ - { - "instance_id": 1, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 1: GoDB", - "part_name": "2.3. Fields and Tuples", - "exercise": "Exercise 1", - "introduction": "## 2. GoDB Architecture and Implementation Guide\n\nGoDB consists of:\n\n* Structures that represent fields, tuples, and tuple schemas;\n* Methods that apply predicates and conditions to tuples;\n* One or more access methods (e.g., heap files) that store relations on disk and\n provide a way to iterate through tuples of those relations;\n* A collection of operator classes (e.g., select, join, insert, delete, etc.)\n that process tuples;\n* A buffer pool that caches active tuples and pages in memory and handles\n concurrency control and transactions (neither of which you need to worry about\n for this lab); and,\n* A catalog that stores information about available tables and their schemas.\n\nGoDB does not include many things that you may think of as being a part of a\n\"database system.\" In particular, GoDB does not have:\n\n* (In this lab), a SQL front end or parser that allows you to type queries\n directly into GoDB. Instead, queries are built up by chaining a set of\n operators together into a hand-built query plan (see [Section\n 2.6](#query_walkthrough)). We will provide a simple parser for use in later\n labs.\n* Views.\n* Data types except integers and fixed length strings.\n* (In this lab) Query optimizer.\n* (In this lab) Indices.\n\nIn the rest of this Section, we describe each of the main components of GoDB\nthat you will need to implement in this lab. You should use the exercises in\nthis discussion to guide your implementation. This document is by no means a\ncomplete specification for GoDB; you will need to make decisions about how\nto design and implement various parts of the system. Note that for Lab 1 you do\nnot need to implement any operators (e.g., select, join, project) except\nsequential scan as a part of the `heap_file.go` file.\nYou will add support for additional operators in future labs.", - "Description": "### 2.3. Fields and Tuples\n\nThe `Tuple` struct in GoDB is used to store the in-memory value of a database tuple. \nThey consist of a collection of fields implementing the `DBValue`\ninterface. Different\ndata types (e.g., `IntField`, `StringField`) implement `DBValue`. `Tuple` objects are created by\nthe underlying access methods (e.g., heap files, or B-trees), as described in\nthe next section. Tuples also have a type (or schema), called a _tuple\ndescriptor_, represented by a `TupleDesc` struct, which consists of a\ncollection of `FieldType` objects, one per field in the tuple, each of which\ndescribes the type of the corresponding field.\n\n\n\n### Exercise 1\n\n**Implement the skeleton methods in:**\n\n---\n* tuple.go\n---\n\nAt this point, your code should pass the unit tests in `tuple_test.go`.", - "repo/location": "go get main\ncd godb\ngo get ../godb\ngo test", - "dependency": [], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab1.md", - "codes": [ - { - "code_path": "godb/tuple.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/tuple.go", - "code_content": "package godb\n\n//This file defines methods for working with tuples, including defining\n// the types DBType, FieldType, TupleDesc, DBValue, and Tuple\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"strconv\"\n\t\"strings\"\n\n)\n\n// DBType is the type of a tuple field, in GoDB, e.g., IntType or StringType\ntype DBType int\n\nconst (\n\tIntType DBType = iota\n\tStringType DBType = iota\n\tUnknownType DBType = iota //used internally, during parsing, because sometimes the type is unknown\n)\n\nfunc (t DBType) String() string {\n\tswitch t {\n\tcase IntType:\n\t\treturn \"int\"\n\tcase StringType:\n\t\treturn \"string\"\n\t}\n\treturn \"unknown\"\n}\n\n// FieldType is the type of a field in a tuple, e.g., its name, table, and [godb.DBType].\n// TableQualifier may or may not be an empty string, depending on whether the table\n// was specified in the query\ntype FieldType struct {\n\tFname string\n\tTableQualifier string\n\tFtype DBType\n}\n\n// TupleDesc is \"type\" of the tuple, e.g., the field names and types\ntype TupleDesc struct {\n\tFields []FieldType\n}\n\n// Compare two tuple descs, and return true iff\n// all of their field objects are equal and they\n// are the same length\nfunc (d1 *TupleDesc) equals(d2 *TupleDesc) bool {\n\t// TODO: some code goes here\n\treturn true\n\n}\n\n// Given a FieldType f and a TupleDesc desc, find the best\n// matching field in desc for f. A match is defined as\n// having the same Ftype and the same name, preferring a match\n// with the same TableQualifier if f has a TableQualifier\n// We have provided this implementation because it's details are\n// idiosyncratic to the behavior of the parser, which we are not\n// asking you to write\nfunc findFieldInTd(field FieldType, desc *TupleDesc) (int, error) {\n\tbest := -1\n\tfor i, f := range desc.Fields {\n\t\tif f.Fname == field.Fname && (f.Ftype == field.Ftype || field.Ftype == UnknownType) {\n\t\t\tif field.TableQualifier == \"\" && best != -1 {\n\t\t\t\treturn 0, GoDBError{AmbiguousNameError, fmt.Sprintf(\"select name %s is ambiguous\", f.Fname)}\n\t\t\t}\n\t\t\tif f.TableQualifier == field.TableQualifier || best == -1 {\n\t\t\t\tbest = i\n\t\t\t}\n\t\t}\n\t}\n\tif best != -1 {\n\t\treturn best, nil\n\t}\n\treturn -1, GoDBError{IncompatibleTypesError, fmt.Sprintf(\"field %s.%s not found\", field.TableQualifier, field.Fname)}\n\n}\n\n// Make a copy of a tuple desc. Note that in go, assignment of a slice to\n// another slice object does not make a copy of the contents of the slice.\n// Look at the built-in function \"copy\".\nfunc (td *TupleDesc) copy() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} //replace me\n}\n\n// Assign the TableQualifier of every field in the TupleDesc to be the\n// supplied alias. We have provided this function as it is only used\n// by the parser.\nfunc (td *TupleDesc) setTableAlias(alias string) {\n\tfields := make([]FieldType, len(td.Fields))\n\tcopy(fields, td.Fields)\n\tfor i := range fields {\n\t\tfields[i].TableQualifier = alias\n\t}\n\ttd.Fields = fields\n}\n\n// Merge two TupleDescs together. The resulting TupleDesc\n// should consist of the fields of desc2\n// appended onto the fields of desc.\nfunc (desc *TupleDesc) merge(desc2 *TupleDesc) *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} //replace me\n}\n\n// ================== Tuple Methods ======================\n\n// Interface for tuple field values\ntype DBValue interface {\n\tEvalPred(v DBValue, op BoolOp) bool\n}\n\n// Integer field value\ntype IntField struct {\n\tValue int64\n}\n\n// String field value\ntype StringField struct {\n\tValue string\n}\n\n// Tuple represents the contents of a tuple read from a database\n// It includes the tuple descriptor, and the value of the fields\ntype Tuple struct {\n\tDesc TupleDesc\n\tFields []DBValue\n\tRid recordID //used to track the page and position this page was read from\n}\n\ntype recordID interface {\n}\n\n// Serialize the contents of the tuple into a byte array Since all tuples are of\n// fixed size, this method should simply write the fields in sequential order\n// into the supplied buffer.\n//\n// See the function [binary.Write]. Objects should be serialized in little\n// endian oder.\n//\n// Strings can be converted to byte arrays by casting to []byte. Note that all\n// strings need to be padded to StringLength bytes (set in types.go). For\n// example if StringLength is set to 5, the string 'mit' should be written as\n// 'm', 'i', 't', 0, 0\n//\n// May return an error if the buffer has insufficient capacity to store the\n// tuple.\nfunc (t *Tuple) writeTo(b *bytes.Buffer) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"writeTo not implemented\") //replace me\n}\n\n// Read the contents of a tuple with the specified [TupleDesc] from the\n// specified buffer, returning a Tuple.\n//\n// See [binary.Read]. Objects should be deserialized in little endian oder.\n//\n// All strings are stored as StringLength byte objects.\n//\n// Strings with length < StringLength will be padded with zeros, and these\n// trailing zeros should be removed from the strings. A []byte can be cast\n// directly to string.\n//\n// May return an error if the buffer has insufficient data to deserialize the\n// tuple.\nfunc readTupleFrom(b *bytes.Buffer, desc *TupleDesc) (*Tuple, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"readTupleFrom not implemented\") //replace me\n}\n\n// Compare two tuples for equality. Equality means that the TupleDescs are equal\n// and all of the fields are equal. TupleDescs should be compared with\n// the [TupleDesc.equals] method, but fields can be compared directly with equality\n// operators.\nfunc (t1 *Tuple) equals(t2 *Tuple) bool {\n\t// TODO: some code goes here\n\treturn true\n}\n\n// Merge two tuples together, producing a new tuple with the fields of t2\n// appended to t1. The new tuple should have a correct TupleDesc that is created\n// by merging the descriptions of the two input tuples.\nfunc joinTuples(t1 *Tuple, t2 *Tuple) *Tuple {\n\t// TODO: some code goes here\n\treturn &Tuple{}\n}\n\ntype orderByState int\n\nconst (\n\tOrderedLessThan orderByState = iota\n\tOrderedEqual orderByState = iota\n\tOrderedGreaterThan orderByState = iota\n)\n\n// Apply the supplied expression to both t and t2, and compare the results,\n// returning an orderByState value.\n//\n// Takes an arbitrary expressions rather than a field, because, e.g., for an\n// ORDER BY SQL may ORDER BY arbitrary expressions, e.g., substr(name, 1, 2)\n//\n// Note that in most cases Expr will be a [godb.FieldExpr], which simply\n// extracts a named field from a supplied tuple.\n//\n// Calling the [Expr.EvalExpr] method on a tuple will return the value of the\n// expression on the supplied tuple.\n//\n// Note that EvalExpr uses the [Tuple.project] method, so you will need\n// to implement projection before testing compareField.\nfunc (t *Tuple) compareField(t2 *Tuple, field Expr) (orderByState, error) {\n\t// TODO: some code goes here\n\treturn OrderedEqual, fmt.Errorf(\"compareField not implemented\") // replace me\n}\n\n// Project out the supplied fields from the tuple. Should return a new Tuple\n// with just the fields named in fields.\n//\n// Should not require a match on TableQualifier, but should prefer fields that\n// do match on TableQualifier (e.g., a field t1.name in fields should match an\n// entry t2.name in t, but only if there is not an entry t1.name in t)\nfunc (t *Tuple) project(fields []FieldType) (*Tuple, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"project not implemented\") //replace me\n}\n\n// Compute a key for the tuple to be used in a map structure\nfunc (t *Tuple) tupleKey() any {\n\tvar buf bytes.Buffer\n\tt.writeTo(&buf)\n\treturn buf.String()\n}\n\nvar winWidth int = 120\n\nfunc fmtCol(v string, ncols int) string {\n\tcolWid := winWidth / ncols\n\tnextLen := len(v) + 3\n\tremLen := colWid - nextLen\n\tif remLen > 0 {\n\t\tspacesRight := remLen / 2\n\t\tspacesLeft := remLen - spacesRight\n\t\treturn strings.Repeat(\" \", spacesLeft) + v + strings.Repeat(\" \", spacesRight) + \" |\"\n\t} else {\n\t\treturn \" \" + v[0:colWid-4] + \" |\"\n\t}\n}\n\n// Return a string representing the header of a table for a tuple with the\n// supplied TupleDesc.\n//\n// Aligned indicates if the tuple should be foramtted in a tabular format\nfunc (d *TupleDesc) HeaderString(aligned bool) string {\n\toutstr := \"\"\n\tfor i, f := range d.Fields {\n\t\ttableName := \"\"\n\t\tif f.TableQualifier != \"\" {\n\t\t\ttableName = f.TableQualifier + \".\"\n\t\t}\n\n\t\tif aligned {\n\t\t\toutstr = fmt.Sprintf(\"%s %s\", outstr, fmtCol(tableName+f.Fname, len(d.Fields)))\n\t\t} else {\n\t\t\tsep := \",\"\n\t\t\tif i == 0 {\n\t\t\t\tsep = \"\"\n\t\t\t}\n\t\t\toutstr = fmt.Sprintf(\"%s%s%s\", outstr, sep, tableName+f.Fname)\n\t\t}\n\t}\n\treturn outstr\n}\n\n// Return a string representing the tuple\n// Aligned indicates if the tuple should be formatted in a tabular format\nfunc (t *Tuple) PrettyPrintString(aligned bool) string {\n\toutstr := \"\"\n\tfor i, f := range t.Fields {\n\t\tstr := \"\"\n\t\tswitch f := f.(type) {\n\t\tcase IntField:\n\t\t\tstr = strconv.FormatInt(f.Value, 10)\n\t\tcase StringField:\n\t\t\tstr = f.Value\n\t\t}\n\t\tif aligned {\n\t\t\toutstr = fmt.Sprintf(\"%s %s\", outstr, fmtCol(str, len(t.Fields)))\n\t\t} else {\n\t\t\tsep := \",\"\n\t\t\tif i == 0 {\n\t\t\t\tsep = \"\"\n\t\t\t}\n\t\t\toutstr = fmt.Sprintf(\"%s%s%s\", outstr, sep, str)\n\t\t}\n\t}\n\treturn outstr\n}\n" - } - ], - "test_codes": [ - { - "code_name": "tuple_test.go", - "code_path": "godb/tuple_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/tuple_test.go", - "code_content": "package godb\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"testing\"\n)\n\nfunc CheckIfOutputMatches(f func() (*Tuple, error), ts []*Tuple) error {\n\tn := 0\n\tfor {\n\t\tt1, err := f()\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif t1 == nil {\n\t\t\tbreak\n\t\t}\n\n\t\tif n >= len(ts) {\n\t\t\treturn fmt.Errorf(\"too many tuples returned. expected %d\", len(ts))\n\t\t}\n\n\t\tt2 := ts[n]\n\t\tif !t1.equals(t2) {\n\t\t\treturn fmt.Errorf(\"tuple %d did not match expected tuple. expected %v, got %v\", n, t2, t1)\n\t\t}\n\t\tn++\n\t}\n\tif n < len(ts) {\n\t\treturn fmt.Errorf(\"too few tuples returned. expected %d, got %d\", len(ts), n)\n\t}\n\treturn nil\n}\n\nfunc CheckIfOutputMatchesUnordered(f func() (*Tuple, error), ts []*Tuple) error {\n\tn := len(ts)\n\tfound := make([]bool, n)\n\n\ti := 0\n\tfor {\n\t\tt1, err := f()\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif t1 == nil {\n\t\t\tbreak\n\t\t}\n\n\t\tif i >= n {\n\t\t\treturn fmt.Errorf(\"too many tuples returned. expected %d\", n)\n\t\t}\n\n\t\tfound_this := false\n\t\tfor j, t2 := range ts {\n\t\t\tif !found[j] && t1.equals(t2) {\n\t\t\t\tfound[j] = true\n\t\t\t\tfound_this = true\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\n\t\tif !found_this {\n\t\t\treturn fmt.Errorf(\"received unexpected tuple %v\", t1)\n\t\t}\n\t\ti++\n\t}\n\tif i < n {\n\t\treturn fmt.Errorf(\"too few tuples returned. expected %d, got %d\", n, i)\n\t}\n\tfor j, f := range found {\n\t\tif !f {\n\t\t\treturn fmt.Errorf(\"missing tuple %v\", ts[j])\n\t\t}\n\t}\n\treturn nil\n}\n\nfunc makeTupleTestVars() (TupleDesc, Tuple, Tuple) {\n\tvar td = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"name\", Ftype: StringType},\n\t\t{Fname: \"age\", Ftype: IntType},\n\t}}\n\n\tvar t1 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\tvar t2 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"george jones\"},\n\t\t\tIntField{999},\n\t\t}}\n\n\treturn td, t1, t2\n}\n\n// Unit test for Tuple.writeTo() and Tuple.readTupleFrom()\nfunc TestTupleSerialization(t *testing.T) {\n\ttd, t1, _ := makeTupleTestVars()\n\tb := new(bytes.Buffer)\n\tt1.writeTo(b)\n\tt3, err := readTupleFrom(b, &td)\n\tif err != nil {\n\t\tt.Fatalf(\"Error loading tuple from saved buffer: %v\", err.Error())\n\t}\n\tif !t3.equals(&t1) {\n\t\tt.Errorf(\"Serialization / deserialization doesn't result in identical tuple.\")\n\t}\n}\n\n// Unit test for Tuple.compareField()\nfunc TestTupleExpr(t *testing.T) {\n\ttd, t1, t2 := makeTupleTestVars()\n\tft := td.Fields[0]\n\tf := FieldExpr{ft}\n\tresult, err := t1.compareField(&t2, &f) // compare \"sam\" to \"george jones\"\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif result != OrderedGreaterThan {\n\t\tt.Errorf(\"comparison of fields did not return expected result\")\n\t}\n}\n\n// Unit test for Tuple.project()\nfunc TestTupleProject(t *testing.T) {\n\t_, t1, _ := makeTupleTestVars()\n\ttNew, err := t1.project([]FieldType{t1.Desc.Fields[0]})\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tNew == nil {\n\t\tt.Fatalf(\"new tuple was nil\")\n\t}\n\tif len(tNew.Fields) != 1 {\n\t\tt.Fatalf(\"unexpected number of fields after project\")\n\t}\n\tf, ok := tNew.Fields[0].(StringField)\n\tif !ok || f.Value != \"sam\" {\n\t\tt.Errorf(\"unexpected value after project\")\n\t}\n}\n\n// Unit test for Tuple.project()\nfunc TestTupleProjectQualifier(t *testing.T) {\n\ttd1 := TupleDesc{Fields: []FieldType{{Fname: \"f\", TableQualifier: \"t1\", Ftype: IntType}, {Fname: \"f\", TableQualifier: \"t2\", Ftype: IntType}}}\n\tt1 := Tuple{td1, []DBValue{IntField{1}, IntField{2}}, nil}\n\n\ttNew, err := t1.project([]FieldType{t1.Desc.Fields[1]})\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tNew == nil {\n\t\tt.Fatalf(\"new tuple was nil\")\n\t}\n\tif len(tNew.Fields) != 1 {\n\t\tt.Fatalf(\"unexpected number of fields after project\")\n\t}\n\tf, ok := tNew.Fields[0].(IntField)\n\tif !ok || f.Value != 2 {\n\t\tt.Errorf(\"failed to select t2.f\")\n\t}\n\n\ttd2 := TupleDesc{Fields: []FieldType{{Fname: \"g\", TableQualifier: \"t1\", Ftype: IntType}, {Fname: \"f\", TableQualifier: \"t2\", Ftype: IntType}}}\n\tt2 := Tuple{td2, []DBValue{IntField{1}, IntField{2}}, nil}\n\n\ttNew, err = t2.project([]FieldType{{Fname: \"f\", TableQualifier: \"t1\", Ftype: IntType}})\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tNew == nil {\n\t\tt.Fatalf(\"new tuple was nil\")\n\t}\n\tif len(tNew.Fields) != 1 {\n\t\tt.Fatalf(\"unexpected number of fields after project\")\n\t}\n\tf, ok = tNew.Fields[0].(IntField)\n\tif !ok || f.Value != 2 {\n\t\tt.Errorf(\"failed to select t2.f\")\n\t}\n}\n\n// Unit test for Tuple.joinTuples()\nfunc TestTupleJoin(t *testing.T) {\n\t_, t1, t2 := makeTupleTestVars()\n\ttNew := joinTuples(&t1, &t2)\n\tif len(tNew.Fields) != 4 {\n\t\tt.Fatalf(\"unexpected number of fields after join\")\n\t}\n\tif len(tNew.Desc.Fields) != 4 {\n\t\tt.Fatalf(\"unexpected number of fields in description after join\")\n\t}\n\tf, ok := tNew.Fields[0].(StringField)\n\tif !ok || f.Value != \"sam\" {\n\t\tt.Fatalf(\"unexpected value after join\")\n\t}\n\tf, ok = tNew.Fields[2].(StringField)\n\tif !ok || f.Value != \"george jones\" {\n\t\tt.Errorf(\"unexpected value after join\")\n\t}\n\n}\n\nfunc TDAssertEquals(t *testing.T, expected, actual TupleDesc) {\n\tif !(expected.equals(&actual)) {\n\t\tt.Errorf(\"Expected EQUAL, found NOT EQUAL\")\n\t}\n}\n\nfunc TDAssertNotEquals(t *testing.T, expected, actual TupleDesc) {\n\tif expected.equals(&actual) {\n\t\tt.Errorf(\"Expected EQUAL, found NOT EQUAL\")\n\t}\n}\n\nfunc TAssertEquals(t *testing.T, expected, actual Tuple) {\n\tif !(expected.equals(&actual)) {\n\t\tt.Errorf(\"Expected EQUAL, found NOT EQUAL\")\n\t}\n}\n\nfunc TAssertNotEquals(t *testing.T, expected, actual Tuple) {\n\tif expected.equals(&actual) {\n\t\tt.Errorf(\"Expected NOT EQUAL, found EQUAL\")\n\t}\n}\n\nfunc TestTupleDescEquals(t *testing.T) {\n\tsingleInt := TupleDesc{Fields: []FieldType{{Ftype: IntType}}}\n\tsingleInt2 := TupleDesc{Fields: []FieldType{{Ftype: IntType}}}\n\tintString := TupleDesc{Fields: []FieldType{{Ftype: IntType}, {Ftype: StringType}}}\n\tintString2 := TupleDesc{Fields: []FieldType{{Ftype: IntType}, {Ftype: StringType}}}\n\n\tTDAssertEquals(t, singleInt, singleInt)\n\tTDAssertEquals(t, singleInt, singleInt2)\n\tTDAssertEquals(t, singleInt2, singleInt)\n\tTDAssertEquals(t, intString, intString)\n\n\tTDAssertNotEquals(t, singleInt, intString)\n\tTDAssertNotEquals(t, singleInt2, intString)\n\tTDAssertNotEquals(t, intString, singleInt)\n\tTDAssertNotEquals(t, intString, singleInt2)\n\tTDAssertEquals(t, intString, intString2)\n\tTDAssertEquals(t, intString2, intString)\n\n\tstringInt := TupleDesc{Fields: []FieldType{{Ftype: StringType}, {Ftype: IntType}}}\n\t_, t1, _ := makeTupleTestVars()\n\tTDAssertNotEquals(t, t1.Desc, stringInt) // diff in only Fname\n}\n\n// Unit test for TupleDesc.copy()\nfunc TestTupleDescCopy(t *testing.T) {\n\tsingleInt := TupleDesc{Fields: []FieldType{{Ftype: IntType}}}\n\tintString := TupleDesc{Fields: []FieldType{{Ftype: IntType}, {Ftype: StringType}}}\n\n\tTDAssertEquals(t, singleInt, *singleInt.copy())\n\tTDAssertEquals(t, intString, *intString.copy())\n\tTDAssertEquals(t, *intString.copy(), *intString.copy())\n\tTDAssertNotEquals(t, *intString.copy(), *singleInt.copy())\n\n\t// tests deep copy\n\ttdCpy := intString.copy()\n\ttdCpy2 := tdCpy.copy()\n\tif tdCpy == nil || len(tdCpy.Fields) == 0 {\n\t\tt.Fatalf(\"tdCpy is nil or fields are empty\")\n\t}\n\tif tdCpy2 == nil || len(tdCpy2.Fields) == 0 {\n\t\tt.Fatalf(\"tdCpy2 is nil or fields are empty\")\n\t}\n\ttdCpy.Fields[0] = intString.Fields[1]\n\tTDAssertNotEquals(t, *tdCpy, *tdCpy2)\n\ttdCpy.Fields[0] = intString.Fields[0]\n\tTDAssertEquals(t, *tdCpy, *tdCpy2)\n}\n\n// Unit test for TupleDesc.merge()\nfunc TestTupleDescMerge(t *testing.T) {\n\tsingleInt := TupleDesc{Fields: []FieldType{{Ftype: IntType}}}\n\tstringInt := TupleDesc{Fields: []FieldType{{Ftype: StringType}, {Ftype: IntType}}}\n\ttd1, td2 := stringInt, stringInt.copy()\n\n\ttdNew := td1.merge(&singleInt).merge(td2)\n\tfinal := TupleDesc{Fields: []FieldType{{Ftype: StringType}, {Ftype: IntType}, {Ftype: IntType}, {Ftype: StringType}, {Ftype: IntType}}}\n\n\tTDAssertEquals(t, final, *tdNew)\n\tTDAssertNotEquals(t, td1, *tdNew)\n}\n\n// Unit test for Tuple.equals()\nfunc TestTupleEquals(t *testing.T) {\n\t_, t1, t2 := makeTupleTestVars()\n\t_, t1Dup, _ := makeTupleTestVars()\n\n\tvar stringTup = Tuple{\n\t\tDesc: TupleDesc{Fields: []FieldType{{Ftype: StringType}}},\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t},\n\t}\n\n\tTAssertEquals(t, t1, t1)\n\tTAssertEquals(t, t1, t1Dup)\n\n\tTAssertNotEquals(t, t1, t2)\n\tTAssertNotEquals(t, t1, stringTup)\n\tTAssertNotEquals(t, stringTup, t2)\n}\n\nfunc TestJoinTuplesDesc(t *testing.T) {\n\t_, t1, t2 := makeTupleTestVars()\n\ttNew := joinTuples(&t1, &t2)\n\tif len(tNew.Desc.Fields) != 4 {\n\t\tt.Fatalf(\"Expected 4 fields in desc after join\")\n\t}\n\tfields := []string{\"name\", \"age\", \"name\", \"age\"}\n\tfor i, fname := range fields {\n\t\tif tNew.Desc.Fields[i].Fname != fname {\n\t\t\tt.Fatalf(\"expected %dth field to be named %s\", i, fname)\n\t\t}\n\t}\n}\n\nfunc TestTupleJoinDesc(t *testing.T) {\n\tvar td1 = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"name\", Ftype: StringType},\n\t\t{Fname: \"age\", Ftype: IntType},\n\t}}\n\n\tvar td2 = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"age2\", Ftype: IntType},\n\t\t{Fname: \"name2\", Ftype: StringType},\n\t}}\n\n\tvar t1 = Tuple{\n\t\tDesc: td1,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\tvar t2 = Tuple{\n\t\tDesc: td2,\n\t\tFields: []DBValue{\n\t\t\tIntField{999},\n\t\t\tStringField{\"george jones\"},\n\t\t}}\n\n\ttNew := joinTuples(&t1, &t2)\n\tif len(tNew.Desc.Fields) != 4 {\n\t\tt.Fatalf(\"unexpected number of desc fields after join\")\n\t}\n\n\tvar tdAns = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"name\", Ftype: StringType},\n\t\t{Fname: \"age\", Ftype: IntType},\n\t\t{Fname: \"age2\", Ftype: IntType},\n\t\t{Fname: \"name2\", Ftype: StringType},\n\t}}\n\n\tif !tNew.Desc.equals(&tdAns) {\n\t\tt.Fatalf(\"unexpected desc after join\")\n\t}\n}\n\nfunc TestTupleProject2(t *testing.T) {\n\tvar td = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"name1\", TableQualifier: \"tq1\", Ftype: StringType},\n\t\t{Fname: \"name2\", TableQualifier: \"tq2\", Ftype: StringType},\n\t\t{Fname: \"name1\", TableQualifier: \"tq2\", Ftype: StringType},\n\t}}\n\n\tvar t1 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"SFname1tq1\"},\n\t\t\tStringField{\"SFname2tq2\"},\n\t\t\tStringField{\"SFname1tq2\"},\n\t\t}}\n\n\tt2, err := t1.project([]FieldType{\n\t\t{Fname: \"name1\", TableQualifier: \"tq1\", Ftype: StringType},\n\t\t{Fname: \"name2\", TableQualifier: \"\", Ftype: StringType},\n\t\t{Fname: \"name1\", TableQualifier: \"tq1\", Ftype: StringType},\n\t\t{Fname: \"name2\", TableQualifier: \"tq2\", Ftype: StringType},\n\t\t{Fname: \"name1\", TableQualifier: \"tq2\", Ftype: StringType},\n\t})\n\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tif t2.Fields[0].(StringField).Value != \"SFname1tq1\" {\n\t\tt.Errorf(\"wrong match 0\")\n\t}\n\tif t2.Fields[1].(StringField).Value != \"SFname2tq2\" {\n\t\tt.Errorf(\"wrong match 1\")\n\t}\n\tif t2.Fields[2].(StringField).Value != \"SFname1tq1\" {\n\t\tt.Errorf(\"wrong match 2\")\n\t}\n\tif t2.Fields[3].(StringField).Value != \"SFname2tq2\" {\n\t\tt.Errorf(\"wrong match 3\")\n\t}\n\tif t2.Fields[4].(StringField).Value != \"SFname1tq2\" {\n\t\tt.Errorf(\"wrong match 4\")\n\t}\n}\n\nfunc TestTupleProject3(t *testing.T) {\n\ttd1 := TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"a\", Ftype: StringType},\n\t\t{Fname: \"b\", Ftype: IntType},\n\t}}\n\n\tt1 := Tuple{\n\t\tDesc: td1,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\tft1 := FieldType{\"a\", \"\", StringType}\n\tft2 := FieldType{\"b\", \"\", IntType}\n\toutTup, err := t1.project([]FieldType{ft1})\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif (len(outTup.Fields)) != 1 {\n\t\tt.Fatalf(\"project returned %d fields, expected 1\", len(outTup.Fields))\n\t}\n\tv, ok := outTup.Fields[0].(StringField)\n\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return string\")\n\t}\n\tif v.Value != \"sam\" {\n\t\tt.Fatalf(\"project didn't return sam\")\n\n\t}\n\toutTup, _ = t1.project([]FieldType{ft2})\n\tif (len(outTup.Fields)) != 1 {\n\t\tt.Fatalf(\"project returned %d fields, expected 1\", len(outTup.Fields))\n\t}\n\tv2, ok := outTup.Fields[0].(IntField)\n\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return int\")\n\t}\n\tif v2.Value != 25 {\n\t\tt.Fatalf(\"project didn't return 25\")\n\t}\n\n\toutTup, _ = t1.project([]FieldType{ft2, ft1})\n\tif (len(outTup.Fields)) != 2 {\n\t\tt.Fatalf(\"project returned %d fields, expected 2\", len(outTup.Fields))\n\t}\n\tv, ok = outTup.Fields[1].(StringField)\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return string in second field\")\n\t}\n\tif v.Value != \"sam\" {\n\t\tt.Fatalf(\"project didn't return sam\")\n\n\t}\n\n\tv2, ok = outTup.Fields[0].(IntField)\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return int in first field\")\n\t}\n\tif v2.Value != 25 {\n\t\tt.Fatalf(\"project didn't return 25\")\n\t}\n}\n\nfunc TestTupleJoinNil(t *testing.T) {\n\t_, t1, t2 := makeTupleTestVars()\n\ttNew := joinTuples(&t1, nil)\n\tif !tNew.equals(&t1) {\n\t\tt.Fatalf(\"Unexpected output of joinTuple with nil\")\n\t}\n\tif tNew.equals(&t2) {\n\t\tt.Fatalf(\"Unexpected output of joinTuple with nil\")\n\t}\n\ttNew2 := joinTuples(nil, &t2)\n\tif !tNew2.equals(&t2) {\n\t\tt.Fatalf(\"Unexpected output of joinTuple with nil\")\n\t}\n\tif tNew2.equals(&t1) {\n\t\tt.Fatalf(\"Unexpected output of joinTuple with nil\")\n\t}\n}\n\nfunc TestTupleJoinDesc2(t *testing.T) {\n\t_, t1, t2 := makeTupleTestVars()\n\ttNew := joinTuples(&t1, &t2)\n\tif len(tNew.Desc.Fields) != 4 {\n\t\tt.Fatalf(\"Expected 4 fields in desc after join\")\n\t}\n\tfields := []string{\"name\", \"age\", \"name\", \"age\"}\n\tfor i, fname := range fields {\n\t\tif tNew.Desc.Fields[i].Fname != fname {\n\t\t\tt.Fatalf(\"expected %dth field to be named %s\", i, fname)\n\t\t}\n\t}\n}\n" - } - ], - "test_command": [ - "go test tuple_test.go" - ] - }, - { - "instance_id": 2, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 1: GoDB", - "part_name": "2.4. BufferPool", - "exercise": "Exercise 2", - "introduction": "## 2. GoDB Architecture and Implementation Guide\n\nGoDB consists of:\n\n* Structures that represent fields, tuples, and tuple schemas;\n* Methods that apply predicates and conditions to tuples;\n* One or more access methods (e.g., heap files) that store relations on disk and\n provide a way to iterate through tuples of those relations;\n* A collection of operator classes (e.g., select, join, insert, delete, etc.)\n that process tuples;\n* A buffer pool that caches active tuples and pages in memory and handles\n concurrency control and transactions (neither of which you need to worry about\n for this lab); and,\n* A catalog that stores information about available tables and their schemas.\n\nGoDB does not include many things that you may think of as being a part of a\n\"database system.\" In particular, GoDB does not have:\n\n* (In this lab), a SQL front end or parser that allows you to type queries\n directly into GoDB. Instead, queries are built up by chaining a set of\n operators together into a hand-built query plan (see [Section\n 2.6](#query_walkthrough)). We will provide a simple parser for use in later\n labs.\n* Views.\n* Data types except integers and fixed length strings.\n* (In this lab) Query optimizer.\n* (In this lab) Indices.\n\nIn the rest of this Section, we describe each of the main components of GoDB\nthat you will need to implement in this lab. You should use the exercises in\nthis discussion to guide your implementation. This document is by no means a\ncomplete specification for GoDB; you will need to make decisions about how\nto design and implement various parts of the system. Note that for Lab 1 you do\nnot need to implement any operators (e.g., select, join, project) except\nsequential scan as a part of the `heap_file.go` file.\nYou will add support for additional operators in future labs.", - "Description": "### 2.4. BufferPool\n\nThe buffer pool (class `BufferPool` in GoDB) is responsible for caching\npages in memory that have been recently read from disk. All operators read and\nwrite pages from various files on disk through the buffer pool. It consists of a\nfixed number of pages, defined by the `numPages` parameter to the `BufferPool`\nconstructor `NewBufferPool`. \n\nFor this lab,\nyou need to implement the constructor and the `BufferPool.getPage()` method\nused by the `HeapFile` iterator, as well as the `BufferPool.flushAllPages()` method.\nThe buffer pool stores structs that implement the `Page` interface; these pages can be read from\nunderlying database files (such as a heap file) which implement the `DBFile` interface using the\n`readPage` method.\nThe BufferPool should store up to `numPages`\npages. `numPages` is passed to the constructor as a parameter. If more than `numPages` requests are made for different\npages, you should evict one of them according to an eviction policy of your choice (nothing sophisticated needed).\nNote that you *should not* evict dirty pages (pages where the `Page` method `isDirty()` returns true), for\nreasons we will explain when we discuss transactions later in the class. For Lab 1, if all pages are dirty, return an error.\n\n\n\n### Exercise 2\n\n**Implement the `getPage()`, `flushAllPages`, and constructor method in:**\n\n---\n* `buffer_pool.go`\n---\nThere is a unit test suite `buffer_pool_test.go`, but you will not be able to pass this test\nuntil you implement the heap file and heap page methods below. You will also test the functionality\nof the buffer pool when you implement your heap file iterator.\n\n\n\n", - "repo/location": "go get main\ncd godb\ngo get ../godb\ngo test", - "dependency": [ - "3", - "4" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab1.md", - "codes": [ - { - "code_path": "godb/buffer_pool.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/buffer_pool.go", - "code_content": "package godb\n\n//BufferPool provides methods to cache pages that have been read from disk.\n//It has a fixed capacity to limit the total amount of memory used by GoDB.\n//It is also the primary way in which transactions are enforced, by using page\n//level locking (you will not need to worry about this until lab3).\n\nimport (\n\t\"fmt\"\n)\n\n// Permissions used to when reading / locking pages\ntype RWPerm int\n\nconst (\n\tReadPerm RWPerm = iota\n\tWritePerm RWPerm = iota\n)\n\ntype BufferPool struct {\n\t// TODO: some code goes here\n}\n\n// Create a new BufferPool with the specified number of pages\nfunc NewBufferPool(numPages int) (*BufferPool, error) {\n\treturn &BufferPool{}, fmt.Errorf(\"NewBufferPool not implemented\")\n}\n\n// Testing method -- iterate through all pages in the buffer pool\n// and flush them using [DBFile.flushPage]. Does not need to be thread/transaction safe.\n// Mark pages as not dirty after flushing them.\nfunc (bp *BufferPool) FlushAllPages() {\n\t// TODO: some code goes here\n}\n\n// Abort the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk so it is sufficient to just\n// release locks to abort. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) AbortTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Commit the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk, so prior to releasing locks you\n// should iterate through pages and write them to disk. In GoDB lab3 we assume\n// that the system will not crash while doing this, allowing us to avoid using a\n// WAL. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) CommitTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Begin a new transaction. You do not need to implement this for lab 1.\n//\n// Returns an error if the transaction is already running.\nfunc (bp *BufferPool) BeginTransaction(tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn nil\n}\n\n// Retrieve the specified page from the specified DBFile (e.g., a HeapFile), on\n// behalf of the specified transaction. If a page is not cached in the buffer pool,\n// you can read it from disk uing [DBFile.readPage]. If the buffer pool is full (i.e.,\n// already stores numPages pages), a page should be evicted. Should not evict\n// pages that are dirty, as this would violate NO STEAL. If the buffer pool is\n// full of dirty pages, you should return an error. Before returning the page,\n// attempt to lock it with the specified permission. If the lock is\n// unavailable, should block until the lock is free. If a deadlock occurs, abort\n// one of the transactions in the deadlock. For lab 1, you do not need to\n// implement locking or deadlock detection. You will likely want to store a list\n// of pages in the BufferPool in a map keyed by the [DBFile.pageKey].\nfunc (bp *BufferPool) GetPage(file DBFile, pageNo int, tid TransactionID, perm RWPerm) (Page, error) {\n\treturn nil, fmt.Errorf(\"GetPage not implemented\")\n}\n" - } - ], - "test_codes": [ - { - "code_name": "buffer_pool_test.go", - "code_path": "godb/buffer_pool_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/buffer_pool_test.go", - "code_content": "package godb\n\nimport (\n\t\"os\"\n\t\"testing\"\n)\n\nfunc TestBufferPoolGetPage(t *testing.T) {\n\t_, t1, t2, hf, bp, _ := makeTestVars(t)\n\ttid := NewTID()\n\tfor i := 0; i < 300; i++ {\n\t\tbp.BeginTransaction(tid)\n\t\terr := hf.insertTuple(&t1, tid)\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"%v\", err)\n\t\t}\n\t\terr = hf.insertTuple(&t2, tid)\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"%v\", err)\n\t\t}\n\n\t\t// Force dirty pages to disk. CommitTransaction may not be implemented\n\t\t// yet if this is called in lab 1 or 2.\n\t\tbp.FlushAllPages()\n\n\t\t// commit transaction\n\t\tbp.CommitTransaction(tid)\n\t}\n\tbp.BeginTransaction(tid)\n\t//expect 6 pages\n\tfor i := 0; i < 6; i++ {\n\t\tpg, err := bp.GetPage(hf, i, tid, ReadPerm)\n\t\tif pg == nil || err != nil {\n\t\t\tt.Fatalf(\"failed to get page %d (err = %v)\", i, err)\n\t\t}\n\t}\n\t_, err := bp.GetPage(hf, 7, tid, ReadPerm)\n\tif err == nil {\n\t\tt.Fatalf(\"No error when getting page 7 from a file with 6 pages.\")\n\t}\n}\n\nfunc TestSetDirty(t *testing.T) {\n\t_, t1, _, hf, bp, _ := makeTestVars(t)\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\tfor i := 0; i < 308; i++ {\n\t\terr := hf.insertTuple(&t1, tid)\n\t\tif err != nil && (i == 306 || i == 307) {\n\t\t\treturn\n\t\t} else if err != nil {\n\t\t\tt.Fatalf(\"%v\", err)\n\t\t}\n\t}\n\tbp.CommitTransaction(tid)\n\tt.Fatalf(\"Expected error due to all pages in BufferPool being dirty\")\n}\n\n// Test is only valid up to Lab 4. In Lab 5 we switch from FORCE/NOSTEAL to NOFORCE/STEAL.\nfunc TestBufferPoolHoldsMultipleHeapFiles(t *testing.T) {\n\tif os.Getenv(\"LAB\") == \"5\" {\n\t\tt.Skip(\"This test is only valid up to Lab 4. Skipping\")\n\t}\n\n\ttd, t1, t2, hf, bp, tid := makeTestVars(t)\n\tos.Remove(TestingFile2)\n\thf2, err := NewHeapFile(TestingFile2, &td, bp)\n\tif err != nil {\n\t\tprint(\"ERROR MAKING TEST VARS, BLARGH\")\n\t\tpanic(err)\n\t}\n\n\terr1 := hf.insertTuple(&t1, tid)\n\terr2 := hf.insertTuple(&t1, tid)\n\terr3 := hf2.insertTuple(&t2, tid)\n\n\tif err1 != nil || err2 != nil || err3 != nil {\n\t\tt.Errorf(\"The BufferPool should be able to handle multiple files\")\n\t}\n\t// bp contains 2 dirty pages at this point\n\n\thf2TupCntPerPage := 0\n\tfor hf2.NumPages() <= 1 {\n\t\tif err := hf2.insertTuple(&t2, tid); err != nil {\n\t\t\tt.Errorf(\"%v\", err)\n\t\t}\n\t\thf2TupCntPerPage++\n\t}\n\t// bp contains 3 dirty pages at this point\n\n\tfor i := 0; i < hf2TupCntPerPage-1; i++ {\n\t\tif err := hf2.insertTuple(&t2, tid); err != nil {\n\t\t\tt.Errorf(\"%v\", err)\n\t\t}\n\t}\n\n\t// bp contains 3 dirty pages at this point, including 2 full pages of hf2\n\t_ = hf2.insertTuple(&t2, tid)\n\tif err := hf2.insertTuple(&t2, tid); err == nil {\n\t\tt.Errorf(\"should cause bufferpool dirty page overflow here\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test buffer_pool_test.go" - ] - }, - { - "instance_id": 3, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 1: GoDB", - "part_name": "2.5. HeapFile access method", - "exercise": "Exercise 3", - "introduction": "## 2. GoDB Architecture and Implementation Guide\n\nGoDB consists of:\n\n* Structures that represent fields, tuples, and tuple schemas;\n* Methods that apply predicates and conditions to tuples;\n* One or more access methods (e.g., heap files) that store relations on disk and\n provide a way to iterate through tuples of those relations;\n* A collection of operator classes (e.g., select, join, insert, delete, etc.)\n that process tuples;\n* A buffer pool that caches active tuples and pages in memory and handles\n concurrency control and transactions (neither of which you need to worry about\n for this lab); and,\n* A catalog that stores information about available tables and their schemas.\n\nGoDB does not include many things that you may think of as being a part of a\n\"database system.\" In particular, GoDB does not have:\n\n* (In this lab), a SQL front end or parser that allows you to type queries\n directly into GoDB. Instead, queries are built up by chaining a set of\n operators together into a hand-built query plan (see [Section\n 2.6](#query_walkthrough)). We will provide a simple parser for use in later\n labs.\n* Views.\n* Data types except integers and fixed length strings.\n* (In this lab) Query optimizer.\n* (In this lab) Indices.\n\nIn the rest of this Section, we describe each of the main components of GoDB\nthat you will need to implement in this lab. You should use the exercises in\nthis discussion to guide your implementation. This document is by no means a\ncomplete specification for GoDB; you will need to make decisions about how\nto design and implement various parts of the system. Note that for Lab 1 you do\nnot need to implement any operators (e.g., select, join, project) except\nsequential scan as a part of the `heap_file.go` file.\nYou will add support for additional operators in future labs.", - "Description": "### 2.5. `HeapFile` access method\n\nAccess methods provide a way to read or write data from disk that is arranged in\na specific way. Common access methods include heap files (unsorted files of\ntuples) and B-trees; for this assignment, you will only implement a heap file\naccess method, and we have written some of the code for you.\n\nA `HeapFile` object is arranged into a set of pages, each of which consists of a\nfixed number of bytes for storing tuples, (defined by the constant\n`PageSize`), including a header. In GoDB, there is one\n`HeapFile` object for each table in the database. Each page in a `HeapFile` is\narranged as a set of slots, each of which can hold one tuple (tuples for a given\ntable in GoDB are all of the same size). \nPages of `HeapFile` objects are of type `HeapPage` which\nimplements the `Page` interface. Pages are stored in the buffer pool but are\nread and written by the `HeapFile` class. Because pages are fixed size, and tuple are fixed\nsize, in GoDB, all pages store the same number of tuples. You are free to choose your\nin-memory implementation of `HeapPage` but a reasonable choice would be a slice\nof `Tuple`s. \n\nGoDB stores heap files on disk as pages of data arranged consecutively on\ndisk. On disk, each page consists of a header, followed\nby the `PageSize` - _header size_ bytes of actual page content.\n The header consists of a 32 bit\n integer with the number of slots (tuples), and a second 32 bit integer with\nthe number of used slots. See the comments at the beginning of `heap_page.go` for\nmore details on the representation.\n\n\n\n### Exercise 3\n\n**Implement the skeleton methods in:**\n\n---\n* heap_page.go\n---\n\nAlthough you are not required to use exactly our interface for `heap_page.go`,\nyou will likely find the methods we have provided to be useful and we recommend\nfollowing our skeleton. \n\nAssuming you follow our outline, there are five non-trivial methods to implement:\n\n1. `insertTuple()` : This method should add a tuple to the page if there is space. Because a heap file is unordered, it\ncan be inserted in any free slot. After inserting a tuple on a page, you should mark it dirty.\n\n2. `deleteTuple()` : Delete a specific tuple from the page.\nNote that this method takes a specific recordID (or \"rid\") to delete. recordID is an empty interface; you are free\nto use any struct you like for the rid, but for a heap file a rid would typically include the page number and the slot number on the page.\nThe page number would typically be the offset in the heap file of the page, and the slot number would likely by the position of the tuple\nin the in-memory slice of tuples on the page. You will set the rid field of the tuples you return from your iterator. Your heap file implementation should use this rid to identify the specific page to delete from, and then pass the rid into this method so that you can delete the appropriate tuple. Note that if you choose to represent a page in memory as a slice of tuples, and the slot in the rid is the position in the slice, you should take care to not cause the rid to change when you perform the deletion. One way to achieve this is to set the position in the slice to nil (rather than creating a new slice with the deleted tuple removed from it), but many implementations are possible. After deleting a tuple from a page, you should mark it dirty.\n\n3. `toBuffer()` : Serialize the pages to a `bytes.Buffer` object for saving to disk, using the `binary.Write()` method to encode the header and the `writeTo()` method from your tuple implementation. Note that the header includes the number of used slots, but does not encode which slots are empty and which are not. This is ok, because, in GoDB you do not need to preserve the record ids of records when they are written out (so a particular tuple's rid may change after it is written and then read back.) \n\n4. `initFromBuffer()` : Read the page from the specified buffer by reading the header with the `binary.Read()` method and then the tuples using the `readTupleFrom()` method.\n\n5. `tupleIter()` : Return a function that can be invoked to interate through the tuples of the page. See the note about iterators in [2.2](#22-operators-and-iterators) above.\n\nThere are a few other relatively simpler methods (`setDirty()`, `isDirty()`, `getNumSlots()`, and the `newHeapPage()` constructor) that you will need to implement\n\nAt this point, your code should pass the unit tests in `heap_page_test.go`.\n\nAfter you have implemented `HeapPage`, you will write methods for `HeapFile` that\nread pages from the file, iterate through pages, and insert and delete\nrecords. ", - "repo/location": "go get main\ncd godb\ngo get ../godb\ngo test", - "dependency": [], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab1.md", - "codes": [ - { - "code_path": "godb/heap_page.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/heap_page.go", - "code_content": "package godb\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n)\n\n/* HeapPage implements the Page interface for pages of HeapFiles. We have\nprovided our interface to HeapPage below for you to fill in, but you are not\nrequired to implement these methods except for the three methods that the Page\ninterface requires. You will want to use an interface like what we provide to\nimplement the methods of [HeapFile] that insert, delete, and iterate through\ntuples.\n\nIn GoDB all tuples are fixed length, which means that given a TupleDesc it is\npossible to figure out how many tuple \"slots\" fit on a given page.\n\nIn addition, all pages are PageSize bytes. They begin with a header with a 32\nbit integer with the number of slots (tuples), and a second 32 bit integer with\nthe number of used slots.\n\nEach tuple occupies the same number of bytes. You can use the go function\nunsafe.Sizeof() to determine the size in bytes of an object. So, a GoDB integer\n(represented as an int64) requires unsafe.Sizeof(int64(0)) bytes. For strings,\nwe encode them as byte arrays of StringLength, so they are size\n((int)(unsafe.Sizeof(byte('a')))) * StringLength bytes. The size in bytes of a\ntuple is just the sum of the size in bytes of its fields.\n\nOnce you have figured out how big a record is, you can determine the number of\nslots on on the page as:\n\nremPageSize = PageSize - 8 // bytes after header\nnumSlots = remPageSize / bytesPerTuple //integer division will round down\n\nTo serialize a page to a buffer, you can then:\n\nwrite the number of slots as an int32\nwrite the number of used slots as an int32\nwrite the tuples themselves to the buffer\n\nYou will follow the inverse process to read pages from a buffer.\n\nNote that to process deletions you will likely delete tuples at a specific\nposition (slot) in the heap page. This means that after a page is read from\ndisk, tuples should retain the same slot number. Because GoDB will never evict a\ndirty page, it's OK if tuples are renumbered when they are written back to disk.\n\n*/\n\ntype heapPage struct {\n\t// TODO: some code goes here\n}\n\n// Construct a new heap page\nfunc newHeapPage(desc *TupleDesc, pageNo int, f *HeapFile) (*heapPage, error) {\n\t// TODO: some code goes here\n\treturn &heapPage{}, fmt.Errorf(\"newHeapPage is not implemented\") //replace me\n}\n\nfunc (h *heapPage) getNumSlots() int {\n\t// TODO: some code goes here\n\treturn 0 //replace me\n}\n\n// Insert the tuple into a free slot on the page, or return an error if there are\n// no free slots. Set the tuples rid and return it.\nfunc (h *heapPage) insertTuple(t *Tuple) (recordID, error) {\n\t// TODO: some code goes here\n\treturn 0, fmt.Errorf(\"insertTuple not implemented\") //replace me\n}\n\n// Delete the tuple at the specified record ID, or return an error if the ID is\n// invalid.\nfunc (h *heapPage) deleteTuple(rid recordID) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"deleteTuple not implemented\") //replace me\n}\n\n// Page method - return whether or not the page is dirty\nfunc (h *heapPage) isDirty() bool {\n\t// TODO: some code goes here\n\treturn false //replace me\n}\n\n// Page method - mark the page as dirty\nfunc (h *heapPage) setDirty(tid TransactionID, dirty bool) {\n\t// TODO: some code goes here\n}\n\n// Page method - return the corresponding HeapFile\n// for this page.\nfunc (p *heapPage) getFile() DBFile {\n\t// TODO: some code goes here\n\treturn nil //replace me\n}\n\n// Allocate a new bytes.Buffer and write the heap page to it. Returns an error\n// if the write to the the buffer fails. You will likely want to call this from\n// your [HeapFile.flushPage] method. You should write the page header, using\n// the binary.Write method in LittleEndian order, followed by the tuples of the\n// page, written using the Tuple.writeTo method.\nfunc (h *heapPage) toBuffer() (*bytes.Buffer, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"heap_page.toBuffer not implemented\") //replace me\n}\n\n// Read the contents of the HeapPage from the supplied buffer.\nfunc (h *heapPage) initFromBuffer(buf *bytes.Buffer) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"initFromBuffer not implemented\") //replace me\n}\n\n// Return a function that iterates through the tuples of the heap page. Be sure\n// to set the rid of the tuple to the rid struct of your choosing beforing\n// return it. Return nil, nil when the last tuple is reached.\nfunc (p *heapPage) tupleIter() func() (*Tuple, error) {\n\t// TODO: some code goes here\n\treturn func() (*Tuple, error) {\n\treturn nil, fmt.Errorf(\"heap_file.Iterator not implemented\") // replace me\n\t}\n}\n" - } - ], - "test_codes": [ - { - "code_name": "heap_page_test.go", - "code_path": "godb/heap_page_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/heap_page_test.go", - "code_content": "package godb\n\nimport (\n\t\"testing\"\n\t\"unsafe\"\n)\n\nfunc TestHeapPageInsert(t *testing.T) {\n\ttd, t1, t2, hf, _, _ := makeTestVars(t)\n\tpg, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tvar expectedSlots = (PageSize - 8) / (StringLength + int(unsafe.Sizeof(int64(0))))\n\tif pg.getNumSlots() != expectedSlots {\n\t\tt.Fatalf(\"Incorrect number of slots, expected %d, got %d\", expectedSlots, pg.getNumSlots())\n\t}\n\n\t_, err = pg.insertTuple(&t1)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\t_, err = pg.insertTuple(&t2)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\titer := pg.tupleIter()\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\n\tcnt := 0\n\tfor {\n\t\ttup, err := iter()\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\n\t\tcnt += 1\n\t}\n\tif cnt != 2 {\n\t\tt.Errorf(\"Expected 2 tuples in interator, got %d\", cnt)\n\t}\n}\n\nfunc TestHeapPageDelete(t *testing.T) {\n\ttd, t1, t2, hf, _, _ := makeTestVars(t)\n\tpg, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tpg.insertTuple(&t1)\n\tslotNo, _ := pg.insertTuple(&t2)\n\tpg.deleteTuple(slotNo)\n\n\titer := pg.tupleIter()\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\tcnt := 0\n\tfor {\n\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\n\t\tcnt += 1\n\t}\n\tif cnt != 1 {\n\t\tt.Errorf(\"Expected 1 tuple in interator, got %d\", cnt)\n\t}\n}\n\n// Unit test for insertTuple\nfunc TestHeapPageInsertTuple(t *testing.T) {\n\ttd, t1, _, hf, _, _ := makeTestVars(t)\n\tpage, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tfree := page.getNumSlots()\n\n\tfor i := 0; i < free; i++ {\n\t\tvar addition = Tuple{\n\t\t\tDesc: td,\n\t\t\tFields: []DBValue{\n\t\t\t\tStringField{\"sam\"},\n\t\t\t\tIntField{int64(i)},\n\t\t\t},\n\t\t}\n\t\tpage.insertTuple(&addition)\n\n\t\titer := page.tupleIter()\n\t\tif iter == nil {\n\t\t\tt.Fatalf(\"Iterator was nil\")\n\t\t}\n\t\tcnt, found := 0, false\n\t\tfor {\n\n\t\t\ttup, _ := iter()\n\t\t\tfound = found || addition.equals(tup)\n\t\t\tif tup == nil {\n\t\t\t\tbreak\n\t\t\t}\n\n\t\t\tcnt += 1\n\t\t}\n\t\tif cnt != i+1 {\n\t\t\tt.Errorf(\"Expected %d tuple in interator, got %d\", i+1, cnt)\n\t\t}\n\t\tif !found {\n\t\t\tt.Errorf(\"Expected inserted tuple to be FOUND, got NOT FOUND\")\n\t\t}\n\t}\n\n\t_, err = page.insertTuple(&t1)\n\n\tif err == nil {\n\t\tt.Errorf(\"Expected error due to full page\")\n\t}\n}\n\n// Unit test for deleteTuple\nfunc TestHeapPageDeleteTuple(t *testing.T) {\n\ttd, _, _, hf, _, _ := makeTestVars(t)\n\tpage, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tfree := page.getNumSlots()\n\n\tlist := make([]recordID, free)\n\tfor i := 0; i < free; i++ {\n\t\tvar addition = Tuple{\n\t\t\tDesc: td,\n\t\t\tFields: []DBValue{\n\t\t\t\tStringField{\"sam\"},\n\t\t\t\tIntField{int64(i)},\n\t\t\t},\n\t\t}\n\t\tlist[i], _ = page.insertTuple(&addition)\n\t}\n\tif len(list) == 0 {\n\t\tt.Fatalf(\"Rid list is empty.\")\n\t}\n\tfor i, rnd := free-1, 0xdefaced; i > 0; i, rnd = i-1, (rnd*0x7deface1+12354)%0x7deface9 {\n\t\t// Generate a random index j such that 0 <= j <= i.\n\t\tj := rnd % (i + 1)\n\n\t\t// Swap arr[i] and arr[j].\n\t\tlist[i], list[j] = list[j], list[i]\n\t}\n\n\tfor _, rid := range list {\n\t\terr := page.deleteTuple(rid)\n\t\tif err != nil {\n\t\t\tt.Errorf(\"Found error %s\", err.Error())\n\t\t}\n\t}\n\n\terr = page.deleteTuple(list[0])\n\tif err == nil {\n\t\tt.Errorf(\"page should be empty; expected error\")\n\t}\n}\n\n// Unit test for isDirty, setDirty\nfunc TestHeapPageDirty(t *testing.T) {\n\ttd, _, _, hf, _, _ := makeTestVars(t)\n\tpage, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tpage.setDirty(0, true)\n\tif !page.isDirty() {\n\t\tt.Errorf(\"page should be dirty\")\n\t}\n\tpage.setDirty(0, true)\n\tif !page.isDirty() {\n\t\tt.Errorf(\"page should be dirty\")\n\t}\n\tpage.setDirty(-1, false)\n\tif page.isDirty() {\n\t\tt.Errorf(\"page should be not dirty\")\n\t}\n}\n\n// Unit test for toBuffer and initFromBuffer\nfunc TestHeapPageSerialization(t *testing.T) {\n\ttd, _, _, hf, _, _ := makeTestVars(t)\n\tpage, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tfree := page.getNumSlots()\n\n\tfor i := 0; i < free-1; i++ {\n\t\tvar addition = Tuple{\n\t\t\tDesc: td,\n\t\t\tFields: []DBValue{\n\t\t\t\tStringField{\"sam\"},\n\t\t\t\tIntField{int64(i)},\n\t\t\t},\n\t\t}\n\t\tpage.insertTuple(&addition)\n\t}\n\n\tbuf, _ := page.toBuffer()\n\tpage2, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = page2.initFromBuffer(buf)\n\tif err != nil {\n\t\tt.Fatalf(\"Error loading heap page from buffer.\")\n\t}\n\n\titer, iter2 := page.tupleIter(), page2.tupleIter()\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil.\")\n\t}\n\tif iter2 == nil {\n\t\tt.Fatalf(\"iter2 was nil.\")\n\t}\n\n\tfindEqCount := func(t0 *Tuple, iter3 func() (*Tuple, error)) int {\n\t\tcnt := 0\n\t\tfor tup, _ := iter3(); tup != nil; tup, _ = iter3() {\n\t\t\tif t0.equals(tup) {\n\t\t\t\tcnt += 1\n\t\t\t}\n\t\t}\n\t\treturn cnt\n\t}\n\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tif findEqCount(tup, page.tupleIter()) != findEqCount(tup, page2.tupleIter()) {\n\t\t\tt.Errorf(\"Serialization / deserialization doesn't result in identical heap page.\")\n\t\t}\n\t}\n}\n\nfunc TestHeapPageBufferLen(t *testing.T) {\n\ttd, _, _, hf, _, _ := makeTestVars(t)\n\tpage, err := newHeapPage(&td, 0, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tfree := page.getNumSlots()\n\n\tfor i := 0; i < free-1; i++ {\n\t\tvar addition = Tuple{\n\t\t\tDesc: td,\n\t\t\tFields: []DBValue{\n\t\t\t\tStringField{\"sam\"},\n\t\t\t\tIntField{int64(i)},\n\t\t\t},\n\t\t}\n\t\tpage.insertTuple(&addition)\n\t}\n\n\tbuf, _ := page.toBuffer()\n\n\tif buf.Len() != PageSize {\n\t\tt.Fatalf(\"HeapPage.toBuffer returns buffer of unexpected size; NOTE: This error may be OK, but many implementations that don't write full pages break.\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test heap_page_test.go" - ] - }, - { - "instance_id": 4, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 1: GoDB", - "part_name": "2.5. HeapFile access method", - "exercise": "Exercise 4", - "introduction": "## 2. GoDB Architecture and Implementation Guide\n\nGoDB consists of:\n\n* Structures that represent fields, tuples, and tuple schemas;\n* Methods that apply predicates and conditions to tuples;\n* One or more access methods (e.g., heap files) that store relations on disk and\n provide a way to iterate through tuples of those relations;\n* A collection of operator classes (e.g., select, join, insert, delete, etc.)\n that process tuples;\n* A buffer pool that caches active tuples and pages in memory and handles\n concurrency control and transactions (neither of which you need to worry about\n for this lab); and,\n* A catalog that stores information about available tables and their schemas.\n\nGoDB does not include many things that you may think of as being a part of a\n\"database system.\" In particular, GoDB does not have:\n\n* (In this lab), a SQL front end or parser that allows you to type queries\n directly into GoDB. Instead, queries are built up by chaining a set of\n operators together into a hand-built query plan (see [Section\n 2.6](#query_walkthrough)). We will provide a simple parser for use in later\n labs.\n* Views.\n* Data types except integers and fixed length strings.\n* (In this lab) Query optimizer.\n* (In this lab) Indices.\n\nIn the rest of this Section, we describe each of the main components of GoDB\nthat you will need to implement in this lab. You should use the exercises in\nthis discussion to guide your implementation. This document is by no means a\ncomplete specification for GoDB; you will need to make decisions about how\nto design and implement various parts of the system. Note that for Lab 1 you do\nnot need to implement any operators (e.g., select, join, project) except\nsequential scan as a part of the `heap_file.go` file.\nYou will add support for additional operators in future labs.", - "Description": "### 2.5. `HeapFile` access method\n\nAccess methods provide a way to read or write data from disk that is arranged in\na specific way. Common access methods include heap files (unsorted files of\ntuples) and B-trees; for this assignment, you will only implement a heap file\naccess method, and we have written some of the code for you.\n\nA `HeapFile` object is arranged into a set of pages, each of which consists of a\nfixed number of bytes for storing tuples, (defined by the constant\n`PageSize`), including a header. In GoDB, there is one\n`HeapFile` object for each table in the database. Each page in a `HeapFile` is\narranged as a set of slots, each of which can hold one tuple (tuples for a given\ntable in GoDB are all of the same size). \nPages of `HeapFile` objects are of type `HeapPage` which\nimplements the `Page` interface. Pages are stored in the buffer pool but are\nread and written by the `HeapFile` class. Because pages are fixed size, and tuple are fixed\nsize, in GoDB, all pages store the same number of tuples. You are free to choose your\nin-memory implementation of `HeapPage` but a reasonable choice would be a slice\nof `Tuple`s. \n\nGoDB stores heap files on disk as pages of data arranged consecutively on\ndisk. On disk, each page consists of a header, followed\nby the `PageSize` - _header size_ bytes of actual page content.\n The header consists of a 32 bit\n integer with the number of slots (tuples), and a second 32 bit integer with\nthe number of used slots. See the comments at the beginning of `heap_page.go` for\nmore details on the representation.\n\n\n### Exercise 4\n\n**Implement the skeleton methods in:**\n\n---\n\n* heap_file.go\n\n---\n\nThere are a number of methods you need to implement; we have provided additional implementation tips in the comments in `heap_file.go`.\n\n1. `NewHeapFile()` - The constructor. It takes a file name that contains the binary encoding of the file (we name these `table.dat` by convention), as well as the TupleDesc that can be used to determine the expected format of the file and a buffer pool object that you will use to retrieve cached pages.\n2. `NumPages()` - Return the number of pages in the heap file; you can use the `File.Stat()` method to determine the size of the heap file in bytes. \n3. `readPage()` - Read a specific page from storage. To read a page from disk, you will first need to calculate the correct offset in\n the file. Hint: you will need random access to the file in order to read and\n write pages at arbitrary offsets -- check out the golang `os.File` type and its `ReadAt()` method.\n You should not call `BufferPool` methods when reading a page from disk in the `readPage()` method, but you will\n use the buffer pool `getPage()` method in your implementations of the heap file `iterator`. Once you have read in the bytes of the page you can create the page using the heap page method `newHeapPage()`. You can convert bytes read from a file to a buffer via the `bytes.NewBuffer()` method.\n4. `flushPage()` - Force a given page object back to disk. The supplied page will be a `HeapPage`; you should cast it and retrieve its bytes via the heap page method `toBuffer()`. You can then write these bytes back to the appropriate location on disk by opening the backing file and using a method like `os.File.WriteAt()`.\n5. `insertTuple()` - Add a tuple to the heap file; because the heap file is unordered, it can be inserted in any free slot in the file\n6. `deleteTuple()` - Remove a specific tuple from the heap file. You should use the rid field of the tuple to determine which page the\n tuple is in, and call the heap page method `deleteTuple()` on the appropriage page.\n7. `Descriptor()`\n8. `Iterator()` - Return a function that iterates through the tuples of the heap file one at a time. You should iterate through the pages and use the `tupleIter()` to iterate through the the tuples of each heap page. See the note above about iterators in GoDB in [2.2](#22-operators-and-iterators) above.\n This method should read pages using the buffer pool method `getPage()` which will eventually be used (in\n a later lab) to implement locking-based concurrency control and recovery. Do\n not load the entire table into memory when the iterator is instantiated -- this will cause an\n out of memory error for very large tables. Instead, you will just load one page at a\n time as the buffer pool accesses them via calls to `readPage()`.\n9. `pageKey()` - Return a struct that can be used as a key for the page. The buffer pool uses this to determine whether the page is cached or not. We have provided an implementation hint in the comment of this function.\n\n\nAt this point, your code should pass the unit tests in `heap_file_test.go` and `buffer_pool_test.go`. This completes the unit tests for this lab. You should complete the final exercises in the next section.", - "repo/location": "go get main\ncd godb\ngo get ../godb\ngo test", - "dependency": [], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab1.md", - "codes": [ - { - "code_path": "godb/heap_file.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/heap_file.go", - "code_content": "package godb\n\nimport (\n\t\"bufio\"\n\t\"fmt\"\n\t\"os\"\n\t\"strconv\"\n\t\"strings\"\n)\n\n// A HeapFile is an unordered collection of tuples.\n//\n// HeapFile is a public class because external callers may wish to instantiate\n// database tables using the method [LoadFromCSV]\ntype HeapFile struct {\n\t// TODO: some code goes here\n\t// HeapFile should include the fields below; you may want to add\n\t// additional fields\n\tbufPool *BufferPool\n}\n\n// Create a HeapFile.\n// Parameters\n// - fromFile: backing file for the HeapFile. May be empty or a previously created heap file.\n// - td: the TupleDesc for the HeapFile.\n// - bp: the BufferPool that is used to store pages read from the HeapFile\n// May return an error if the file cannot be opened or created.\nfunc NewHeapFile(fromFile string, td *TupleDesc, bp *BufferPool) (*HeapFile, error) {\n\t// TODO: some code goes here\n\treturn &HeapFile{}, fmt.Errorf(\"NewHeapFile not implemented\") //replace me\n}\n\n// Return the name of the backing file\nfunc (f *HeapFile) BackingFile() string {\n\t// TODO: some code goes here\n\treturn \"\" //replace me\n}\n\n// Return the number of pages in the heap file\nfunc (f *HeapFile) NumPages() int {\n\t// TODO: some code goes here\n\treturn 0 //replace me\n}\n\n// Load the contents of a heap file from a specified CSV file. Parameters are as follows:\n// - hasHeader: whether or not the CSV file has a header\n// - sep: the character to use to separate fields\n// - skipLastField: if true, the final field is skipped (some TPC datasets include a trailing separator on each line)\n// Returns an error if the field cannot be opened or if a line is malformed\n// We provide the implementation of this method, but it won't work until\n// [HeapFile.insertTuple] and some other utility functions are implemented\nfunc (f *HeapFile) LoadFromCSV(file *os.File, hasHeader bool, sep string, skipLastField bool) error {\n\tscanner := bufio.NewScanner(file)\n\tcnt := 0\n\tfor scanner.Scan() {\n\t\tline := scanner.Text()\n\t\tfields := strings.Split(line, sep)\n\t\tif skipLastField {\n\t\t\tfields = fields[0 : len(fields)-1]\n\t\t}\n\t\tnumFields := len(fields)\n\t\tcnt++\n\t\tdesc := f.Descriptor()\n\t\tif desc == nil || desc.Fields == nil {\n\t\t\treturn GoDBError{MalformedDataError, \"Descriptor was nil\"}\n\t\t}\n\t\tif numFields != len(desc.Fields) {\n\t\t\treturn GoDBError{MalformedDataError, fmt.Sprintf(\"LoadFromCSV: line %d (%s) does not have expected number of fields (expected %d, got %d)\", cnt, line, len(f.Descriptor().Fields), numFields)}\n\t\t}\n\t\tif cnt == 1 && hasHeader {\n\t\t\tcontinue\n\t\t}\n\t\tvar newFields []DBValue\n\t\tfor fno, field := range fields {\n\t\t\tswitch f.Descriptor().Fields[fno].Ftype {\n\t\t\tcase IntType:\n\t\t\t\tfield = strings.TrimSpace(field)\n\t\t\t\tfloatVal, err := strconv.ParseFloat(field, 64)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn GoDBError{TypeMismatchError, fmt.Sprintf(\"LoadFromCSV: couldn't convert value %s to int, tuple %d\", field, cnt)}\n\t\t\t\t}\n\t\t\t\tintValue := int(floatVal)\n\t\t\t\tnewFields = append(newFields, IntField{int64(intValue)})\n\t\t\tcase StringType:\n\t\t\t\tif len(field) > StringLength {\n\t\t\t\t\tfield = field[0:StringLength]\n\t\t\t\t}\n\t\t\t\tnewFields = append(newFields, StringField{field})\n\t\t\t}\n\t\t}\n\t\tnewT := Tuple{*f.Descriptor(), newFields, nil}\n\t\ttid := NewTID()\n\t\tbp := f.bufPool\n\t\tf.insertTuple(&newT, tid)\n\n\t\t// Force dirty pages to disk. CommitTransaction may not be implemented\n\t\t// yet if this is called in lab 1 or 2.\n\t\tbp.FlushAllPages()\n\n\t}\n\treturn nil\n}\n\n// Read the specified page number from the HeapFile on disk. This method is\n// called by the [BufferPool.GetPage] method when it cannot find the page in its\n// cache.\n//\n// This method will need to open the file supplied to the constructor, seek to\n// the appropriate offset, read the bytes in, and construct a [heapPage] object,\n// using the [heapPage.initFromBuffer] method.\nfunc (f *HeapFile) readPage(pageNo int) (Page, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"readPage not implemented\")\n}\n\n// Add the tuple to the HeapFile. This method should search through pages in the\n// heap file, looking for empty slots and adding the tuple in the first empty\n// slot if finds.\n//\n// If none are found, it should create a new [heapPage] and insert the tuple\n// there, and write the heapPage to the end of the HeapFile (e.g., using the\n// [flushPage] method.)\n//\n// To iterate through pages, it should use the [BufferPool.GetPage method]\n// rather than directly reading pages itself. For lab 1, you do not need to\n// worry about concurrent transactions modifying the Page or HeapFile. We will\n// add support for concurrent modifications in lab 3.\n//\n// The page the tuple is inserted into should be marked as dirty.\nfunc (f *HeapFile) insertTuple(t *Tuple, tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"insertTuple not implemented\") //replace me\n}\n\n// Remove the provided tuple from the HeapFile.\n//\n// This method should use the [Tuple.Rid] field of t to determine which tuple to\n// remove. The Rid field should be set when the tuple is read using the\n// [Iterator] method, or is otherwise created (as in tests). Note that Rid is an\n// empty interface, so you can supply any object you wish. You will likely want\n// to identify the heap page and slot within the page that the tuple came from.\n//\n// The page the tuple is deleted from should be marked as dirty.\nfunc (f *HeapFile) deleteTuple(t *Tuple, tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"deleteTuple not implemented\") //replace me\n}\n\n// Method to force the specified page back to the backing file at the\n// appropriate location. This will be called by BufferPool when it wants to\n// evict a page. The Page object should store information about its offset on\n// disk (e.g., that it is the ith page in the heap file), so you can determine\n// where to write it back.\nfunc (f *HeapFile) flushPage(p Page) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"flushPage not implemented\") //replace me\n}\n\n// [Operator] descriptor method -- return the TupleDesc for this HeapFile\n// Supplied as argument to NewHeapFile.\nfunc (f *HeapFile) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn nil //replace me\n\n}\n\n// [Operator] iterator method\n// Return a function that iterates through the records in the heap file\n// Note that this method should read pages from the HeapFile using the\n// BufferPool method GetPage, rather than reading pages directly,\n// since the BufferPool caches pages and manages page-level locking state for\n// transactions\n// You should esnure that Tuples returned by this method have their Rid object\n// set appropriate so that [deleteTuple] will work (see additional comments there).\n// Make sure to set the returned tuple's TupleDescriptor to the TupleDescriptor of\n// the HeapFile. This allows it to correctly capture the table qualifier.\nfunc (f *HeapFile) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn func() (*Tuple, error) {\n\treturn nil, fmt.Errorf(\"heap_file.Iterator not implemented\")\n\t}, nil\n}\n\n// internal strucuture to use as key for a heap page\ntype heapHash struct {\n\tFileName string\n\tPageNo int\n}\n\n// This method returns a key for a page to use in a map object, used by\n// BufferPool to determine if a page is cached or not. We recommend using a\n// heapHash struct as the key for a page, although you can use any struct that\n// does not contain a slice or a map that uniquely identifies the page.\nfunc (f *HeapFile) pageKey(pgNo int) any {\n\t// TODO: some code goes here\n\treturn nil\n}\n" - } - ], - "test_codes": [ - { - "code_name": "heap_file_test.go", - "code_path": "godb/heap_file_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/heap_file_test.go", - "code_content": "package godb\n\nimport (\n\t\"os\"\n\t\"testing\"\n)\n\nconst TestingFile string = \"test.dat\"\nconst TestingFile2 string = \"test2.dat\"\n\nfunc makeTestFile(t *testing.T, bufferPoolSize int) (*BufferPool, *HeapFile) {\n\tos.Remove(TestingFile)\n\n\tbp, c, err := MakeTestDatabase(bufferPoolSize, \"catalog.txt\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ttd, _, _ := makeTupleTestVars()\n\ttbl, err := c.addTable(\"test\", td)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\treturn bp, tbl.(*HeapFile)\n}\n\nfunc makeTestVars(t *testing.T) (TupleDesc, Tuple, Tuple, *HeapFile, *BufferPool, TransactionID) {\n\tbp, hf := makeTestFile(t, 3)\n\ttd, t1, t2 := makeTupleTestVars()\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\treturn td, t1, t2, hf, bp, tid\n}\n\nfunc TestHeapFileCreateAndInsert(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\n\thf.insertTuple(&t2, tid)\n\titer, err := hf.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ti := 0\n\tfor {\n\t\ttup, err := iter()\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\ti = i + 1\n\t}\n\tif i != 2 {\n\t\tt.Fatalf(\"HeapFile iterator expected 2 tuples, got %d\", i)\n\t}\n}\n\nfunc TestHeapFileDelete(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\terr = hf.deleteTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\titer, err := hf.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tt3, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif t3 == nil {\n\t\tt.Fatalf(\"HeapFile iterator expected 1 tuple\")\n\t}\n\n\terr = hf.deleteTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\titer, err = hf.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tt3, err = iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tif t3 != nil {\n\t\tt.Fatalf(\"HeapFile iterator expected 0 tuple\")\n\t}\n}\n\nfunc testSerializeN(t *testing.T, n int) {\n\tbp, hf := makeTestFile(t, max(1, n/50))\n\t_, t1, t2 := makeTupleTestVars()\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\tfor i := 0; i < n; i++ {\n\t\tif err := hf.insertTuple(&t1, tid); err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\n\t\tif err := hf.insertTuple(&t2, tid); err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\t}\n\tbp.CommitTransaction(tid)\n\tbp.FlushAllPages()\n\n\tbp2, catalog, err := MakeTestDatabase(1, \"catalog.txt\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\thf2, err := catalog.addTable(\"test\", *hf.Descriptor())\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ttid = NewTID()\n\tbp2.BeginTransaction(tid)\n\n\titer, err := hf2.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ti := 0\n\tfor tup, err := iter(); tup != nil; tup, err = iter() {\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\t\ti = i + 1\n\t}\n\tif i != 2*n {\n\t\tt.Fatalf(\"HeapFile iterator expected %d tuples, got %d\", 2*n, i)\n\t}\n\n}\nfunc TestHeapFileSerializeSmall(t *testing.T) {\n\ttestSerializeN(t, 2)\n}\n\nfunc TestHeapFileSerializeLarge(t *testing.T) {\n\ttestSerializeN(t, 2000)\n}\n\nfunc TestHeapFileSerializeVeryLarge(t *testing.T) {\n\ttestSerializeN(t, 4000)\n}\n\nfunc TestHeapFileLoadCSV(t *testing.T) {\n\t_, _, _, hf, _, tid := makeTestVars(t)\n\tf, err := os.Open(\"test_heap_file.csv\")\n\tif err != nil {\n\t\tt.Fatalf(\"Couldn't open test_heap_file.csv\")\n\t}\n\terr = hf.LoadFromCSV(f, true, \",\", false)\n\tif err != nil {\n\t\tt.Fatalf(\"Load failed, %s\", err)\n\t}\n\t//should have 384 records\n\titer, _ := hf.Iterator(tid)\n\ti := 0\n\tfor {\n\t\tt, _ := iter()\n\t\tif t == nil {\n\t\t\tbreak\n\t\t}\n\t\ti = i + 1\n\t}\n\tif i != 384 {\n\t\tt.Fatalf(\"HeapFile iterator expected 384 tuples, got %d\", i)\n\t}\n}\n\nfunc TestHeapFilePageKey(t *testing.T) {\n\ttd, t1, _, hf, bp, tid := makeTestVars(t)\n\n\tos.Remove(TestingFile2)\n\thf2, err := NewHeapFile(TestingFile2, &td, bp)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tfor hf.NumPages() < 2 {\n\t\terr = hf.insertTuple(&t1, tid)\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\n\t\terr = hf2.insertTuple(&t1, tid)\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\n\t\tif hf.NumPages() == 0 {\n\t\t\tt.Fatalf(\"Heap file should have at least one page after insertion.\")\n\t\t}\n\n\t\tbp.FlushAllPages()\n\t}\n\n\tif hf.NumPages() != hf2.NumPages() || hf.NumPages() != 2 {\n\t\tt.Fatalf(\"Should be two pages here\")\n\t}\n\n\tfor i := 0; i < hf.NumPages(); i++ {\n\t\tif hf.pageKey(i) != hf.pageKey(i) {\n\t\t\tt.Fatalf(\"Expected equal pageKey\")\n\t\t}\n\t\tif hf.pageKey(i) == hf.pageKey((i+1)%hf.NumPages()) {\n\t\t\tt.Fatalf(\"Expected non-equal pageKey for different pages\")\n\t\t}\n\t\tif hf.pageKey(i) == hf2.pageKey(i) {\n\t\t\tt.Fatalf(\"Expected non-equal pageKey for different heapfiles\")\n\t\t}\n\t}\n}\n\nfunc TestHeapFileSize(t *testing.T) {\n\t_, t1, _, hf, bp, _ := makeTestVars(t)\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\thf.insertTuple(&t1, tid)\n\tpage, err := bp.GetPage(hf, 0, tid, ReadPerm)\n\tif err != nil {\n\t\tt.Fatalf(\"unexpected error, getPage, %s\", err.Error())\n\t}\n\thf.flushPage(page)\n\tinfo, err := os.Stat(TestingFile)\n\tif err != nil {\n\t\tt.Fatalf(\"unexpected error, stat, %s\", err.Error())\n\t}\n\tif info.Size() != int64(PageSize) {\n\t\tt.Fatalf(\"heap file page is not %d bytes; NOTE: This error may be OK, but many implementations that don't write full pages break.\", PageSize)\n\t}\n}\n\nfunc TestHeapFileSetDirty(t *testing.T) {\n\tif os.Getenv(\"LAB\") == \"5\" {\n\t\tt.Skip(\"This test is only valid up to Lab 4. Skipping\")\n\t}\n\n\t_, t1, _, hf, bp, tid := makeTestVars(t)\n\tfor i := 0; i < 308; i++ {\n\t\terr := hf.insertTuple(&t1, tid)\n\t\tif err != nil && (i == 306 || i == 307) {\n\t\t\treturn\n\t\t} else if err != nil {\n\t\t\tt.Fatalf(\"%v\", err)\n\t\t}\n\t}\n\tbp.CommitTransaction(tid)\n\tt.Fatalf(\"Expected error due to all pages in BufferPool being dirty\")\n}\n\nfunc TestHeapFileDirtyBit(t *testing.T) {\n\t_, t1, _, hf, bp, _ := makeTestVars(t)\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t1, tid)\n\tpage, _ := bp.GetPage(hf, 0, tid, ReadPerm)\n\tif !page.isDirty() {\n\t\tt.Fatalf(\"Expected page to be dirty\")\n\t}\n}\n\nfunc TestHeapFileIteratorExtra(t *testing.T) {\n\t_, t1, _, hf, bp, _ := makeTestVars(t)\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\n\tit, err := hf.Iterator(tid)\n\t_, err = it()\n\tif err != nil {\n\t\tt.Fatalf(\"Empty heap file iterator should return nil,nil\")\n\t}\n\thf.insertTuple(&t1, tid)\n\tit, err = hf.Iterator(tid)\n\tpg, err := it()\n\tif err != nil {\n\t\tt.Fatalf(\"Iterating over heap file with one tuple returned error %s\", err.Error())\n\t}\n\tif pg == nil {\n\t\tt.Fatalf(\"Should have gotten 1 page in heap file iterator\")\n\t}\n\tpg, err = it()\n\tif pg != nil {\n\t\tt.Fatalf(\"More than 1 page in heap file iterator!\")\n\t}\n\tif err != nil {\n\t\tt.Fatalf(\"Iterator returned error at end, expected nil, nil, got nil, %s\", err.Error())\n\t}\n}\n" - } - ], - "test_command": [ - "go test heap_file_test.go" - ] - }, - { - "instance_id": 5, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 1: GoDB", - "part_name": "2.6. A simple query", - "exercise": "Exercise 5", - "introduction": "## 2. GoDB Architecture and Implementation Guide\n\nGoDB consists of:\n\n* Structures that represent fields, tuples, and tuple schemas;\n* Methods that apply predicates and conditions to tuples;\n* One or more access methods (e.g., heap files) that store relations on disk and\n provide a way to iterate through tuples of those relations;\n* A collection of operator classes (e.g., select, join, insert, delete, etc.)\n that process tuples;\n* A buffer pool that caches active tuples and pages in memory and handles\n concurrency control and transactions (neither of which you need to worry about\n for this lab); and,\n* A catalog that stores information about available tables and their schemas.\n\nGoDB does not include many things that you may think of as being a part of a\n\"database system.\" In particular, GoDB does not have:\n\n* (In this lab), a SQL front end or parser that allows you to type queries\n directly into GoDB. Instead, queries are built up by chaining a set of\n operators together into a hand-built query plan (see [Section\n 2.6](#query_walkthrough)). We will provide a simple parser for use in later\n labs.\n* Views.\n* Data types except integers and fixed length strings.\n* (In this lab) Query optimizer.\n* (In this lab) Indices.\n\nIn the rest of this Section, we describe each of the main components of GoDB\nthat you will need to implement in this lab. You should use the exercises in\nthis discussion to guide your implementation. This document is by no means a\ncomplete specification for GoDB; you will need to make decisions about how\nto design and implement various parts of the system. Note that for Lab 1 you do\nnot need to implement any operators (e.g., select, join, project) except\nsequential scan as a part of the `heap_file.go` file.\nYou will add support for additional operators in future labs.", - "Description": "### 2.6. A simple query\n\nIn the next lab, you will implement \"Operators\" that will allow you to run actual SQL queries against GoDB. For the final test in this lab, we ask you to implement a simple query in go logic. This method takes the name of a CSV file and a `TupleDesc` and a field name and return the sum of the supplied field name. You can use the `HeapFile.LoadFromCSV` method to load the CSV file, and the `findFieldInTd` method\nto find the field number in the `TupleDesc`, if it exists.\n\n### Exercise 5\n\n**Implement the skeleton method in:**\n\n---\n* lab1_query.go\n---\n\nWe have supplied a simple test case for you for this method in `lab1_query_test.go`, although we will also test it with other files to confirm your implementation is working.", - "repo/location": "go get main\ncd godb\ngo get ../godb\ngo test", - "dependency": [], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab1.md", - "codes": [ - { - "code_path": "godb/lab1_query.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/lab1_query.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n)\n\n/*\ncomputeFieldSum should (1) load the csv file named fileName into a heap file\n(see [HeapFile.LoadFromCSV]), (2) compute the sum of the integer field named\nsumField string and, (3) return its value as an int.\n\nThe supplied csv file is comma delimited and has a header.\n\nIf the file doesn't exist, can't be opened, the field doesn't exist, or the\nfield is not an integer, you should return an error.\n\nNote that when you create a HeapFile, you will need to supply a file name;\nyou can supply a non-existent file, in which case it will be created.\nHowever, subsequent invocations of this method will result in tuples being\nreinserted into this file unless you delete (e.g., with [os.Remove] it before\ncalling NewHeapFile.\n\nNote that you should NOT pass fileName into NewHeapFile -- fileName is a CSV\nfile that you should call LoadFromCSV on.\n*/\nfunc computeFieldSum(bp *BufferPool, fileName string, td TupleDesc, sumField string) (int, error) {\n\treturn 0, fmt.Errorf(\"computeFieldSum not implemented\") // replace me\n}\n" - } - ], - "test_codes": [ - { - "code_name": "lab1_query_test.go", - "code_path": "godb/lab1_query_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/lab1_query_test.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"testing\"\n)\n\nfunc TestLab1Query(t *testing.T) {\n\tif os.Getenv(\"LAB\") == \"5\" {\n\t\tt.Skip(\"This test is only valid up to Lab 4. Skipping\")\n\t}\n\tbp, _, err := MakeTestDatabase(10, \"catalog.txt\")\n\tif err != nil {\n\t\tt.Fatalf(\"Failed to initialize test database\")\n\t}\n\tf1 := FieldType{\"name\", \"\", StringType}\n\tf2 := FieldType{\"age\", \"\", IntType}\n\ttd := TupleDesc{[]FieldType{f1, f2}}\n\tsum, err := computeFieldSum(bp, \"lab1_test.csv\", td, \"age\")\n\tif err != nil {\n\t\tfmt.Println(err)\n\t}\n\tif sum != 1111 {\n\t\tt.Fatalf(\"expected sum of 1111, got %d\", sum)\n\t}\n}\n" - } - ], - "test_command": [ - "go test lab1_query_test.go" - ] - }, - { - "instance_id": 6, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.1. Filter and Join", - "exercise": "Exercise 1", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.1. Filter and Join\n\nRecall that GoDB OpIterator classes implement the operations of the\nrelational algebra. You will now implement two operators that will enable you to\nperform queries that are slightly more interesting than a table scan.\n\n* *Filter*: This operator only returns tuples that satisfy a predicate that is\n specified at construction. Hence, it filters out any tuples that\n do not match the predicate.\n\n* *Join*: This operator joins tuples from its two children according to an equality predicate that is specified at construction. Our current version of godb only supports joining with equality predicates `EqualityJoin`.\nWe only require\n a simple nested loop join, but you may explore more interesting join\n implementations. In particular, we will give a small amount of extra credit to those satisfying a stricter time-out requirement. Describe your implementation in your lab writeup.\n\nFor both of these operators, we have given you constructors so that you don't have to deal with the complexities of Go generics\nand constructors for both integer and string fields. You will need to implement the `Descriptor()` and `Iterator()` methods.\n\nNote that both filters and joins take `Expr` objects that, in the case of joins or the left side of a filter, extract the field to be compared, or in the case of the right side of a filter, evaluate to a constant value. We saw expressions in lab 1, but as a reminder, \nthe idea here is that either side of a predicate can be an arbitrary arithmetic expression, e.g. a join expression can be:\n\n`(t1.x + 7) = (t2.y * 4)`\n\nTo handle this, you will need to evaluate\nthe expression over the tuple and then use the `getter` function to extract the value.\nHere the getter takes a `DBValue` type and extracts either an `int64` or a `string`, depending\non the type of the filter or join (this way, you don't need to have different Iterator() implementations\nfor different types.)\nFor example, for the right field of the `joinOp` of type in the EqualityJoin's `Iterator()` implementation, you can get\nthe value for the right side of the join using:\n\n```\nv, _ := joinOp.rightField.EvalExpr(curT)\nrightFieldVal := joinOp.getter(v)\n```\n\n**Exercise 1.**\n\nImplement the skeleton methods in:\n\n------\n\n* godb/filter_op.go\n* godb/join_op.go\n\n------\n\nNote that the implementation of `Iterator()`, particularly for join, is a bit tricky because your iterator will have to store the current progress of the iterator after returning each tuple. You had to deal with this a bit in your heap file iterator in lab 1, but it is more complicated here. Your implementation should not pre-compute the entire join or filter result; instead, it should resume iterating through the child operator(s) at the point it left off after returning the previous tuple.\n\nRecall that the result of a call to an `Iterator()` is a function that does the iteration and that this function can \"capture\" variables that are defined in the outer portion of the iterator. To understand this, it may be helpful to look at the discussion of iterators and closures in lab 1, or review code such as [this example](https://go.dev/tour/moretypes/25). Note that in this example, the `adder()`\nfunction returns a function that captures a unique value of `sum` for each invocation of `adder()` -- so the to `adder` objects in `main()` will operate on different `sum` objects. Your `Iterator()` implementation will want to capture the state of the iterator (how far it has iterated through the child iterators) outside of the function you return in a similar way.\n\nAt this point, your code should pass the unit tests in `filter_op_test.go` and the test `TestJoin` in `join_op_test.go`. You do not need to pass the test `TestBigJoinOptional` (this test will timeout and fail internally after 10 seconds).", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [ - { - "code_path": "godb/filter_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/filter_op.go", - "code_content": "package godb\n\nimport (\n\"fmt\"\n)\n\ntype Filter struct {\n\top BoolOp\n\tleft Expr\n\tright Expr\n\tchild Operator\n}\n\n// Construct a filter operator on ints.\nfunc NewFilter(constExpr Expr, op BoolOp, field Expr, child Operator) (*Filter, error) {\n\treturn &Filter{op, field, constExpr, child}, nil\n}\n\n// Return a TupleDescriptor for this filter op.\nfunc (f *Filter) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\n// Filter operator implementation. This function should iterate over the results\n// of the child iterator and return a tuple if it satisfies the predicate.\n//\n// HINT: you can use [types.evalPred] to compare two values.\nfunc (f *Filter) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"Filter.Iterator not implemented\") // replace me\n}\n" - }, - { - "code_path": "godb/join_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/join_op.go", - "code_content": "package godb\n\nimport (\n\"fmt\"\n)\n\ntype EqualityJoin struct {\n\t// Expressions that when applied to tuples from the left or right operators,\n\t// respectively, return the value of the left or right side of the join\n\tleftField, rightField Expr\n\n\tleft, right *Operator // Operators for the two inputs of the join\n\n\t// The maximum number of records of intermediate state that the join should\n\t// use (only required for optional exercise).\n\tmaxBufferSize int\n}\n\n// Constructor for a join of integer expressions.\n//\n// Returns an error if either the left or right expression is not an integer.\nfunc NewJoin(left Operator, leftField Expr, right Operator, rightField Expr, maxBufferSize int) (*EqualityJoin, error) {\n\treturn &EqualityJoin{leftField, rightField, &left, &right, maxBufferSize}, nil\n}\n\n// Return a TupleDesc for this join. The returned descriptor should contain the\n// union of the fields in the descriptors of the left and right operators.\n//\n// HINT: use [TupleDesc.merge].\nfunc (hj *EqualityJoin) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\n// Join operator implementation. This function should iterate over the results\n// of the join. The join should be the result of joining joinOp.left and\n// joinOp.right, applying the joinOp.leftField and joinOp.rightField expressions\n// to the tuples of the left and right iterators respectively, and joining them\n// using an equality predicate.\n//\n// HINT: When implementing the simple nested loop join, you should keep in mind\n// that you only iterate through the left iterator once (outer loop) but iterate\n// through the right iterator once for every tuple in the left iterator (inner\n// loop).\n//\n// HINT: You can use [Tuple.joinTuples] to join two tuples.\n//\n// OPTIONAL EXERCISE: the operator implementation should not use more than\n// maxBufferSize records, and should pass the testBigJoin test without timing\n// out. To pass this test, you will need to use something other than a nested\n// loops join.\nfunc (joinOp *EqualityJoin) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"EqualityJoin.Iterator not implemented\") // replace me\n}\n" - } - ], - "test_codes": [ - { - "code_name": "filter_op_test.go", - "code_path": "godb/filter_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/filter_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"testing\"\n)\n\n// This function is for _testing only_! It is not part of the godb API.\nfunc insertTupleForTest(t *testing.T, hf DBFile, tup *Tuple, tid TransactionID) {\n\tt.Helper()\n\terr := hf.insertTuple(tup, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n}\n\nfunc TestFilterInt(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\n\tinsertTupleForTest(t, hf, &t1, tid)\n\tinsertTupleForTest(t, hf, &t2, tid)\n\n\tvar f FieldType = FieldType{\"age\", \"\", IntType}\n\tfilt, err := NewFilter(&ConstExpr{IntField{25}, IntType}, OpGt, &FieldExpr{f}, hf)\n\tif err != nil {\n\t\tt.Errorf(err.Error())\n\t}\n\titer, err := filt.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\n\tcnt := 0\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tt.Logf(\"filter passed tup %d: %v\\n\", cnt, tup)\n\t\tcnt++\n\t}\n\tif cnt != 1 {\n\t\tt.Errorf(\"unexpected number of results\")\n\t}\n}\n\nfunc TestFilterString(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\tinsertTupleForTest(t, hf, &t1, tid)\n\tinsertTupleForTest(t, hf, &t2, tid)\n\tvar f FieldType = FieldType{\"name\", \"\", StringType}\n\tfilt, err := NewFilter(&ConstExpr{StringField{\"sam\"}, StringType}, OpEq, &FieldExpr{f}, hf)\n\tif err != nil {\n\t\tt.Errorf(err.Error())\n\t}\n\titer, err := filt.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\n\tcnt := 0\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tt.Logf(\"filter passed tup %d: %v\\n\", cnt, tup)\n\t\tcnt++\n\t}\n\tif cnt != 1 {\n\t\tt.Errorf(\"unexpected number of results\")\n\t}\n}\n" - }, - { - "code_name": "join_op_test.go", - "code_path": "godb/join_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/join_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"testing\"\n\t\"time\"\n)\n\nconst JoinTestFile string = \"JoinTestFile.dat\"\n\nfunc TestJoin(t *testing.T) {\n\ttd, t1, t2, hf, bp, tid := makeTestVars(t)\n\tinsertTupleForTest(t, hf, &t1, tid)\n\tinsertTupleForTest(t, hf, &t2, tid)\n\tinsertTupleForTest(t, hf, &t2, tid)\n\n\tos.Remove(JoinTestFile)\n\thf2, _ := NewHeapFile(JoinTestFile, &td, bp)\n\tinsertTupleForTest(t, hf2, &t1, tid)\n\tinsertTupleForTest(t, hf2, &t2, tid)\n\tinsertTupleForTest(t, hf2, &t2, tid)\n\n\toutT1 := joinTuples(&t1, &t1)\n\toutT2 := joinTuples(&t2, &t2)\n\n\tleftField := FieldExpr{td.Fields[1]}\n\tjoin, err := NewJoin(hf, &leftField, hf2, &leftField, 100)\n\tif err != nil {\n\t\tt.Errorf(\"unexpected error initializing join\")\n\t\treturn\n\t}\n\titer, err := join.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\tcnt := 0\n\tcntOut1 := 0\n\tcntOut2 := 0\n\tfor {\n\t\tt, _ := iter()\n\t\tif t == nil {\n\t\t\tbreak\n\t\t}\n\t\tif t.equals(outT1) {\n\t\t\tcntOut1++\n\t\t} else if t.equals(outT2) {\n\t\t\tcntOut2++\n\t\t}\n\t\t//fmt.Printf(\"got tuple %v: %v\\n\", cnt, t)\n\t\tcnt++\n\t}\n\tif cnt != 5 {\n\t\tt.Errorf(\"unexpected number of join results (%d, expected 5)\", cnt)\n\t}\n\tif cntOut1 != 1 {\n\t\tt.Errorf(\"unexpected number of t1 results (%d, expected 1)\", cntOut1)\n\t}\n\tif cntOut2 != 4 {\n\t\tt.Errorf(\"unexpected number of t2 results (%d, expected 4)\", cntOut2)\n\t}\n\n}\n\nconst BigJoinFile1 string = \"jointest1.dat\"\nconst BigJoinFile2 string = \"jointest2.dat\"\n\n//This test joins two large heap files (each containing ntups tuples). A simple\n//nested loops join will take a LONG time to complete this join, so we've added\n//a timeout that will cause the join to fail after 10 seconds.\n//\n//Note that this test is optional; passing it will give extra credit, as\n//describe in the lab 2 assignment.\n\nfunc TestJoinBigOptional(t *testing.T) {\n\ttimeout := time.After(20 * time.Second)\n\n\tdone := make(chan bool)\n\n\tgo func() {\n\t\tfail := func(err error) {\n\t\t\tdone <- true\n\t\t\tt.Errorf(err.Error())\n\t\t}\n\t\tntups := 314159\n\n\t\tif err := os.Remove(BigJoinFile1); err != nil && !os.IsNotExist(err) {\n\t\t\tfail(fmt.Errorf(\"removing file1: %w\", err))\n\t\t\treturn\n\t\t}\n\t\tif err := os.Remove(BigJoinFile2); err != nil && !os.IsNotExist(err) {\n\t\t\tfail(fmt.Errorf(\"removing file2: %w\", err))\n\t\t}\n\n\t\tbp, c, err := MakeTestDatabase(100, \"big_join_catalog.txt\")\n\t\tif err != nil {\n\t\t\tfail(fmt.Errorf(\"making database: %w\", err))\n\t\t\treturn\n\t\t}\n\n\t\thf1, err := c.GetTable(\"jointest1\")\n\t\tif err != nil {\n\t\t\tfail(fmt.Errorf(\"getting jointest1: %w\", err))\n\t\t\treturn\n\t\t}\n\t\thf2, err := c.GetTable(\"jointest2\")\n\t\tif err != nil {\n\t\t\tfail(fmt.Errorf(\"getting jointest2: %w\", err))\n\t\t\treturn\n\t\t}\n\n\t\ttid := NewTID()\n\t\tbp.BeginTransaction(tid)\n\t\tfor i := 0; i < ntups; i++ {\n\n\t\t\tif i > 0 && i%5000 == 0 {\n\t\t\t\tbp.FlushAllPages()\n\t\t\t\t// commit transaction\n\t\t\t\tbp.CommitTransaction(tid)\n\n\t\t\t\ttid = NewTID()\n\t\t\t\tbp.BeginTransaction(tid)\n\t\t\t}\n\n\t\t\ttup := Tuple{*hf1.Descriptor(), []DBValue{IntField{int64(i)}}, nil}\n\t\t\terr := hf1.insertTuple(&tup, tid)\n\t\t\tif err != nil {\n\t\t\t\tfail(fmt.Errorf(\"inserting tuple1: %w\", err))\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\terr = hf2.insertTuple(&tup, tid)\n\t\t\tif err != nil {\n\t\t\t\tfail(fmt.Errorf(\"inserting tuple2: %w\", err))\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t\tbp.CommitTransaction(tid)\n\n\t\ttid = NewTID()\n\t\tbp.BeginTransaction(tid)\n\t\tleftField := FieldExpr{hf1.Descriptor().Fields[0]}\n\t\tjoin, err := NewJoin(hf1, &leftField, hf2, &leftField, 100000)\n\t\tif err != nil {\n\t\t\tt.Errorf(\"unexpected error initializing join\")\n\t\t\tdone <- true\n\t\t\treturn\n\t\t}\n\t\titer, err := join.Iterator(tid)\n\t\tif err != nil {\n\t\t\tfail(err)\n\t\t\treturn\n\t\t}\n\n\t\tif iter == nil {\n\t\t\tt.Errorf(\"iter was nil\")\n\t\t\tdone <- true\n\t\t\treturn\n\t\t}\n\t\tcnt := 0\n\t\tfor {\n\t\t\ttup, err := iter()\n\t\t\tif err != nil {\n\t\t\t\tfail(err)\n\t\t\t\treturn\n\t\t\t}\n\t\t\tif tup == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tcnt++\n\t\t}\n\t\tif cnt != ntups {\n\t\t\tt.Errorf(\"unexpected number of join results (%d, expected %d)\", cnt, ntups)\n\t\t}\n\t\tdone <- true\n\t}()\n\n\tselect {\n\tcase <-timeout:\n\t\tt.Fatal(\"Test didn't finish in time\")\n\tcase <-done:\n\t}\n}\n\nfunc makeJoinOrderingVars(t *testing.T) (*HeapFile, *HeapFile, Tuple, Tuple, *BufferPool) {\n\tvar td1 = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"a\", Ftype: StringType},\n\t\t{Fname: \"b\", Ftype: IntType},\n\t}}\n\tvar td2 = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"c\", Ftype: StringType},\n\t\t{Fname: \"d\", Ftype: IntType},\n\t}}\n\n\tvar t1 = Tuple{\n\t\tDesc: td1,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\tvar t2 = Tuple{\n\t\tDesc: td2,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"george jones\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\tbp, err := NewBufferPool(3)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tos.Remove(TestingFile)\n\thf1, err := NewHeapFile(TestingFile, &td1, bp)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tos.Remove(TestingFile2)\n\thf2, err := NewHeapFile(TestingFile2, &td2, bp)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\treturn hf1, hf2, t1, t2, bp\n}\n\nfunc TestJoinFieldOrder(t *testing.T) {\n\tbp, c, err := MakeTestDatabase(3, \"join_test_catalog.txt\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\thf1, err := c.GetTable(\"test\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\thf2, err := c.GetTable(\"test2\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tvar t1 = Tuple{\n\t\tDesc: *hf1.Descriptor(),\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\tvar t2 = Tuple{\n\t\tDesc: *hf2.Descriptor(),\n\t\tFields: []DBValue{\n\t\t\tStringField{\"george jones\"},\n\t\t\tIntField{25},\n\t\t}}\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\n\tinsertTupleForTest(t, hf1, &t1, tid)\n\tinsertTupleForTest(t, hf2, &t2, tid)\n\n\tleftField := FieldExpr{t1.Desc.Fields[1]}\n\trightField := FieldExpr{t2.Desc.Fields[1]}\n\n\tjoin, err := NewJoin(hf1, &leftField, hf2, &rightField, 100)\n\tif err != nil {\n\t\tt.Errorf(\"unexpected error initializing join\")\n\t\treturn\n\t}\n\titer, err := join.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\n\tvar tdExpected = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"a\", Ftype: StringType},\n\t\t{Fname: \"b\", Ftype: IntType},\n\t\t{Fname: \"c\", Ftype: StringType},\n\t\t{Fname: \"d\", Ftype: IntType},\n\t}}\n\n\ttj, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tif !tdExpected.equals(&tj.Desc) {\n\t\tt.Fatalf(\"Unexpected descriptor of joined tuple\")\n\t}\n}\n\nfunc TestJoinTupleNil(t *testing.T) {\n\t_, t1, t2, _, _, _ := makeTestVars(t)\n\ttNew := joinTuples(&t1, nil)\n\tif !tNew.equals(&t1) {\n\t\tt.Fatalf(\"Unexpected output of joinTuple with nil\")\n\t}\n\ttNew2 := joinTuples(nil, &t2)\n\tif !tNew2.equals(&t2) {\n\t\tt.Fatalf(\"Unexpected output of joinTuple with nil\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test filter_op_test.go", - "go test join_op_test.go" - ] - }, - { - "instance_id": 7, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.2. Aggregates", - "exercise": "Exercise 2", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.2. Aggregates\n\nThe aggregate operator implements basic SQL aggregates with a `GROUP BY` clause. You will need to implement the five SQL aggregates (`COUNT`, `SUM`, `AVG`,\n`MIN`, `MAX`) and support grouping over zero or more fields.\n\nIn order to calculate aggregates, we use an `AggState` interface, which merges\na new tuple into the existing calculation of an aggregate. The `AggState` is\ntold during construction what operation it should use for aggregation.\nSubsequently, the client code should call `AggState.addTuple()` for\nevery tuple in the child iterator. After all tuples have been merged, the client\ncan retrieve an iterator of aggregation results. Each tuple in the result is a\npair of the form `(groupValue, aggregateValue)` unless the value of the group\nby field was `Aggregator.groupByFields = nil`, in which case the result is a single\ntuple of the form `(aggregateValue)`.\n\nNote that this implementation requires space linear in the number of distinct\ngroups. For the purposes of this lab, you do not need to worry about the\nsituation where the number of groups exceeds available memory.\n\nSimilar to Exercise 1, we have provided the construction methods and fields for the `Aggregator` operator so that you only have to worry about the `Descriptor()` and `Iterator()` methods. \n\nNotice that in the fields of the `Aggregator` operator, `groupByFields` is an array of `Expr` objects. This is to support grouping by more than one `Expr`. Analogously, `newAggState` being an array of `AggState` is to support multiple aggregations per group at the same time (think `SELECT MAX(salary), AVG(salary), MIN(salary) FROM employees GROUP BY office_location;`).\n\nAs for `AggState`, the purpose is to maintain some running value for the aggregation operation (one of `COUNT`, `SUM`, `AVG`,\n`MIN`, or `MAX`) when you go through the child iterators. For example, for the `SUM` operator, you will probably want to maintain some number representing the running sum up to the current tuple. Every aggregation operation needs to implement the interface methods: `Init`, `Copy`, `AddTuple`, `Finalize`, and `GetTupleDesc`. In general, we `Init`-ialize the aggregation state at the beginning, `AddTuple` of all relevant child tuples, and then call `Finalize` at the end to retrieve the aggregation results. This intuition should hint at how to implement the five aggregation operations and which fields to maintain. Furthermore, we have provided our implementation of the `COUNT` aggregation state, which may help you understand how some methods work. You will need to complete the implementation of other aggregation states.\n\n\n**Exercise 2.**\n\nImplement the skeleton methods in:\n\n------\n\n* godb/agg_state.go\n* godb/agg_op.go\n\n------\n\nAgain, for implementing the `Iterator()` method, you will want to make use of the \"capture\" functionality to store internal states such as how many result tuples have been iterated through. The logic of one possible implementation, of which we have provided a skeleton code, is as follows: on the first iterator call, firstly, we iterate through all the child tuples to collect aggregation results of all groups. Then, we create a `finalizedIter` iterator for iterating through the results of each group. Subsequent calls to the function will then simply be all redirected to `finalizedIter`. Our implementation uses three helper functions which you will have to implement: `extractGroupByKeyTuple` (given a tuple `t` from a child, return a tuple that identifies `t`'s group; this tuple may contain multiple fields, one per group by attribute), `addTupleToGrpAggState` (given a tuple `t` from child and a pointer to an array of AggState `grpAggState`, add `t` into all aggregation states in the array), and `getFinalizedTuplesIterator` (given that all child tuples have been added, create an iterator that iterates through the finalized aggregate result of each group). We also handled the no group-by case for you, so you can assume there's always grouping when these helper functions are called. If you prefer, you may implement `Iterator()` in some other way that doesn't use our overall skeleton; we don't test the three helper methods, only the overall `Iterator()` method.\n\nAt this point, your code should pass the unit tests in `agg_op_test.go`.", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [ - { - "code_path": "godb/agg_state.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/agg_state.go", - "code_content": "package godb\n\nimport (\n\"fmt\"\n)\n\n// interface for an aggregation state\ntype AggState interface {\n\t// Initializes an aggregation state. Is supplied with an alias, an expr to\n\t// evaluate an input tuple into a DBValue, and a getter to extract from the\n\t// DBValue its int or string field's value.\n\tInit(alias string, expr Expr) error\n\n\t// Makes an copy of the aggregation state.\n\tCopy() AggState\n\n\t// Adds an tuple to the aggregation state.\n\tAddTuple(*Tuple)\n\n\t// Returns the final result of the aggregation as a tuple.\n\tFinalize() *Tuple\n\n\t// Gets the tuple description of the tuple that Finalize() returns.\n\tGetTupleDesc() *TupleDesc\n}\n\n// Implements the aggregation state for COUNT\n// We are supplying the implementation of CountAggState as an example. You need to\n// implement the rest of the aggregation states.\ntype CountAggState struct {\n\talias string\n\texpr Expr\n\tcount int\n}\n\nfunc (a *CountAggState) Copy() AggState {\n\treturn &CountAggState{a.alias, a.expr, a.count}\n}\n\nfunc (a *CountAggState) Init(alias string, expr Expr) error {\n\ta.count = 0\n\ta.expr = expr\n\ta.alias = alias\n\treturn nil\n}\n\nfunc (a *CountAggState) AddTuple(t *Tuple) {\n\ta.count++\n}\n\nfunc (a *CountAggState) Finalize() *Tuple {\n\ttd := a.GetTupleDesc()\n\tf := IntField{int64(a.count)}\n\tfs := []DBValue{f}\n\tt := Tuple{*td, fs, nil}\n\treturn &t\n}\n\nfunc (a *CountAggState) GetTupleDesc() *TupleDesc {\n\tft := FieldType{a.alias, \"\", IntType}\n\tfts := []FieldType{ft}\n\ttd := TupleDesc{}\n\ttd.Fields = fts\n\treturn &td\n}\n\n// Implements the aggregation state for SUM\ntype SumAggState struct {\n\t// TODO: some code goes here\n}\n\nfunc (a *SumAggState) Copy() AggState {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\nfunc intAggGetter(v DBValue) any {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\nfunc stringAggGetter(v DBValue) any {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\nfunc (a *SumAggState) Init(alias string, expr Expr) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"SumAggState.Init not implemented\") // replace me\n}\n\nfunc (a *SumAggState) AddTuple(t *Tuple) {\n\t// TODO: some code goes here\n}\n\nfunc (a *SumAggState) GetTupleDesc() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\nfunc (a *SumAggState) Finalize() *Tuple {\n\t// TODO: some code goes here\n\treturn &Tuple{} // replace me\n}\n\n// Implements the aggregation state for AVG\n// Note that we always AddTuple() at least once before Finalize()\n// so no worries for divide-by-zero\ntype AvgAggState struct {\n\t// TODO: some code goes here\n}\n\nfunc (a *AvgAggState) Copy() AggState {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\nfunc (a *AvgAggState) Init(alias string, expr Expr) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"AvgAggState.Init not implemented\") // replace me\n}\n\nfunc (a *AvgAggState) AddTuple(t *Tuple) {\n\t// TODO: some code goes here\n}\n\nfunc (a *AvgAggState) GetTupleDesc() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\nfunc (a *AvgAggState) Finalize() *Tuple {\n\t// TODO: some code goes here\n\treturn &Tuple{} // replace me\n}\n\n// Implements the aggregation state for MAX\n// Note that we always AddTuple() at least once before Finalize()\n// so no worries for NaN max\ntype MaxAggState struct {\n\t// TODO: some code goes here\n}\n\nfunc (a *MaxAggState) Copy() AggState {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\nfunc (a *MaxAggState) Init(alias string, expr Expr) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"MaxAggState.Init not implemented\") // replace me\n}\n\nfunc (a *MaxAggState) AddTuple(t *Tuple) {\n\t// TODO: some code goes here\n}\n\nfunc (a *MaxAggState) GetTupleDesc() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\nfunc (a *MaxAggState) Finalize() *Tuple {\n\t// TODO: some code goes here\n\treturn &Tuple{} // replace me\n}\n\n// Implements the aggregation state for MIN\n// Note that we always AddTuple() at least once before Finalize()\n// so no worries for NaN min\ntype MinAggState struct {\n\t// TODO: some code goes here\n}\n\nfunc (a *MinAggState) Copy() AggState {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\nfunc (a *MinAggState) Init(alias string, expr Expr) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"MinAggState.Init not implemented\") // replace me\n}\n\nfunc (a *MinAggState) AddTuple(t *Tuple) {\n\t// TODO: some code goes here\n}\n\nfunc (a *MinAggState) GetTupleDesc() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\nfunc (a *MinAggState) Finalize() *Tuple {\n\t// TODO: some code goes here\n\treturn &Tuple{} // replace me\n}\n" - }, - { - "code_path": "godb/agg_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/agg_op.go", - "code_content": "package godb\n\nimport (\n\"fmt\"\n)\n\ntype Aggregator struct {\n\t// Expressions that when applied to tuples from the child operators,\n\t// respectively, return the value of the group by key tuple\n\tgroupByFields []Expr\n\n\t// Aggregation states that serves as a template as to which types of\n\t// aggregations in which order are to be computed for every group.\n\tnewAggState []AggState\n\n\tchild Operator // the child operator for the inputs to aggregate\n}\n\ntype AggType int\n\nconst (\n\tIntAggregator AggType = iota\n\tStringAggregator AggType = iota\n)\n\nconst DefaultGroup int = 0 // for handling the case of no group-by\n\n// Construct an aggregator with a group-by.\nfunc NewGroupedAggregator(emptyAggState []AggState, groupByFields []Expr, child Operator) *Aggregator {\n\treturn &Aggregator{groupByFields, emptyAggState, child}\n}\n\n// Construct an aggregator with no group-by.\nfunc NewAggregator(emptyAggState []AggState, child Operator) *Aggregator {\n\treturn &Aggregator{nil, emptyAggState, child}\n}\n\n// Return a TupleDescriptor for this aggregation.\n//\n// If the aggregator has no group-by, the returned descriptor should contain the\n// union of the fields in the descriptors of the aggregation states. If the\n// aggregator has a group-by, the returned descriptor will additionally start\n// with the group-by fields, and then the aggregation states descriptors like\n// that without group-by.\n//\n// HINT: for groupByFields, you can use [Expr.GetExprType] to get the FieldType.\n//\n// HINT: use [TupleDesc.merge] to merge the two [TupleDesc]s.\nfunc (a *Aggregator) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} //replace me\n}\n\n// Returns an iterator over the results of the aggregate. The aggregate should\n// be the result of aggregating each group's tuples and the iterator should\n// iterate through each group's result. In the case where there is no group-by,\n// the iterator simply iterates through only one tuple, representing the\n// aggregation of all child tuples.\nfunc (a *Aggregator) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// the child iterator\n\tchildIter, err := a.child.Iterator(tid)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\tif childIter == nil {\n\t\treturn nil, GoDBError{MalformedDataError, \"child iter unexpectedly nil\"}\n\t}\n\n\t// the map that stores the aggregation state of each group\n\taggState := make(map[any]*[]AggState)\n\tif a.groupByFields == nil {\n\t\tvar newAggState []AggState\n\t\tfor _, as := range a.newAggState {\n\t\t\tcopy := as.Copy()\n\t\t\tif copy == nil {\n\t\t\t\treturn nil, GoDBError{MalformedDataError, \"aggState Copy unexpectedly returned nil\"}\n\t\t\t}\n\t\t\tnewAggState = append(newAggState, copy)\n\t\t}\n\n\t\taggState[DefaultGroup] = &newAggState\n\t}\n\n\t// the list of group key tuples\n\tvar groupByList []*Tuple\n\t// the iterator for iterating thru the finalized aggregation results for each group\n\tvar finalizedIter func() (*Tuple, error)\n\n\treturn func() (*Tuple, error) {\n\t\t// iterates thru all child tuples\n\t\tfor t, err := childIter(); t != nil || err != nil; t, err = childIter() {\n\t\t\tif err != nil {\n\t\t\t\treturn nil, err\n\t\t\t}\n\t\t\tif t == nil {\n\t\t\t\treturn nil, nil\n\t\t\t}\n\n\t\t\tif a.groupByFields == nil { // adds tuple to the aggregation in the case of no group-by\n\t\t\t\tfor i := 0; i < len(a.newAggState); i++ {\n\t\t\t\t\t(*aggState[DefaultGroup])[i].AddTuple(t)\n\t\t\t\t}\n\t\t\t} else { // adds tuple to the aggregation with grouping\n\t\t\t\tkeygenTup, err := extractGroupByKeyTuple(a, t)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn nil, err\n\t\t\t\t}\n\n\t\t\t\tkey := keygenTup.tupleKey()\n\t\t\t\tif aggState[key] == nil {\n\t\t\t\t\tasNew := make([]AggState, len(a.newAggState))\n\t\t\t\t\taggState[key] = &asNew\n\t\t\t\t\tgroupByList = append(groupByList, keygenTup)\n\t\t\t\t}\n\n\t\t\t\taddTupleToGrpAggState(a, t, aggState[key])\n\t\t\t}\n\t\t}\n\n\t\tif finalizedIter == nil { // builds the iterator for iterating thru the finalized aggregation results for each group\n\t\t\tif a.groupByFields == nil {\n\t\t\t\tvar tup *Tuple\n\t\t\t\tfor i := 0; i < len(a.newAggState); i++ {\n\t\t\t\t\tnewTup := (*aggState[DefaultGroup])[i].Finalize()\n\t\t\t\t\ttup = joinTuples(tup, newTup)\n\t\t\t\t}\n\t\t\t\tfinalizedIter = func() (*Tuple, error) { return nil, nil }\n\t\t\t\treturn tup, nil\n\t\t\t} else {\n\t\t\t\tfinalizedIter = getFinalizedTuplesIterator(a, groupByList, aggState)\n\t\t\t}\n\t\t}\n\t\treturn finalizedIter()\n\t}, nil\n}\n\n// Given a tuple t from a child iterator, return a tuple that identifies t's\n// group. The returned tuple should contain the fields from the groupByFields\n// list passed into the aggregator constructor. The ith field can be extracted\n// from the supplied tuple using the EvalExpr method on the ith expression of\n// groupByFields.\n//\n// If there is any error during expression evaluation, return the error.\nfunc extractGroupByKeyTuple(a *Aggregator, t *Tuple) (*Tuple, error) {\n\t// TODO: some code goes here\n\treturn &Tuple{}, fmt.Errorf(\"extractGroupByKeyTuple not implemented.\") // replace me\n}\n\n// Given a tuple t from child and (a pointer to) the array of partially computed\n// aggregates grpAggState, add t into all partial aggregations using\n// [AggState.AddTuple]. If any of the array elements is of grpAggState is null\n// (i.e., because this is the first invocation of this method, create a new\n// aggState using [aggState.Copy] on appropriate element of the a.newAggState\n// field and add the new aggState to grpAggState.\nfunc addTupleToGrpAggState(a *Aggregator, t *Tuple, grpAggState *[]AggState) {\n\t// TODO: some code goes here\n}\n\n// Given that all child tuples have been added, return an iterator that iterates\n// through the finalized aggregate result one group at a time. The returned\n// tuples should be structured according to the TupleDesc returned from the\n// Descriptor() method.\n//\n// HINT: you can call [aggState.Finalize] to get the field for each AggState.\n// Then, you should get the groupByTuple and merge it with each of the AggState\n// tuples using the joinTuples function in tuple.go you wrote in lab 1.\nfunc getFinalizedTuplesIterator(a *Aggregator, groupByList []*Tuple, aggState map[any]*[]AggState) func() (*Tuple, error) {\n\t// TODO: some code goes here\n\treturn func() (*Tuple, error) {\n\t\t// TODO: some code goes here\n\t\treturn nil, fmt.Errorf(\"getFinalizedTuplesIterator not implemented.\") // replace me\n\t}\n}\n" - } - ], - "test_codes": [ - { - "code_name": "agg_op_test.go", - "code_path": "godb/agg_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/agg_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"testing\"\n)\n\nfunc TestAggSimpleSum(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tsa := SumAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[1]}\n\terr = sa.Init(\"sum\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tagg := NewAggregator([]AggState{&sa}, hf)\n\titer, err := agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tsum := tup.Fields[0].(IntField).Value\n\tif sum != 1024 {\n\t\tt.Errorf(\"unexpected sum\")\n\t}\n}\n\nfunc TestAggMinStringAgg(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tsa := MinAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[0]}\n\terr = sa.Init(\"min\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tagg := NewAggregator([]AggState{&sa}, hf)\n\titer, err := agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tmin := tup.Fields[0].(StringField).Value\n\tif min != \"george jones\" {\n\t\tt.Errorf(\"incorrect min\")\n\t}\n}\n\nfunc TestAggSimpleCount(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tsa := CountAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[0]}\n\terr = sa.Init(\"count\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tagg := NewAggregator([]AggState{&sa}, hf)\n\titer, err := agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tcnt := tup.Fields[0].(IntField).Value\n\tif cnt != 2 {\n\t\tt.Errorf(\"unexpected count\")\n\t}\n}\n\nfunc TestAggMulti(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tca := CountAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[0]}\n\terr = ca.Init(\"count\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tsa := SumAggState{}\n\texpr = FieldExpr{t1.Desc.Fields[1]}\n\terr = sa.Init(\"sum\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tagg := NewAggregator([]AggState{&ca, &sa}, hf)\n\titer, err := agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tcnt := tup.Fields[0].(IntField).Value\n\tif cnt != 2 {\n\t\tt.Errorf(\"unexpected count\")\n\t}\n\tsum := tup.Fields[1].(IntField).Value\n\tif sum != 1024 {\n\t\tt.Errorf(\"unexpected sum\")\n\t}\n}\n\nfunc TestAggGbyCount(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tgbyFields := []Expr{&FieldExpr{hf.Descriptor().Fields[0]}}\n\tsa := CountAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[0]}\n\terr = sa.Init(\"count\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tagg := NewGroupedAggregator([]AggState{&sa}, gbyFields, hf)\n\titer, _ := agg.Iterator(tid)\n\tfields := []FieldType{\n\t\t{\"name\", \"\", StringType},\n\t\t{\"count\", \"\", IntType},\n\t}\n\toutt1 := Tuple{TupleDesc{fields},\n\t\t[]DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{1},\n\t\t},\n\t\tnil,\n\t}\n\toutt2 := Tuple{\n\t\tTupleDesc{fields},\n\t\t[]DBValue{\n\t\t\tStringField{\"george jones\"},\n\t\t\tIntField{3},\n\t\t},\n\t\tnil,\n\t}\n\tts := []*Tuple{&outt1, &outt2}\n\terr = CheckIfOutputMatches(iter, ts)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n}\n\nfunc TestAggGbySum(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\t//gbyFields := hf.td.Fields[0:1]\n\tgbyFields := []Expr{&FieldExpr{hf.Descriptor().Fields[0]}}\n\n\tsa := SumAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[1]}\n\terr = sa.Init(\"sum\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tagg := NewGroupedAggregator([]AggState{&sa}, gbyFields, hf)\n\titer, _ := agg.Iterator(tid)\n\n\tfields := []FieldType{\n\t\t{\"name\", \"\", StringType},\n\t\t{\"sum\", \"\", IntType},\n\t}\n\toutt1 := Tuple{TupleDesc{fields},\n\t\t[]DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{50},\n\t\t}, nil,\n\t}\n\toutt2 := Tuple{\n\t\tTupleDesc{fields},\n\t\t[]DBValue{\n\t\t\tStringField{\"george jones\"},\n\t\t\tIntField{1998},\n\t\t}, nil,\n\t}\n\tts := []*Tuple{&outt1, &outt2}\n\terr = CheckIfOutputMatches(iter, ts)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n}\n\nfunc TestAggFilterCount(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tvar f FieldType = FieldType{\"age\", \"\", IntType}\n\tfilt, err := NewFilter(&ConstExpr{IntField{25}, IntType}, OpGt, &FieldExpr{f}, hf)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif filt == nil {\n\t\tt.Fatalf(\"Filter returned nil\")\n\t}\n\n\tsa := CountAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[0]}\n\terr = sa.Init(\"count\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tagg := NewAggregator([]AggState{&sa}, filt)\n\titer, err := agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tcnt := tup.Fields[0].(IntField).Value\n\tif cnt != 1 {\n\t\tt.Errorf(\"unexpected count\")\n\t}\n}\n\nfunc TestAggRepeatedIteration(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\terr := hf.insertTuple(&t1, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\terr = hf.insertTuple(&t2, tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tsa := CountAggState{}\n\texpr := FieldExpr{t1.Desc.Fields[0]}\n\terr = sa.Init(\"count\", &expr)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tagg := NewAggregator([]AggState{&sa}, hf)\n\titer, err := agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tcnt := tup.Fields[0].(IntField).Value\n\tif cnt != 2 {\n\t\tt.Errorf(\"unexpected count\")\n\t}\n\titer, err = agg.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t}\n\ttup, err = iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"Expected non-null tuple\")\n\t}\n\tcnt2 := tup.Fields[0].(IntField).Value\n\tif cnt != cnt2 {\n\t\tt.Errorf(\"count changed on repeated iteration\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test agg_op_test.go" - ] - }, - { - "instance_id": 8, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.3. Insertion and deletion", - "exercise": "Exercise 3", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.3. Insertion and deletion\n\nNow that you have written all of the aggregations, you will implement the `InsertOp` and `DeleteOp` operators.\n\nFor plans that implement `insert` and `delete` queries, the top most operator is\na special `InsertOp` or `DeleteOp` operator that modifies the pages of a specific `DBFile`. These operators\nreturn the number of affected tuples. This is implemented by returning a single\ntuple with one integer field, containing the count.\n\n* *Insert*: This operator adds the tuples it reads from its child operator to\n the `insertFile` specified in its constructor. It should use the\n `insertFile.insertTuple()` method to do this.\n\n* *Delete*: This operator deletes the tuples it reads from its child operator\n from the `deleteFile` specified in its constructor. It should use the\n `deleteFile.deleteTuple()` method to do this.\n\n Both of these operators should perform all of the inserts or deletes on the first invocation of the iterator, and then return the number of records inserted or deleted. The returned tuple should have a single field \"count\" of type integer.\n You will need to implement the constructors, `Descriptor()` and `Iterator()` for `InsertOp` and `DeleteOp` operators.\n The `Descriptor()` method should also return a descriptor with a single \"count\" field.\n\n**Exercise 3.**\n\nImplement the skeleton methods in:\n\n------\n\n* godb/insert_op.go\n* godb/delete_op.go\n\n------\n\nNote that the correctness of this exercise heavily depends on the correctness of your code for lab1, especially, `heapFile.insertTuple()` and `heapFile.deleteTuple()`.\nAt this point, your code should pass the unit tests in `insert_op_test.go` and `delete_op_test.go`.", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [ - { - "code_path": "godb/insert_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/insert_op.go", - "code_content": "package godb\n\nimport \"fmt\"\n\ntype InsertOp struct {\n\t// TODO: some code goes here\n}\n\n// Construct an insert operator that inserts the records in the child Operator\n// into the specified DBFile.\nfunc NewInsertOp(insertFile DBFile, child Operator) *InsertOp {\n\t// TODO: some code goes here\n\treturn nil\n}\n\n// The insert TupleDesc is a one column descriptor with an integer field named \"count\"\nfunc (i *InsertOp) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn nil\n}\n\n// Return an iterator function that inserts all of the tuples from the child\n// iterator into the DBFile passed to the constuctor and then returns a\n// one-field tuple with a \"count\" field indicating the number of tuples that\n// were inserted. Tuples should be inserted using the [DBFile.insertTuple]\n// method.\nfunc (iop *InsertOp) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"InsertOp.Iterator not implemented\")\n}\n" - }, - { - "code_path": "godb/delete_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/delete_op.go", - "code_content": "package godb\n\nimport (\n\"fmt\"\n)\n\ntype DeleteOp struct {\n\t// TODO: some code goes here\n}\n\n// Construct a delete operator. The delete operator deletes the records in the\n// child Operator from the specified DBFile.\nfunc NewDeleteOp(deleteFile DBFile, child Operator) *DeleteOp {\n\t// TODO: some code goes here\n\treturn nil // replace me\n}\n\n// The delete TupleDesc is a one column descriptor with an integer field named\n// \"count\".\nfunc (i *DeleteOp) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n\n}\n\n// Return an iterator that deletes all of the tuples from the child iterator\n// from the DBFile passed to the constructor and then returns a one-field tuple\n// with a \"count\" field indicating the number of tuples that were deleted.\n// Tuples should be deleted using the [DBFile.deleteTuple] method.\nfunc (dop *DeleteOp) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"DeleteOp.Iterator not implemented\") // replace me\n}\n" - } - ], - "test_codes": [ - { - "code_name": "insert_op_test.go", - "code_path": "godb/insert_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/insert_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"os\"\n\t\"testing\"\n)\n\nconst InsertTestFile string = \"InsertTestFile.dat\"\n\nfunc TestInsert(t *testing.T) {\n\ttd, t1, _, hf, bp, tid := makeTestVars(t)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t1, tid)\n\tbp.CommitTransaction(tid)\n\tos.Remove(InsertTestFile)\n\thf2, _ := NewHeapFile(InsertTestFile, &td, bp)\n\tif hf2 == nil {\n\t\tt.Fatalf(\"hf was nil\")\n\t}\n\ttid = NewTID()\n\tbp.BeginTransaction(tid)\n\tins := NewInsertOp(hf2, hf)\n\titer, _ := ins.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Errorf(err.Error())\n\t\treturn\n\t}\n\tif tup == nil {\n\t\tt.Errorf(\"insert did not return tuple\")\n\t\treturn\n\t}\n\tintField, ok := tup.Fields[0].(IntField)\n\tif !ok || len(tup.Fields) != 1 || intField.Value != 2 {\n\t\tt.Errorf(\"invalid output tuple\")\n\t\treturn\n\t}\n\tbp.CommitTransaction(tid)\n\ttid = NewTID()\n\tbp.BeginTransaction(tid)\n\n\tcnt := 0\n\titer, _ = hf2.Iterator(tid)\n\tfor {\n\t\ttup, err := iter()\n\n\t\tif err != nil {\n\t\t\tt.Errorf(err.Error())\n\t\t}\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tcnt = cnt + 1\n\t}\n\tif cnt != 2 {\n\t\tt.Errorf(\"insert failed, expected 2 tuples, got %d\", cnt)\n\t}\n}\n" - }, - { - "code_name": "delete_op_test.go", - "code_path": "godb/delete_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/delete_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"testing\"\n)\n\n// This function is for _testing only_! It is not part of the godb API.\nfunc BeginTransactionForTest(t *testing.T, bp *BufferPool) TransactionID {\n\tt.Helper()\n\ttid := NewTID()\n\terr := bp.BeginTransaction(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\treturn tid\n}\n\nfunc TestDelete(t *testing.T) {\n\t_, t1, t2, hf, bp, tid := makeTestVars(t)\n\n\tinsertTupleForTest(t, hf, &t1, tid)\n\tinsertTupleForTest(t, hf, &t2, tid)\n\n\tbp.CommitTransaction(tid)\n\tvar f FieldType = FieldType{\"age\", \"\", IntType}\n\tfilt, err := NewFilter(&ConstExpr{IntField{25}, IntType}, OpGt, &FieldExpr{f}, hf)\n\tif err != nil {\n\t\tt.Errorf(err.Error())\n\t}\n\tdop := NewDeleteOp(hf, filt)\n\tif dop == nil {\n\t\tt.Fatalf(\"delete op was nil\")\n\t}\n\n\ttid = BeginTransactionForTest(t, bp)\n\titer, _ := dop.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif tup == nil {\n\t\tt.Fatalf(\"insert did not return tuple\")\n\t}\n\tintField, ok := tup.Fields[0].(IntField)\n\tif !ok || len(tup.Fields) != 1 || intField.Value != 1 {\n\t\tt.Fatalf(\"invalid output tuple\")\n\t}\n\tbp.CommitTransaction(tid)\n\n\ttid = BeginTransactionForTest(t, bp)\n\n\titer, _ = hf.Iterator(tid)\n\n\tcnt := 0\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tcnt++\n\t}\n\tif cnt != 1 {\n\t\tt.Errorf(\"unexpected number of results after deletion\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test insert_op_test.go", - "go test delete_op_test.go" - ] - }, - { - "instance_id": 9, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.4. Projection", - "exercise": "Exercise 4", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.4. Projection\n\n\nYou will now implement the projection operation. Project iterates through its child, selects some of each tuple's fields, and returns them. Optionally, you will need to support the `DISTINCT` keyword, meaning that identical tuples should be returned only once. For example, given a dataset like:\n\n```\nsam, 25, $100,000\ntim, 30, $75,000\nmike, 35, $50,000\nsam, 50, $150,000\n```\n\nIf the query is:\n```\nSELECT name FROM table\n```\n\nThe result should be:\n```\nsam\ntim\nmike\nsam\n```\n\nBut the following query:\n```\nSELECT DISTINCT name FROM table\n```\n\nShould instead produce:\n```\nsam\ntim\nmike\n```\n\n\nThe list of fields to select, their names to be outputted by, whether the operation is `DISTINCT`, and the child operator is provided to the `NewProjectOp` constructor:\n```\nfunc NewProjectOp(selectFields []Expr, outputNames []string, distinct bool, child Operator) (Operator, error) \n```\nHere, `selectFields` is a list of expressions that can be extracted from the child operator's tuples (as in previous operators), and `outputNames` records the names that will populate the `Fname` fields in the tuple descriptor of the projection operation.\n\n**Exercise 4.**\n\nImplement the skeleton methods in:\n\n------\n\n* godb/project_op.go\n\n------\n\nAt this point, your code should pass the unit tests in `project_op_test.go`. Passing `TestProjectDistinctOptional` is optional; if you pass it, we will offer 5% additional extra credit on the lab. Please be sure to describe how you implemented support for distinct in your writeup.", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [ - { - "code_path": "godb/project_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/project_op.go", - "code_content": "package godb\n\nimport \"fmt\"\n\ntype Project struct {\n\tselectFields []Expr // required fields for parser\n\toutputNames []string\n\tchild Operator\n\t// You may want to add additional fields here\n\t// TODO: some code goes here\n}\n\n// Construct a projection operator. It saves the list of selected field, child,\n// and the child op. Here, selectFields is a list of expressions that represents\n// the fields to be selected, outputNames are names by which the selected fields\n// are named (should be same length as selectFields; throws error if not),\n// distinct is for noting whether the projection reports only distinct results,\n// and child is the child operator.\nfunc NewProjectOp(selectFields []Expr, outputNames []string, distinct bool, child Operator) (Operator, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"NewProjectOp not implemented.\") // replace me\n}\n\n// Return a TupleDescriptor for this projection. The returned descriptor should\n// contain fields for each field in the constructor selectFields list with\n// outputNames as specified in the constructor.\n//\n// HINT: you can use expr.GetExprType() to get the field type\nfunc (p *Project) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n\n}\n\n// Project operator implementation. This function should iterate over the\n// results of the child iterator, projecting out the fields from each tuple. In\n// the case of distinct projection, duplicate tuples should be removed. To\n// implement this you will need to record in some data structure with the\n// distinct tuples seen so far. Note that support for the distinct keyword is\n// optional as specified in the lab 2 assignment.\nfunc (p *Project) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"Project.Iterator not implemented\") // replace me\n}\n" - } - ], - "test_codes": [ - { - "code_name": "project_op_test.go", - "code_path": "godb/project_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/project_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"testing\"\n)\n\nfunc TestProject(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t2, tid)\n\t//fs := make([]FieldType, 1)\n\t//fs[0] = t1.Desc.Fields[0]\n\tvar outNames []string = make([]string, 1)\n\toutNames[0] = \"outf\"\n\tfieldExpr := FieldExpr{t1.Desc.Fields[0]}\n\tproj, _ := NewProjectOp([]Expr{&fieldExpr}, outNames, false, hf)\n\tif proj == nil {\n\t\tt.Fatalf(\"project was nil\")\n\t}\n\titer, _ := proj.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\ttup, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tif len(tup.Fields) != 1 || tup.Desc.Fields[0].Fname != \"outf\" {\n\t\tt.Errorf(\"invalid output tuple\")\n\t}\n\n}\n\nfunc TestProjectDistinctOptional(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t2, tid)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t2, tid)\n\n\t//fs := make([]FieldType, 1)\n\t//fs[0] = t1.Desc.Fields[0]\n\tvar outNames []string = make([]string, 1)\n\toutNames[0] = \"outf\"\n\tfieldExpr := FieldExpr{t1.Desc.Fields[0]}\n\tproj, _ := NewProjectOp([]Expr{&fieldExpr}, outNames, true, hf)\n\tif proj == nil {\n\t\tt.Fatalf(\"project was nil\")\n\t}\n\titer, _ := proj.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\tcnt := 0\n\tfor {\n\t\ttup, err := iter()\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tcnt = cnt + 1\n\t}\n\tif cnt != 2 {\n\t\tt.Errorf(\"expected two names, got %d\", cnt)\n\n\t}\n}\n\nfunc TestProjectOrdering(t *testing.T) {\n\thf, tup, td, bp, err := makeOrderByOrderingVars()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\thf.insertTuple(&tup, tid)\n\n\tvar outNames = []string{\"out1\", \"out2\"}\n\texprs := []Expr{&FieldExpr{td.Fields[2]}, &FieldExpr{td.Fields[0]}}\n\n\tproj, _ := NewProjectOp(exprs, outNames, false, hf)\n\tif proj == nil {\n\t\tt.Fatalf(\"project was nil\")\n\t}\n\titer, _ := proj.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\n\ttupOut, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tvar expectedDesc = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"out1\", Ftype: IntType},\n\t\t{Fname: \"out2\", Ftype: StringType},\n\t}}\n\n\tif !expectedDesc.equals(&tupOut.Desc) {\n\t\tt.Fatalf(\"Unexpected descriptor of projected tuple\")\n\t}\n\n}\n\nfunc TestProjectExtra(t *testing.T) {\n\t_, _, t1, _, _ := makeJoinOrderingVars(t)\n\tft1 := FieldType{\"a\", \"\", StringType}\n\tft2 := FieldType{\"b\", \"\", IntType}\n\toutTup, _ := t1.project([]FieldType{ft1})\n\tif (len(outTup.Fields)) != 1 {\n\t\tt.Fatalf(\"project returned %d fields, expected 1\", len(outTup.Fields))\n\t}\n\tv, ok := outTup.Fields[0].(StringField)\n\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return string\")\n\t}\n\tif v.Value != \"sam\" {\n\t\tt.Fatalf(\"project didn't return sam\")\n\n\t}\n\toutTup, _ = t1.project([]FieldType{ft2})\n\tif (len(outTup.Fields)) != 1 {\n\t\tt.Fatalf(\"project returned %d fields, expected 1\", len(outTup.Fields))\n\t}\n\tv2, ok := outTup.Fields[0].(IntField)\n\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return int\")\n\t}\n\tif v2.Value != 25 {\n\t\tt.Fatalf(\"project didn't return 25\")\n\n\t}\n\n\toutTup, _ = t1.project([]FieldType{ft2, ft1})\n\tif (len(outTup.Fields)) != 2 {\n\t\tt.Fatalf(\"project returned %d fields, expected 2\", len(outTup.Fields))\n\t}\n\tv, ok = outTup.Fields[1].(StringField)\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return string in second field\")\n\t}\n\tif v.Value != \"sam\" {\n\t\tt.Fatalf(\"project didn't return sam\")\n\n\t}\n\n\tv2, ok = outTup.Fields[0].(IntField)\n\tif !ok {\n\t\tt.Fatalf(\"project of name didn't return int in first field\")\n\t}\n\tif v2.Value != 25 {\n\t\tt.Fatalf(\"project didn't return 25\")\n\n\t}\n\n}\n\nfunc TestTupleProjectExtra(t *testing.T) {\n\tvar td = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"name1\", TableQualifier: \"tq1\", Ftype: StringType},\n\t\t{Fname: \"name2\", TableQualifier: \"tq2\", Ftype: StringType},\n\t\t{Fname: \"name1\", TableQualifier: \"tq2\", Ftype: StringType},\n\t}}\n\n\tvar t1 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"SFname1tq1\"},\n\t\t\tStringField{\"SFname2tq2\"},\n\t\t\tStringField{\"SFname1tq2\"},\n\t\t}}\n\n\tt2, err := t1.project([]FieldType{\n\t\t{Fname: \"name1\", TableQualifier: \"tq1\", Ftype: StringType},\n\t\t{Fname: \"name2\", TableQualifier: \"\", Ftype: StringType},\n\t\t{Fname: \"name1\", TableQualifier: \"tq1\", Ftype: StringType},\n\t\t{Fname: \"name2\", TableQualifier: \"tq2\", Ftype: StringType},\n\t\t{Fname: \"name1\", TableQualifier: \"tq2\", Ftype: StringType},\n\t})\n\n\tif err != nil {\n\t\tt.Fatalf(\"%v\", err)\n\t}\n\n\tif t2.Fields[0].(StringField).Value != \"SFname1tq1\" {\n\t\tt.Fatalf(\"tuple project extra wrong match\")\n\t}\n\n\tif t2.Fields[1].(StringField).Value != \"SFname2tq2\" {\n\t\tt.Fatalf(\"tuple project extra wrong match\")\n\t}\n\n\tif t2.Fields[2].(StringField).Value != \"SFname1tq1\" {\n\t\tt.Fatalf(\"tuple project extra wrong match\")\n\t}\n\tif t2.Fields[3].(StringField).Value != \"SFname2tq2\" {\n\t\tt.Fatalf(\"tuple project extra wrong match\")\n\t}\n\tif t2.Fields[4].(StringField).Value != \"SFname1tq2\" {\n\t\tt.Fatalf(\"tuple project extra wrong match\")\n\t}\n\n}\n" - } - ], - "test_command": [ - "go test project_op_test.go" - ] - }, - { - "instance_id": 10, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.5. Order By", - "exercise": "Exercise 5", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.5. Order By\n\n\nYou will now implement the \"order by\" operation. It iterates through its child in a particular order. It needs to support ordering by more than one field, with each field in either ascending or descending order. For example, consider the query:\n\n```\nSELECT name, age, salary\nFROM table\nORDER BY name ASC, age DESC\n```\n\nGiven a dataset like:\n```\nsam, 25, $100,000\ntim, 30, $75,000\nmike, 35, $50,000\nsam, 50, $150,000\n```\n\nThe above query should produce the result:\n```\nmike, 35, $50,000\nsam, 50, $150,000\nsam, 25, $100,000\ntim, 30, $75,000\n```\n\nThe list of fields to order by and the ascending/descending for each field provided to the `NewOrderBy` constructor:\n```\nfunc NewOrderBy(orderByFields []Expr, child Operator, ascending []bool) (*OrderBy, error) {\n```\nHere, `orderByFields` is a list of expressions that can be extracted from the child operator's tuples (as in previous operators), and the ascending bitmap indicates whether the *i*th field in the `orderByFields` list should be ascending (true) or descending(false).\n\n**Exercise 5.**\n\nImplement the skeleton methods in:\n\n------\n\n* godb/order_by_op.go\n\n------\n\nAt this point, your code should pass the unit tests in `order_by_test.go`.", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [ - { - "code_path": "godb/order_by_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/order_by_op.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n)\n\ntype OrderBy struct {\n\torderBy []Expr // OrderBy should include these two fields (used by parser)\n\tchild Operator\n\t// TODO: You may want to add additional fields here\n}\n\n// Construct an order by operator. Saves the list of field, child, and ascending\n// values for use in the Iterator() method. Here, orderByFields is a list of\n// expressions that can be extracted from the child operator's tuples, and the\n// ascending bitmap indicates whether the ith field in the orderByFields list\n// should be in ascending (true) or descending (false) order.\nfunc NewOrderBy(orderByFields []Expr, child Operator, ascending []bool) (*OrderBy, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"NewOrderBy not implemented.\") //replace me\n\n}\n\n// Return the tuple descriptor.\n//\n// Note that the order by just changes the order of the child tuples, not the\n// fields that are emitted.\nfunc (o *OrderBy) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\n// Return a function that iterates through the results of the child iterator in\n// ascending/descending order, as specified in the constructor. This sort is\n// \"blocking\" -- it should first construct an in-memory sorted list of results\n// to return, and then iterate through them one by one on each subsequent\n// invocation of the iterator function.\n//\n// Although you are free to implement your own sorting logic, you may wish to\n// leverage the go sort package and the [sort.Sort] method for this purpose. To\n// use this you will need to implement three methods: Len, Swap, and Less that\n// the sort algorithm will invoke to produce a sorted list. See the first\n// example, example of SortMultiKeys, and documentation at:\n// https://pkg.go.dev/sort\nfunc (o *OrderBy) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"OrderBy.Iterator not implemented\") // replace me\n}\n" - } - ], - "test_codes": [ - { - "code_name": "order_by_test.go", - "code_path": "godb/order_by_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/order_by_test.go", - "code_content": "package godb\n\nimport (\n\t\"os\"\n\t\"testing\"\n)\n\nfunc makeOrderByOrderingVars() (DBFile, Tuple, TupleDesc, *BufferPool, error) {\n\tvar td = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"a\", Ftype: StringType},\n\t\t{Fname: \"b\", Ftype: IntType},\n\t\t{Fname: \"c\", Ftype: IntType},\n\t}}\n\n\tvar t = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{\n\t\t\tStringField{\"sam\"},\n\t\t\tIntField{25},\n\t\t\tIntField{5},\n\t\t}}\n\n\tbp, c, err := MakeTestDatabase(3, \"catalog.txt\")\n\tif err != nil {\n\t\treturn nil, t, td, nil, err\n\t}\n\n\tos.Remove(\"test.dat\")\n\thf, err := c.addTable(\"test\", td)\n\tif err != nil {\n\t\treturn hf, t, td, nil, err\n\t}\n\n\treturn hf, t, td, bp, nil\n}\n\n// test the order by operator, by asking it to sort the test database\n// in ascending and descending order and verifying the result\nfunc TestOrderBy(t *testing.T) {\n\t_, t1, t2, hf, _, tid := makeTestVars(t)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t2, tid)\n\tbs := make([]bool, 2)\n\tfor i := range bs {\n\t\tbs[i] = false\n\t}\n\t//order by name and then age, descending\n\texprs := make([]Expr, len(t1.Desc.Fields))\n\tfor i, f := range t1.Desc.Fields {\n\t\texprs[i] = &FieldExpr{f}\n\t}\n\toby, err := NewOrderBy(exprs, hf, bs)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\titer, _ := oby.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\tvar last string\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tfval := tup.Fields[0].(StringField).Value\n\t\tif last != \"\" {\n\t\t\tif fval > last {\n\t\t\t\tt.Fatalf(\"data was not descending, as expected\")\n\t\t\t}\n\t\t}\n\t\tlast = fval\n\t}\n\n\tfor i := range bs {\n\t\tbs[i] = true\n\t}\n\t//order by name and then age, ascending\n\toby, err = NewOrderBy(exprs, hf, bs)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\titer, _ = oby.Iterator(tid)\n\tlast = \"\"\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tfval := tup.Fields[0].(StringField).Value\n\t\tif last != \"\" {\n\t\t\tif fval < last {\n\t\t\t\tt.Fatalf(\"data was not ascending, as expected\")\n\t\t\t}\n\t\t}\n\t\tlast = fval\n\t}\n}\n\n// harder order by test that inserts 4 tuples, and alternates ascending vs descending\nfunc TestOrderByMultiField(t *testing.T) {\n\tvar td = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"name\", Ftype: StringType},\n\t\t{Fname: \"age\", Ftype: IntType},\n\t}}\n\n\tvar t1 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{StringField{\"sam\"}, IntField{25}},\n\t\tRid: nil,\n\t}\n\n\tvar t2 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{StringField{\"tim\"}, IntField{44}},\n\t\tRid: nil,\n\t}\n\n\tvar t3 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{StringField{\"mike\"}, IntField{88}},\n\t\tRid: nil,\n\t}\n\n\tvar t4 = Tuple{\n\t\tDesc: td,\n\t\tFields: []DBValue{StringField{\"sam\"}, IntField{26}},\n\t\tRid: nil,\n\t}\n\n\tbp, c, err := MakeTestDatabase(2, \"catalog.txt\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tos.Remove(\"test.dat\")\n\thf, err := c.addTable(\"test\", td)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\thf.insertTuple(&t1, tid)\n\thf.insertTuple(&t2, tid)\n\thf.insertTuple(&t3, tid)\n\thf.insertTuple(&t4, tid)\n\n\t//order by name and then age, descending\n\tascDescs := [][]bool{{false, false}, {true, false}}\n\texpectedAnswers := [][]Tuple{{t2, t4, t1, t3}, {t3, t4, t1, t2}}\n\texprs := make([]Expr, len(t1.Desc.Fields))\n\tfor i, f := range t1.Desc.Fields {\n\t\texprs[i] = &FieldExpr{f}\n\t}\n\n\tfor i := 0; i < len(ascDescs); i++ {\n\t\tascDesc := ascDescs[i]\n\t\texpected := expectedAnswers[i]\n\t\tresult := []Tuple{}\n\t\toby, err := NewOrderBy(exprs, hf, ascDesc)\n\t\tif err != nil {\n\t\t\tt.Fatalf(err.Error())\n\t\t}\n\t\titer, _ := oby.Iterator(tid)\n\t\tif iter == nil {\n\t\t\tt.Fatalf(\"iter was nil\")\n\t\t}\n\n\t\tfor {\n\t\t\ttup, _ := iter()\n\t\t\tif tup == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tresult = append(result, *tup)\n\n\t\t}\n\t\tif len(result) != len(expected) {\n\t\t\tt.Fatalf(\"order by test %d produced different number of results than expected (%d got, expected %d)\", i, len(result), len(expected))\n\t\t}\n\t\tfor j, tup := range result {\n\t\t\tif !tup.equals(&expected[j]) {\n\t\t\t\tt.Fatalf(\"order by test %d got wrong tuple at position %d (expected %v, got %v)\", i, j, expected[j].Fields, tup.Fields)\n\t\t\t}\n\t\t}\n\t}\n\n\tbp.CommitTransaction(tid)\n}\n\nfunc TestOrderByFieldsOrder(t *testing.T) {\n\thf, tup, td, bp, err := makeOrderByOrderingVars()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\thf.insertTuple(&tup, tid)\n\n\tbs := make([]bool, 2)\n\tfor i := range bs {\n\t\tbs[i] = false\n\t}\n\n\texprs := []Expr{&FieldExpr{td.Fields[0]}, &FieldExpr{td.Fields[2]}}\n\n\toby, err := NewOrderBy(exprs, hf, bs)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\titer, _ := oby.Iterator(tid)\n\tif iter == nil {\n\t\tt.Fatalf(\"iter was nil\")\n\t}\n\n\tvar expectedDesc = TupleDesc{Fields: []FieldType{\n\t\t{Fname: \"a\", Ftype: StringType},\n\t\t{Fname: \"b\", Ftype: IntType},\n\t\t{Fname: \"c\", Ftype: IntType},\n\t}}\n\n\ttupOut, err := iter()\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\tif !expectedDesc.equals(&tupOut.Desc) {\n\t\tt.Fatalf(\"Unexpected descriptor of ordered tuple\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test order_by_test.go" - ] - }, - { - "instance_id": 11, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.6. Limit", - "exercise": "Exercise 6", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.6. Limit\n\nYou will now implement the limit operation. Limit iterates through its child and selects the first `n` tuples it sees. If the child returns `m < n` tuples, the limit operator only returns `m` tuples.\n\n**Exercise 6.**\n\nImplement the skeleton methods in:\n\n------\n\n* godb/limit_op.go\n\n------\n\nAt this point, your code should pass the unit tests in `limit_op_test.go`. ", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [ - { - "code_path": "godb/limit_op.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/limit_op.go", - "code_content": "package godb\n\nimport (\n\"fmt\"\n)\n\ntype LimitOp struct {\n\t// Required fields for parser\n\tchild Operator\n\tlimitTups Expr\n\t// Add additional fields here, if needed\n}\n\n// Construct a new limit operator. lim is how many tuples to return and child is\n// the child operator.\nfunc NewLimitOp(lim Expr, child Operator) *LimitOp {\n\treturn &LimitOp{child, lim}\n}\n\n// Return a TupleDescriptor for this limit.\nfunc (l *LimitOp) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn &TupleDesc{} // replace me\n}\n\n// Limit operator implementation. This function should iterate over the results\n// of the child iterator, and limit the result set to the first [lim] tuples it\n// sees (where lim is specified in the constructor).\nfunc (l *LimitOp) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"LimitOp.Iterator not implemented\") // replace me\n}\n" - } - ], - "test_codes": [ - { - "code_name": "limit_op_test.go", - "code_path": "godb/limit_op_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/limit_op_test.go", - "code_content": "package godb\n\nimport (\n\t\"testing\"\n)\n\nfunc testLimitCount(t *testing.T, n int) {\n\tt.Helper()\n\t_, t1, t2, hf, bp, _ := makeTestVars(t)\n\n\tfor i := 0; i < n; i++ {\n\t\ttid := NewTID()\n\t\tbp.BeginTransaction(tid)\n\t\terr := hf.insertTuple(&t1, tid)\n\t\tif err != nil {\n\t\t\tt.Errorf(err.Error())\n\t\t\treturn\n\t\t}\n\t\terr = hf.insertTuple(&t2, tid)\n\t\tif err != nil {\n\t\t\tt.Errorf(err.Error())\n\t\t\treturn\n\t\t}\n\n\t\t// hack to force dirty pages to disk\n\t\t// because CommitTransaction may not be implemented\n\t\t// yet if this is called in lab 2\n\t\tif i%10 == 0 {\n\t\t\tbp.FlushAllPages()\n\t\t}\n\n\t\t//commit frequently to prevent buffer pool from filling\n\t\tbp.CommitTransaction(tid)\n\t}\n\n\t// check results\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\tlim := NewLimitOp(&ConstExpr{IntField{int64(n)}, IntType}, hf)\n\tif lim == nil {\n\t\tt.Fatalf(\"Op was nil\")\n\t\treturn\n\t}\n\titer, err := lim.Iterator(tid)\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t\treturn\n\t}\n\tif iter == nil {\n\t\tt.Fatalf(\"Iterator was nil\")\n\t\treturn\n\t}\n\n\tcnt := 0\n\tfor {\n\t\ttup, _ := iter()\n\t\tif tup == nil {\n\t\t\tbreak\n\t\t}\n\t\tcnt++\n\t}\n\tif cnt != n {\n\t\tt.Errorf(\"unexpected number of results\")\n\t}\n\n\tbp.CommitTransaction(tid)\n}\n\nfunc TestLimit5(t *testing.T) {\n\ttestLimitCount(t, 5)\n}\n\nfunc TestLimit50(t *testing.T) {\n\ttestLimitCount(t, 50)\n}\n\nfunc TestLimit100(t *testing.T) {\n\ttestLimitCount(t, 100)\n}\n" - } - ], - "test_command": [ - "go test limit_op_test.go" - ] - }, - { - "instance_id": 12, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 2: GoDB Operators", - "part_name": "2.8. Query Parser", - "exercise": "Exercise 7", - "introduction": "In this lab assignment, you will write a set of operators for GoDB to\nimplement table modifications (e.g., insert and delete records), filters,\njoins, aggregates, etc. These will build on top of the foundation that you wrote\nin Lab 1 to provide you with a database system that can perform simple queries\nover multiple tables.\n\nYou do not need to implement transactions or locking in this lab.\n\nThe remainder of this document gives some suggestions about how to start coding,\ndescribes a set of exercises to help you work through the lab, and discusses how\nto hand in your code. This lab requires you to write a fair amount of code, so\nwe encourage you to **start early**!", - "Description": "### 2.8. Query Parser\n\nBecause it's very cumbersome to compose operators to make queries like this, we've provided a parser for you. This allows you to input SQL queries and get a result set. We've also built a query shell that allows you to interact with the parser. To run it, type `go run main.go` from the top-level godb directory in your terminal (if an error message pops up, yu may need to run some `go get` as the error message suggested).\nThis will display:\n\n```-bash ~/godb % go run main.go\nWelcome to\n\n \u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591 \u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2591\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n \u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\u2593\n\nType \\h for help\n\n>\n```\n\nTyping `\\h` will give a list of commands you can input; for example, `\\d` lists the tables and their schemas. Tables are, by default, loaded from the file `catalog.txt`, but you can point to another catalog file. Note that each table in the catalog is stored in a file called `.dat`, where tablename is the name of the table. From this terminal, you can run `DROP`, `CREATE`, `INSERT`, `BEGIN`, `COMMIT/ROLLBACK`, and `SELECT` statements. You can also load a CSV file into a table using the `\\l` command.\n\nThe parser supports most of SQL with some limitations, including:\n\n* No CTEs, window functions, recursive queries, or other SQL99 or later features (arbitrarily nested subqueries are fully supported)\n* No OUTER joins (all joins are INNER)\n* No USING clause for join predicates (you should write this to ON)\n* No correlated subqueries\n* No UPDATEs\n\n\nWhen you first run the console, it will load a small test catalog containing two identical tables of people and ages. You can see the schemas of these tables using the `\\d` command. If you have fully implemented the operators from the previous exercises (including DISTINCT) you should be able to pass this test. Because these test queries use DISTINCT, we will not grade you on these particular queries but may have hidden test cases that run a few SQL queries against your lab, so you should be sure to confirm that at least simple queries run. \n\nAs an example, we have loaded the ps1 mbta dataset into GoDB format. You can download it from [here](https://www.dropbox.com/scl/fi/l27l17fg6mo3d4jjihmls/transitdb.zip?rlkey=890c1omvwevm6n4us10d7m11j). Note that all columns are either strings or ints; floats have been cast to ints in this database.\nDownload the `transitdb` folder (you may be asked to login or create Dropdox account) and put it in your top-level godb directory.\nAfterward, you can connect to over the console using the `\\c`\ncommand:\n```\n> \\c transitdb/transitdb.catalog\nLoaded transitdb/transitdb.catalog\ngated_station_entries (service_date string, time string, station_id string, line_id string, gated_entries int)\nlines (line_id string, line_name string)\nroutes (route_id int, line_id string, first_station_id string, last_station_id string, direction int, direction_desc string, route_name string)\nstations (station_id string, station_name string)\nrail_ridership (season string, line_id string, direction int, time_period_id string, station_id string, total_ons int, total_offs int, number_service_days int, average_ons int, average_offs int, average_flow int)\nstation_orders (route_id int, station_id string, stop_order int, distance_from_last_station_miles int)\ntime_periods (time_period_id string, day_type string, time_period string, period_start_time string, period_end_time string)\n```\n\nOnce it is loaded, you should be able to run a query. For example, to find the first and last station of each line, you can write:\n``` \n> SELECT line_name,\n> direction_desc,\n> s1.station_name AS first_station,\n> s2.station_name AS last_station\n> FROM routes\n> JOIN lines ON lines.line_id = routes.line_id\n> JOIN stations s1 ON first_station_id = s1.station_id\n> JOIN stations s2 ON last_station_id = s2.station_id\n> ORDER BY line_name ASC, direction_desc ASC, first_station ASC, last_station ASC;\n line_name | direction_desc | first_station | last_station |\n \"Blue Line\" | East | Bowdoin | Wonderland |\n \"Blue Line\" | West | Wonderland | Bowdoin |\n \"Green Line\" | East | \"Boston College\" | \"Government Center\" |\n \"Green Line\" | East | \"Cleveland Circle\" | \"Government Center\" |\n \"Green Line\" | East | \"Heath Street\" | Lechmere |\n \"Green Line\" | East | Riverside | \"North Station\" |\n \"Green Line\" | West | \"Government Center\" | \"Boston College\" |\n \"Green Line\" | West | \"Government Center\" | \"Cleveland Circle\" |\n \"Green Line\" | West | \"North Station\" | Riverside |\n \"Green Line\" | West | Lechmere | \"Heath Street\" |\n \"Mattapan Trolley\" | Inbound | Mattapan | Ashmont |\n \"Mattapan Trolley\" | Outbound | Ashmont | Mattapan |\n \"Orange Line\" | North | \"Forest Hills\" | \"Oak Grove\" |\n \"Orange Line\" | South | \"Oak Grove\" | \"Forest Hills\" |\n \"Red Line\" | North | Ashmont | Alewife |\n \"Red Line\" | North | Braintree | Alewife |\n \"Red Line\" | South | Alewife | Ashmont |\n \"Red Line\" | South | Alewife | Braintree |\n(18 results)\n57.01075ms\n```\n\nYou can also view the query plan generated for the query by appending the \"EXPLAIN\" keyword to a query, e.g.:\n```\n> explain SELECT line_name,\n> direction_desc,\n> s1.station_name AS first_station,\n> s2.station_name AS last_station\n> FROM routes\n> JOIN lines ON lines.line_id = routes.line_id\n> JOIN stations s1 ON first_station_id = s1.station_id\n> JOIN stations s2 ON last_station_id = s2.station_id\n> ORDER BY line_name ASC, direction_desc ASC, first_station ASC, last_station ASC;\n\nOrder By line_name,direction_desc,first_station,last_station,\n Project lines.line_name,routes.direction_desc,s1.station_name,s2.station_name, -> [line_name direction_desc first_station last_station]\n Join, routes.last_station_id == s2.station_id\n Join, routes.first_station_id == s1.station_id\n Join, lines.line_id == routes.line_id\n Heap Scan transitdb/lines.dat\n Heap Scan transitdb/routes.dat\n Heap Scan transitdb/stations.dat\n Heap Scan transitdb/stations.dat\n```\n\n**Exercise 7.**\n\nRun a few queries against the transitdb to make sure your operator implementations are working. \n\nYou should also be able to pass `TestParseEasy` in `easy_parser_test.go`. This test runs a few SQL queries against the `catalog.txt` catalog that we have provided. Note that it works by comparing your results to a set of saved CSV files in the `savedresults` directory.", - "repo/location": "$ cd go-db-hw-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab2.md", - "codes": [], - "test_codes": [ - { - "code_name": "easy_parser_test.go", - "code_path": "godb/easy_parser_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/easy_parser_test.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"testing\"\n)\n\ntype Query struct {\n\tSQL string\n\tOrdered bool\n}\n\nfunc TestParseEasy(t *testing.T) {\n\tqueries := []Query{\n\t\t{SQL: \"select sum(age) as s from t group by t.name having s > 30\", Ordered: false},\n\t\t{SQL: \"select sum(age + 10) , sum(age) from t\", Ordered: false},\n\t\t{SQL: \"select min(age) + max(age) from t\", Ordered: false},\n\t\t{SQL: \"select * from t order by t.age, t.name limit 1+2\", Ordered: true},\n\t\t{SQL: \"select t.name, t.age from t join t2 on t.name = t2.name, t2 as t3 where t.age < 50 and t3.age = t.age order by t.age asc, t.name asc\", Ordered: true},\n\t\t{SQL: \"select sq(sq(5)) from t\", Ordered: false},\n\t\t{SQL: \"select 1, name from t\", Ordered: false},\n\t\t{SQL: \"select age, name from t\", Ordered: false},\n\t\t{SQL: \"select t.name, sum(age) totage from t group by t.name\", Ordered: false},\n\t\t{SQL: \"select t.name, t.age from t join t2 on t.name = t2.name where t.age < 50\", Ordered: false},\n\t\t{SQL: \"select name from (select x.name from (select t.name from t) x)y order by name asc\", Ordered: true},\n\t\t{SQL: \"select age, count(*) from t group by age\", Ordered: false},\n\t}\n\tsave := false //set save to true to save the output of the current test run as the correct answer\n\tprintOutput := false //print the result set during testing\n\n\tbp, c, err := MakeParserTestDatabase(10)\n\tif err != nil {\n\t\tt.Fatalf(\"failed to create test database, %s\", err.Error())\n\t}\n\n\tqNo := 0\n\tfor _, query := range queries {\n\t\ttid := BeginTransactionForTest(t, bp)\n\t\tqNo++\n\n\t\tqType, plan, err := Parse(c, query.SQL)\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"failed to parse, q=%s, %s\", query.SQL, err.Error())\n\t\t}\n\t\tif plan == nil {\n\t\t\tt.Fatalf(\"plan was nil\")\n\t\t}\n\t\tif qType != IteratorType {\n\t\t\tcontinue\n\t\t}\n\n\t\tvar outfile *HeapFile\n\t\tvar outfile_csv *os.File\n\t\tvar resultSet []*Tuple\n\t\tfname := fmt.Sprintf(\"savedresults/q%d-easy-result.csv\", qNo)\n\n\t\tif save {\n\t\t\tos.Remove(fname)\n\t\t\toutfile_csv, err = os.OpenFile(fname, os.O_RDWR|os.O_CREATE, 0644)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"failed to open CSV file (%s)\", err.Error())\n\t\t\t}\n\t\t\t//outfile, _ = NewHeapFile(fname, plan.Descriptor(), bp)\n\t\t} else {\n\t\t\tfname_bin := fmt.Sprintf(\"savedresults/q%d-easy-result.dat\", qNo)\n\t\t\tos.Remove(fname_bin)\n\t\t\tdesc := plan.Descriptor()\n\t\t\tif desc == nil {\n\t\t\t\tt.Fatalf(\"descriptor was nil\")\n\t\t\t}\n\n\t\t\toutfile, _ = NewHeapFile(fname_bin, desc, bp)\n\t\t\tif outfile == nil {\n\t\t\t\tt.Fatalf(\"heapfile was nil\")\n\t\t\t}\n\t\t\tf, err := os.Open(fname)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"csv file with results was nil (%s)\", err.Error())\n\t\t\t}\n\t\t\terr = outfile.LoadFromCSV(f, true, \",\", false)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(err.Error())\n\t\t\t}\n\n\t\t\tresultIter, err := outfile.Iterator(tid)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(err.Error())\n\t\t\t}\n\t\t\tfor {\n\t\t\t\ttup, err := resultIter()\n\t\t\t\tif err != nil {\n\t\t\t\t\tt.Fatalf(err.Error())\n\t\t\t\t}\n\n\t\t\t\tif tup != nil {\n\t\t\t\t\tresultSet = append(resultSet, tup)\n\t\t\t\t} else {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tif printOutput || save {\n\t\t\tfmt.Printf(\"Doing %s\\n\", query.SQL)\n\t\t\titer, err := plan.Iterator(tid)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"%s\", err.Error())\n\n\t\t\t}\n\t\t\tnresults := 0\n\t\t\tif save {\n\t\t\t\tfmt.Fprintf(outfile_csv, \"%s\\n\", plan.Descriptor().HeaderString(false))\n\t\t\t}\n\t\t\tfmt.Printf(\"%s\\n\", plan.Descriptor().HeaderString(true))\n\t\t\tfor {\n\t\t\t\ttup, err := iter()\n\t\t\t\tif err != nil {\n\t\t\t\t\tt.Errorf(\"%s\", err.Error())\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif tup == nil {\n\t\t\t\t\tbreak\n\t\t\t\t} else {\n\t\t\t\t\tfmt.Printf(\"%s\\n\", tup.PrettyPrintString(true))\n\t\t\t\t}\n\t\t\t\tnresults++\n\t\t\t\tif save {\n\t\t\t\t\tfmt.Fprintf(outfile_csv, \"%s\\n\", tup.PrettyPrintString(false))\n\t\t\t\t\t//outfile.insertTuple(tup, tid)\n\t\t\t\t}\n\t\t\t}\n\t\t\tfmt.Printf(\"(%d results)\\n\\n\", nresults)\n\t\t}\n\t\tif save {\n\t\t\tbp.FlushAllPages()\n\t\t\toutfile.bufPool.CommitTransaction(tid)\n\t\t\toutfile_csv.Close()\n\t\t} else {\n\t\t\titer, err := plan.Iterator(tid)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"%s\", err.Error())\n\t\t\t}\n\t\t\tif query.Ordered {\n\t\t\t\terr = CheckIfOutputMatches(iter, resultSet)\n\t\t\t} else {\n\t\t\t\terr = CheckIfOutputMatchesUnordered(iter, resultSet)\n\t\t\t}\n\t\t\tif err != nil {\n\t\t\t\tt.Errorf(\"query '%s' did not match expected result set: %v\", query.SQL, err)\n\t\t\t\tverbose := true\n\t\t\t\tif verbose {\n\t\t\t\t\tfmt.Print(\"Expected: \\n\")\n\t\t\t\t\tfor _, r := range resultSet {\n\t\t\t\t\t\tfmt.Printf(\"%s\\n\", r.PrettyPrintString(true))\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n" - } - ], - "test_command": [ - "go test easy_parser_test.go" - ] - }, - { - "instance_id": 13, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 3: GoDB Transactions", - "part_name": "2.6. Lock Lifetime", - "exercise": "Exercise 1", - "introduction": "In this lab, you will implement a simple locking-based transaction system in GoDB. You will need to add lock and unlock calls at the appropriate places in your code, as well as code to track the locks held by each transaction and grant locks to transactions as they are needed.\n\nThe remainder of this document describes what is involved in adding transaction support and provides a basic outline of how you might add this support to your database.", - "Description": "### 2.6. Lock Lifetime\n\nYou will need to implement strict two-phase locking. This means that transactions should acquire the appropriate type of lock on any object before accessing that object and shouldn't release any locks until after the transaction is committed.\n\nFortunately, the GoDB design is such that it is possible to obtain locks on pages in `BufferPool.GetPage()` before you read or modify them. So, rather than adding calls to locking routines in each of your operators, you should acquire locks in `GetPage()`. You will implement releasing of locks when you implement `CommitTransaction()` and `AbortTransaction()` below.\n\nYou will need to acquire a _shared_ lock on any page (or tuple) before you read it, and you will need to acquire an _exclusive_ lock on any page (or tuple) before you write it. You will notice that we are already passing around `RWPerm` variables in the BufferPool; these variables indicate the type of lock that the caller would like to have on the object being accessed (we have given you code for the `RWPerm` type.)\n\n\n----------\n\nOur testing system relies on the godb implementation returning an error when the transaction is aborted (due to deadlocks etc.). If the transaction is aborted, your implementation is not responsible for restarting the transaction. Simply return an error and the test suite (called/user of the system) will restarte the transaction. Before you start implementing anything for this lab, check that `TestTransactionTid` passes. This test relies on the implementation we have provided you for `NewTID()` which the rest of the system and the tests depend on. If it does not pass, contact the course staff. At this point, `TestTransaction` will not terminate since `insertTuple` returns an error for a buffer pool full with dirty pages, not an aborted transaction (see `readXaction` and `writeXaction` in `transaction_test.go`). This part will execute normally after you implement exercises 1 and 2.\n\nBefore you start working on lab3, you may also find that the following tests already pass. This is normal as they should work for a sequential implementation. After you add transaction related features, these tests should still pass. There is a small portion of credit assigned for them.\n* `TestLockingAcquireReadLocksOnSampePage`\n* `TestLockingAcquireReadWriteLocksOnTwoPages`\n* `TestLockingAcquireWriteLocksOnTwoPages`\n* `TestLockingAcquireReadLocksOnTwoPages`\n* `TestLockingUpgrade`\n* `TestLockingAcquireWriteAndReadLocks`\n* `TestTransactionTwice`\n* `TestTransactionCommit`\n* `TestTransactionSingleThread`\n\n**Exercise 1.**\n\nWrite the methods that acquire transactional locks in BufferPool. Assuming you are using page-level locking, you will need to modify `GetPage` to block and acquire the desired lock (specified by `RWPerm`) before returning a page. `GetPage` receives a `TransactionID` that is attempting to acquire the lock. You will want to allocate data structures that keep track of the shared and exclusive locks each transaction is currently holding. \n\nPlease note that unlike in previous tests, there will be multiple threads concurrently calling `GetPage()` during this test. Use `sync.Mutex` or the `sync.Map` construct to prevent race conditions. Think about what happens if two threads simultaneously try to read or evict a page. The simplest approach (which we recommend) is:\n\n- Associate a `Mutex` with your buffer pool. \n - Acquire this mutex before you access any of the data structures you used to keep track of which pages are locked; this will ensure only one thread is trying to use the data structures in the buffer pool to acquire a page lock at a time. \n - If you successfully acquire the page lock, you should release the buffer pool mutex after lock acquisition. \n - If you fail to acquire the lock, you will block. \n - You will need to release the mutex before blocking (to allow another thread/transaction to attempt to acquire the lock)\n - Attempt to re-acquire the mutex before trying to re-acquire the lock.", - "repo/location": "$ cd go-db-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "12" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab3.md", - "codes": [ - { - "code_path": "godb/buffer_pool.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/buffer_pool.go", - "code_content": "package godb\n\n//BufferPool provides methods to cache pages that have been read from disk.\n//It has a fixed capacity to limit the total amount of memory used by GoDB.\n//It is also the primary way in which transactions are enforced, by using page\n//level locking (you will not need to worry about this until lab3).\n\nimport (\n\t\"fmt\"\n)\n\n// Permissions used to when reading / locking pages\ntype RWPerm int\n\nconst (\n\tReadPerm RWPerm = iota\n\tWritePerm RWPerm = iota\n)\n\ntype BufferPool struct {\n\t// TODO: some code goes here\n}\n\n// Create a new BufferPool with the specified number of pages\nfunc NewBufferPool(numPages int) (*BufferPool, error) {\n\treturn &BufferPool{}, fmt.Errorf(\"NewBufferPool not implemented\")\n}\n\n// Testing method -- iterate through all pages in the buffer pool\n// and flush them using [DBFile.flushPage]. Does not need to be thread/transaction safe.\n// Mark pages as not dirty after flushing them.\nfunc (bp *BufferPool) FlushAllPages() {\n\t// TODO: some code goes here\n}\n\n// Abort the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk so it is sufficient to just\n// release locks to abort. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) AbortTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Commit the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk, so prior to releasing locks you\n// should iterate through pages and write them to disk. In GoDB lab3 we assume\n// that the system will not crash while doing this, allowing us to avoid using a\n// WAL. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) CommitTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Begin a new transaction. You do not need to implement this for lab 1.\n//\n// Returns an error if the transaction is already running.\nfunc (bp *BufferPool) BeginTransaction(tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn nil\n}\n\n// Retrieve the specified page from the specified DBFile (e.g., a HeapFile), on\n// behalf of the specified transaction. If a page is not cached in the buffer pool,\n// you can read it from disk uing [DBFile.readPage]. If the buffer pool is full (i.e.,\n// already stores numPages pages), a page should be evicted. Should not evict\n// pages that are dirty, as this would violate NO STEAL. If the buffer pool is\n// full of dirty pages, you should return an error. Before returning the page,\n// attempt to lock it with the specified permission. If the lock is\n// unavailable, should block until the lock is free. If a deadlock occurs, abort\n// one of the transactions in the deadlock. For lab 1, you do not need to\n// implement locking or deadlock detection. You will likely want to store a list\n// of pages in the BufferPool in a map keyed by the [DBFile.pageKey].\nfunc (bp *BufferPool) GetPage(file DBFile, pageNo int, tid TransactionID, perm RWPerm) (Page, error) {\n\treturn nil, fmt.Errorf(\"GetPage not implemented\")\n}\n" - } - ], - "test_codes": [ - { - "code_name": "locking_test.go", - "code_path": "godb/locking_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/locking_test.go", - "code_content": "package godb\n\nimport (\n\t\"sync\"\n\t\"testing\"\n\t\"time\"\n)\n\ntype LockGrabber struct {\n\tbp *BufferPool\n\ttid TransactionID\n\tfile DBFile\n\tpgNo int\n\tperm RWPerm\n\n\tacq bool\n\terr error\n\talock, elock sync.Mutex\n}\n\nfunc NewLockGrabber(bp *BufferPool, tid TransactionID, file DBFile, pgNo int, perm RWPerm) *LockGrabber {\n\treturn &LockGrabber{bp, tid, file, pgNo, perm,\n\t\tfalse, nil, sync.Mutex{}, sync.Mutex{}}\n}\n\nfunc (lg *LockGrabber) run() {\n\t// Try to get the page from the buffer pool.\n\t_, err := lg.bp.GetPage(lg.file, lg.pgNo, lg.tid, lg.perm)\n\tif err == nil {\n\t\tlg.alock.Lock()\n\t\tlg.acq = true\n\t\tlg.alock.Unlock()\n\t} else {\n\t\tlg.elock.Lock()\n\t\tlg.err = err\n\t\tlg.elock.Unlock()\n\n\t\tlg.bp.AbortTransaction(lg.tid)\n\t}\n}\n\nfunc (lg *LockGrabber) acquired() bool {\n\tlg.alock.Lock()\n\tdefer lg.alock.Unlock()\n\treturn lg.acq\n}\n\nfunc (lg *LockGrabber) getError() error {\n\tlg.elock.Lock()\n\tdefer lg.elock.Unlock()\n\treturn lg.err\n}\n\nfunc startGrabber(bp *BufferPool, tid TransactionID, file DBFile, pgNo int, perm RWPerm) *LockGrabber {\n\tlg := NewLockGrabber(bp, tid, file, pgNo, perm)\n\tgo lg.run()\n\treturn lg\n}\n\nfunc grabLock(t *testing.T,\n\tbp *BufferPool, tid TransactionID, file DBFile, pgNo int, perm RWPerm,\n\texpected bool) {\n\n\tlg := startGrabber(bp, tid, file, pgNo, perm)\n\n\ttime.Sleep(100 * time.Millisecond)\n\n\tvar acquired bool = lg.acquired()\n\tif expected != acquired {\n\t\tt.Errorf(\"Expected %t, found %t\", expected, acquired)\n\t}\n\n\t// TODO how to kill stalling lg?\n}\n\nfunc metaLockTester(t *testing.T, bp *BufferPool,\n\ttid1 TransactionID, file1 DBFile, pgNo1 int, perm1 RWPerm,\n\ttid2 TransactionID, file2 DBFile, pgNo2 int, perm2 RWPerm,\n\texpected bool) {\n\tbp.GetPage(file1, pgNo1, tid1, perm1)\n\tgrabLock(t, bp, tid2, file2, pgNo2, perm2, expected)\n}\n\nfunc lockingTestSetUp(t *testing.T) (*BufferPool, *HeapFile, TransactionID, TransactionID) {\n\tbp, hf, tid1, tid2, _ := transactionTestSetUp(t)\n\treturn bp, hf, tid1, tid2\n}\n\nfunc TestAcquireReadLocksOnSamePage(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 0, ReadPerm,\n\t\ttrue)\n}\n\nfunc TestAcquireReadWriteLocksOnSamePage(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 0, WritePerm,\n\t\tfalse)\n}\n\nfunc TestAcquireWriteReadLocksOnSamePage(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttid2, hf, 0, ReadPerm,\n\t\tfalse)\n}\n\nfunc TestAcquireReadWriteLocksOnTwoPages(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 1, WritePerm,\n\t\ttrue)\n}\n\nfunc TestAcquireWriteLocksOnTwoPages(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttid2, hf, 1, WritePerm,\n\t\ttrue)\n}\n\nfunc TestAcquireReadLocksOnTwoPages(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 1, ReadPerm,\n\t\ttrue)\n}\n\nfunc TestLockUpgrade(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttrue)\n\tmetaLockTester(t, bp,\n\t\ttid2, hf, 1, ReadPerm,\n\t\ttid2, hf, 1, WritePerm,\n\t\ttrue)\n}\n\nfunc TestAcquireWriteAndReadLocks(t *testing.T) {\n\tbp, hf, tid1, _ := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttrue)\n}\n" - } - ], - "test_command": [ - "go test locking_test.go" - ] - }, - { - "instance_id": 14, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 3: GoDB Transactions", - "part_name": "2.6. Lock Lifetime", - "exercise": "Exercise 2", - "introduction": "In this lab, you will implement a simple locking-based transaction system in GoDB. You will need to add lock and unlock calls at the appropriate places in your code, as well as code to track the locks held by each transaction and grant locks to transactions as they are needed.\n\nThe remainder of this document describes what is involved in adding transaction support and provides a basic outline of how you might add this support to your database.", - "Description": "### 2.6. Lock Lifetime\n\nYou will need to implement strict two-phase locking. This means that transactions should acquire the appropriate type of lock on any object before accessing that object and shouldn't release any locks until after the transaction is committed.\n\nFortunately, the GoDB design is such that it is possible to obtain locks on pages in `BufferPool.GetPage()` before you read or modify them. So, rather than adding calls to locking routines in each of your operators, you should acquire locks in `GetPage()`. You will implement releasing of locks when you implement `CommitTransaction()` and `AbortTransaction()` below.\n\nYou will need to acquire a _shared_ lock on any page (or tuple) before you read it, and you will need to acquire an _exclusive_ lock on any page (or tuple) before you write it. You will notice that we are already passing around `RWPerm` variables in the BufferPool; these variables indicate the type of lock that the caller would like to have on the object being accessed (we have given you code for the `RWPerm` type.)\n\n\n----------\n\nOur testing system relies on the godb implementation returning an error when the transaction is aborted (due to deadlocks etc.). If the transaction is aborted, your implementation is not responsible for restarting the transaction. Simply return an error and the test suite (called/user of the system) will restarte the transaction. Before you start implementing anything for this lab, check that `TestTransactionTid` passes. This test relies on the implementation we have provided you for `NewTID()` which the rest of the system and the tests depend on. If it does not pass, contact the course staff. At this point, `TestTransaction` will not terminate since `insertTuple` returns an error for a buffer pool full with dirty pages, not an aborted transaction (see `readXaction` and `writeXaction` in `transaction_test.go`). This part will execute normally after you implement exercises 1 and 2.\n\nBefore you start working on lab3, you may also find that the following tests already pass. This is normal as they should work for a sequential implementation. After you add transaction related features, these tests should still pass. There is a small portion of credit assigned for them.\n* `TestLockingAcquireReadLocksOnSampePage`\n* `TestLockingAcquireReadWriteLocksOnTwoPages`\n* `TestLockingAcquireWriteLocksOnTwoPages`\n* `TestLockingAcquireReadLocksOnTwoPages`\n* `TestLockingUpgrade`\n* `TestLockingAcquireWriteAndReadLocks`\n* `TestTransactionTwice`\n* `TestTransactionCommit`\n* `TestTransactionSingleThread`\n\n\n**Exercise 2.**\n\nImplement the `BeginTransaction()`, `CommitTransaction()` and `AbortTransaction()` methods in `BufferPool`. \n\n`BeginTransaction()` may or may not need to do anything depending on your design choices -- you may want to store the transaction id in a list of running transactions.\n\nWhen you commit, you should flush dirty pages associated with the transaction to disk. When you abort, you should revert any changes made by the transaction by restoring the page to its on-disk state (which can be done simply by discarding the page from memory since we never write dirty pages back to disk).\n\nWhether the transaction commits or aborts, you should also release any state the `BufferPool` keeps regarding the transaction, including releasing any locks that the transaction held.\n\nAs with previous methods, you will need to use mutexes or other synchronization to ensure correctness when two transactions simultaneously attempt to abort or commit. In our implementation, we used the `Mutex` associated with our buffer pool to protect the entire body of each of these three methods.\n\nAt this point, your code should pass the tests in `locking_test.go`, `TestTransactionTwice`, and `TestTransaction{Commit, Abort}` unit tests and the `TestAbortEviction` system test. You may find the `Test{One, Two, Five}Threads` and `TestAllDirtyFails` system tests illustrative, but they will likely fail until you complete the next exercises.", - "repo/location": "$ cd go-db-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "12" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab3.md", - "codes": [ - { - "code_path": "godb/buffer_pool.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/buffer_pool.go", - "code_content": "package godb\n\n//BufferPool provides methods to cache pages that have been read from disk.\n//It has a fixed capacity to limit the total amount of memory used by GoDB.\n//It is also the primary way in which transactions are enforced, by using page\n//level locking (you will not need to worry about this until lab3).\n\nimport (\n\t\"fmt\"\n)\n\n// Permissions used to when reading / locking pages\ntype RWPerm int\n\nconst (\n\tReadPerm RWPerm = iota\n\tWritePerm RWPerm = iota\n)\n\ntype BufferPool struct {\n\t// TODO: some code goes here\n}\n\n// Create a new BufferPool with the specified number of pages\nfunc NewBufferPool(numPages int) (*BufferPool, error) {\n\treturn &BufferPool{}, fmt.Errorf(\"NewBufferPool not implemented\")\n}\n\n// Testing method -- iterate through all pages in the buffer pool\n// and flush them using [DBFile.flushPage]. Does not need to be thread/transaction safe.\n// Mark pages as not dirty after flushing them.\nfunc (bp *BufferPool) FlushAllPages() {\n\t// TODO: some code goes here\n}\n\n// Abort the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk so it is sufficient to just\n// release locks to abort. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) AbortTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Commit the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk, so prior to releasing locks you\n// should iterate through pages and write them to disk. In GoDB lab3 we assume\n// that the system will not crash while doing this, allowing us to avoid using a\n// WAL. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) CommitTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Begin a new transaction. You do not need to implement this for lab 1.\n//\n// Returns an error if the transaction is already running.\nfunc (bp *BufferPool) BeginTransaction(tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn nil\n}\n\n// Retrieve the specified page from the specified DBFile (e.g., a HeapFile), on\n// behalf of the specified transaction. If a page is not cached in the buffer pool,\n// you can read it from disk uing [DBFile.readPage]. If the buffer pool is full (i.e.,\n// already stores numPages pages), a page should be evicted. Should not evict\n// pages that are dirty, as this would violate NO STEAL. If the buffer pool is\n// full of dirty pages, you should return an error. Before returning the page,\n// attempt to lock it with the specified permission. If the lock is\n// unavailable, should block until the lock is free. If a deadlock occurs, abort\n// one of the transactions in the deadlock. For lab 1, you do not need to\n// implement locking or deadlock detection. You will likely want to store a list\n// of pages in the BufferPool in a map keyed by the [DBFile.pageKey].\nfunc (bp *BufferPool) GetPage(file DBFile, pageNo int, tid TransactionID, perm RWPerm) (Page, error) {\n\treturn nil, fmt.Errorf(\"GetPage not implemented\")\n}\n" - } - ], - "test_codes": [ - { - "code_name": "locking_test.go", - "code_path": "godb/locking_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/locking_test.go", - "code_content": "package godb\n\nimport (\n\t\"sync\"\n\t\"testing\"\n\t\"time\"\n)\n\ntype LockGrabber struct {\n\tbp *BufferPool\n\ttid TransactionID\n\tfile DBFile\n\tpgNo int\n\tperm RWPerm\n\n\tacq bool\n\terr error\n\talock, elock sync.Mutex\n}\n\nfunc NewLockGrabber(bp *BufferPool, tid TransactionID, file DBFile, pgNo int, perm RWPerm) *LockGrabber {\n\treturn &LockGrabber{bp, tid, file, pgNo, perm,\n\t\tfalse, nil, sync.Mutex{}, sync.Mutex{}}\n}\n\nfunc (lg *LockGrabber) run() {\n\t// Try to get the page from the buffer pool.\n\t_, err := lg.bp.GetPage(lg.file, lg.pgNo, lg.tid, lg.perm)\n\tif err == nil {\n\t\tlg.alock.Lock()\n\t\tlg.acq = true\n\t\tlg.alock.Unlock()\n\t} else {\n\t\tlg.elock.Lock()\n\t\tlg.err = err\n\t\tlg.elock.Unlock()\n\n\t\tlg.bp.AbortTransaction(lg.tid)\n\t}\n}\n\nfunc (lg *LockGrabber) acquired() bool {\n\tlg.alock.Lock()\n\tdefer lg.alock.Unlock()\n\treturn lg.acq\n}\n\nfunc (lg *LockGrabber) getError() error {\n\tlg.elock.Lock()\n\tdefer lg.elock.Unlock()\n\treturn lg.err\n}\n\nfunc startGrabber(bp *BufferPool, tid TransactionID, file DBFile, pgNo int, perm RWPerm) *LockGrabber {\n\tlg := NewLockGrabber(bp, tid, file, pgNo, perm)\n\tgo lg.run()\n\treturn lg\n}\n\nfunc grabLock(t *testing.T,\n\tbp *BufferPool, tid TransactionID, file DBFile, pgNo int, perm RWPerm,\n\texpected bool) {\n\n\tlg := startGrabber(bp, tid, file, pgNo, perm)\n\n\ttime.Sleep(100 * time.Millisecond)\n\n\tvar acquired bool = lg.acquired()\n\tif expected != acquired {\n\t\tt.Errorf(\"Expected %t, found %t\", expected, acquired)\n\t}\n\n\t// TODO how to kill stalling lg?\n}\n\nfunc metaLockTester(t *testing.T, bp *BufferPool,\n\ttid1 TransactionID, file1 DBFile, pgNo1 int, perm1 RWPerm,\n\ttid2 TransactionID, file2 DBFile, pgNo2 int, perm2 RWPerm,\n\texpected bool) {\n\tbp.GetPage(file1, pgNo1, tid1, perm1)\n\tgrabLock(t, bp, tid2, file2, pgNo2, perm2, expected)\n}\n\nfunc lockingTestSetUp(t *testing.T) (*BufferPool, *HeapFile, TransactionID, TransactionID) {\n\tbp, hf, tid1, tid2, _ := transactionTestSetUp(t)\n\treturn bp, hf, tid1, tid2\n}\n\nfunc TestAcquireReadLocksOnSamePage(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 0, ReadPerm,\n\t\ttrue)\n}\n\nfunc TestAcquireReadWriteLocksOnSamePage(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 0, WritePerm,\n\t\tfalse)\n}\n\nfunc TestAcquireWriteReadLocksOnSamePage(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttid2, hf, 0, ReadPerm,\n\t\tfalse)\n}\n\nfunc TestAcquireReadWriteLocksOnTwoPages(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 1, WritePerm,\n\t\ttrue)\n}\n\nfunc TestAcquireWriteLocksOnTwoPages(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttid2, hf, 1, WritePerm,\n\t\ttrue)\n}\n\nfunc TestAcquireReadLocksOnTwoPages(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid2, hf, 1, ReadPerm,\n\t\ttrue)\n}\n\nfunc TestLockUpgrade(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttrue)\n\tmetaLockTester(t, bp,\n\t\ttid2, hf, 1, ReadPerm,\n\t\ttid2, hf, 1, WritePerm,\n\t\ttrue)\n}\n\nfunc TestAcquireWriteAndReadLocks(t *testing.T) {\n\tbp, hf, tid1, _ := lockingTestSetUp(t)\n\tmetaLockTester(t, bp,\n\t\ttid1, hf, 0, WritePerm,\n\t\ttid1, hf, 0, ReadPerm,\n\t\ttrue)\n}\n" - } - ], - "test_command": [ - "go test locking_test.go" - ] - }, - { - "instance_id": 15, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 3: GoDB Transactions", - "part_name": "2.7. Changes to Methods Outside of Buffer Pool", - "exercise": "Exercise 3", - "introduction": "In this lab, you will implement a simple locking-based transaction system in GoDB. You will need to add lock and unlock calls at the appropriate places in your code, as well as code to track the locks held by each transaction and grant locks to transactions as they are needed.\n\nThe remainder of this document describes what is involved in adding transaction support and provides a basic outline of how you might add this support to your database.", - "Description": "### 2.7. Changes to Methods Outside of Buffer Pool\n\nDouble check that your implementation of `HeapFile.insertTuple()` and `HeapFile.deleteTuple()`, as well as the implementation of the iterator returned by `HeapFile.Iterator()` access pages using `BufferPool.GetPage()`. Double check that these different uses of `GetPage()` pass the correct permissions object (e.g., `WritePerm` or `ReadPerm`). You may also wish to double check that your implementation of `HeapFile.insertTuple()` and `HeapFile.deleteTuple()` call `setDirty()` on any of the pages they access (you should have done this when you implemented this code in lab 1.)\n\nYou will also need to ensure that your methods behave properly under concurrency. Transactional locking will prevent methods like `insertTuple` or `deleteTuple` from being called on the same `HeapPage` object by two different transactions (and hence two different threads), but your `HeapFile` itself may have shared variables that need to be protected with mutexes. For example, your heap file implementation may use a variable to track the number of pages or the next page to insert into; you will want to ensure that threads are isolated from each other when one or both of them are updating these variables. There also may be race conditions that you will need to think through. For example, in your implementation, you will want to ensure that two threads do not simultaneously try to insert a new tuple that adds a new page to the HeapFile (e.g. because two transactions try to do an insert into a heap file with no empty slots on any pages). \n\n\n----------\n\n\n**Exercise 3.**\n\nAdd synchronization primitives like mutexes throughout GoDB. For most implementations, the primary code to be concerned about is in HeapFile. Some (but not necessarily all) actions that you should verify work properly:\n\n- Reading tuples off of pages during your Iterator. Note that it is okay for two threads to read the same variable at the same time -- its concurrent modification by both threads or modification by one and reading by another that is problematic. Also, recall that transactional locking will prevent one transaction from inserting into a page while another is reading from it.\n- Inserting and deleting tuples through HeapFile methods. \n- Adding a new page to a `HeapFile`. When do you physically write the page to disk? Are there race conditions with other transactions (on other threads) that might need special attention at the HeapFile level, regardless of page-level locking?\n- Looking for an empty slot into which you can insert tuples. \n\nIn the staff implementation, we also added a `Mutex` `m` to our HeapFile. We then locked and unlocked `m` in `insertTuple` and `deleteTuple` as we needed. This is because we want to avoid two inserts/deletes modifying shared heapFile variables, for example, variables that keep track of the last page inserted into and the total number of pages. We didn't need to acquire the mutex during our iterator because we know that no other transaction will modify a page while we are scanning it, thanks to the page locks.\n\nThere are no specific test cases for this exercise because the places where synchronization needs to be added are dependent on your implementation.", - "repo/location": "$ cd go-db-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "12" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab3.md", - "codes": [ - { - "code_path": "godb/heap_file.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/heap_file.go", - "code_content": "package godb\n\nimport (\n\t\"bufio\"\n\t\"fmt\"\n\t\"os\"\n\t\"strconv\"\n\t\"strings\"\n)\n\n// A HeapFile is an unordered collection of tuples.\n//\n// HeapFile is a public class because external callers may wish to instantiate\n// database tables using the method [LoadFromCSV]\ntype HeapFile struct {\n\t// TODO: some code goes here\n\t// HeapFile should include the fields below; you may want to add\n\t// additional fields\n\tbufPool *BufferPool\n}\n\n// Create a HeapFile.\n// Parameters\n// - fromFile: backing file for the HeapFile. May be empty or a previously created heap file.\n// - td: the TupleDesc for the HeapFile.\n// - bp: the BufferPool that is used to store pages read from the HeapFile\n// May return an error if the file cannot be opened or created.\nfunc NewHeapFile(fromFile string, td *TupleDesc, bp *BufferPool) (*HeapFile, error) {\n\t// TODO: some code goes here\n\treturn &HeapFile{}, fmt.Errorf(\"NewHeapFile not implemented\") //replace me\n}\n\n// Return the name of the backing file\nfunc (f *HeapFile) BackingFile() string {\n\t// TODO: some code goes here\n\treturn \"\" //replace me\n}\n\n// Return the number of pages in the heap file\nfunc (f *HeapFile) NumPages() int {\n\t// TODO: some code goes here\n\treturn 0 //replace me\n}\n\n// Load the contents of a heap file from a specified CSV file. Parameters are as follows:\n// - hasHeader: whether or not the CSV file has a header\n// - sep: the character to use to separate fields\n// - skipLastField: if true, the final field is skipped (some TPC datasets include a trailing separator on each line)\n// Returns an error if the field cannot be opened or if a line is malformed\n// We provide the implementation of this method, but it won't work until\n// [HeapFile.insertTuple] and some other utility functions are implemented\nfunc (f *HeapFile) LoadFromCSV(file *os.File, hasHeader bool, sep string, skipLastField bool) error {\n\tscanner := bufio.NewScanner(file)\n\tcnt := 0\n\tfor scanner.Scan() {\n\t\tline := scanner.Text()\n\t\tfields := strings.Split(line, sep)\n\t\tif skipLastField {\n\t\t\tfields = fields[0 : len(fields)-1]\n\t\t}\n\t\tnumFields := len(fields)\n\t\tcnt++\n\t\tdesc := f.Descriptor()\n\t\tif desc == nil || desc.Fields == nil {\n\t\t\treturn GoDBError{MalformedDataError, \"Descriptor was nil\"}\n\t\t}\n\t\tif numFields != len(desc.Fields) {\n\t\t\treturn GoDBError{MalformedDataError, fmt.Sprintf(\"LoadFromCSV: line %d (%s) does not have expected number of fields (expected %d, got %d)\", cnt, line, len(f.Descriptor().Fields), numFields)}\n\t\t}\n\t\tif cnt == 1 && hasHeader {\n\t\t\tcontinue\n\t\t}\n\t\tvar newFields []DBValue\n\t\tfor fno, field := range fields {\n\t\t\tswitch f.Descriptor().Fields[fno].Ftype {\n\t\t\tcase IntType:\n\t\t\t\tfield = strings.TrimSpace(field)\n\t\t\t\tfloatVal, err := strconv.ParseFloat(field, 64)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn GoDBError{TypeMismatchError, fmt.Sprintf(\"LoadFromCSV: couldn't convert value %s to int, tuple %d\", field, cnt)}\n\t\t\t\t}\n\t\t\t\tintValue := int(floatVal)\n\t\t\t\tnewFields = append(newFields, IntField{int64(intValue)})\n\t\t\tcase StringType:\n\t\t\t\tif len(field) > StringLength {\n\t\t\t\t\tfield = field[0:StringLength]\n\t\t\t\t}\n\t\t\t\tnewFields = append(newFields, StringField{field})\n\t\t\t}\n\t\t}\n\t\tnewT := Tuple{*f.Descriptor(), newFields, nil}\n\t\ttid := NewTID()\n\t\tbp := f.bufPool\n\t\tf.insertTuple(&newT, tid)\n\n\t\t// Force dirty pages to disk. CommitTransaction may not be implemented\n\t\t// yet if this is called in lab 1 or 2.\n\t\tbp.FlushAllPages()\n\n\t}\n\treturn nil\n}\n\n// Read the specified page number from the HeapFile on disk. This method is\n// called by the [BufferPool.GetPage] method when it cannot find the page in its\n// cache.\n//\n// This method will need to open the file supplied to the constructor, seek to\n// the appropriate offset, read the bytes in, and construct a [heapPage] object,\n// using the [heapPage.initFromBuffer] method.\nfunc (f *HeapFile) readPage(pageNo int) (Page, error) {\n\t// TODO: some code goes here\n\treturn nil, fmt.Errorf(\"readPage not implemented\")\n}\n\n// Add the tuple to the HeapFile. This method should search through pages in the\n// heap file, looking for empty slots and adding the tuple in the first empty\n// slot if finds.\n//\n// If none are found, it should create a new [heapPage] and insert the tuple\n// there, and write the heapPage to the end of the HeapFile (e.g., using the\n// [flushPage] method.)\n//\n// To iterate through pages, it should use the [BufferPool.GetPage method]\n// rather than directly reading pages itself. For lab 1, you do not need to\n// worry about concurrent transactions modifying the Page or HeapFile. We will\n// add support for concurrent modifications in lab 3.\n//\n// The page the tuple is inserted into should be marked as dirty.\nfunc (f *HeapFile) insertTuple(t *Tuple, tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"insertTuple not implemented\") //replace me\n}\n\n// Remove the provided tuple from the HeapFile.\n//\n// This method should use the [Tuple.Rid] field of t to determine which tuple to\n// remove. The Rid field should be set when the tuple is read using the\n// [Iterator] method, or is otherwise created (as in tests). Note that Rid is an\n// empty interface, so you can supply any object you wish. You will likely want\n// to identify the heap page and slot within the page that the tuple came from.\n//\n// The page the tuple is deleted from should be marked as dirty.\nfunc (f *HeapFile) deleteTuple(t *Tuple, tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"deleteTuple not implemented\") //replace me\n}\n\n// Method to force the specified page back to the backing file at the\n// appropriate location. This will be called by BufferPool when it wants to\n// evict a page. The Page object should store information about its offset on\n// disk (e.g., that it is the ith page in the heap file), so you can determine\n// where to write it back.\nfunc (f *HeapFile) flushPage(p Page) error {\n\t// TODO: some code goes here\n\treturn fmt.Errorf(\"flushPage not implemented\") //replace me\n}\n\n// [Operator] descriptor method -- return the TupleDesc for this HeapFile\n// Supplied as argument to NewHeapFile.\nfunc (f *HeapFile) Descriptor() *TupleDesc {\n\t// TODO: some code goes here\n\treturn nil //replace me\n\n}\n\n// [Operator] iterator method\n// Return a function that iterates through the records in the heap file\n// Note that this method should read pages from the HeapFile using the\n// BufferPool method GetPage, rather than reading pages directly,\n// since the BufferPool caches pages and manages page-level locking state for\n// transactions\n// You should esnure that Tuples returned by this method have their Rid object\n// set appropriate so that [deleteTuple] will work (see additional comments there).\n// Make sure to set the returned tuple's TupleDescriptor to the TupleDescriptor of\n// the HeapFile. This allows it to correctly capture the table qualifier.\nfunc (f *HeapFile) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\t// TODO: some code goes here\n\treturn func() (*Tuple, error) {\n\treturn nil, fmt.Errorf(\"heap_file.Iterator not implemented\")\n\t}, nil\n}\n\n// internal strucuture to use as key for a heap page\ntype heapHash struct {\n\tFileName string\n\tPageNo int\n}\n\n// This method returns a key for a page to use in a map object, used by\n// BufferPool to determine if a page is cached or not. We recommend using a\n// heapHash struct as the key for a page, although you can use any struct that\n// does not contain a slice or a map that uniquely identifies the page.\nfunc (f *HeapFile) pageKey(pgNo int) any {\n\t// TODO: some code goes here\n\treturn nil\n}\n" - } - ], - "test_codes": [], - "test_command": [] - }, - { - "instance_id": 16, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 3: GoDB Transactions", - "part_name": "2.8. Implementing NO STEAL", - "exercise": "Exercise 4", - "introduction": "In this lab, you will implement a simple locking-based transaction system in GoDB. You will need to add lock and unlock calls at the appropriate places in your code, as well as code to track the locks held by each transaction and grant locks to transactions as they are needed.\n\nThe remainder of this document describes what is involved in adding transaction support and provides a basic outline of how you might add this support to your database.", - "Description": "### 2.8. Implementing NO STEAL\n\nModifications from a transaction are written to disk only after it commits. This means we can abort a transaction by discarding the dirty pages and rereading them from the disk. Thus, we must not evict dirty pages. This policy is called NO STEAL.\n\n----------\n\n**Exercise 4.**\n\nDouble-check that you don't evict dirty pages from the buffer pool. We will test this later in `TestAllDirtyFails` but you probably cannot pass this test case yet.", - "repo/location": "$ cd go-db-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "12" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab3.md", - "codes": [], - "test_codes": [ - { - "code_name": "transaction_test.go", - "code_path": "godb/transaction_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/transaction_test.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"os\"\n\t\"sync\"\n\t\"testing\"\n\t\"time\"\n)\n\n// This test does not have credit and serves merely as a sanity check since\n// NewTID() should have been implemented for you.\nfunc TestTransactionTid(t *testing.T) {\n\ttid := NewTID()\n\ttid2 := NewTID()\n\tvar tid3 = tid\n\tif tid == tid2 {\n\t\tt.Errorf(\"different transactions have same id\")\n\t}\n\tif tid != tid3 {\n\t\tt.Errorf(\"same transactions have different id\")\n\t}\n}\n\nconst numConcurrentThreads int = 20\n\nvar c chan int = make(chan int, numConcurrentThreads*2)\n\nfunc readXaction(hf DBFile, bp *BufferPool, wg *sync.WaitGroup) {\n\tfor {\n\tstart:\n\t\ttid := NewTID()\n\t\tbp.BeginTransaction(tid)\n\t\tpgCnt1 := hf.NumPages()\n\t\tit, _ := hf.Iterator(tid)\n\t\tcnt1 := 0\n\n\t\tfor {\n\t\t\tt, err := it()\n\t\t\tif err != nil {\n\t\t\t\t// Assume this is because of a deadlock, restart txn\n\t\t\t\ttime.Sleep(time.Duration(rand.Intn(8)) * 100 * time.Microsecond)\n\t\t\t\tgoto start\n\t\t\t}\n\t\t\tif t == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tcnt1++\n\t\t}\n\n\t\tit, _ = hf.Iterator(tid)\n\t\tcnt2 := 0\n\t\tfor {\n\t\t\tt, err := it()\n\t\t\tif err != nil {\n\t\t\t\t// Assume this is because of a deadlock, restart txn\n\t\t\t\ttime.Sleep(time.Duration(rand.Intn(8)) * 100 * time.Microsecond)\n\t\t\t\tgoto start\n\t\t\t}\n\t\t\tif t == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tcnt2++\n\t\t}\n\t\tif cnt1 == cnt2 || pgCnt1 != hf.NumPages() {\n\t\t\t//fmt.Printf(\"read same number of tuples both iterators (%d)\\n\", cnt1)\n\t\t\tc <- 1\n\t\t} else {\n\t\t\tfmt.Printf(\"ERROR: read different number of tuples both iterators (%d, %d)\\n\", cnt1, cnt2)\n\t\t\tc <- 0\n\t\t}\n\t\tbp.CommitTransaction(tid)\n\t\twg.Done()\n\t\treturn\n\t}\n}\n\nfunc writeXaction(hf DBFile, bp *BufferPool, writeTuple Tuple, wg *sync.WaitGroup) {\n\tfor {\n\tstart:\n\t\ttid := NewTID()\n\t\tbp.BeginTransaction(tid)\n\t\tfor i := 0; i < 10; i++ {\n\t\t\terr := hf.insertTuple(&writeTuple, tid)\n\t\t\tif err != nil {\n\t\t\t\t// Assume this is because of a deadlock, restart txn\n\t\t\t\ttime.Sleep(time.Duration(rand.Intn(8)) * 100 * time.Microsecond)\n\t\t\t\tgoto start\n\t\t\t}\n\t\t}\n\t\tbp.CommitTransaction(tid)\n\t\tbreak\n\t}\n\tc <- 1\n\twg.Done()\n}\n\nfunc TestTransactions(t *testing.T) {\n\t_, t1, t2, _, _, _ := makeTestVars(t)\n\tbp, catalog, err := MakeTestDatabase(20, \"catalog.txt\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\thf, err := catalog.GetTable(\"t\")\n\tif err != nil {\n\t\tt.Fatalf(err.Error())\n\t}\n\tvar wg sync.WaitGroup\n\n\tfor i := 0; i < 1000; i++ {\n\t\terr := hf.insertTuple(&t1, tid)\n\t\tif err != nil {\n\t\t\tfmt.Print(err.Error())\n\t\t\tt.Errorf(\"transaction test failed\")\n\t\t}\n\t\terr = hf.insertTuple(&t2, tid)\n\t\tif err != nil {\n\t\t\tfmt.Print(err.Error())\n\t\t\tt.Errorf(\"transaction test failed\")\n\t\t}\n\t}\n\tbp.CommitTransaction(tid)\n\n\twg.Add(numConcurrentThreads * 2)\n\n\tfor i := 0; i < numConcurrentThreads; i++ {\n\t\tgo readXaction(hf, bp, &wg)\n\t\t//time.Sleep(2 * time.Millisecond)\n\t\tgo writeXaction(hf, bp, t1, &wg)\n\t\ttime.Sleep(10 * time.Millisecond)\n\t}\n\n\twg.Wait()\n\n\tfor i := 0; i < numConcurrentThreads*2; i++ {\n\t\tval := <-c\n\t\tif val == 0 {\n\t\t\tt.Errorf(\"transaction test failed\")\n\t\t}\n\t}\n\n\twg.Add(1)\n\tgo readXaction(hf, bp, &wg)\n\twg.Wait()\n}\n\nfunc transactionTestSetUpVarLen(t *testing.T, tupCnt int, pgCnt int) (*BufferPool, *HeapFile, TransactionID, TransactionID, Tuple, Tuple) {\n\t_, t1, t2, hf, bp, _ := makeTestVars(t)\n\n\tcsvFile, err := os.Open(fmt.Sprintf(\"txn_test_%d_%d.csv\", tupCnt, pgCnt))\n\tif err != nil {\n\t\tt.Fatalf(\"error opening test file\")\n\t}\n\thf.LoadFromCSV(csvFile, false, \",\", false)\n\tif hf.NumPages() != pgCnt {\n\t\tt.Fatalf(\"error making test vars; unexpected number of pages\")\n\t}\n\n\ttid1 := NewTID()\n\tbp.BeginTransaction(tid1)\n\ttid2 := NewTID()\n\tbp.BeginTransaction(tid2)\n\treturn bp, hf, tid1, tid2, t1, t2\n}\n\nfunc transactionTestSetUp(t *testing.T) (*BufferPool, *HeapFile, TransactionID, TransactionID, Tuple) {\n\tbp, hf, tid1, tid2, t1, _ := transactionTestSetUpVarLen(t, 300, 3)\n\treturn bp, hf, tid1, tid2, t1\n}\n\nfunc TestTransactionTwice(t *testing.T) {\n\tbp, hf, tid1, tid2, _ := transactionTestSetUp(t)\n\tbp.GetPage(hf, 0, tid1, ReadPerm)\n\tbp.GetPage(hf, 1, tid1, WritePerm)\n\tbp.CommitTransaction(tid1)\n\n\tbp.GetPage(hf, 0, tid2, WritePerm)\n\tbp.GetPage(hf, 1, tid2, WritePerm)\n}\n\nfunc testTransactionComplete(t *testing.T, commit bool) {\n\tbp, hf, tid1, tid2, t1 := transactionTestSetUp(t)\n\n\tpg, _ := bp.GetPage(hf, 2, tid1, WritePerm)\n\theapp := pg.(*heapPage)\n\theapp.insertTuple(&t1)\n\theapp.setDirty(tid1, true)\n\n\tif commit {\n\t\tbp.CommitTransaction(tid1)\n\t} else {\n\t\tbp.AbortTransaction(tid1)\n\t}\n\n\tbp.FlushAllPages()\n\n\tpg, _ = bp.GetPage(hf, 2, tid2, WritePerm)\n\theapp = pg.(*heapPage)\n\titer := heapp.tupleIter()\n\n\tfound := false\n\tfor tup, err := iter(); tup != nil || err != nil; tup, err = iter() {\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"Iterator error\")\n\t\t}\n\t\tif t1.equals(tup) {\n\t\t\tfound = true\n\t\t\tbreak\n\t\t}\n\t}\n\n\tif found != commit {\n\t\tt.Errorf(\"Expected %t, found %t\", commit, found)\n\t}\n}\n\nfunc TestTransactionCommit(t *testing.T) {\n\ttestTransactionComplete(t, true)\n}\n\nfunc TestTransactionAbort(t *testing.T) {\n\ttestTransactionComplete(t, false)\n}\n\n// placeholder op for a singleton tuple\ntype Singleton struct {\n\ttup Tuple\n\tran bool\n}\n\nfunc (i *Singleton) Descriptor() *TupleDesc {\n\treturn &i.tup.Desc\n}\n\nfunc (i *Singleton) Iterator(tid TransactionID) (func() (*Tuple, error), error) {\n\treturn func() (*Tuple, error) {\n\t\tif i.ran {\n\t\t\treturn nil, nil\n\t\t}\n\t\ti.ran = true\n\t\treturn &i.tup, nil\n\t}, nil\n}\n\n// Run threads transactions, each each of which reads a single tuple from a\n// page, deletes the tuple, and re-inserts it with an incremented value. There\n// will be deadlocks, so your deadlock handling will have to be correct to allow\n// all transactions to be committed and the value to be incremented threads\n// times.\nfunc validateTransactions(t *testing.T, threads int) {\n\tbp, hf, _, _, _, t2 := transactionTestSetUpVarLen(t, 1, 1)\n\n\tvar startWg, readyWg sync.WaitGroup\n\tstartChan := make(chan struct{})\n\n\t// sleep for an increasingly long time after deadlocks. this backoff helps avoid starvation\n\tnDeadlocks := 0\n\tvar nDeadlocksMutex sync.Mutex\n\tsleepAfterDeadlock := func(thrId int, err error) {\n\t\tnDeadlocksMutex.Lock()\n\t\tnDeadlocks++\n\t\tt.Logf(\"thread %d operation failed: %v deadlock #%v\", thrId, err, nDeadlocks)\n\t\tsleepTime := time.Duration(rand.Intn(int(nDeadlocks) + 1))\n\t\tnDeadlocksMutex.Unlock()\n\t\ttime.Sleep(sleepTime * time.Millisecond)\n\t}\n\n\tincrementer := func(thrId int) {\n\t\t// Signal that this goroutine is ready\n\t\treadyWg.Done()\n\n\t\t// Wait for the signal to start\n\t\t<-startChan\n\n\t\tfor tid := TransactionID(0); ; bp.AbortTransaction(tid) {\n\t\t\ttid = NewTID()\n\t\t\tbp.BeginTransaction(tid)\n\t\t\titer1, err := hf.Iterator(tid)\n\t\t\tif err != nil {\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\treadTup, err := iter1()\n\t\t\tif err != nil {\n\t\t\t\tsleepAfterDeadlock(thrId, err)\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tvar writeTup = Tuple{\n\t\t\t\tDesc: readTup.Desc,\n\t\t\t\tFields: []DBValue{\n\t\t\t\t\treadTup.Fields[0],\n\t\t\t\t\tIntField{readTup.Fields[1].(IntField).Value + 1},\n\t\t\t\t}}\n\n\t\t\tdop := NewDeleteOp(hf, hf)\n\t\t\titerDel, err := dop.Iterator(tid)\n\t\t\tif err != nil {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tdelCnt, err := iterDel()\n\t\t\tif err != nil {\n\t\t\t\tsleepAfterDeadlock(thrId, err)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tif delCnt.Fields[0].(IntField).Value != 1 {\n\t\t\t\tt.Errorf(\"Delete Op should return 1\")\n\t\t\t}\n\t\t\tiop := NewInsertOp(hf, &Singleton{writeTup, false})\n\t\t\titerIns, err := iop.Iterator(tid)\n\t\t\tif err != nil {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tinsCnt, err := iterIns()\n\t\t\tif err != nil {\n\t\t\t\tsleepAfterDeadlock(thrId, err)\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tif insCnt.Fields[0].(IntField).Value != 1 {\n\t\t\t\tt.Errorf(\"Insert Op should return 1\")\n\t\t\t}\n\n\t\t\tbp.CommitTransaction(tid)\n\t\t\tbreak //exit on success, so we don't do terminal abort\n\t\t}\n\t\tstartWg.Done()\n\t}\n\n\t// Prepare goroutines\n\treadyWg.Add(threads)\n\tstartWg.Add(threads)\n\tfor i := 0; i < threads; i++ {\n\t\tgo incrementer(i)\n\t}\n\n\t// Wait for all goroutines to be ready\n\treadyWg.Wait()\n\n\t// Start all goroutines at once\n\tclose(startChan)\n\n\t// Wait for all goroutines to finish\n\tstartWg.Wait()\n\n\ttid := NewTID()\n\tbp.BeginTransaction(tid)\n\titer, _ := hf.Iterator(tid)\n\ttup, _ := iter()\n\n\tdiff := tup.Fields[1].(IntField).Value - t2.Fields[1].(IntField).Value\n\tif diff != int64(threads) {\n\t\tt.Errorf(\"Expected #increments = %d, found %d\", threads, diff)\n\t}\n}\n\nfunc TestTransactionSingleThread(t *testing.T) {\n\tvalidateTransactions(t, 1)\n}\n\nfunc TestTransactionTwoThreads(t *testing.T) {\n\tvalidateTransactions(t, 2)\n}\n\nfunc TestTransactionFiveThreads(t *testing.T) {\n\tvalidateTransactions(t, 5)\n}\n\nfunc TestTransactionAllDirtyFails(t *testing.T) {\n\tif os.Getenv(\"LAB\") == \"5\" {\n\t\tt.Skip(\"Test is valid up through Lab 4. Skipping.\")\n\t}\n\ttd, t1, _, hf, bp, tid := makeTestVars(t)\n\n\tfor hf.NumPages() < 3 {\n\t\thf.insertTuple(&t1, tid)\n\t\tif hf.NumPages() == 0 {\n\t\t\tt.Fatalf(\"Heap file should have at least one page after insertion.\")\n\t\t}\n\t}\n\tbp.CommitTransaction(tid) // make three clean pages\n\n\tos.Remove(TestingFile2)\n\thf2, _ := NewHeapFile(TestingFile2, &td, bp)\n\ttid2 := NewTID()\n\tbp.BeginTransaction(tid2)\n\n\tfor hf2.NumPages() < 3 { // make three dirty pages\n\t\thf2.insertTuple(&t1, tid2)\n\t\tif hf2.NumPages() == 0 {\n\t\t\tt.Fatalf(\"Heap file should have at least one page after insertion.\")\n\t\t}\n\t}\n\n\t_, err := bp.GetPage(hf, 0, tid2, ReadPerm) // since bp capacity = 3, should return error due to all dirty pages\n\tif err == nil {\n\t\tt.Errorf(\"Expected error due to all dirty pages\")\n\t}\n}\n\nfunc TestTransactionAbortEviction(t *testing.T) {\n\ttupExists := func(t0 Tuple, tid TransactionID, hf *HeapFile) (bool, error) {\n\t\titer, err := hf.Iterator(tid)\n\t\tif err != nil {\n\t\t\treturn false, err\n\t\t}\n\t\tfor tup, err := iter(); tup != nil; tup, err = iter() {\n\t\t\tif err != nil {\n\t\t\t\treturn false, err\n\t\t\t}\n\t\t\tif t0.equals(tup) {\n\t\t\t\treturn true, nil\n\t\t\t}\n\t\t}\n\t\treturn false, nil\n\t}\n\n\t_, t1, _, hf, bp, tid := makeTestVars(t)\n\thf.insertTuple(&t1, tid)\n\tif exists, err := tupExists(t1, tid, hf); !(exists == true && err == nil) {\n\t\tt.Errorf(\"Tuple should exist\")\n\t}\n\tbp.AbortTransaction(tid)\n\n\ttid2 := NewTID()\n\tbp.BeginTransaction(tid2)\n\n\t// tuple should not exist after abort\n\tif exists, err := tupExists(t1, tid2, hf); !(exists == false && err == nil) {\n\t\tt.Errorf(\"Tuple should not exist\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test transaction_test.go" - ] - }, - { - "instance_id": 17, - "course": "6.5830/6.5831: Database Systems", - "year": "Fall 2024", - "index": "Lab 3: GoDB Transactions", - "part_name": "2.9. Deadlocks and Aborts", - "exercise": "Exercise 5", - "introduction": "In this lab, you will implement a simple locking-based transaction system in GoDB. You will need to add lock and unlock calls at the appropriate places in your code, as well as code to track the locks held by each transaction and grant locks to transactions as they are needed.\n\nThe remainder of this document describes what is involved in adding transaction support and provides a basic outline of how you might add this support to your database.", - "Description": "### 2.9. Deadlocks and Aborts\n\nIt is possible for transactions in GoDB to deadlock -- if you do not understand why, we recommend reading about deadlocks in the reading on Concurrency Control and Recovery (i.e., the reading for Lecture 10 and 11). You will need to detect this situation and return an error.\n\nThere are many possible ways to detect a deadlock. A simple method would be to implement a timeout policy that aborts a transaction if it has not been completed after a given period of time. For a better solution, you may implement cycle-detection in a dependency graph data structure as shown in lecture. In this scheme, you would check for cycles in a dependency graph periodically or whenever you attempt to grant a new lock, and abort something if a cycle exists. After you have detected that a deadlock exists, you must decide how to improve the situation. Assume you have detected a deadlock while transaction _t_ is waiting for a lock. In theory, you could abort **all** transactions that _t_ is waiting for; this may result in a large amount of work being undone, but you can guarantee that _t_ will make progress. Alternately, you may decide to abort _t_ to give other transactions a chance to make progress. This means that the end-user will have to retry transaction _t_.\n\nAnother approach is to use global orderings of transactions to avoid building the wait-for graph. This is sometimes preferred for performance reasons, but transactions that could have succeeded can be aborted by mistake under this scheme. Examples include the WAIT-DIE and WOUND-WAIT schemes.\n\n----------\n\n**Exercise 5.**\n\nImplement deadlock detection or prevention in `BufferPool.GetPage()`. You have many design decisions for your deadlock handling system, but it is not necessary to do something highly sophisticated. We expect you to do better than a simple timeout on each transaction. A good starting point will be to implement cycle-detection in a wait-for graph before every lock request, and you will receive full credit for such an implementation. Please describe your choices in the lab writeup and list the pros and cons of your choice compared to the alternatives.\n\nYou should ensure that your code aborts transactions properly when a deadlock occurs, which means calling `AbortTransaction()` and returning an error. You are not expected to automatically restart a transaction which fails due to a deadlock -- you can assume that higher-level code will take care of this. \n\nYou will need to be careful about acquiring and releasing mutexes here -- if `AbortTransaction` also acquires the buffer pool mutex, your `GetPage` will need to release the mutex before calling `AbortTransaction.`\n\nWe have provided some (not-so-unit) tests in `deadlock_test.go`. They are a bit involved, so they may take more than a few seconds to run (depending on your policy). If they seem to hang indefinitely, then you probably have an unresolved deadlock. These tests construct simple deadlock situations that your code should be able to escape.\n\nNote that there are two timing parameters near the top of `deadlock_test.go`; these determine the frequency at which the test checks if locks have been acquired and the waiting time before an aborted transaction is restarted. You may observe different performance characteristics by tweaking these parameters if you use a timeout-based detection method.\n\nYour code should now should pass the `Test{One, Two, Five}Threads` and `TestAllDirtyFails` tests (which may also run for quite a long time depending on your implementation).\n\nAt this point, you should have a recoverable database, in the sense that if the database system crashes (at a point other than `CommitTransaction()` or `AbortTransaction()`) or if the user explicitly aborts a transaction, the effects of any running transaction will not be visible after the system restarts (or the transaction aborts.) You may wish to verify this by running some transactions and explicitly killing the database server.", - "repo/location": "$ cd go-db-2024\n$ git pull upstream main", - "dependency": [ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11", - "12" - ], - "link": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/lab3.md", - "codes": [ - { - "code_path": "godb/buffer_pool.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/buffer_pool.go", - "code_content": "package godb\n\n//BufferPool provides methods to cache pages that have been read from disk.\n//It has a fixed capacity to limit the total amount of memory used by GoDB.\n//It is also the primary way in which transactions are enforced, by using page\n//level locking (you will not need to worry about this until lab3).\n\nimport (\n\t\"fmt\"\n)\n\n// Permissions used to when reading / locking pages\ntype RWPerm int\n\nconst (\n\tReadPerm RWPerm = iota\n\tWritePerm RWPerm = iota\n)\n\ntype BufferPool struct {\n\t// TODO: some code goes here\n}\n\n// Create a new BufferPool with the specified number of pages\nfunc NewBufferPool(numPages int) (*BufferPool, error) {\n\treturn &BufferPool{}, fmt.Errorf(\"NewBufferPool not implemented\")\n}\n\n// Testing method -- iterate through all pages in the buffer pool\n// and flush them using [DBFile.flushPage]. Does not need to be thread/transaction safe.\n// Mark pages as not dirty after flushing them.\nfunc (bp *BufferPool) FlushAllPages() {\n\t// TODO: some code goes here\n}\n\n// Abort the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk so it is sufficient to just\n// release locks to abort. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) AbortTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Commit the transaction, releasing locks. Because GoDB is FORCE/NO STEAL, none\n// of the pages tid has dirtied will be on disk, so prior to releasing locks you\n// should iterate through pages and write them to disk. In GoDB lab3 we assume\n// that the system will not crash while doing this, allowing us to avoid using a\n// WAL. You do not need to implement this for lab 1.\nfunc (bp *BufferPool) CommitTransaction(tid TransactionID) {\n\t// TODO: some code goes here\n}\n\n// Begin a new transaction. You do not need to implement this for lab 1.\n//\n// Returns an error if the transaction is already running.\nfunc (bp *BufferPool) BeginTransaction(tid TransactionID) error {\n\t// TODO: some code goes here\n\treturn nil\n}\n\n// Retrieve the specified page from the specified DBFile (e.g., a HeapFile), on\n// behalf of the specified transaction. If a page is not cached in the buffer pool,\n// you can read it from disk uing [DBFile.readPage]. If the buffer pool is full (i.e.,\n// already stores numPages pages), a page should be evicted. Should not evict\n// pages that are dirty, as this would violate NO STEAL. If the buffer pool is\n// full of dirty pages, you should return an error. Before returning the page,\n// attempt to lock it with the specified permission. If the lock is\n// unavailable, should block until the lock is free. If a deadlock occurs, abort\n// one of the transactions in the deadlock. For lab 1, you do not need to\n// implement locking or deadlock detection. You will likely want to store a list\n// of pages in the BufferPool in a map keyed by the [DBFile.pageKey].\nfunc (bp *BufferPool) GetPage(file DBFile, pageNo int, tid TransactionID, perm RWPerm) (Page, error) {\n\treturn nil, fmt.Errorf(\"GetPage not implemented\")\n}\n" - } - ], - "test_codes": [ - { - "code_name": "deadlock_test.go", - "code_path": "godb/deadlock_test.go", - "code_url": "https://github.com/MIT-DB-Class/go-db-2024/blob/main/godb/deadlock_test.go", - "code_content": "package godb\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n)\n\nconst POLL_INTERVAL = 100 * time.Millisecond\nconst WAIT_INTERVAL = 200 * time.Millisecond\n\n/**\n* Not-so-unit test to construct a deadlock situation.\n* t1 acquires p0.read; t2 acquires p1.read; t1 attempts p1.write; t2\n* attempts p0.write. Rinse and repeat.\n */\nfunc TestDeadlockReadWrite(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\n\tlg1Read := startGrabber(bp, tid1, hf, 0, ReadPerm)\n\tlg2Read := startGrabber(bp, tid2, hf, 1, ReadPerm)\n\n\ttime.Sleep(POLL_INTERVAL)\n\n\tlg1Write := startGrabber(bp, tid1, hf, 1, WritePerm)\n\tlg2Write := startGrabber(bp, tid2, hf, 0, WritePerm)\n\n\tfor {\n\t\ttime.Sleep(POLL_INTERVAL)\n\n\t\tif lg1Write.acquired() && lg2Write.acquired() {\n\t\t\tt.Errorf(\"Should not both get write lock\")\n\t\t}\n\t\tif lg1Write.acquired() != lg2Write.acquired() {\n\t\t\tbreak\n\t\t}\n\n\t\tif lg1Write.getError() != nil {\n\t\t\tbp.AbortTransaction(tid1) // at most abort twice; should be able to abort twice\n\t\t\ttime.Sleep(time.Duration((float64(WAIT_INTERVAL) * rand.Float64())))\n\n\t\t\ttid1 = NewTID()\n\t\t\tlg1Read = startGrabber(bp, tid1, hf, 0, ReadPerm)\n\t\t\ttime.Sleep(POLL_INTERVAL)\n\t\t\tlg1Write = startGrabber(bp, tid1, hf, 1, WritePerm)\n\t\t}\n\n\t\tif lg2Write.getError() != nil {\n\t\t\tbp.AbortTransaction(tid2) // at most abort twice; should be able to abort twice\n\t\t\ttime.Sleep(time.Duration((float64(WAIT_INTERVAL) * rand.Float64())))\n\n\t\t\ttid2 = NewTID()\n\t\t\tlg2Read = startGrabber(bp, tid2, hf, 1, ReadPerm)\n\t\t\ttime.Sleep(POLL_INTERVAL)\n\t\t\tlg2Write = startGrabber(bp, tid2, hf, 0, WritePerm)\n\t\t}\n\t}\n\n\tif lg1Read == nil || lg2Read == nil {\n\t\tfmt.Println(\"should not be nil\")\n\t}\n}\n\n/**\n * Not-so-unit test to construct a deadlock situation.\n * t1 acquires p0.write; t2 acquires p1.write; t1 attempts p1.write; t2\n * attempts p0.write.\n */\nfunc TestDeadlockWriteWrite(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\n\tlg1WriteA := startGrabber(bp, tid1, hf, 0, WritePerm)\n\tlg2WriteA := startGrabber(bp, tid2, hf, 1, WritePerm)\n\n\ttime.Sleep(POLL_INTERVAL)\n\n\tlg1WriteB := startGrabber(bp, tid1, hf, 1, WritePerm)\n\tlg2WriteB := startGrabber(bp, tid2, hf, 0, WritePerm)\n\n\tfor {\n\t\ttime.Sleep(POLL_INTERVAL)\n\n\t\tif lg1WriteB.acquired() && lg2WriteB.acquired() {\n\t\t\tt.Errorf(\"Should not both get write lock\")\n\t\t}\n\t\tif lg1WriteB.acquired() != lg2WriteB.acquired() {\n\t\t\tbreak\n\t\t}\n\n\t\tif lg1WriteB.getError() != nil {\n\t\t\tbp.AbortTransaction(tid1) // at most abort twice; should be able to abort twice\n\t\t\ttime.Sleep(time.Duration((float64(WAIT_INTERVAL) * rand.Float64())))\n\n\t\t\ttid1 = NewTID()\n\t\t\tlg1WriteA = startGrabber(bp, tid1, hf, 0, WritePerm)\n\t\t\ttime.Sleep(POLL_INTERVAL)\n\t\t\tlg1WriteB = startGrabber(bp, tid1, hf, 1, WritePerm)\n\t\t}\n\n\t\tif lg2WriteB.getError() != nil {\n\t\t\tbp.AbortTransaction(tid2) // at most abort twice; should be able to abort twice\n\t\t\ttime.Sleep(time.Duration((float64(WAIT_INTERVAL) * rand.Float64())))\n\n\t\t\ttid2 = NewTID()\n\t\t\tlg2WriteA = startGrabber(bp, tid2, hf, 1, WritePerm)\n\t\t\ttime.Sleep(POLL_INTERVAL)\n\t\t\tlg2WriteB = startGrabber(bp, tid2, hf, 0, WritePerm)\n\t\t}\n\t}\n\n\tif lg1WriteA == nil || lg2WriteA == nil {\n\t\tfmt.Println(\"should not be nil\")\n\t}\n}\n\n/**\n * Not-so-unit test to construct a deadlock situation.\n * t1 acquires p0.read; t2 acquires p0.read; t1 attempts to upgrade to\n * p0.write; t2 attempts to upgrade to p0.write\n */\nfunc TestDeadlockUpgradeWrite(t *testing.T) {\n\tbp, hf, tid1, tid2 := lockingTestSetUp(t)\n\n\tlg1Read := startGrabber(bp, tid1, hf, 0, ReadPerm)\n\tlg2Read := startGrabber(bp, tid2, hf, 0, ReadPerm)\n\n\ttime.Sleep(POLL_INTERVAL)\n\n\tlg1Write := startGrabber(bp, tid1, hf, 0, WritePerm)\n\tlg2Write := startGrabber(bp, tid2, hf, 0, WritePerm)\n\n\tfor {\n\t\ttime.Sleep(POLL_INTERVAL)\n\n\t\tif lg1Write.acquired() && lg2Write.acquired() {\n\t\t\tt.Errorf(\"Should not both get write lock\")\n\t\t}\n\t\tif lg1Write.acquired() != lg2Write.acquired() {\n\t\t\tbreak\n\t\t}\n\n\t\tif lg1Write.getError() != nil {\n\t\t\tbp.AbortTransaction(tid1) // at most abort twice; should be able to abort twice\n\t\t\ttime.Sleep(time.Duration((float64(WAIT_INTERVAL) * rand.Float64())))\n\n\t\t\ttid1 = NewTID()\n\t\t\tlg1Read = startGrabber(bp, tid1, hf, 0, ReadPerm)\n\t\t\ttime.Sleep(POLL_INTERVAL)\n\t\t\tlg1Write = startGrabber(bp, tid1, hf, 0, WritePerm)\n\t\t}\n\n\t\tif lg2Write.getError() != nil {\n\t\t\tbp.AbortTransaction(tid2) // at most abort twice; should be able to abort twice\n\t\t\ttime.Sleep(time.Duration((float64(WAIT_INTERVAL) * rand.Float64())))\n\n\t\t\ttid2 = NewTID()\n\t\t\tlg2Read = startGrabber(bp, tid2, hf, 0, ReadPerm)\n\t\t\ttime.Sleep(POLL_INTERVAL)\n\t\t\tlg2Write = startGrabber(bp, tid2, hf, 0, WritePerm)\n\t\t}\n\t}\n\n\tif lg1Read == nil || lg2Read == nil {\n\t\tfmt.Println(\"should not be nil\")\n\t}\n}\n" - } - ], - "test_command": [ - "go test deadlock_test.go" - ] - } -] \ No newline at end of file diff --git a/benchmarks/course_lab_bench/data/benchmark/env_setup_examples.jsonl b/benchmarks/course_lab_bench/data/benchmark/env_setup_examples.jsonl deleted file mode 100644 index 4ce7adf..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/env_setup_examples.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"task_id": "env_setup_1", "task_name": "problems/test-repo-problems/1.md", "task": "set up the java environment", "repo_name": "projects/test-repo", "repo_url": "https://github.com/SWE-agent/test-repo.git", "test_method": "java -version", "test_results": "", "difficulty": "easy", "docker_env": "xuafeng/swe-go-python:latest"} -{"task_id": "env_setup_2", "task_name": "problems/test-repo-problems/2.md", "task": "set up the rust environment", "repo_name": "projects/test-repo", "repo_url": "https://github.com/SWE-agent/test-repo.git", "test_method": "rustc --version", "test_results": "", "difficulty": "easy", "docker_env": "xuafeng/swe-go-python:latest"} -{"task_id": "env_setup_3", "task_name": "problems/test-repo-problems/3.md", "task": "set up the nodejs environment", "repo_name": "projects/test-repo", "repo_url": "https://github.com/SWE-agent/test-repo.git", "test_method": "node -v", "test_results": "", "difficulty": "easy", "docker_env": "xuafeng/swe-go-python:latest"} \ No newline at end of file diff --git a/benchmarks/course_lab_bench/data/benchmark/lab_exam_data.csv b/benchmarks/course_lab_bench/data/benchmark/lab_exam_data.csv deleted file mode 100644 index 1fbe73b..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/lab_exam_data.csv +++ /dev/null @@ -1,5660 +0,0 @@ -instance_id,course,year,index,part_name,introduction,getting_started,The code,description,task,hint,rules,repo_location,branch,repo,test_method,test_results,difficluty,link -1,6.5840: Distributed Systems,Spring 2024,Lab 1: MapReduce,Your Job,"In this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses ""coordinator"" instead of the paper's ""master"".)","You need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs. - -Fetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html). - -``` -$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840 -$ cd 6.5840 -$ ls -Makefile src -$ -``` - -We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: - -``` -$ cd ~/6.5840 -$ cd src/main -$ go build -buildmode=plugin ../mrapps/wc.go -$ rm mr-out* -$ go run mrsequential.go wc.so pg*.txt -$ more mr-out-0 -A 509 -ABOUT 2 -ACT 8 -... -``` - -`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. - -Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. - -For this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.",,"Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The ""main"" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -$ go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -$ rm mr-out* -$ go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one ""split"", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -$ go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method ""Done"" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2024/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited.","Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The ""main"" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -$ go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -$ rm mr-out* -$ go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one ""split"", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -$ go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method ""Done"" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2024/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited. -","- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's `encoding/json` package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC `call()` function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.","- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `""%v %v""` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented ""this is the correct format"". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a ""please exit"" pseudo-task that the coordinator can give to workers.",git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/main && bash test-mr.sh,"*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS",moderate/hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html -2,6.5840: Distributed Systems,Spring 2024,Lab 2: Key/Value Server,Key/value server with no network failures,"In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ -``` - -### Key/value server with no network failures ([easy](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html)) - -Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: ""one client"" and ""many clients"". - -- Check that your code is race-free using `go test -race`.",,"Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: ""one client"" and ""many clients"". - -- Check that your code is race-free using `go test -race`.","Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: ""one client"" and ""many clients"".",- Check that your code is race-free using `go test -race`.[@rules],,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvsr && go test,,easy,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html -3,6.5840: Distributed Systems,Spring 2024,Lab 2: Key/Value Server,Key/value server with dropped messages,"In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ -``` - -### Key/value server with no network failures ([easy](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html)) - -Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: ""one client"" and ""many clients"". - -- Check that your code is race-free using `go test -race`.",,"Now you should modify your solution to continue in the face of dropped messages (e.g., RPC requests and RPC replies). If a message was lost, then the client's `ck.server.Call()` will return `false` (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it succeeds. Each call to `Clerk.Put()` or `Clerk.Append()`, however, should result in just a *single* execution, so you will have to ensure that the re-send doesn't result in the server executing the request twice. - -Add code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). - -- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. -- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. -- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. - -Your code should now pass all tests, like this: - -``` -$ go test -Test: one client ... - ... Passed -- t 3.8 nrpc 31135 ops 31135 -Test: many clients ... - ... Passed -- t 4.7 nrpc 102853 ops 102853 -Test: unreliable net, many clients ... - ... Passed -- t 4.1 nrpc 580 ops 496 -Test: concurrent append to same key, unreliable ... - ... Passed -- t 0.6 nrpc 61 ops 52 -Test: memory use get ... - ... Passed -- t 0.4 nrpc 4 ops 0 -Test: memory use put ... - ... Passed -- t 0.2 nrpc 2 ops 0 -Test: memory use append ... - ... Passed -- t 0.4 nrpc 2 ops 0 -Test: memory use many puts ... - ... Passed -- t 11.5 nrpc 100000 ops 0 -Test: memory use many gets ... - ... Passed -- t 12.2 nrpc 100001 ops 0 -PASS -ok 6.5840/kvsrv 39.000s -``` - -The numbers after each `Passed` are real time in seconds, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls).","Now you should modify your solution to continue in the face of dropped messages (e.g., RPC requests and RPC replies). If a message was lost, then the client's `ck.server.Call()` will return `false` (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it succeeds. Each call to `Clerk.Put()` or `Clerk.Append()`, however, should result in just a *single* execution, so you will have to ensure that the re-send doesn't result in the server executing the request twice. - -Add code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). - -- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. -- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. -- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. - -Your code should now pass all tests, like this: - -``` -$ go test -Test: one client ... - ... Passed -- t 3.8 nrpc 31135 ops 31135 -Test: many clients ... - ... Passed -- t 4.7 nrpc 102853 ops 102853 -Test: unreliable net, many clients ... - ... Passed -- t 4.1 nrpc 580 ops 496 -Test: concurrent append to same key, unreliable ... - ... Passed -- t 0.6 nrpc 61 ops 52 -Test: memory use get ... - ... Passed -- t 0.4 nrpc 4 ops 0 -Test: memory use put ... - ... Passed -- t 0.2 nrpc 2 ops 0 -Test: memory use append ... - ... Passed -- t 0.4 nrpc 2 ops 0 -Test: memory use many puts ... - ... Passed -- t 11.5 nrpc 100000 ops 0 -Test: memory use many gets ... - ... Passed -- t 12.2 nrpc 100001 ops 0 -PASS -ok 6.5840/kvsrv 39.000s -``` - -The numbers after each `Passed` are real time in seconds, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls).","- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. -- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. -- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time.[@[repo/location]]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvsr && go test,"Test: one client ... - ... Passed -- t 3.8 nrpc 31135 ops 31135 -Test: many clients ... - ... Passed -- t 4.7 nrpc 102853 ops 102853 -Test: unreliable net, many clients ... - ... Passed -- t 4.1 nrpc 580 ops 496 -Test: concurrent append to same key, unreliable ... - ... Passed -- t 0.6 nrpc 61 ops 52 -Test: memory use get ... - ... Passed -- t 0.4 nrpc 4 ops 0 -Test: memory use put ... - ... Passed -- t 0.2 nrpc 2 ops 0 -Test: memory use append ... - ... Passed -- t 0.4 nrpc 2 ops 0 -Test: memory use many puts ... - ... Passed -- t 11.5 nrpc 100000 ops 0 -Test: memory use many gets ... - ... Passed -- t 12.2 nrpc 100001 ops 0 -PASS -ok 6.5840/kvsrv 39.000s",easy,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html -4,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3A: leader election,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.",,"Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) page for debugging tips. - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election ... - ... Passed -- 3.5 3 58 16840 0 -Test (3A): election after network failure ... - ... Passed -- 5.4 3 118 25269 0 -Test (3A): multiple elections ... - ... Passed -- 7.3 7 624 138014 0 -PASS -ok 6.5840/raft 16.265s -$ -``` - -Each ""Passed"" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag.",,,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && go test -run 3A,"Test (3A): initial election ... - ... Passed -- 3.5 3 58 16840 0 -Test (3A): election after network failure ... - ... Passed -- 5.4 3 118 25269 0 -Test (3A): multiple elections ... - ... Passed -- 7.3 7 624 138014 0 -PASS -ok 6.5840/raft 16.265s",moderate,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -5,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3B: log,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass. - -- Run `git pull` to get the latest lab software. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API. - -The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: - -``` -$ time go test -run 3B -Test (3B): basic agreement ... - ... Passed -- 0.9 3 16 4572 3 -Test (3B): RPC byte count ... - ... Passed -- 1.7 3 48 114536 11 -Test (3B): agreement after follower reconnects ... - ... Passed -- 3.6 3 78 22131 7 -Test (3B): no agreement if too many followers disconnect ... - ... Passed -- 3.8 5 172 40935 3 -Test (3B): concurrent Start()s ... - ... Passed -- 1.1 3 24 7379 6 -Test (3B): rejoin of partitioned leader ... - ... Passed -- 5.1 3 152 37021 4 -Test (3B): leader backs up quickly over incorrect follower logs ... - ... Passed -- 17.2 5 2080 1587388 102 -Test (3B): RPC counts aren't too high ... - ... Passed -- 2.2 3 60 20119 12 -PASS -ok 6.5840/raft 35.557s - -real 0m35.899s -user 0m2.556s -sys 0m1.458s -$ -``` - -The ""ok 6.5840/raft 35.557s"" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The ""user 0m2.556s"" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. - -### ","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass.","- Run `git pull` to get the latest lab software. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API.[@task]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && time go test -run 3B,"Test (3B): basic agreement ... - ... Passed -- 0.9 3 16 4572 3 -Test (3B): RPC byte count ... - ... Passed -- 1.7 3 48 114536 11 -Test (3B): agreement after follower reconnects ... - ... Passed -- 3.6 3 78 22131 7 -Test (3B): no agreement if too many followers disconnect ... - ... Passed -- 3.8 5 172 40935 3 -Test (3B): concurrent Start()s ... - ... Passed -- 1.1 3 24 7379 6 -Test (3B): rejoin of partitioned leader ... - ... Passed -- 5.1 3 152 37021 4 -Test (3B): leader backs up quickly over incorrect follower logs ... - ... Passed -- 17.2 5 2080 1587388 102 -Test (3B): RPC counts aren't too high ... - ... Passed -- 2.2 3 60 20119 12 -PASS -ok 6.5840/raft 35.557s - -real 0m35.899s -user 0m2.556s -sys 0m1.458s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -6,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3C: persistence,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. - -A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. - -Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. - -You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: - -``` - XTerm: term in the conflicting entry (if any) - XIndex: index of first entry with that term (if any) - XLen: log length -``` - -Then the leader's logic can be something like: - -``` - Case 1: leader doesn't have XTerm: - nextIndex = XIndex - Case 2: leader has XTerm: - nextIndex = leader's last entry for XTerm - Case 3: follower's log is too short: - nextIndex = XLen -``` - -A few other hints: - -- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. - -Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. - -``` -$ go test -run 3C -Test (3C): basic persistence ... - ... Passed -- 5.0 3 86 22849 6 -Test (3C): more persistence ... - ... Passed -- 17.6 5 952 218854 16 -Test (3C): partitioned leader and one follower crash, leader restarts ... - ... Passed -- 2.0 3 34 8937 4 -Test (3C): Figure 8 ... - ... Passed -- 31.2 5 580 130675 32 -Test (3C): unreliable agreement ... - ... Passed -- 1.7 5 1044 366392 246 -Test (3C): Figure 8 (unreliable) ... - ... Passed -- 33.6 5 10700 33695245 308 -Test (3C): churn ... - ... Passed -- 16.1 5 8864 44771259 1544 -Test (3C): unreliable churn ... - ... Passed -- 16.5 5 4220 6414632 906 -PASS -ok 6.5840/raft 123.564s -$ -``` - -It is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`. - -``` -$ for i in {0..10}; do go test; done -```","Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests.","- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B.[@task]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && go test -run 3C,"Test (3C): basic persistence ... - ... Passed -- 5.0 3 86 22849 6 -Test (3C): more persistence ... - ... Passed -- 17.6 5 952 218854 16 -Test (3C): partitioned leader and one follower crash, leader restarts ... - ... Passed -- 2.0 3 34 8937 4 -Test (3C): Figure 8 ... - ... Passed -- 31.2 5 580 130675 32 -Test (3C): unreliable agreement ... - ... Passed -- 1.7 5 1044 366392 246 -Test (3C): Figure 8 (unreliable) ... - ... Passed -- 33.6 5 10700 33695245 308 -Test (3C): churn ... - ... Passed -- 16.1 5 8864 44771259 1544 -Test (3C): unreliable churn ... - ... Passed -- 16.5 5 4220 6414632 906 -PASS -ok 6.5840/raft 123.564s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -7,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3D: log compaction,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a ""snapshot"" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) outlines the scheme; you will have to design the details. - -Your Raft must provide the following function that the service can call with a serialized snapshot of its state: - -``` -Snapshot(index int, snapshot []byte) -``` - -In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). - -The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. - -You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. - -When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. - -If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. - -When a server restarts, the application layer reads the persisted snapshot and restores its saved state. - -Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). - -- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. - -Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. - -``` -$ go test -run 3D -Test (3D): snapshots basic ... - ... Passed -- 11.6 3 176 61716 192 -Test (3D): install snapshots (disconnect) ... - ... Passed -- 64.2 3 878 320610 336 -Test (3D): install snapshots (disconnect+unreliable) ... - ... Passed -- 81.1 3 1059 375850 341 -Test (3D): install snapshots (crash) ... - ... Passed -- 53.5 3 601 256638 339 -Test (3D): install snapshots (unreliable+crash) ... - ... Passed -- 63.5 3 687 288294 336 -Test (3D): crash and restart all servers ... - ... Passed -- 19.5 3 268 81352 58 -PASS -ok 6.5840/raft 293.456s -```","Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests).","- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time.[@rules]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && go test -run 3D,"Test (3D): snapshots basic ... - ... Passed -- 11.6 3 176 61716 192 -Test (3D): install snapshots (disconnect) ... - ... Passed -- 64.2 3 878 320610 336 -Test (3D): install snapshots (disconnect+unreliable) ... - ... Passed -- 81.1 3 1059 375850 341 -Test (3D): install snapshots (crash) ... - ... Passed -- 53.5 3 601 256638 339 -Test (3D): install snapshots (unreliable+crash) ... - ... Passed -- 63.5 3 687 288294 336 -Test (3D): crash and restart all servers ... - ... Passed -- 19.5 3 268 81352 58 -PASS -ok 6.5840/raft 293.456s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -8,6.5840: Distributed Systems,Spring 2024,Lab 4: Fault-tolerant Key/Value Service,Part A: Key/value service without snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf). - -Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: - -- `Put(key, value)`: replaces the value for a particular key in the database -- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) -- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) - -Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. - -Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) - -Start early.","We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvraft -$ go test -... -$ -``` -",,"Each of your key/value servers (""kvservers"") will have an associated Raft peer. Clerks send `Put()`, `Append()`, and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Append/Get operation to Raft, so that the Raft log holds a sequence of Put/Append/Get operations. All of the kvservers execute operations from the Raft log in order, applying the operations to their key/value databases; the intent is for the servers to maintain identical replicas of the key/value database. - -A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. - -Your kvservers should not directly communicate; they should only interact with each other through Raft. - -Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - -Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. - -You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. - -You have completed this task when you **reliably** pass the first test in the test suite: ""One client"". - -- After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. -- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. -- You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` or `Clerk.Append()` should result in just a single execution, so you will have to ensure that the re-send doesn't result in the servers executing the request twice. - -Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). Your code should pass the `go test -run 4A` tests. - -- Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. -- You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2. - -Your code should now pass the Lab 4A tests, like this: - -``` -$ go test -run 4A -Test: one client (4A) ... - ... Passed -- 15.5 5 4576 903 -Test: ops complete fast enough (4A) ... - ... Passed -- 15.7 3 3022 0 -Test: many clients (4A) ... - ... Passed -- 15.9 5 5884 1160 -Test: unreliable net, many clients (4A) ... - ... Passed -- 19.2 5 3083 441 -Test: concurrent append to same key, unreliable (4A) ... - ... Passed -- 2.5 3 218 52 -Test: progress in majority (4A) ... - ... Passed -- 1.7 5 103 2 -Test: no progress in minority (4A) ... - ... Passed -- 1.0 5 102 3 -Test: completion after heal (4A) ... - ... Passed -- 1.2 5 70 3 -Test: partitions, one client (4A) ... - ... Passed -- 23.8 5 4501 765 -Test: partitions, many clients (4A) ... - ... Passed -- 23.5 5 5692 974 -Test: restarts, one client (4A) ... - ... Passed -- 22.2 5 4721 908 -Test: restarts, many clients (4A) ... - ... Passed -- 22.5 5 5490 1033 -Test: unreliable net, restarts, many clients (4A) ... - ... Passed -- 26.5 5 3532 474 -Test: restarts, partitions, many clients (4A) ... - ... Passed -- 29.7 5 6122 1060 -Test: unreliable net, restarts, partitions, many clients (4A) ... - ... Passed -- 32.9 5 2967 317 -Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... - ... Passed -- 35.0 7 8249 746 -PASS -ok 6.5840/kvraft 290.184s -``` - -The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls).","1. task1 - - Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - - Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. - - You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. - - You have completed this task when you **reliably** pass the first test in the test suite: ""One client"". - -2. task2 - - Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). Your code should pass the `go test -run 4A` tests.","1. hint1 - - After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. - - A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. - - You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. - - It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. -2. hint2 - - Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. - - You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. - - You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvraft && go test -run 4A,"Test: one client (4A) ... - ... Passed -- 15.5 5 4576 903 -Test: ops complete fast enough (4A) ... - ... Passed -- 15.7 3 3022 0 -Test: many clients (4A) ... - ... Passed -- 15.9 5 5884 1160 -Test: unreliable net, many clients (4A) ... - ... Passed -- 19.2 5 3083 441 -Test: concurrent append to same key, unreliable (4A) ... - ... Passed -- 2.5 3 218 52 -Test: progress in majority (4A) ... - ... Passed -- 1.7 5 103 2 -Test: no progress in minority (4A) ... - ... Passed -- 1.0 5 102 3 -Test: completion after heal (4A) ... - ... Passed -- 1.2 5 70 3 -Test: partitions, one client (4A) ... - ... Passed -- 23.8 5 4501 765 -Test: partitions, many clients (4A) ... - ... Passed -- 23.5 5 5692 974 -Test: restarts, one client (4A) ... - ... Passed -- 22.2 5 4721 908 -Test: restarts, many clients (4A) ... - ... Passed -- 22.5 5 5490 1033 -Test: unreliable net, restarts, many clients (4A) ... - ... Passed -- 26.5 5 3532 474 -Test: restarts, partitions, many clients (4A) ... - ... Passed -- 29.7 5 6122 1060 -Test: unreliable net, restarts, partitions, many clients (4A) ... - ... Passed -- 32.9 5 2967 317 -Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... - ... Passed -- 35.0 7 8249 746 -PASS -ok 6.5840/kvraft 290.184s",moderate/hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvraft.html -9,6.5840: Distributed Systems,Spring 2024,Lab 4: Fault-tolerant Key/Value Service,Part B: Key/value service with snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf). - -Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: - -- `Put(key, value)`: replaces the value for a particular key in the database -- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) -- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) - -Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. - -Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) - -Start early.","We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvraft -$ go test -... -$ -``` -",,"As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver to cooperate with Raft to save log space, and reduce restart time, using Raft's `Snapshot()` from Lab 3D. - -The tester passes `maxraftstate` to your `StartKVServer()`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `persister.RaftStateSize()`. Whenever your key/value server detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. If `maxraftstate` is -1, you do not have to snapshot. `maxraftstate` applies to the GOB-encoded bytes your Raft passes as the first argument to to `persister.Save()`. - -Modify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot. - -- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots. -- Capitalize all fields of structures stored in the snapshot. -- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time. - -Your code should pass the 4B tests (as in the example here) as well as the 4A tests (and your Raft must continue to pass the Lab 3 tests). - -``` -$ go test -run 4B -Test: InstallSnapshot RPC (4B) ... - ... Passed -- 4.0 3 289 63 -Test: snapshot size is reasonable (4B) ... - ... Passed -- 2.6 3 2418 800 -Test: ops complete fast enough (4B) ... - ... Passed -- 3.2 3 3025 0 -Test: restarts, snapshots, one client (4B) ... - ... Passed -- 21.9 5 29266 5820 -Test: restarts, snapshots, many clients (4B) ... - ... Passed -- 21.5 5 33115 6420 -Test: unreliable net, snapshots, many clients (4B) ... - ... Passed -- 17.4 5 3233 482 -Test: unreliable net, restarts, snapshots, many clients (4B) ... - ... Passed -- 22.7 5 3337 471 -Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... - ... Passed -- 30.4 5 2725 274 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... - ... Passed -- 37.7 7 8378 681 -PASS -ok 6.5840/kvraft 161.538s -```","Modify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot.","- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots. -- Capitalize all fields of structures stored in the snapshot. -- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvraft && go test -run 4B,"Test: InstallSnapshot RPC (4B) ... - ... Passed -- 4.0 3 289 63 -Test: snapshot size is reasonable (4B) ... - ... Passed -- 2.6 3 2418 800 -Test: ops complete fast enough (4B) ... - ... Passed -- 3.2 3 3025 0 -Test: restarts, snapshots, one client (4B) ... - ... Passed -- 21.9 5 29266 5820 -Test: restarts, snapshots, many clients (4B) ... - ... Passed -- 21.5 5 33115 6420 -Test: unreliable net, snapshots, many clients (4B) ... - ... Passed -- 17.4 5 3233 482 -Test: unreliable net, restarts, snapshots, many clients (4B) ... - ... Passed -- 22.7 5 3337 471 -Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... - ... Passed -- 30.4 5 2725 274 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... - ... Passed -- 37.7 7 8378 681 -PASS -ok 6.5840/kvraft 161.538s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvraft.html -10,6.5840: Distributed Systems,Spring 2024,Lab 5: Sharded Key/Value Service,Part A: The Controller and Static Sharding,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. - -Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the ""shard controller"". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. - -A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. - -The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. - -Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -This lab uses ""configuration"" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. - -Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation.","Do a `git pull` to get the latest lab software. - -We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardctrler -$ go test ---- FAIL: TestBasic (0.00s) - test_test.go:11: wanted 1 groups, got 0 -FAIL -exit status 1 -FAIL shardctrler 0.008s -$ -``` - -When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`.",,"First you'll implement the shard controller, in `shardctrler/server.go` and `client.go`, and a sharded key/value server that can handle an unchanging (static) configuration. When you're done, your code should pass all the tests in the `shardctrler/` directory, and the `5A` tests in `shardkv/`. - -``` -$ cd ~/6.5840/src/shardctrler -$ go test -Test: Basic leave/join ... - ... Passed -Test: Historical queries ... - ... Passed -Test: Move ... - ... Passed -Test: Concurrent leave/join ... - ... Passed -Test: Minimal transfers after joins ... - ... Passed -Test: Minimal transfers after leaves ... - ... Passed -Test: Multi-group join/leave ... - ... Passed -Test: Concurrent multi leave/join ... - ... Passed -Test: Minimal transfers after multijoins ... - ... Passed -Test: Minimal transfers after multileaves ... - ... Passed -Test: Check Same config on servers ... - ... Passed -PASS -ok 6.5840/shardctrler 5.863s -$ -$ cd ../shardkv -$ go test -run 5A -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -PASS -ok 6.5840/shardkv 9.262s -$ -``` - -The shardctrler manages a sequence of numbered configurations. Each configuration describes a set of replica groups and an assignment of shards to replica groups. Whenever this assignment needs to change, the shard controller creates a new configuration with the new assignment. Key/value clients and servers contact the shardctrler when they want to know the current (or a past) configuration. - -Your implementation must support the RPC interface described in `shardctrler/common.go`, which consists of `Join`, `Leave`, `Move`, and `Query` RPCs. These RPCs are intended to allow an administrator (and the tests) to control the shardctrler: to add new replica groups, to eliminate replica groups, and to move shards between replica groups. - -The `Join` RPC is used by an administrator to add new replica groups. Its argument is a set of mappings from unique, non-zero replica group identifiers (GIDs) to lists of server names. The shardctrler should react by creating a new configuration that includes the new replica groups. The new configuration should divide the shards as evenly as possible among the full set of groups, and should move as few shards as possible to achieve that goal. The shardctrler should allow re-use of a GID if it's not part of the current configuration (i.e. a GID should be allowed to Join, then Leave, then Join again). - -The `Leave` RPC's argument is a list of GIDs of previously joined groups. The shardctrler should create a new configuration that does not include those groups, and that assigns those groups' shards to the remaining groups. The new configuration should divide the shards as evenly as possible among the groups, and should move as few shards as possible to achieve that goal. - -The `Move` RPC's arguments are a shard number and a GID. The shardctrler should create a new configuration in which the shard is assigned to the group. The purpose of `Move` is to allow us to test your software. A `Join` or `Leave` following a `Move` will likely un-do the `Move`, since `Join` and `Leave` re-balance. - -The `Query` RPC's argument is a configuration number. The shardctrler replies with the configuration that has that number. If the number is -1 or bigger than the biggest known configuration number, the shardctrler should reply with the latest configuration. The result of `Query(-1)` should reflect every `Join`, `Leave`, or `Move` RPC that the shardctrler finished handling before it received the `Query(-1)` RPC. - -The very first configuration should be numbered zero. It should contain no groups, and all shards should be assigned to GID zero (an invalid GID). The next configuration (created in response to a `Join` RPC) should be numbered 1, &c. There will usually be significantly more shards than groups (i.e., each group will serve more than one shard), in order that load can be shifted at a fairly fine granularity. - -You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`. - -- Start with a stripped-down copy of your kvraft server. -- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. -- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is [not deterministic](https://blog.golang.org/maps#TOC_7.). -- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. -- The Go race detector (go test -race) may help you find bugs. - -Next, in the `shardkv/` directory, implement enough of a sharded key/value server to pass the first two tests in `shardkv/`. Again, start by copying code from your existing `kvraft` server. You should be able to get the first test to pass without doing anything special regarding sharding, since the `shardkv/client.go` we give you takes care of sending RPCs to the group that the controller assigns to the key in question. - -For the second `shardkv` test, each k/v replica group must reject requests for keys for shards for which the group is not the assigned group. At this point, it's enough for the k/v servers to periodically ask the controller for the latest configuration, and to check that configuration each time a client Get/Put/Append RPC arrives. Use `key2shard()` (in `client.go`) to find the shard number for a key. - -Your server should respond with an `ErrWrongGroup` error to a client RPC with a key that the server isn't responsible for (i.e. for a key whose shard is not assigned to the server's group). - -Your server should not call the shard controller's `Join()` handler. The tester will call `Join()` when appropriate.","You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`.","- Start with a stripped-down copy of your kvraft server. -- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. -- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is [not deterministic](https://blog.golang.org/maps#TOC_7.). -- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. -- The Go race detector (go test -race) may help you find bugs.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/shardctrler && go test,"Test: Basic leave/join ... - ... Passed -Test: Historical queries ... - ... Passed -Test: Move ... - ... Passed -Test: Concurrent leave/join ... - ... Passed -Test: Minimal transfers after joins ... - ... Passed -Test: Minimal transfers after leaves ... - ... Passed -Test: Multi-group join/leave ... - ... Passed -Test: Concurrent multi leave/join ... - ... Passed -Test: Minimal transfers after multijoins ... - ... Passed -Test: Minimal transfers after multileaves ... - ... Passed -Test: Check Same config on servers ... - ... Passed -PASS -ok 6.5840/shardctrler 5.863s -$ -$ cd ../shardkv -$ go test -run 5A -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -PASS -ok 6.5840/shardkv 9.262s",easy,http://nil.csail.mit.edu/6.5840/2024/labs/lab-shard.html -11,6.5840: Distributed Systems,Spring 2024,Lab 5: Sharded Key/Value Service,Part B: Shard Movement,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. - -Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the ""shard controller"". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. - -A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. - -The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. - -Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -This lab uses ""configuration"" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. - -Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation.","Do a `git pull` to get the latest lab software. - -We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardctrler -$ go test ---- FAIL: TestBasic (0.00s) - test_test.go:11: wanted 1 groups, got 0 -FAIL -exit status 1 -FAIL shardctrler 0.008s -$ -``` - -When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`.",,"Do a `git pull` to get the latest lab software. - -The main task in this part of the lab is to move shards among replica groups when the controller changes the sharding, and do it in a way that provides linearizable k/v client operations. - -Each of your shards is only required to make progress when a majority of servers in the shard's Raft replica group is alive and can talk to each other, and can talk to a majority of the `shardctrler` servers. Your implementation must operate (serve requests and be able to re-configure as needed) even if a minority of servers in some replica group(s) are dead, temporarily unavailable, or slow. - -A shardkv server is a member of only a single replica group. The set of servers in a given replica group will never change. - -We supply you with `client.go` code that sends each RPC to the replica group responsible for the RPC's key. It re-tries if the replica group says it is not responsible for the key; in that case, the client code asks the shard controller for the latest configuration and tries again. You'll have to modify client.go as part of your support for dealing with duplicate client RPCs, much as in the kvraft lab. - -When you're done your code should pass all the shardkv tests other than the challenge tests: - -``` -$ cd ~/6.5840/src/shardkv -$ go test -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -Test (5B): join then leave ... - ... Passed -Test (5B): snapshots, join, and leave ... -labgob warning: Decoding into a non-default variable/field Num may not work - ... Passed -Test (5B): servers miss configuration changes... - ... Passed -Test (5B): concurrent puts and configuration changes... - ... Passed -Test (5B): more concurrent puts and configuration changes... - ... Passed -Test (5B): concurrent configuration change and restart... - ... Passed -Test (5B): unreliable 1... - ... Passed -Test (5B): unreliable 2... - ... Passed -Test (5B): unreliable 3... - ... Passed -Test: shard deletion (challenge 1) ... - ... Passed -Test: unaffected shard access (challenge 2) ... - ... Passed -Test: partial migration shard access (challenge 2) ... - ... Passed -PASS -ok 6.5840/shardkv 173.974s -$ -``` - -You will need to make your servers watch for configuration changes, and when one is detected, to start the shard migration process. If a replica group loses a shard, it must stop serving requests to keys in that shard immediately, and start migrating the data for that shard to the replica group that is taking over ownership. If a replica group gains a shard, it needs to wait for the previous owner to send over the old shard data before accepting requests for that shard. - -Implement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test (""join then leave"") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`. - -Your server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems. - -Servers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this. - -- Process re-configurations one at a time, in order. -- If a test fails, check for gob errors (e.g. ""gob: type not registered for interface ...""). Go doesn't consider gob errors to be fatal, although they are fatal for the lab. -- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement. -- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request? -- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation. -- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1? -- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. -- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log. -- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source.","Implement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test (""join then leave"") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`.","Your server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems. - -Servers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this. - -- Process re-configurations one at a time, in order. -- If a test fails, check for gob errors (e.g. ""gob: type not registered for interface ...""). Go doesn't consider gob errors to be fatal, although they are fatal for the lab. -- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement. -- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request? -- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation. -- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1? -- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. -- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log. -- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/shardkv && $ go test,"Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -Test (5B): join then leave ... - ... Passed -Test (5B): snapshots, join, and leave ... -labgob warning: Decoding into a non-default variable/field Num may not work - ... Passed -Test (5B): servers miss configuration changes... - ... Passed -Test (5B): concurrent puts and configuration changes... - ... Passed -Test (5B): more concurrent puts and configuration changes... - ... Passed -Test (5B): concurrent configuration change and restart... - ... Passed -Test (5B): unreliable 1... - ... Passed -Test (5B): unreliable 2... - ... Passed -Test (5B): unreliable 3... - ... Passed -Test: shard deletion (challenge 1) ... - ... Passed -Test: unaffected shard access (challenge 2) ... - ... Passed -Test: partial migration shard access (challenge 2) ... - ... Passed -PASS -ok 6.5840/shardkv 173.974s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-shard.html -12,6.5840: Distributed Systems,Spring 2025,Lab 1: MapReduce,Your Job,"In this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses ""coordinator"" instead of the paper's ""master"".)","You need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs. - -Fetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html). - -``` -$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840 -$ cd 6.5840 -$ ls -Makefile src -$ -``` - -We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: - -``` -$ cd ~/6.5840 -$ cd src/main -$ go build -buildmode=plugin ../mrapps/wc.go -$ rm mr-out* -$ go run mrsequential.go wc.so pg*.txt -$ more mr-out-0 -A 509 -ABOUT 2 -ACT 8 -... -``` - -`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. - -Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. - -For this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.",,"Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The ""main"" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -$ go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -$ rm mr-out* -$ go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one ""split"", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -$ go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method ""Done"" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2025/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited.","Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The ""main"" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -$ go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -$ rm mr-out* -$ go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one ""split"", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -$ go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method ""Done"" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2025/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited.","- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's `encoding/json` package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC `call()` function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.","- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `""%v %v""` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented ""this is the correct format"". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a ""please exit"" pseudo-task that the coordinator can give to workers.",git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/main && bash test-mr.sh,"*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS",moderate/hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html -13,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Key/value server with reliable network,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"Your first task is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in `client.go`, and implement `Put` and `Get` RPC handlers in `server.go`. - -You have completed this task when you pass the Reliable tests in the test suite: - -``` -$ go test -v -run Reliable -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 90171 90171 ---- PASS: TestPutConcurrentReliable (3.07s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 9.2 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (16.59s) -PASS -ok 6.5840/kvsrv1 19.681s -``` - -The numbers after each `Passed` are real time in seconds, the constant 1, the number of RPCs sent (including client RPCs), and the number of key/value operations executed (`Clerk` `Get` and `Put` calls). - -- Check that your code is race-free using `go test -race`.","Your first task is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in `client.go`, and implement `Put` and `Get` RPC handlers in `server.go`. - -You have completed this task when you pass the Reliable tests in the test suite:",- Check that your code is race-free using `go test -race`.,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvsrv1 && go test -v -run Reliable,"=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 90171 90171 ---- PASS: TestPutConcurrentReliable (3.07s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 9.2 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (16.59s) -PASS -ok 6.5840/kvsrv1 19.681s",easy,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -14,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Implementing a lock using key/value clerk,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"In many distributed applications, clients running on different machines use a key/value server to coordinate their activities. For example, ZooKeeper and Etcd allow clients to coordinate using a distributed lock, in analogy with how threads in a Go program can coordinate with locks (i.e., `sync.Mutex`). Zookeeper and Etcd implement such a lock with conditional put. - -In this exercise your task is to implement a lock layered on client `Clerk.Put` and `Clerk.Get` calls. The lock supports two methods: `Acquire` and `Release`. The lock's specification is that only one client can successfully acquire the lock at a time; other clients must wait until the first client has released the lock using `Release`. - -We supply you with skeleton code and tests in `src/kvsrv1/lock/`. You will need to modify `src/kvsrv1/lock/lock.go`. Your `Acquire` and `Release` code can talk to your key/value server by calling `lk.ck.Put()` and `lk.ck.Get()`. - -If a client crashes while holding a lock, the lock will never be released. In a design more sophisticated than this lab, the client would attach a [lease](https://en.wikipedia.org/wiki/Lease_(computer_science)#:~:text=Leases are commonly used in,to rely on the resource.) to a lock. When the lease expires, the lock server would release the lock on behalf of the client. In this lab clients don't crash and you can ignore this problem. - -Implement `Acquire` and `Release`. You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory: - -``` -$ cd lock -$ go test -v -run Reliable -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 974 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 83194 0 ---- PASS: TestManyClientsReliable (2.11s) -PASS -ok 6.5840/kvsrv1/lock 4.120s -``` - -If you haven't implemented the lock yet, the first test will succeed. - -This exercise requires little code but will require a bit more independent thought than the previous exercise. - -- You will need a unique identifier for each lock client; call `kvtest.RandValue(8)` to generate a random string. -- The lock service should use a specific key to store the ""lock state"" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter `l` of `MakeLock` in `src/kvsrv1/lock/lock.go`.",Implement `Acquire` and `Release`. You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory:,"- You will need a unique identifier for each lock client; call `kvtest.RandValue(8)` to generate a random string. -- The lock service should use a specific key to store the ""lock state"" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter `l` of `MakeLock` in `src/kvsrv1/lock/lock.go`.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd cd src/kvsrv1/lock && go test -v -run Reliable,"=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 974 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 83194 0 ---- PASS: TestManyClientsReliable (2.11s) -PASS -ok 6.5840/kvsrv1/lock 4.120s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -15,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Key/value server with dropped messages,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"The main challenge in this exercise is that the network may re-order, delay, or discard RPC requests and/or replies. To recover from discarded requests/replies, the Clerk must keep re-trying each RPC until it receives a reply from the server. - -If the network discards an RPC request message, then the client re-sending the request will solve the problem: the server will receive and execute just the re-sent request. - -However, the network might instead discard an RPC reply message. The client does not know which message was discarded; the client only observes that it received no reply. If it was the reply that was discarded, and the client re-sends the RPC request, then the server will receive two copies of the request. That's OK for a `Get`, since `Get` doesn't modify the server state. It is safe to resend a `Put` RPC with the same version number, since the server executes `Put` conditionally on the version number; if the server received and executed a `Put` RPC, it will respond to a re-transmitted copy of that RPC with `rpc.ErrVersion` rather than executing the Put a second time. - -A tricky case is if the server replies with an `rpc.ErrVersion` in a response to an RPC that the Clerk retried. In this case, the Clerk cannot know if the Clerk's `Put` was executed by the server or not: the first RPC might have been executed by the server but the network may have discarded the successful response from the server, so that the server sent `rpc.ErrVersion` only for the retransmitted RPC. Or, it might be that another Clerk updated the key before the Clerk's first RPC arrived at the server, so that the server executed neither of the Clerk's RPCs and replied `rpc.ErrVersion` to both. Therefore, if a Clerk receives `rpc.ErrVersion` for a retransmitted Put RPC, `Clerk.Put` must return `rpc.ErrMaybe` to the application instead of `rpc.ErrVersion` since the request may have been executed. It is then up to the application to handle this case. If the server responds to an initial (not retransmitted) Put RPC with `rpc.ErrVersion`, then the Clerk should return `rpc.ErrVersion` to the application, since the RPC was definitely not executed by the server. - -It would be more convenient for application developers if `Put`'s were exactly-once (i.e., no `rpc.ErrMaybe` errors) but that is difficult to guarantee without maintaining state at the server for each Clerk. In the last exercise of this lab, you will implement a lock using your Clerk to explore how to program with at-most-once `Clerk.Put`. - -Now you should modify your `kvsrv1/client.go` to continue in the face of dropped RPC requests and replies. A return value of `true` from the client's `ck.clnt.Call()` indicates that the client received an RPC reply from the server; a return value of `false` indicates that it did not receive a reply (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). Your `Clerk` should keep re-sending an RPC until it receives a reply. Keep in mind the discussion of `rpc.ErrMaybe` above. Your solution shouldn't require any changes to the server. - -Add code to `Clerk` to retry if doesn't receive a reply. Your have completed this task if your code passes all tests in `kvsrv1/`, like this: - -``` -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 106647 106647 ---- PASS: TestPutConcurrentReliable (3.09s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 8.0 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (14.61s) -=== RUN TestUnreliableNet -One client (unreliable network)... - ... Passed -- 7.6 1 251 208 ---- PASS: TestUnreliableNet (7.60s) -PASS -ok 6.5840/kvsrv1 25.319s -``` - -- Before the client retries, it should wait a little bit; you can use go's `time` package and call `time.Sleep(100 * time.Millisecond)`","Add code to `Clerk` to retry if doesn't receive a reply. Your have completed this task if your code passes all tests in `kvsrv1/`, like this:","- Before the client retries, it should wait a little bit; you can use go's `time` package and call `time.Sleep(100 * time.Millisecond)`",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvsrv1 && go test -v,"=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 106647 106647 ---- PASS: TestPutConcurrentReliable (3.09s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 8.0 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (14.61s) -=== RUN TestUnreliableNet -One client (unreliable network)... - ... Passed -- 7.6 1 251 208 ---- PASS: TestUnreliableNet (7.60s) -PASS -ok 6.5840/kvsrv1 25.319s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -16,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Implementing a lock using key/value clerk and unreliable network,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"Modify your lock implementation to work correctly with your modified key/value client when the network is not reliable. You have completed this exercise when your code passes all the `kvsrv1/lock/` tests, including the unreliable ones: - -``` -$ cd lock -$ go test -v -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 968 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 10789 0 ---- PASS: TestManyClientsReliable (2.12s) -=== RUN TestOneClientUnreliable -Test: 1 lock clients (unreliable network)... - ... Passed -- 2.3 1 70 0 ---- PASS: TestOneClientUnreliable (2.27s) -=== RUN TestManyClientsUnreliable -Test: 10 lock clients (unreliable network)... - ... Passed -- 3.6 1 908 0 ---- PASS: TestManyClientsUnreliable (3.62s) -PASS -ok 6.5840/kvsrv1/lock 10.033s -``` -","Modify your lock implementation to work correctly with your modified key/value client when the network is not reliable. You have completed this exercise when your code passes all the `kvsrv1/lock/` tests, including the unreliable ones:",,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvsrv1/lock && go test -v,"=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 968 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 10789 0 ---- PASS: TestManyClientsReliable (2.12s) -=== RUN TestOneClientUnreliable -Test: 1 lock clients (unreliable network)... - ... Passed -- 2.3 1 70 0 ---- PASS: TestOneClientUnreliable (2.27s) -=== RUN TestManyClientsUnreliable -Test: 10 lock clients (unreliable network)... - ... Passed -- 3.6 1 908 0 ---- PASS: TestManyClientsUnreliable (3.62s) -PASS -ok 6.5840/kvsrv1/lock 10.033s",easy,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -17,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3A: leader election,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](http://nil.csail.mit.edu/6.5840/2025/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(""Server 0"", ""short description"", ""details"")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know! - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak -$ -``` - -Each ""Passed"" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag.","Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code.","- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](http://nil.csail.mit.edu/6.5840/2025/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(""Server 0"", ""short description"", ""details"")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know!",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && go test -run 3A,"Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -18,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3B: log,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass. - -- Run `git pull` to get the latest lab software. -- Raft log is 1-indexed, but we suggest that you view it as 0-indexed, and starting out with an entry (at index=0) that has term 0. That allows the very first AppendEntries RPC to contain 0 as PrevLogIndex, and be a valid index into the log. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `raft_test.go` and trace the test code from there to understand what's being tested. - -The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: - -``` -$ time go test -run 3B -Test (3B): basic agreement (reliable network)... - ... Passed -- 1.3 3 18 0 -Test (3B): RPC byte count (reliable network)... - ... Passed -- 2.8 3 56 0 -Test (3B): test progressive failure of followers (reliable network)... - ... Passed -- 5.3 3 188 0 -Test (3B): test failure of leaders (reliable network)... - ... Passed -- 6.4 3 378 0 -Test (3B): agreement after follower reconnects (reliable network)... - ... Passed -- 5.9 3 176 0 -Test (3B): no agreement if too many followers disconnect (reliable network)... - ... Passed -- 4.3 5 288 0 -Test (3B): concurrent Start()s (reliable network)... - ... Passed -- 1.5 3 32 0 -Test (3B): rejoin of partitioned leader (reliable network)... - ... Passed -- 5.3 3 216 0 -Test (3B): leader backs up quickly over incorrect follower logs (reliable network)... - ... Passed -- 12.1 5 1528 0 -Test (3B): RPC counts aren't too high (reliable network)... - ... Passed -- 3.1 3 106 0 -PASS -ok 6.5840/raft1 48.353s -go test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total -$ -``` - -The ""ok 6.5840/raft 35.557s"" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The ""user 0m2.556s"" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent.","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass.","- Run `git pull` to get the latest lab software. -- Raft log is 1-indexed, but we suggest that you view it as 0-indexed, and starting out with an entry (at index=0) that has term 0. That allows the very first AppendEntries RPC to contain 0 as PrevLogIndex, and be a valid index into the log. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `raft_test.go` and trace the test code from there to understand what's being tested.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && time go test -run 3B,"Test (3B): basic agreement (reliable network)... - ... Passed -- 1.3 3 18 0 -Test (3B): RPC byte count (reliable network)... - ... Passed -- 2.8 3 56 0 -Test (3B): test progressive failure of followers (reliable network)... - ... Passed -- 5.3 3 188 0 -Test (3B): test failure of leaders (reliable network)... - ... Passed -- 6.4 3 378 0 -Test (3B): agreement after follower reconnects (reliable network)... - ... Passed -- 5.9 3 176 0 -Test (3B): no agreement if too many followers disconnect (reliable network)... - ... Passed -- 4.3 5 288 0 -Test (3B): concurrent Start()s (reliable network)... - ... Passed -- 1.5 3 32 0 -Test (3B): rejoin of partitioned leader (reliable network)... - ... Passed -- 5.3 3 216 0 -Test (3B): leader backs up quickly over incorrect follower logs (reliable network)... - ... Passed -- 12.1 5 1528 0 -Test (3B): RPC counts aren't too high (reliable network)... - ... Passed -- 3.1 3 106 0 -PASS -ok 6.5840/raft1 48.353s -go test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -19,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3C: persistence,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. - -A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. - -Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. - -You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: - -``` - XTerm: term in the conflicting entry (if any) - XIndex: index of first entry with that term (if any) - XLen: log length -``` - -Then the leader's logic can be something like: - -``` - Case 1: leader doesn't have XTerm: - nextIndex = XIndex - Case 2: leader has XTerm: - nextIndex = (index of leader's last entry for XTerm) + 1 - Case 3: follower's log is too short: - nextIndex = XLen -``` - -A few other hints: - -- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. - -Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. - -``` -$ go test -run 3C -Test (3C): basic persistence (reliable network)... - ... Passed -- 6.6 3 110 0 -Test (3C): more persistence (reliable network)... - ... Passed -- 15.6 5 428 0 -Test (3C): partitioned leader and one follower crash, leader restarts (reliable network)... - ... Passed -- 3.1 3 50 0 -Test (3C): Figure 8 (reliable network)... - ... Passed -- 33.7 5 654 0 -Test (3C): unreliable agreement (unreliable network)... - ... Passed -- 2.1 5 1076 0 -Test (3C): Figure 8 (unreliable) (unreliable network)... - ... Passed -- 31.9 5 4400 0 -Test (3C): churn (reliable network)... - ... Passed -- 16.8 5 4896 0 -Test (3C): unreliable churn (unreliable network)... - ... Passed -- 16.1 5 7204 0 -PASS -ok 6.5840/raft1 126.054s -$ -``` - -It is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`. - -``` -$ for i in {0..10}; do go test; done -``` -","Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests.","- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && go test -run 3C,"Test (3C): basic persistence (reliable network)... - ... Passed -- 6.6 3 110 0 -Test (3C): more persistence (reliable network)... - ... Passed -- 15.6 5 428 0 -Test (3C): partitioned leader and one follower crash, leader restarts (reliable network)... - ... Passed -- 3.1 3 50 0 -Test (3C): Figure 8 (reliable network)... - ... Passed -- 33.7 5 654 0 -Test (3C): unreliable agreement (unreliable network)... - ... Passed -- 2.1 5 1076 0 -Test (3C): Figure 8 (unreliable) (unreliable network)... - ... Passed -- 31.9 5 4400 0 -Test (3C): churn (reliable network)... - ... Passed -- 16.8 5 4896 0 -Test (3C): unreliable churn (unreliable network)... - ... Passed -- 16.1 5 7204 0 -PASS -ok 6.5840/raft1 126.054s",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -20,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3D: log compaction,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will shard your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a ""snapshot"" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) outlines the scheme; you will have to design the details. - -Your Raft must provide the following function that the service can call with a serialized snapshot of its state: - -``` -Snapshot(index int, snapshot []byte) -``` - -In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). - -The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. - -You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. - -When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. - -If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. - -When a server restarts, the application layer reads the persisted snapshot and restores its saved state. - -Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). - -- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- A common reason for failing the first 3D test is that followers take too long to catch up to the leader. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. - -Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. - -``` -$ go test -run 3D -Test (3D): snapshots basic (reliable network)... - ... Passed -- 3.3 3 522 0 -Test (3D): install snapshots (disconnect) (reliable network)... - ... Passed -- 48.4 3 2710 0 -Test (3D): install snapshots (disconnect) (unreliable network)... - ... Passed -- 56.1 3 3025 0 -Test (3D): install snapshots (crash) (reliable network)... - ... Passed -- 33.3 3 1559 0 -Test (3D): install snapshots (crash) (unreliable network)... - ... Passed -- 38.1 3 1723 0 -Test (3D): crash and restart all servers (unreliable network)... - ... Passed -- 11.2 3 296 0 -Test (3D): snapshot initialization after crash (unreliable network)... - ... Passed -- 4.3 3 84 0 -PASS -ok 6.5840/raft1 195.006s -``` -","Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests).","- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- A common reason for failing the first 3D test is that followers take too long to catch up to the leader. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && go test -run 3D,"Test (3D): snapshots basic (reliable network)... - ... Passed -- 3.3 3 522 0 -Test (3D): install snapshots (disconnect) (reliable network)... - ... Passed -- 48.4 3 2710 0 -Test (3D): install snapshots (disconnect) (unreliable network)... - ... Passed -- 56.1 3 3025 0 -Test (3D): install snapshots (crash) (reliable network)... - ... Passed -- 33.3 3 1559 0 -Test (3D): install snapshots (crash) (unreliable network)... - ... Passed -- 38.1 3 1723 0 -Test (3D): crash and restart all servers (unreliable network)... - ... Passed -- 11.2 3 296 0 -Test (3D): snapshot initialization after crash (unreliable network)... - ... Passed -- 4.3 3 84 0 -PASS -ok 6.5840/raft1 195.006s",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -21,6.5840: Distributed Systems,Spring 2025,Lab 4: Fault-tolerant Key/Value Service,Part A: replicated state machine (RSM),"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.","We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `""src/kvsrv1""` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` -",," - -``` -$ cd src/kvraft1/rsm -$ go test -v -=== RUN TestBasic -Test RSM basic (reliable network)... -.. - config.go:147: one: took too long -``` - -In the common situation of a client/server service using Raft for replication, the service interacts with Raft in two ways: the service leader submits client operations by calling `raft.Start()`, and all service replicas receive committed operations via Raft's `applyCh`, which they execute. On the leader, these two activities interact. At any given time, some server goroutines are handling client requests, have called `raft.Start()`, and each is waiting for its operation to commit and to find out what the result of executing the operation is. And as committed operations appear on the `applyCh`, each needs to be executed by the service, and the results need to be handed to the goroutine that called `raft.Start()` so that it can return the result to the client. - -The `rsm` package encapsulates the above interaction. It sits as a layer between the service (e.g. a key/value database) and Raft. In `rsm/rsm.go` you will need to implement a ""reader"" goroutine that reads the `applyCh`, and a `rsm.Submit()` function that calls `raft.Start()` for a client operation and then waits for the reader goroutine to hand it the result of executing that operation. - -The service that is using `rsm` appears to the `rsm` reader goroutine as a `StateMachine` object providing a `DoOp()` method. The reader goroutine should hand each committed operation to `DoOp()`; `DoOp()`'s return value should be given to the corresponding `rsm.Submit()` call for it to return. `DoOp()`'s argument and return value have type `any`; the actual values should have the same types as the argument and return values that the service passes to `rsm.Submit()`, respectively. - -The service should pass each client operation to `rsm.Submit()`. To help the reader goroutine match `applyCh` messages with waiting calls to `rsm.Submit()`, `Submit()` should wrap each client operation in an `Op` structure along with a unique identifier. `Submit()` should then wait until the operation has committed and been executed, and return the result of execution (the value returned by `DoOp()`). If `raft.Start()` indicates that the current peer is not the Raft leader, `Submit()` should return an `rpc.ErrWrongLeader` error. `Submit()` should detect and handle the situation in which leadership changed just after it called `raft.Start()`, causing the operation to be lost (never committed). - -For Part A, the `rsm` tester acts as the service, submitting operations that it interprets as increments on a state consisting of a single integer. In Part B you'll use `rsm` as part of a key/value service that implements `StateMachine` (and `DoOp()`), and calls `rsm.Submit()`. - -If all goes well, the sequence of events for a client request is: - -- The client sends a request to the service leader. -- The service leader calls `rsm.Submit()` with the request. -- `rsm.Submit()` calls `raft.Start()` with the request, and then waits. -- Raft commits the request and sends it on all peers' `applyCh`s. -- The `rsm` reader goroutine on each peer reads the request from the `applyCh` and passes it to the service's `DoOp()`. -- On the leader, the `rsm` reader goroutine hands the `DoOp()` return value to the `Submit()` goroutine that originally submitted the request, and `Submit()` returns that value. - -Your servers should not directly communicate; they should only interact with each other through Raft. - -Implement `rsm.go`: the `Submit()` method and a reader goroutine. You have completed this task if you pass the `rsm` 4A tests: - -``` - $ cd src/kvraft1/rsm - $ go test -v -run 4A -=== RUN TestBasic4A -Test RSM basic (reliable network)... - ... Passed -- 1.2 3 48 0 ---- PASS: TestBasic4A (1.21s) -=== RUN TestLeaderFailure4A - ... Passed -- 9223372036.9 3 31 0 ---- PASS: TestLeaderFailure4A (1.50s) -PASS -ok 6.5840/kvraft1/rsm 2.887s -``` - -- You should not need to add any fields to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- Your solution needs to handle an `rsm` leader that has called `Start()` for a request submitted with `Submit()` but loses its leadership before the request is committed to the log. One way to do this is for the `rsm` to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by `Start()`, and return `rpc.ErrWrongLeader` from `Submit()`. If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server to wait indefinitely until the partition heals. -- The tester calls your Raft's `rf.Kill()` when it is shutting down a peer. Raft should close the `applyCh` so that your rsm learns about the shutdown, and can exit out of all loops.",Implement `rsm.go`: the `Submit()` method and a reader goroutine. You have completed this task if you pass the `rsm` 4A tests:,"- You should not need to add any fields to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- Your solution needs to handle an `rsm` leader that has called `Start()` for a request submitted with `Submit()` but loses its leadership before the request is committed to the log. One way to do this is for the `rsm` to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by `Start()`, and return `rpc.ErrWrongLeader` from `Submit()`. If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server to wait indefinitely until the partition heals. -- The tester calls your Raft's `rf.Kill()` when it is shutting down a peer. Raft should close the `applyCh` so that your rsm learns about the shutdown, and can exit out of all loops.",,git://g.csail.mit.edu/6.5840-golabs-20250,,6.5840-golabs-2025,cd src/kvraft1/rsm && go test -v -run 4A,"=== RUN TestBasic4A -Test RSM basic (reliable network)... - ... Passed -- 1.2 3 48 0 ---- PASS: TestBasic4A (1.21s) -=== RUN TestLeaderFailure4A - ... Passed -- 9223372036.9 3 31 0 ---- PASS: TestLeaderFailure4A (1.50s) -PASS -ok 6.5840/kvraft1/rsm 2.887s",moderate/hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html -22,6.5840: Distributed Systems,Spring 2025,Lab 4: Fault-tolerant Key/Value Service,Part B: Key/value service without snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.","We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `""src/kvsrv1""` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` -",," - -``` -$ cd src/kvraft1 -$ go test -v -run TestBasic4B -=== RUN TestBasic4B -Test: one client (4B basic) (reliable network)... - kvtest.go:62: Wrong error -$ -``` - -Now you will use the `rsm` package to replicate a key/value server. Each of the servers (""kvservers"") will have an associated rsm/Raft peer. Clerks send `Put()` and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Get operation to `rsm`, which replicates it using Raft and invokes your server's `DoOp` at each peer, which should apply the operations to the peer's key/value database; the intent is for the servers to maintain identical replicas of the key/value database. - -A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. - -Your kvservers should not directly communicate; they should only interact with each other through Raft. - -Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - -Feel free to copy your client code from Lab 2 (`kvsrv1/client.go`) into `kvraft1/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. - -You'll also need to implement `Put()` and `Get()` RPC handlers in `server.go`. These handlers should submit the request to Raft using `rsm.Submit()`. As the `rsm` package reads commands from `applyCh`, it should invoke the `DoOp` method, which you will have to implement in `server.go`. - -You have completed this task when you **reliably** pass the first test in the test suite, with `go test -v -run TestBasic4B`. - -- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()`) in the Raft log using `Submit()`. You don't have to implement the optimization for read-only operations that is described in Section 8. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` should result in just a single execution for a particular version number. - -Add code to handle failures. Your `Clerk` can use a similar retry plan as in lab 2, including returning `ErrMaybe` if a response to a retried `Put` RPC is lost. You are done when your code reliably passes all the 4B tests, with `go test -v -run 4B`. - -- Recall that the rsm leader may lose its leadership and return `rpc.ErrWrongLeader` from `Submit()`. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. - -Your code should now pass the Lab 4B tests, like this: - -``` -$ cd kvraft1 -$ go test -run 4B -Test: one client (4B basic) ... - ... Passed -- 3.2 5 1041 183 -Test: one client (4B speed) ... - ... Passed -- 15.9 3 3169 0 -Test: many clients (4B many clients) ... - ... Passed -- 3.9 5 3247 871 -Test: unreliable net, many clients (4B unreliable net, many clients) ... - ... Passed -- 5.3 5 1035 167 -Test: unreliable net, one client (4B progress in majority) ... - ... Passed -- 2.9 5 155 3 -Test: no progress in minority (4B) ... - ... Passed -- 1.6 5 102 3 -Test: completion after heal (4B) ... - ... Passed -- 1.3 5 67 4 -Test: partitions, one client (4B partitions, one client) ... - ... Passed -- 6.2 5 958 155 -Test: partitions, many clients (4B partitions, many clients (4B)) ... - ... Passed -- 6.8 5 3096 855 -Test: restarts, one client (4B restarts, one client 4B ) ... - ... Passed -- 6.7 5 311 13 -Test: restarts, many clients (4B restarts, many clients) ... - ... Passed -- 7.5 5 1223 95 -Test: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ... - ... Passed -- 8.4 5 804 33 -Test: restarts, partitions, many clients (4B restarts, partitions, many clients) ... - ... Passed -- 10.1 5 1308 105 -Test: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ... - ... Passed -- 11.9 5 1040 33 -Test: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ... - ... Passed -- 12.1 7 2801 93 -PASS -ok 6.5840/kvraft1 103.797s -``` - -The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put calls).","1. task1 - - Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - - Feel free to copy your client code from Lab 2 (`kvsrv1/client.go`) into `kvraft1/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. - - You'll also need to implement `Put()` and `Get()` RPC handlers in `server.go`. These handlers should submit the request to Raft using `rsm.Submit()`. As the `rsm` package reads commands from `applyCh`, it should invoke the `DoOp` method, which you will have to implement in `server.go`. - - You have completed this task when you **reliably** pass the first test in the test suite, with `go test -v -run TestBasic4B`. - -2. task2 - - Add code to handle failures. Your `Clerk` can use a similar retry plan as in lab 2, including returning `ErrMaybe` if a response to a retried `Put` RPC is lost. You are done when your code reliably passes all the 4B tests, with `go test -v -run 4B`. - - - - -","- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()`) in the Raft log using `Submit()`. You don't have to implement the optimization for read-only operations that is described in Section 8. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -- Recall that the rsm leader may lose its leadership and return `rpc.ErrWrongLeader` from `Submit()`. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvraft1 && go test -run 4B,"Test: one client (4B basic) ... - ... Passed -- 3.2 5 1041 183 -Test: one client (4B speed) ... - ... Passed -- 15.9 3 3169 0 -Test: many clients (4B many clients) ... - ... Passed -- 3.9 5 3247 871 -Test: unreliable net, many clients (4B unreliable net, many clients) ... - ... Passed -- 5.3 5 1035 167 -Test: unreliable net, one client (4B progress in majority) ... - ... Passed -- 2.9 5 155 3 -Test: no progress in minority (4B) ... - ... Passed -- 1.6 5 102 3 -Test: completion after heal (4B) ... - ... Passed -- 1.3 5 67 4 -Test: partitions, one client (4B partitions, one client) ... - ... Passed -- 6.2 5 958 155 -Test: partitions, many clients (4B partitions, many clients (4B)) ... - ... Passed -- 6.8 5 3096 855 -Test: restarts, one client (4B restarts, one client 4B ) ... - ... Passed -- 6.7 5 311 13 -Test: restarts, many clients (4B restarts, many clients) ... - ... Passed -- 7.5 5 1223 95 -Test: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ... - ... Passed -- 8.4 5 804 33 -Test: restarts, partitions, many clients (4B restarts, partitions, many clients) ... - ... Passed -- 10.1 5 1308 105 -Test: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ... - ... Passed -- 11.9 5 1040 33 -Test: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ... - ... Passed -- 12.1 7 2801 93 -PASS -ok 6.5840/kvraft1 103.797s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html -23,6.5840: Distributed Systems,Spring 2025,Lab 4: Fault-tolerant Key/Value Service,Part C: Key/value service with snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.","We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `""src/kvsrv1""` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` -",,"As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver and `rsm` to cooperate with Raft to save log space and reduce restart time, using Raft's `Snapshot()` from Lab 3D. - -The tester passes `maxraftstate` to your `StartKVServer()`, which passes it to `rsm`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `rf.PersistBytes()`. Whenever your `rsm` detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. `rsm` can create this snapshot by calling the `Snapshot` method of the `StateMachine` interface to obtain a snapshot of the kvserver. If `maxraftstate` is -1, you do not have to snapshot. The `maxraftstate` limit applies to the GOB-encoded bytes your Raft passes as the first argument to `persister.Save()`. - -You can find the source for the `persister` object in `tester1/persister.go`. - -Modify your rsm so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a `rsm` server restarts, it should read the snapshot with `persister.ReadSnapshot()` and, if the snapshot's length is greater than zero, pass the snapshot to the `StateMachine`'s `Restore()` method. You complete this task if you pass TestSnapshot4C in `rsm`. - -``` -$ cd kvraft1/rsm -$ go test -run TestSnapshot4C -=== RUN TestSnapshot4C - ... Passed -- 9223372036.9 3 230 0 ---- PASS: TestSnapshot4C (3.88s) -PASS -ok 6.5840/kvraft1/rsm 3.882s -``` - -- Think about when `rsm` should snapshot its state and what should be included in the snapshot beyond just the server state. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Capitalize all fields of structures stored in the snapshot. - -Implement the `kvraft1/server.go` `Snapshot()` and `Restore()` methods, which `rsm` calls. Modify `rsm` to handle applyCh messages that contain snapshots. - -- You may have bugs in your Raft and rsm library that this task exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. - -Your code should pass the 4C tests (as in the example here) as well as the 4A+B tests (and your Raft must continue to pass the Lab 3 tests). - -``` -$ go test -run 4C -Test: snapshots, one client (4C SnapshotsRPC) ... -Test: InstallSnapshot RPC (4C) ... - ... Passed -- 4.5 3 241 64 -Test: snapshots, one client (4C snapshot size is reasonable) ... - ... Passed -- 11.4 3 2526 800 -Test: snapshots, one client (4C speed) ... - ... Passed -- 14.2 3 3149 0 -Test: restarts, snapshots, one client (4C restarts, snapshots, one client) ... - ... Passed -- 6.8 5 305 13 -Test: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ... - ... Passed -- 9.0 5 5583 795 -Test: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ... - ... Passed -- 4.7 5 977 155 -Test: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ... - ... Passed -- 8.6 5 847 33 -Test: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ... - ... Passed -- 11.5 5 841 33 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ... - ... Passed -- 12.8 7 2903 93 -PASS -ok 6.5840/kvraft1 83.543s -``` -","1. task1 - - Modify your rsm so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a `rsm` server restarts, it should read the snapshot with `persister.ReadSnapshot()` and, if the snapshot's length is greater than zero, pass the snapshot to the `StateMachine`'s `Restore()` method. You complete this task if you pass TestSnapshot4C in `rsm`. - -2. task2 - - Implement the `kvraft1/server.go` `Snapshot()` and `Restore()` methods, which `rsm` calls. Modify `rsm` to handle applyCh messages that contain snapshots.","1. hint - - Think about when `rsm` should snapshot its state and what should be included in the snapshot beyond just the server state. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. - - Capitalize all fields of structures stored in the snapshot. -2. hint2 - - You may have bugs in your Raft and rsm library that this task exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. - - A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd kvraft1/rsm && go test -run 4C,"Test: snapshots, one client (4C SnapshotsRPC) ... -Test: InstallSnapshot RPC (4C) ... - ... Passed -- 4.5 3 241 64 -Test: snapshots, one client (4C snapshot size is reasonable) ... - ... Passed -- 11.4 3 2526 800 -Test: snapshots, one client (4C speed) ... - ... Passed -- 14.2 3 3149 0 -Test: restarts, snapshots, one client (4C restarts, snapshots, one client) ... - ... Passed -- 6.8 5 305 13 -Test: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ... - ... Passed -- 9.0 5 5583 795 -Test: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ... - ... Passed -- 4.7 5 977 155 -Test: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ... - ... Passed -- 8.6 5 847 33 -Test: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ... - ... Passed -- 11.5 5 841 33 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ... - ... Passed -- 12.8 7 2903 93 -PASS -ok 6.5840/kvraft1 83.543s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html -24,6.5840: Distributed Systems,Spring 2025,Lab 5: Sharded Key/Value Service,Part A: Moving shards,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key ""a"", and shardgrp 2 holds a shard storing key ""b"". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses ""configuration"" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D.","Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` -",,"Your first job is to implement shardgrps and the `InitConfig`, `Query`, and `ChangeConfigTo` methods when there are no failures. We have given you the code for describing a configuration, in `shardkv1/shardcfg`. Each `shardcfg.ShardConfig` has a unique identifying number, a mapping from shard number to group number, and a mapping from group number to the list of servers replicating that group. There will usually be more shards than groups (so that each group serves more than one shard), in order that load can be shifted at a fairly fine granularity. - -Implement these two methods in `shardctrler/shardctrler.go`: - -- The `InitConfig` method receives the first configuration, passed to it by the tester as a `shardcfg.ShardConfig`. `InitConfig` should store the configuration in an instance of Lab 2's `kvsrv`. -- The `Query` method returns the current configuration; it should read the configuration from `kvsrv`, previously stored there by `InitConfig`. - -Implement `InitConfig` and `Query`, and store the configuration in `kvsrv`. You're done when your code passes the first test. Note this task doesn't require any shardgrps. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -PASS -ok 6.5840/shardkv1 0.197s -$ -``` - -- Implement `InitConfig` and `Query` by storing and reading the initial configuration from `kvsrv`: use the `Get`/`Put` methods of `ShardCtrler.IKVClerk` to talk to `kvsrv`, use the `String` method of `ShardConfig` to turn a `ShardConfig` into a string that you can pass to `Put`, and use the `shardcfg.FromString()` function to turn a string into a `ShardConfig`. - -Implement an initial version of `shardgrp` in `shardkv1/shardgrp/server.go` and a corresponding clerk in `shardkv1/shardgrp/client.go` by copying code from your Lab 4 `kvraft` solution. - -Implement a clerk in `shardkv1/client.go` that uses the `Query` method to find the shardgrp for a key, and then talks to that shardgrp. You're done when your code passes the `Static` test. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run Static -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.4s #peers 1 #RPCs 793 #Ops 180 -PASS -ok 6.5840/shardkv1 5.632s -$ -``` - -- Copy code from your `kvraft` client.go and server.go for `Put` and `Get`, and any other code you need from `kvraft`. -- The code in `shardkv1/client.go` provides the `Put`/`Get` clerk for the overall system: it finds out which shardgrp holds the desired key's shard by invoking the `Query` method, and then talks to the shardgrp that holds that shard. -- Implement `shardkv1/client.go`, including its `Put`/`Get` methods. Use `shardcfg.Key2Shard()` to find the shard number for a key. The tester passes a `ShardCtrler` object to `MakeClerk` in `shardkv1/client.go`. Retrieve the current configuration using the `Query` method. -- To put/get a key from a shardgrp, the shardkv clerk should create a shardgrp clerk for the shardgrp by calling `shardgrp.MakeClerk`, passing in the servers found in the configuration and the shardkv clerk's `ck.clnt`. Use the `GidServers()` method from `ShardConfig` to get the group for a shard. -- `shardkv1/client.go`'s Put must return `ErrMaybe` when the reply was maybe lost, but this Put invokes `shardgrp`'s Put to talk a particular shardgrp. The inner Put can signal this with an error. -- Upon creation, the first shardgrp (`shardcfg.Gid1`) should initialize itself to own all shards. - -Now you should support movement of shards among groups by implementing the `ChangeConfigTo` method, which changes from an old configuration to a new configuration. The new configuration may include new shardgrps that are not present in the old configuration, and may exclude shardgrps that were present in the old configuration. The controller should move shards (the key/value data) so that the set of shards stored by each shardgrp matches the new configuration. - -The approach we suggest for moving a shard is for `ChangeConfigTo` to first ""freeze"" the shard at the source shardgrp, causing that shardgrp to reject `Put`'s for keys in the moving shard. Then, copy (install) the shard to the destination shardgrp; then delete the frozen shard. Finally, post a new configuration so that clients can find the moved shard. A nice property of this approach is that it avoids any direct interactions among the shardgrps. It also supports serving shards that are not affected by an ongoing configuration change. - -To be able to order changes to the configuration, each configuration has a unique number `Num` (see `shardcfg/shardcfg.go`). The tester in Part A invokes `ChangeConfigTo` sequentially, and the configuration passed to `ChangeConfigTo` will have a `Num` one larger than the previous one; thus, a configuration with a higher `Num` is newer than one with a lower `Num`. - -The network may delay RPCs, and RPCs may arrive out of order at the shardgrps. To reject old `FreezeShard`, `InstallShard`, and `DeleteShard` RPCs, they should include `Num` (see `shardgrp/shardrpc/shardrpc.go`), and shardgrps must remember the largest `Num` they have seen for each shard. - -Implement `ChangeConfigTo` (in `shardctrler/shardctrler.go`) and extend `shardgrp` to support freeze, install, and delete. `ChangeConfigTo` should always succeed in Part A because the tester doesn't induce failures in this part. You will need to implement `FreezeShard`, `InstallShard`, and `DeleteShard` in `shardgrp/client.go` and `shardgrp/server.go` using the RPCs in the `shardgrp/shardrpc` package, and reject old RPCs based on `Num`. You will also need modify the shardkv clerk in `shardkv1/client.go` to handle `ErrWrongGroup`, which a shardgrp should return if it isn't responsible for the shard. - -You have completed this task when you pass the `JoinBasic` and `DeleteBasic` tests. These tests focus on adding shardgrps; you don't have to worry about shardgrps leaving just yet. - -- A shardgrp should respond with an `ErrWrongGroup` error to a client `Put`/`Get` with a key that the shardgrp isn't responsible for (i.e., for a key whose shard is not assigned to the shardgrp). You will have to modify `shardkv1/client.go` to reread the configuration and retry the `Put`/`Get`. -- Note that you will have to run `FreezeShard`, `InstallShard`, and `DeleteShard` through your `rsm` package, just like `Put` and `Get`. -- You can send an entire map as your state in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. - -Extend `ChangeConfigTo` to handle shard groups that leave; i.e., shardgrps that are present in the current configuration but not in the new one. Your solution should pass `TestJoinLeaveBasic5A` now. (You may have handled this scenario already in the previous task, but the previous tests didn't test for shardgrps leaving.) - -Make your solution pass all Part A tests, which check that your sharded key/value service supports many groups joining and leaving, shardgrps restarting from snapshots, processing `Get`s while some shards are offline or involved in a configuration change, and linearizability when many clients interact with the service while the tester concurrently invokes the controller's `ChangeConfigTo` to rebalance shards. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run 5A -Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180 -Test (5A): a group joins... (reliable network)... - ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180 -Test (5A): delete ... (reliable network)... - ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360 -Test (5A): basic groups join/leave ... (reliable network)... - ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240 -Test (5A): many groups join/leave ... (reliable network)... - ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180 -Test (5A): many groups join/leave ... (unreliable network)... - ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180 -Test (5A): shutdown ... (reliable network)... - ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180 -Test (5A): progress ... (reliable network)... - ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82 -Test (5A): progress ... (reliable network)... - ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390 -Test (5A): one concurrent clerk reliable... (reliable network)... - ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248 -Test (5A): many concurrent clerks reliable... (reliable network)... - ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500 -Test (5A): one concurrent clerk unreliable ... (unreliable network)... - ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176 -Test (5A): many concurrent clerks unreliable... (unreliable network)... - ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896 -PASS -ok 6.5840/shardkv1 243.115s -$ -``` - -Your solution must continue serving shards that are not affected by an ongoing configuration change.","1. task1 - - Implement `InitConfig` and `Query`, and store the configuration in `kvsrv`. You're done when your code passes the first test. Note this task doesn't require any shardgrps. - -2. task2 - - Implement an initial version of `shardgrp` in `shardkv1/shardgrp/server.go` and a corresponding clerk in `shardkv1/shardgrp/client.go` by copying code from your Lab 4 `kvraft` solution. - - Implement a clerk in `shardkv1/client.go` that uses the `Query` method to find the shardgrp for a key, and then talks to that shardgrp. You're done when your code passes the `Static` test. - -3. task3 - - Implement `ChangeConfigTo` (in `shardctrler/shardctrler.go`) and extend `shardgrp` to support freeze, install, and delete. `ChangeConfigTo` should always succeed in Part A because the tester doesn't induce failures in this part. You will need to implement `FreezeShard`, `InstallShard`, and `DeleteShard` in `shardgrp/client.go` and `shardgrp/server.go` using the RPCs in the `shardgrp/shardrpc` package, and reject old RPCs based on `Num`. You will also need modify the shardkv clerk in `shardkv1/client.go` to handle `ErrWrongGroup`, which a shardgrp should return if it isn't responsible for the shard. - - You have completed this task when you pass the `JoinBasic` and `DeleteBasic` tests. These tests focus on adding shardgrps; you don't have to worry about shardgrps leaving just yet. - -4. task4 - - Extend `ChangeConfigTo` to handle shard groups that leave; i.e., shardgrps that are present in the current configuration but not in the new one. Your solution should pass `TestJoinLeaveBasic5A` now. (You may have handled this scenario already in the previous task, but the previous tests didn't test for shardgrps leaving.) - -5. task5 - - Make your solution pass all Part A tests, which check that your sharded key/value service supports many groups joining and leaving, shardgrps restarting from snapshots, processing `Get`s while some shards are offline or involved in a configuration change, and linearizability when many clients interact with the service while the tester concurrently invokes the controller's `ChangeConfigTo` to rebalance shards.","1. hint1 - - Implement `InitConfig` and `Query` by storing and reading the initial configuration from `kvsrv`: use the `Get`/`Put` methods of `ShardCtrler.IKVClerk` to talk to `kvsrv`, use the `String` method of `ShardConfig` to turn a `ShardConfig` into a string that you can pass to `Put`, and use the `shardcfg.FromString()` function to turn a string into a `ShardConfig`. -2. hint2 - - Copy code from your `kvraft` client.go and server.go for `Put` and `Get`, and any other code you need from `kvraft`. - - The code in `shardkv1/client.go` provides the `Put`/`Get` clerk for the overall system: it finds out which shardgrp holds the desired key's shard by invoking the `Query` method, and then talks to the shardgrp that holds that shard. - - Implement `shardkv1/client.go`, including its `Put`/`Get` methods. Use `shardcfg.Key2Shard()` to find the shard number for a key. The tester passes a `ShardCtrler` object to `MakeClerk` in `shardkv1/client.go`. Retrieve the current configuration using the `Query` method. - - To put/get a key from a shardgrp, the shardkv clerk should create a shardgrp clerk for the shardgrp by calling `shardgrp.MakeClerk`, passing in the servers found in the configuration and the shardkv clerk's `ck.clnt`. Use the `GidServers()` method from `ShardConfig` to get the group for a shard. - - `shardkv1/client.go`'s Put must return `ErrMaybe` when the reply was maybe lost, but this Put invokes `shardgrp`'s Put to talk a particular shardgrp. The inner Put can signal this with an error. - - Upon creation, the first shardgrp (`shardcfg.Gid1`) should initialize itself to own all shards. -3. hint3 - - A shardgrp should respond with an `ErrWrongGroup` error to a client `Put`/`Get` with a key that the shardgrp isn't responsible for (i.e., for a key whose shard is not assigned to the shardgrp). You will have to modify `shardkv1/client.go` to reread the configuration and retry the `Put`/`Get`. - - Note that you will have to run `FreezeShard`, `InstallShard`, and `DeleteShard` through your `rsm` package, just like `Put` and `Get`. - - You can send an entire map as your state in an RPC request or reply, which may help keep the code for shard transfer simple. - - If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/shardkv1 && go test -run 5A,"Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180 -Test (5A): a group joins... (reliable network)... - ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180 -Test (5A): delete ... (reliable network)... - ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360 -Test (5A): basic groups join/leave ... (reliable network)... - ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240 -Test (5A): many groups join/leave ... (reliable network)... - ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180 -Test (5A): many groups join/leave ... (unreliable network)... - ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180 -Test (5A): shutdown ... (reliable network)... - ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180 -Test (5A): progress ... (reliable network)... - ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82 -Test (5A): progress ... (reliable network)... - ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390 -Test (5A): one concurrent clerk reliable... (reliable network)... - ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248 -Test (5A): many concurrent clerks reliable... (reliable network)... - ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500 -Test (5A): one concurrent clerk unreliable ... (unreliable network)... - ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176 -Test (5A): many concurrent clerks unreliable... (unreliable network)... - ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896 -PASS -ok 6.5840/shardkv1 243.115s",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html -25,6.5840: Distributed Systems,Spring 2025,Lab 5: Sharded Key/Value Service,Part B: Handling a failed controller,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key ""a"", and shardgrp 2 holds a shard storing key ""b"". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses ""configuration"" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D.","Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` -",,"The controller is a short-lived command, which an administrator invokes: it moves shards and then exits. But, it may fail or lose network connectivity while moving shards. The main task in this part of the lab is recovering from a controller that fails to complete `ChangeConfigTo`. The tester starts a new controller and invokes its `ChangeConfigTo` after partitioning the first controller; you have to modify the controller so that the new one finishes the reconfiguration. The tester calls `InitController` when starting a controller; you can modify that function to check whether an interrupted configuration change needs to be completed. - -A good approach to allowing a controller to finish a reconfiguration that a previous one started is to keep two configurations: a current one and a next one, both stored in the controller's kvsrv. When a controller starts a reconfiguration, it stores the next configuration. Once a controller completes the reconfiguration, it makes the next configuration the current one. Modify `InitController` to first check if there is a stored next configuration with a higher configuration number than the current one, and if so, complete the shard moves necessary to reconfigure to the next one. - -Modify shardctrler to implement the above approach. A controller that picks up the work from a failed controller may repeat `FreezeShard`, `InstallShard`, and `Delete` RPCs; shardgrps can use `Num` to detect duplicates and reject them. You have completed this task if your solution passes the Part B tests. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run 5B -Test (5B): Join/leave while a shardgrp is down... (reliable network)... - ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120 -Test (5B): recover controller ... (reliable network)... - ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360 -PASS -ok 6.5840/shardkv1 35.805s -$ -``` - -- The tester calls `InitController` when starting a controller; you can implement recovery in that method in `shardctrler/shardctrler.go`.","Modify shardctrler to implement the above approach. A controller that picks up the work from a failed controller may repeat `FreezeShard`, `InstallShard`, and `Delete` RPCs; shardgrps can use `Num` to detect duplicates and reject them. You have completed this task if your solution passes the Part B tests.",- The tester calls `InitController` when starting a controller; you can implement recovery in that method in `shardctrler/shardctrler.go`.,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/shardkv1 && go test -run 5B,"Test (5B): Join/leave while a shardgrp is down... (reliable network)... - ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120 -Test (5B): recover controller ... (reliable network)... - ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360 -PASS -ok 6.5840/shardkv1 35.805s",easy,http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html -26,6.5840: Distributed Systems,Spring 2025,Lab 5: Sharded Key/Value Service,Part C: Concurrent configuration changes,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key ""a"", and shardgrp 2 holds a shard storing key ""b"". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses ""configuration"" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D.","Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` -",,"In this part of the lab you will modify the controller to allow for concurrent controllers. When a controller crashes or is partitioned, the tester will start a new controller, which must finish any work that the old controller might have in progress (i.e., finishing moving shards like in Part B). This means that several controllers may run concurrently and send RPCs to the shardgrps and the `kvsrv` that stores configurations. - -The main challenge is to ensure these controllers don't step on each other. In Part A you already fenced all the shardgrp RPCs with `Num` so that old RPCs are rejected. Even if several controllers pick up the work of an old controller concurrently, one of them succeeds and the others repeat all the RPCs, the shardgrps will ignore them. - -Thus the challenging case left is to ensure that only one controller updates the next configuration to avoid that two controllers (e.g., a partitioned one and a new one) put different configurations in the next one. To stress this scenario, the tester runs several controllers concurrently and each one computes the next configuration by reading the current configuration and updating it for a shardgrp that left or joined, and then the tester invokes `ChangeConfigTo`; thus multiple controllers may invoke `ChangeConfigTo` with different configuration with the same `Num`. You can use the version number of a key and versioned `Put`s to ensure that only one controller updates the next configuration and that the other invocations return without doing anything. - -Modify your controller so that only one controller can post a next configuration for a configuration `Num`. The tester will start many controllers but only one should start `ChangeConfigTo` for a new configuation. You have completed this task if you pass the concurrent tests of Part C: - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run TestConcurrentReliable5C -Test (5C): Concurrent ctrlers ... (reliable network)... - ... Passed -- time 8.2s #peers 1 #RPCs 1753 #Ops 120 -PASS -ok 6.5840/shardkv1 8.364s -$ go test -run TestAcquireLockConcurrentUnreliable5C -Test (5C): Concurrent ctrlers ... (unreliable network)... - ... Passed -- time 23.8s #peers 1 #RPCs 1850 #Ops 120 -PASS -ok 6.5840/shardkv1 24.008s -$ -``` - -- See `concurCtrler` in `test.go` to see how the tester runs controllers concurrently. - -In this exercise you will put recovery of an old controller together with a new controller: a new controller should perform recovery from Part B. If the old controller was partitioned during `ChangeConfigTo`, you will have to make sure that the old controller doesn't interfere with the new controller. If all the controller's updates are already properly fenced with `Num` checks from Part B, you don't have to write extra code. You have completed this task if you pass the `Partition` tests. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run Partition -Test (5C): partition controller in join... (reliable network)... - ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336 -PASS -ok 6.5840/shardkv1 217.779s -$ -``` - -You have completed implementing a highly-available sharded key/value service with many shard groups for scalability, reconfiguration to handle changes in load, and with a fault-tolerant controller; congrats! - -Rerun all tests to check that your recent changes to the controller haven't broken earlier tests. - -Gradescope will rerun the Lab 3A-D and Lab 4A-C tests on your submission, in addition to the 5C tests. Before submitting, double check that your solution works: - -``` -$ go test ./raft1 -$ go test ./kvraft1 -$ go test ./shardkv1 -``` -","1. task1 - - Modify your controller so that only one controller can post a next configuration for a configuration `Num`. The tester will start many controllers but only one should start `ChangeConfigTo` for a new configuation. You have completed this task if you pass the concurrent tests of Part C: - -2. task2 - - In this exercise you will put recovery of an old controller together with a new controller: a new controller should perform recovery from Part B. If the old controller was partitioned during `ChangeConfigTo`, you will have to make sure that the old controller doesn't interfere with the new controller. If all the controller's updates are already properly fenced with `Num` checks from Part B, you don't have to write extra code. You have completed this task if you pass the `Partition` tests. - -3. task3 - - Rerun all tests to check that your recent changes to the controller haven't broken earlier tests.",- See `concurCtrler` in `test.go` to see how the tester runs controllers concurrently.,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/shardkv1 && go test -run Partition,"Test (5C): partition controller in join... (reliable network)... - ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336 -PASS -ok 6.5840/shardkv1 217.779s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html -27,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,Boot xv6,This lab will familiarize you with xv6 and its system calls.,,,"Have a look at the [lab tools page](https://pdos.csail.mit.edu/6.1810/2024/tools.html) for information about how to set up your computer to run these labs. - -Fetch the git repository for the xv6 source for the lab: - -``` -$ git clone git://g.csail.mit.edu/xv6-labs-2024 -Cloning into 'xv6-labs-2024'... -... -$ cd xv6-labs-2024 -``` - -The files you will need for this and subsequent labs are distributed using the [Git](http://www.git-scm.com/) version control system. For each of the labs you will check out a version of xv6 tailored for that lab. To learn more about Git, take a look at the [Git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html), or this [CS-oriented overview of Git](http://eagain.net/articles/git-for-computer-scientists/). Git allows you to keep track of the changes you make to the code. For example, if you are finished with one of the exercises, and want to checkpoint your progress, you can *commit* your changes by running: - -``` -$ git commit -am 'my solution for util lab exercise 1' -Created commit 60d2135: my solution for util lab exercise 1 - 1 files changed, 1 insertions(+), 0 deletions(-) -$ -``` - -You can view your changes with git diff, which displays changes since your last commit. git diff origin/util displays changes relative to the initial `util` code. `origin/util` is the name of the git branch for this lab. - -Build and run xv6: - -``` -$ make qemu -riscv64-unknown-elf-gcc -c -o kernel/entry.o kernel/entry.S -riscv64-unknown-elf-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -DSOL_UTIL -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie -c -o kernel/start.o kernel/start.c -... -riscv64-unknown-elf-ld -z max-page-size=4096 -N -e main -Ttext 0 -o user/_zombie user/zombie.o user/ulib.o user/usys.o user/printf.o user/umalloc.o -riscv64-unknown-elf-objdump -S user/_zombie > user/zombie.asm -riscv64-unknown-elf-objdump -t user/_zombie | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$/d' > user/zombie.sym -mkfs/mkfs fs.img README user/xargstest.sh user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_grind user/_wc user/_zombie -nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 954 total 1000 -balloc: first 591 blocks have been allocated -balloc: write bitmap block at sector 45 -qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 - -xv6 kernel is booting - -hart 2 starting -hart 1 starting -init: starting sh -$ -``` - -If you type `ls` at the prompt, you should see output similar to the following: - -``` -$ ls -. 1 1 1024 -.. 1 1 1024 -README 2 2 2227 -xargstest.sh 2 3 93 -cat 2 4 32864 -echo 2 5 31720 -forktest 2 6 15856 -grep 2 7 36240 -init 2 8 32216 -kill 2 9 31680 -ln 2 10 31504 -ls 2 11 34808 -mkdir 2 12 31736 -rm 2 13 31720 -sh 2 14 54168 -stressfs 2 15 32608 -usertests 2 16 178800 -grind 2 17 47528 -wc 2 18 33816 -zombie 2 19 31080 -console 3 20 0 -``` - -These are the files that `mkfs` includes in the initial file system; most are programs you can run. You just ran one of them: `ls`. - -xv6 has no `ps` command, but, if you type Ctrl-p, the kernel will print information about each process. If you try it now, you'll see two lines: one for `init`, and one for `sh`. - -To quit qemu type: Ctrl-a x (press Ctrl and a at the same time, followed by x).",,,,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && ls,"riscv64-unknown-elf-gcc -c -o kernel/entry.o kernel/entry.S -riscv64-unknown-elf-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -DSOL_UTIL -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie -c -o kernel/start.o kernel/start.c -... -riscv64-unknown-elf-ld -z max-page-size=4096 -N -e main -Ttext 0 -o user/_zombie user/zombie.o user/ulib.o user/usys.o user/printf.o user/umalloc.o -riscv64-unknown-elf-objdump -S user/_zombie > user/zombie.asm -riscv64-unknown-elf-objdump -t user/_zombie | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$/d' > user/zombie.sym -mkfs/mkfs fs.img README user/xargstest.sh user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_grind user/_wc user/_zombie -nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 954 total 1000 -balloc: first 591 blocks have been allocated -balloc: write bitmap block at sector 45 -qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 - -xv6 kernel is booting - -hart 2 starting -hart 1 starting -init: starting sh",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -28,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,sleep,This lab will familiarize you with xv6 and its system calls.,,,"Implement a user-level `sleep` program for xv6, along the lines of the UNIX sleep command. Your `sleep` should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file `user/sleep.c`. - -Some hints: - -- Before you start coding, read Chapter 1 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). -- Put your code in `user/sleep.c`. Look at some of the other programs in `user/` (e.g., `user/echo.c`, `user/grep.c`, and `user/rm.c`) to see how command-line arguments are passed to a program. -- Add your `sleep` program to `UPROGS` in Makefile; once you've done that, `make qemu` will compile your program and you'll be able to run it from the xv6 shell. -- If the user forgets to pass an argument, sleep should print an error message. -- The command-line argument is passed as a string; you can convert it to an integer using `atoi` (see user/ulib.c). -- Use the system call `sleep`. -- See `kernel/sysproc.c` for the xv6 kernel code that implements the `sleep` system call (look for `sys_sleep`), `user/user.h` for the C definition of `sleep` callable from a user program, and `user/usys.S` for the assembler code that jumps from user code into the kernel for `sleep`. -- sleep's `main` should call `exit(0)` when it is done. -- Look at Kernighan and Ritchie's book *The C programming language (second edition)* (K&R) to learn about C. - -Run the program from the xv6 shell: - -``` - $ make qemu - ... - init: starting sh - $ sleep 10 - (nothing happens for a little while) - $ - -``` - -Your program should pause when run as shown above. Run make grade in your command line (outside of qemu) to see if you pass the sleep tests. - -Note that make grade runs all tests, including the ones for the tasks below. If you want to run the grade tests for one task, type: - -``` - $ ./grade-lab-util sleep - -``` - -This will run the grade tests that match ""sleep"". Or, you can type: - -``` - $ make GRADEFLAGS=sleep grade - -``` - -which does the same.","Implement a user-level `sleep` program for xv6, along the lines of the UNIX sleep command. Your `sleep` should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file `user/sleep.c`.","- Before you start coding, read Chapter 1 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). -- Put your code in `user/sleep.c`. Look at some of the other programs in `user/` (e.g., `user/echo.c`, `user/grep.c`, and `user/rm.c`) to see how command-line arguments are passed to a program. -- Add your `sleep` program to `UPROGS` in Makefile; once you've done that, `make qemu` will compile your program and you'll be able to run it from the xv6 shell. -- If the user forgets to pass an argument, sleep should print an error message. -- The command-line argument is passed as a string; you can convert it to an integer using `atoi` (see user/ulib.c). -- Use the system call `sleep`. -- See `kernel/sysproc.c` for the xv6 kernel code that implements the `sleep` system call (look for `sys_sleep`), `user/user.h` for the C definition of `sleep` callable from a user program, and `user/usys.S` for the assembler code that jumps from user code into the kernel for `sleep`. -- sleep's `main` should call `exit(0)` when it is done. -- Look at Kernighan and Ritchie's book *The C programming language (second edition)* (K&R) to learn about C.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && sleep 10,(nothing happens for a little while),easy,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -29,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,pingpong,This lab will familiarize you with xv6 and its system calls.,,,"Write a user-level program that uses xv6 system calls to ''ping-pong'' a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print "": received ping"", where is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print "": received pong"", and exit. Your solution should be in the file `user/pingpong.c`. - -Some hints: - -- Add the program to `UPROGS` in Makefile. -- You'll need to use the `pipe`, `fork`, `write`, `read`, and `getpid` system calls. -- User programs on xv6 have a limited set of library functions available to them. You can see the list in `user/user.h`; the source (other than for system calls) is in `user/ulib.c`, `user/printf.c`, and `user/umalloc.c`. - -Run the program from the xv6 shell and it should produce the following output: - -``` - $ make qemu - ... - init: starting sh - $ pingpong - 4: received ping - 3: received pong - $ - -``` - -Your program should exchange a byte between two processes and produces output as shown above. Run make grade to check.","Write a user-level program that uses xv6 system calls to ''ping-pong'' a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print "": received ping"", where is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print "": received pong"", and exit. Your solution should be in the file `user/pingpong.c`.","- Add the program to `UPROGS` in Makefile. -- You'll need to use the `pipe`, `fork`, `write`, `read`, and `getpid` system calls. -- User programs on xv6 have a limited set of library functions available to them. You can see the list in `user/user.h`; the source (other than for system calls) is in `user/ulib.c`, `user/printf.c`, and `user/umalloc.c`.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && pingpong,"4: received ping -3: received pong",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -30,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,primes,This lab will familiarize you with xv6 and its system calls.,,,"Write a concurrent prime sieve program for xv6 using pipes and the design illustrated in the picture halfway down [this page](http://swtch.com/~rsc/thread/) and the surrounding text. This idea is due to Doug McIlroy, inventor of Unix pipes. Your solution should be in the file `user/primes.c`. - -Your goal is to use `pipe` and `fork` to set up the pipeline. The first process feeds the numbers 2 through 280 into the pipeline. For each prime number, you will arrange to create one process that reads from its left neighbor over a pipe and writes to its right neighbor over another pipe. Since xv6 has limited number of file descriptors and processes, the first process can stop at 280. - -Some hints: - -- Be careful to close file descriptors that a process doesn't need, because otherwise your program will run xv6 out of resources before the first process reaches 280. -- Once the first process reaches 280, it should wait until the entire pipeline terminates, including all children, grandchildren, &c. Thus the main primes process should only exit after all the output has been printed, and after all the other primes processes have exited. -- Hint: `read` returns zero when the write-side of a pipe is closed. -- It's simplest to directly write 32-bit (4-byte) `int`s to the pipes, rather than using formatted ASCII I/O. -- You should create the processes in the pipeline only as they are needed. -- Add the program to `UPROGS` in Makefile. -- If you get an infinite recursion error from the compiler for the function `primes`, you may have to declare `void primes(int) __attribute__((noreturn));` to indicate that `primes` doesn't return. - -Your solution should implement a pipe-based sieve and produce the following output: - -``` - $ make qemu - ... - init: starting sh - $ primes - prime 2 - prime 3 - prime 5 - prime 7 - prime 11 - prime 13 - prime 17 - prime 19 - prime 23 - prime 29 - prime 31 - ... - $ - -```","Write a concurrent prime sieve program for xv6 using pipes and the design illustrated in the picture halfway down [this page](http://swtch.com/~rsc/thread/) and the surrounding text. This idea is due to Doug McIlroy, inventor of Unix pipes. Your solution should be in the file `user/primes.c`. - -Your goal is to use `pipe` and `fork` to set up the pipeline. The first process feeds the numbers 2 through 280 into the pipeline. For each prime number, you will arrange to create one process that reads from its left neighbor over a pipe and writes to its right neighbor over another pipe. Since xv6 has limited number of file descriptors and processes, the first process can stop at 280.","- Be careful to close file descriptors that a process doesn't need, because otherwise your program will run xv6 out of resources before the first process reaches 280. -- Once the first process reaches 280, it should wait until the entire pipeline terminates, including all children, grandchildren, &c. Thus the main primes process should only exit after all the output has been printed, and after all the other primes processes have exited. -- Hint: `read` returns zero when the write-side of a pipe is closed. -- It's simplest to directly write 32-bit (4-byte) `int`s to the pipes, rather than using formatted ASCII I/O. -- You should create the processes in the pipeline only as they are needed. -- Add the program to `UPROGS` in Makefile. -- If you get an infinite recursion error from the compiler for the function `primes`, you may have to declare `void primes(int) __attribute__((noreturn));` to indicate that `primes` doesn't return.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && primes,"... -init: starting sh -$ primes -prime 2 -prime 3 -prime 5 -prime 7 -prime 11 -prime 13 -prime 17 -prime 19 -prime 23 -prime 29 -prime 31 -",moderate/hard,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -31,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,find,This lab will familiarize you with xv6 and its system calls.,,,"Write a simple version of the UNIX find program for xv6: find all the files in a directory tree with a specific name. Your solution should be in the file `user/find.c`. - -Some hints: - -- Look at user/ls.c to see how to read directories. -- Use recursion to allow find to descend into sub-directories. -- Don't recurse into ""."" and "".."". -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu. -- You'll need to use C strings. Have a look at K&R (the C book), for example Section 5.5. -- Note that == does not compare strings like in Python. Use strcmp() instead. -- Add the program to `UPROGS` in Makefile. - -Your solution should produce the following output (when the file system contains the files `b`, `a/b` and `a/aa/b`): - -``` - $ make qemu - ... - init: starting sh - $ echo > b - $ mkdir a - $ echo > a/b - $ mkdir a/aa - $ echo > a/aa/b - $ find . b - ./b - ./a/b - ./a/aa/b - $ - -``` - -Run `make grade` to see what our tests think.",Write a simple version of the UNIX find program for xv6: find all the files in a directory tree with a specific name. Your solution should be in the file `user/find.c`.,"- Look at user/ls.c to see how to read directories. -- Use recursion to allow find to descend into sub-directories. -- Don't recurse into ""."" and "".."". -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu. -- You'll need to use C strings. Have a look at K&R (the C book), for example Section 5.5. -- Note that == does not compare strings like in Python. Use strcmp() instead. -- Add the program to `UPROGS` in Makefile.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && echo > b && mkdir a && echo > a/b && mkdir a/aa && echo > a/aa/b && find . B,"./b -./a/b -./a/aa/b",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -32,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,xargs,This lab will familiarize you with xv6 and its system calls.,,,"Write a simple version of the UNIX xargs program for xv6: its arguments describe a command to run, it reads lines from the standard input, and it runs the command for each line, appending the line to the command's arguments. Your solution should be in the file `user/xargs.c`. - -The following example illustrates xarg's behavior: - -``` - $ echo hello too | xargs echo bye - bye hello too - $ - -``` - -Note that the command here is ""echo bye"" and the additional arguments are ""hello too"", making the command ""echo bye hello too"", which outputs ""bye hello too"". - -Please note that xargs on UNIX makes an optimization where it will feed more than one argument to the command at a time. We don't expect you to make this optimization. To make xargs on UNIX behave the way we want it to for this lab, please run it with the -n option set to 1. For instance - -``` - $ (echo 1 ; echo 2) | xargs -n 1 echo - 1 - 2 - $ - -``` - -Some hints: - -- Use `fork` and `exec` to invoke the command on each line of input. Use `wait` in the parent to wait for the child to complete the command. -- To read individual lines of input, read a character at a time until a newline ('\n') appears. -- kernel/param.h declares MAXARG, which may be useful if you need to declare an argv array. -- Add the program to `UPROGS` in Makefile. -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu. - -xargs, find, and grep combine well: - -``` - $ find . b | xargs grep hello - -``` - -will run ""grep hello"" on each file named b in the directories below ""."". - -To test your solution for xargs, run the shell script xargstest.sh. Your solution should produce the following output: - -``` - $ make qemu - ... - init: starting sh - $ sh < xargstest.sh - $ $ $ $ $ $ hello - hello - hello - $ $ - -``` - -You may have to go back and fix bugs in your find program. The output has many `$` because the xv6 shell doesn't realize it is processing commands from a file instead of from the console, and prints a `$` for each command in the file.","Write a simple version of the UNIX xargs program for xv6: its arguments describe a command to run, it reads lines from the standard input, and it runs the command for each line, appending the line to the command's arguments. Your solution should be in the file `user/xargs.c`.","- Use `fork` and `exec` to invoke the command on each line of input. Use `wait` in the parent to wait for the child to complete the command. -- To read individual lines of input, read a character at a time until a newline ('\n') appears. -- kernel/param.h declares MAXARG, which may be useful if you need to declare an argv array. -- Add the program to `UPROGS` in Makefile. -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && sh < xargstest.sh,"... -init: starting sh -$ sh < xargstest.sh -$ $ $ $ $ $ hello -hello -hello",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -33,6.1810: Operating System Engineering,2024,Lab: system calls,Using gdb,"In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs. - -Before you start coding, read Chapter 2 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and Sections 4.3 and 4.4 of Chapter 4, and related source files: - -- The user-space ""stubs"" that route system calls into the kernel are in `user/usys.S`, which is generated by `user/usys.pl` when you run `make`. Declarations are in `user/user.h` -- The kernel-space code that routes a system call to the kernel function that implements it is in `kernel/syscall.c` and `kernel/syscall.h`. -- Process-related code is `kernel/proc.h` and `kernel/proc.c`. - -To start the lab, switch to the syscall branch: - -``` - $ git fetch - $ git checkout syscall - $ make clean - -``` - -If you run `make grade` you will see that the grading script cannot exec `trace`. Your job is to add the necessary system calls and stubs to make `trace` work. Furthermore, you will notice `attacktest` fails.",,,"In many cases, print statements will be sufficient to debug your kernel, but sometimes it is useful to single step through code or get a stack back-trace. The GDB debugger can help. - -To help you become familiar with gdb, run make qemu-gdb and then fire up gdb in another window (see the gdb material on the [guidance page](https://pdos.csail.mit.edu/6.1810/2024/labs/guidance.html)). Once you have two windows open, type in the gdb window: - -``` -(gdb) b syscall -Breakpoint 1 at 0x80002142: file kernel/syscall.c, line 243. -(gdb) c -Continuing. -[Switching to Thread 1.2] - -Thread 2 hit Breakpoint 1, syscall () at kernel/syscall.c:243 -243 { -(gdb) layout src -(gdb) backtrace -``` - -The `layout` command splits the window in two, showing where gdb is in the source code. `backtrace` prints a stack backtrace. - -Answer the following questions in `answers-syscall.txt`. - -Looking at the backtrace output, which function called `syscall`? - -Type n a few times to step past `struct proc *p = myproc();` Once past this statement, type p /x *p, which prints the current process's `proc struct` (see `kernel/proc.h>`) in hex. - -What is the value of `p->trapframe->a7` and what does that value represent? (Hint: look `user/initcode.S`, the first user program xv6 starts.) - -The processor is running in supervisor mode, and we can print privileged registers such as `sstatus` (see [RISC-V privileged instructions](https://github.com/riscv/riscv-isa-manual/releases/download/Priv-v1.12/riscv-privileged-20211203.pdf) for a description): - -``` - (gdb) p /x $sstatus - -``` - -What was the previous mode that the CPU was in? - -The xv6 kernel code contains consistency checks whose failure causes the kernel to panic; you may find that your kernel modifications cause panics. For example, replace the statement `num = p->trapframe->a7;` with `num = * (int *) 0;` at the beginning of `syscall`, run make qemu, and you will see something similar to: - -``` -xv6 kernel is booting - -hart 2 starting -hart 1 starting -scause=0xd sepc=0x80001bfe stval=0x0 -panic: kerneltrap - -``` - -Quit out of `qemu`. - -To track down the source of a kernel page-fault panic, search for the `sepc` value printed for the panic you just saw in the file `kernel/kernel.asm`, which contains the assembly for the compiled kernel. - -Write down the assembly instruction the kernel is panicing at. Which register corresponds to the variable `num`? - -To inspect the state of the processor and the kernel at the faulting instruction, fire up gdb, and set a breakpoint at the faulting `epc`, like this: - -``` -(gdb) b *0x80001bfe -Breakpoint 1 at 0x80001bfe: file kernel/syscall.c, line 138. -(gdb) layout asm -(gdb) c -Continuing. -[Switching to Thread 1.3] - -Thread 3 hit Breakpoint 1, syscall () at kernel/syscall.c:138 -``` - -Confirm that the faulting assembly instruction is the same as the one you found above. - -Why does the kernel crash? Hint: look at figure 3-3 in the text; is address 0 mapped in the kernel address space? Is that confirmed by the value in `scause` above? (See description of `scause` in [RISC-V privileged instructions](https://pdos.csail.mit.edu/6.1810/2024/labs/n//github.com/riscv/riscv-isa-manual/releases/download/Priv-v1.12/riscv-privileged-20211203.pdf)) - -Note that `scause` was printed by the kernel panic above, but often you need to look at additional info to track down the problem that caused the panic. For example, to find out which user process was running when the kernel paniced, you can print the process's name: - -``` - (gdb) p p->name - -``` - -What is the name of the process that was running when the kernel paniced? What is its process id (`pid`)? - -You may want to revisit [Using the GNU Debugger](https://pdos.csail.mit.edu/6.828/2019/lec/gdb_slides.pdf) as needed. The [guidance page](https://pdos.csail.mit.edu/6.1810/2024/labs/guidance.html) also has debugging tips.",,,,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,,,easy,https://pdos.csail.mit.edu/6.1810/2024/labs/syscall.html -34,6.1810: Operating System Engineering,2024,Lab: system calls,System call tracing,"In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs. - -Before you start coding, read Chapter 2 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and Sections 4.3 and 4.4 of Chapter 4, and related source files: - -- The user-space ""stubs"" that route system calls into the kernel are in `user/usys.S`, which is generated by `user/usys.pl` when you run `make`. Declarations are in `user/user.h` -- The kernel-space code that routes a system call to the kernel function that implements it is in `kernel/syscall.c` and `kernel/syscall.h`. -- Process-related code is `kernel/proc.h` and `kernel/proc.c`. - -To start the lab, switch to the syscall branch: - -``` - $ git fetch - $ git checkout syscall - $ make clean - -``` - -If you run `make grade` you will see that the grading script cannot exec `trace`. Your job is to add the necessary system calls and stubs to make `trace` work. Furthermore, you will notice `attacktest` fails.",,,"In this assignment you will add a system call tracing feature that may help you when debugging later labs. You'll create a new `trace` system call that will control tracing. It should take one argument, an integer ""mask"", whose bits specify which system calls to trace. For example, to trace the fork system call, a program calls `trace(1 << SYS_fork)`, where `SYS_fork` is a syscall number from `kernel/syscall.h`. You have to modify the xv6 kernel to print a line when each system call is about to return, if the system call's number is set in the mask. The line should contain the process id, the name of the system call and the return value; you don't need to print the system call arguments. The `trace` system call should enable tracing for the process that calls it and any children that it subsequently forks, but should not affect other processes. - -We provide a `trace` user-level program that runs another program with tracing enabled (see `user/trace.c`). When you're done, you should see output like this: - -``` -$ trace 32 grep hello README -3: syscall read -> 1023 -3: syscall read -> 966 -3: syscall read -> 70 -3: syscall read -> 0 -$ -$ trace 2147483647 grep hello README -4: syscall trace -> 0 -4: syscall exec -> 3 -4: syscall open -> 3 -4: syscall read -> 1023 -4: syscall read -> 966 -4: syscall read -> 70 -4: syscall read -> 0 -4: syscall close -> 0 -$ -$ grep hello README -$ -$ trace 2 usertests forkforkfork -usertests starting -test forkforkfork: 407: syscall fork -> 408 -408: syscall fork -> 409 -409: syscall fork -> 410 -410: syscall fork -> 411 -409: syscall fork -> 412 -410: syscall fork -> 413 -409: syscall fork -> 414 -411: syscall fork -> 415 -... -$ -``` - -In the first example above, trace invokes grep tracing just the read system call. The 32 is `1< 408 -408: syscall fork -> 409 -409: syscall fork -> 410 -410: syscall fork -> 411 -409: syscall fork -> 412 -410: syscall fork -> 413 -409: syscall fork -> 414 -411: syscall fork -> 415 -",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/syscall.html -35,6.1810: Operating System Engineering,2024,Lab: system calls,Attack xv6,"In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs. - -Before you start coding, read Chapter 2 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and Sections 4.3 and 4.4 of Chapter 4, and related source files: - -- The user-space ""stubs"" that route system calls into the kernel are in `user/usys.S`, which is generated by `user/usys.pl` when you run `make`. Declarations are in `user/user.h` -- The kernel-space code that routes a system call to the kernel function that implements it is in `kernel/syscall.c` and `kernel/syscall.h`. -- Process-related code is `kernel/proc.h` and `kernel/proc.c`. - -To start the lab, switch to the syscall branch: - -``` - $ git fetch - $ git checkout syscall - $ make clean - -``` - -If you run `make grade` you will see that the grading script cannot exec `trace`. Your job is to add the necessary system calls and stubs to make `trace` work. Furthermore, you will notice `attacktest` fails.",,,"The xv6 kernel isolates user programs from each other and isolates the kernel from user programs. As you saw in the above assignments, an application cannot directly call a function in the kernel or in another user program; instead, interactions occur only through system calls. However, if there is a bug in the implementation of a system call, an attacker may be able to exploit that bug to break the isolation boundaries. To get a sense for how bugs can be exploited, we have introduced a bug into xv6 and your goal is to exploit that bug to trick xv6 into revealing a secret from another process. - -The bug is that the call to `memset(mem, 0, sz)` at line 272 in `kernel/vm.c` to clear a newly-allocated page is omitted when compiling this lab. Similarly, when compiling `kernel/kalloc.c` for this lab the two lines that use `memset` to put garbage into free pages are omitted. The net effect of omitting these 3 lines (all marked by `ifndef LAB_SYSCALL`) is that newly allocated memory retains the contents from its previous use. - -`user/secret.c` writes an 8-byte secret in its memory and then exits (which frees its memory). Your goal is to add a few lines of code to `user/attack.c` to find the secret that a previous execution of `secret.c` wrote to memory, and write the 8 secret bytes to file descriptor 2. You'll receive full credit if `attacktest` prints: ""OK: secret is ebb.ebb"". (Note: the secret may be different for each run of `attacktest`.) - -You are allowed to modify `user/attack.c`, but you cannot make any other changes: you cannot modify the xv6 kernel sources, secret.c, attacktest.c, etc. - -Some hints: - -- Run `attacktest` in the xv6 shell. It should the following output: - - ``` - $ attacktest - FAIL: no/incorrect secret - ``` - - Note that despite the 3 deleted lines, xv6 appears to work correctly: it started the shell and it ran `attacktest`. In fact, if you run `usertests` most of them pass! - -- Read `user/attacktest.c`. It generates a random 8-byte string, which it passes to the program `secret`, which writes it into its memory. After `secret` exits, `attacktest` spawns `attack` and waits for `attack` to write the secret string to file descriptor 2. - -- Read `user/secret.c` and think about how you could trick xv6 into revealing the secret to `attack.c`. - -- Test your exploit by running `attacktest` in the xv6 shell. - -`user/secret.c` copies the secret bytes to memory whose address is 32 bytes after the start of a page. Change the 32 to 0 and you should see that your attack doesn't work anymore; why not? - -Small bugs that do not directly affect correctness but still can be exploited to break security (like the one above) make kernel programming challenging. xv6 is likely to have such bugs, although we try to not have them. Real kernels, which have many more lines of code than xv6, have a long history of such bugs. For example, see the public [Linux vulnerabilities](https://www.opencve.io/cve?vendor=linux&product=linux_kernel) and [how to report vulnerabilities](https://docs.kernel.org/process/security-bugs.html).","`user/secret.c` writes an 8-byte secret in its memory and then exits (which frees its memory). Your goal is to add a few lines of code to `user/attack.c` to find the secret that a previous execution of `secret.c` wrote to memory, and write the 8 secret bytes to file descriptor 2. You'll receive full credit if `attacktest` prints: ""OK: secret is ebb.ebb"". (Note: the secret may be different for each run of `attacktest`.)","- Run `attacktest` in the xv6 shell. It should the following output: - - ``` - $ attacktest - FAIL: no/incorrect secret - ``` - - Note that despite the 3 deleted lines, xv6 appears to work correctly: it started the shell and it ran `attacktest`. In fact, if you run `usertests` most of them pass! - -- Read `user/attacktest.c`. It generates a random 8-byte string, which it passes to the program `secret`, which writes it into its memory. After `secret` exits, `attacktest` spawns `attack` and waits for `attack` to write the secret string to file descriptor 2. - -- Read `user/secret.c` and think about how you could trick xv6 into revealing the secret to `attack.c`. - -- Test your exploit by running `attacktest` in the xv6 shell.",,git://g.csail.mit.edu/xv6-labs-2024,syscall,xv6-labs-2024,make qemu && attacktest,FAIL: no/incorrect secret,moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/syscall.html -36,6.1810: Operating System Engineering,2024,Lab: page tables,Inspect a user-process page table,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"To help you understand RISC-V page tables, your first task is to explain the page table for a user process. - -Run `make qemu` and run the user program `pgtbltest`. The `print_pgtbl` functions prints out the page-table entries for the first 10 and last 10 pages of the `pgtbltest` process using the `pgpte` system call that we added to xv6 for this lab. The output looks as follows: - -``` -va 0 pte 0x21FCF45B pa 0x87F3D000 perm 0x5B -va 1000 pte 0x21FCE85B pa 0x87F3A000 perm 0x5B -... -va 0xFFFFD000 pte 0x0 pa 0x0 perm 0x0 -va 0xFFFFE000 pte 0x21FD80C7 pa 0x87F60000 perm 0xC7 -va 0xFFFFF000 pte 0x20001C4B pa 0x80007000 perm 0x4B - -``` - -For every page table entry in the `print_pgtbl` output, explain what it logically contains and what its permission bits are. Figure 3.4 in the xv6 book might be helpful, although note that the figure might have a slightly different set of pages than process that's being inspected here. Note that xv6 doesn't place the virtual pages consecutively in physical memory.",,,,git://g.csail.mit.edu/xv6-labs-2024,pgtbl,xv6-labs-2024,make qemu && pgtbltest,"va 0 pte 0x21FCF45B pa 0x87F3D000 perm 0x5B -va 1000 pte 0x21FCE85B pa 0x87F3A000 perm 0x5B -... -va 0xFFFFD000 pte 0x0 pa 0x0 perm 0x0 -va 0xFFFFE000 pte 0x21FD80C7 pa 0x87F60000 perm 0xC7 -va 0xFFFFF000 pte 0x20001C4B pa 0x80007000 perm 0x4B - ",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -37,6.1810: Operating System Engineering,2024,Lab: page tables,Speed up system calls,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"Some operating systems (e.g., Linux) speed up certain system calls by sharing data in a read-only region between userspace and the kernel. This eliminates the need for kernel crossings when performing these system calls. To help you learn how to insert mappings into a page table, your first task is to implement this optimization for the `getpid()` system call in xv6. - -When each process is created, map one read-only page at USYSCALL (a virtual address defined in `memlayout.h`). At the start of this page, store a `struct usyscall` (also defined in `memlayout.h`), and initialize it to store the PID of the current process. For this lab, `ugetpid()` has been provided on the userspace side and will automatically use the USYSCALL mapping. You will receive full credit for this part of the lab if the `ugetpid` test case passes when running `pgtbltest`. - -Some hints: - -- Choose permission bits that allow userspace to only read the page. -- There are a few things that need to be done over the lifecycle of a new page. For inspiration, understand the trapframe handling in `kernel/proc.c`. - -Which other xv6 system call(s) could be made faster using this shared page? Explain how.",,,,git://g.csail.mit.edu/xv6-labs-2024,pgtbl,xv6-labs-2024,,,easy,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -38,6.1810: Operating System Engineering,2024,Lab: page tables,Print a page table,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"To help you visualize RISC-V page tables, and perhaps to aid future debugging, your next task is to write a function that prints the contents of a page table. - -We added a system call `kpgtbl()`, which calls `vmprint()` in `vm.c`. It takes a `pagetable_t` argument, and your job is to print that pagetable in the format described below. - -When you run `print_kpgtbl()` test, your implementation should print the following output: - -``` -page table 0x0000000087f22000 - ..0x0000000000000000: pte 0x0000000021fc7801 pa 0x0000000087f1e000 - .. ..0x0000000000000000: pte 0x0000000021fc7401 pa 0x0000000087f1d000 - .. .. ..0x0000000000000000: pte 0x0000000021fc7c5b pa 0x0000000087f1f000 - .. .. ..0x0000000000001000: pte 0x0000000021fc70d7 pa 0x0000000087f1c000 - .. .. ..0x0000000000002000: pte 0x0000000021fc6c07 pa 0x0000000087f1b000 - .. .. ..0x0000000000003000: pte 0x0000000021fc68d7 pa 0x0000000087f1a000 - ..0xffffffffc0000000: pte 0x0000000021fc8401 pa 0x0000000087f21000 - .. ..0xffffffffffe00000: pte 0x0000000021fc8001 pa 0x0000000087f20000 - .. .. ..0xffffffffffffd000: pte 0x0000000021fd4c13 pa 0x0000000087f53000 - .. .. ..0xffffffffffffe000: pte 0x0000000021fd00c7 pa 0x0000000087f40000 - .. .. ..0xfffffffffffff000: pte 0x000000002000184b pa 0x0000000080006000 - -``` - -The first line displays the argument to `vmprint`. After that there is a line for each PTE, including PTEs that refer to page-table pages deeper in the tree. Each PTE line is indented by a number of `"" ..""` that indicates its depth in the tree. Each PTE line shows its virtual addresss, the pte bits, and the physical address extracted from the PTE. Don't print PTEs that are not valid. In the above example, the top-level page-table page has mappings for entries 0 and 255. The next level down for entry 0 has only index 0 mapped, and the bottom-level for that index 0 has a few entries mapped. - -Your code might emit different physical addresses than those shown above. The number of entries and the virtual addresses should be the same. - -Some hints: - -- Use the macros at the end of the file kernel/riscv.h. -- The function `freewalk` may be inspirational. -- Use `%p` in your printf calls to print out full 64-bit hex PTEs and addresses as shown in the example. - -For every leaf page in the `vmprint` output, explain what it logically contains and what its permission bits are, and how it relates to the output of the earlier `print_pgtbl()` exercise above. Figure 3.4 in the xv6 book might be helpful, although note that the figure might have a slightly different set of pages than the process that's being inspected here.",,"- Use the macros at the end of the file kernel/riscv.h. -- The function `freewalk` may be inspirational. -- Use `%p` in your printf calls to print out full 64-bit hex PTEs and addresses as shown in the example.",,git://g.csail.mit.edu/xv6-labs-2024,pgtbl,xv6-labs-2024,print_kpgtbl(),"page table 0x0000000087f22000 - ..0x0000000000000000: pte 0x0000000021fc7801 pa 0x0000000087f1e000 - .. ..0x0000000000000000: pte 0x0000000021fc7401 pa 0x0000000087f1d000 - .. .. ..0x0000000000000000: pte 0x0000000021fc7c5b pa 0x0000000087f1f000 - .. .. ..0x0000000000001000: pte 0x0000000021fc70d7 pa 0x0000000087f1c000 - .. .. ..0x0000000000002000: pte 0x0000000021fc6c07 pa 0x0000000087f1b000 - .. .. ..0x0000000000003000: pte 0x0000000021fc68d7 pa 0x0000000087f1a000 - ..0xffffffffc0000000: pte 0x0000000021fc8401 pa 0x0000000087f21000 - .. ..0xffffffffffe00000: pte 0x0000000021fc8001 pa 0x0000000087f20000 - .. .. ..0xffffffffffffd000: pte 0x0000000021fd4c13 pa 0x0000000087f53000 - .. .. ..0xffffffffffffe000: pte 0x0000000021fd00c7 pa 0x0000000087f40000 - .. .. ..0xfffffffffffff000: pte 0x000000002000184b pa 0x0000000080006000 - ",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -39,6.1810: Operating System Engineering,2024,Lab: page tables,Use superpages,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"The RISC-V paging hardware supports two-megabyte pages as well as ordinary 4096-byte pages. The general idea of larger pages is called superpages, and (since RISC-V supports more than one size) 2M pages are called megapages. The operating system creates a superpage by setting the PTE_V and PTE_R bits in the level-1 PTE, and setting the physical page number to point to the start of a two-megabyte region of physical memory. This physical address must be two-mega-byte aligned (i.e., a multiple of two megabytes). You can read about this in the RISC-V privileged manual by searching for megapage and superpage; in particular, the top of page 112. Use of superpages decreases the amount of physical memory used by the page table, and can decrease misses in the TLB cache. For some programs this leads to large increases in performance. - -Your job is to modify the xv6 kernel to use superpages. In particular, if a user program calls sbrk() with a size of 2 megabytes or more, and the newly created address range includes one or more areas that are two-megabyte-aligned and at least two megabytes in size, the kernel should use a single superpage (instead of hundreds of ordinary pages). You will receive full credit for this part of the lab if the `superpg_test` test case passes when running `pgtbltest`. - -Some hints: - -- Read `superpg_test` in `user/pgtbltest.c`. -- A good place to start is `sys_sbrk` in `kernel/sysproc.c`, which is invoked by the `sbrk` system call. Follow the code path to the function that allocates memory for `sbrk`. -- Your kernel will need to be able to allocate and free two-megabyte regions. Modify kalloc.c to set aside a few two-megabyte areas of physical memory, and create superalloc() and superfree() functions. You'll only need a handful of two-megabyte chunks of memory. -- Superpages must be allocated when a process with superpages forks, and freed when it exits; you'll need to modify `uvmcopy()` and `uvmunmap()`. - -Real operating systems dynamically promote a collection of pages to a superpage. The following reference explains why that is a good idea and what is hard in a more serious design: [Juan Navarro, Sitaram Iyer, Peter Druschel, and Alan Cox. Practical, transparent operating system support for superpages. SIGOPS Oper. Syst. Rev., 36(SI):89-104, December 2002.](https://www.usenix.org/conference/osdi-02/practical-transparent-operating-system-support-superpages) This reference summarizes superpage-implementations for different OSes: [A comprehensive analysis of superpage management mechanism and policies](https://www.usenix.org/conference/atc20/presentation/zhu-weixi). -","Your job is to modify the xv6 kernel to use superpages. In particular, if a user program calls sbrk() with a size of 2 megabytes or more, and the newly created address range includes one or more areas that are two-megabyte-aligned and at least two megabytes in size, the kernel should use a single superpage (instead of hundreds of ordinary pages). You will receive full credit for this part of the lab if the `superpg_test` test case passes when running `pgtbltest`.","- Read `superpg_test` in `user/pgtbltest.c`. -- A good place to start is `sys_sbrk` in `kernel/sysproc.c`, which is invoked by the `sbrk` system call. Follow the code path to the function that allocates memory for `sbrk`. -- Your kernel will need to be able to allocate and free two-megabyte regions. Modify kalloc.c to set aside a few two-megabyte areas of physical memory, and create superalloc() and superfree() functions. You'll only need a handful of two-megabyte chunks of memory. -- Superpages must be allocated when a process with superpages forks, and freed when it exits; you'll need to modify `uvmcopy()` and `uvmunmap()`.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,,,moderate/hard,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -40,6.1810: Operating System Engineering,2024,Lab: traps,RISC-V assembly,"This lab explores how system calls are implemented using traps. You will first do a warm-up exercises with stacks and then you will implement an example of user-level trap handling. - -Before you start coding, read Chapter 4 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related source files: - -- `kernel/trampoline.S`: the assembly involved in changing from user space to kernel space and back -- `kernel/trap.c`: code handling all interrupts - -To start the lab, switch to the trap branch: - -``` - $ git fetch - $ git checkout traps - $ make clean - -``` -",,,"It will be important to understand a bit of RISC-V assembly, which you were exposed to in 6.1910 (6.004). There is a file `user/call.c` in your xv6 repo. make fs.img compiles it and also produces a readable assembly version of the program in `user/call.asm`. - -Read the code in call.asm for the functions `g`, `f`, and `main`. The instruction manual for RISC-V is on the [reference page](https://pdos.csail.mit.edu/6.1810/2024/reference.html). Answer the following questions in `answers-traps.txt`: - -Which registers contain arguments to functions? For example, which register holds 13 in main's call to `printf`? - -Where is the call to function `f` in the assembly code for main? Where is the call to `g`? (Hint: the compiler may inline functions.) - -At what address is the function `printf` located? - -What value is in the register `ra` just after the `jalr` to `printf` in `main`? - -Run the following code. - -``` - unsigned int i = 0x00646c72; - printf(""H%x Wo%s"", 57616, (char *) &i); - -``` - -What is the output? [Here's an ASCII table](https://www.asciitable.com/) that maps bytes to characters. - -The output depends on that fact that the RISC-V is little-endian. If the RISC-V were instead big-endian what would you set `i` to in order to yield the same output? Would you need to change `57616` to a different value? - -[Here's a description of little- and big-endian](http://www.webopedia.com/TERM/b/big_endian.html) and [a more whimsical description](https://www.rfc-editor.org/ien/ien137.txt). - -In the following code, what is going to be printed after `'y='`? (note: the answer is not a specific value.) Why does this happen? - -``` - printf(""x=%d y=%d"", 3); - -``` -",,,," $ git fetch - $ git checkout traps - $ make clean",,xv6-labs-2024,,,easy,https://pdos.csail.mit.edu/6.1810/2024/labs/traps.html -41,6.1810: Operating System Engineering,2024,Lab: traps,Backtrace,"This lab explores how system calls are implemented using traps. You will first do a warm-up exercises with stacks and then you will implement an example of user-level trap handling. - -Before you start coding, read Chapter 4 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related source files: - -- `kernel/trampoline.S`: the assembly involved in changing from user space to kernel space and back -- `kernel/trap.c`: code handling all interrupts - -To start the lab, switch to the trap branch: - -``` - $ git fetch - $ git checkout traps - $ make clean - -``` -",,,"For debugging it is often useful to have a backtrace: a list of the function calls on the stack above the point at which the error occurred. To help with backtraces, the compiler generates machine code that maintains a stack frame on the stack corresponding to each function in the current call chain. Each stack frame consists of the return address and a ""frame pointer"" to the caller's stack frame. Register `s0` contains a pointer to the current stack frame (it actually points to the the address of the saved return address on the stack plus 8). Your `backtrace` should use the frame pointers to walk up the stack and print the saved return address in each stack frame. - -Implement a `backtrace()` function in `kernel/printf.c`. Insert a call to this function in `sys_sleep`, and then run bttest, which calls `sys_sleep`. Your output should be a list of return addresses with this form (but the numbers will likely be different): - -``` - backtrace: - 0x0000000080002cda - 0x0000000080002bb6 - 0x0000000080002898 - -``` - -After `bttest` exit qemu. In a terminal window: run `addr2line -e kernel/kernel` (or `riscv64-unknown-elf-addr2line -e kernel/kernel`) and cut-and-paste the addresses from your backtrace, like this: - -``` - $ addr2line -e kernel/kernel - 0x0000000080002de2 - 0x0000000080002f4a - 0x0000000080002bfc - Ctrl-D - -``` - -You should see something like this: - -``` - kernel/sysproc.c:74 - kernel/syscall.c:224 - kernel/trap.c:85 - -``` - -Some hints: - -- Add the prototype for your `backtrace()` to `kernel/defs.h` so that you can invoke `backtrace` in `sys_sleep`. - -- The GCC compiler stores the frame pointer of the currently executing function in the register s0 . In the section marked by #ifndef __ASSEMBLER__ ... #endif, add the following function to `kernel/riscv.h` : - - ``` - static inline uint64 - r_fp() - { - uint64 x; - asm volatile(""mv %0, s0"" : ""=r"" (x) ); - return x; - } - ``` - - and call this function in `backtrace` to read the current frame pointer. `r_fp()` uses in-line assembly to read s0 . - -- These [lecture notes](https://pdos.csail.mit.edu/6.1810/2023/lec/l-riscv.txt) have a picture of the layout of stack frames. Note that the return address lives at a fixed offset (-8) from the frame pointer of a stackframe, and that the saved frame pointer lives at fixed offset (-16) from the frame pointer. - -- Your `backtrace()` will need a way to recognize that it has seen the last stack frame, and should stop. A useful fact is that the memory allocated for each kernel stack consists of a single page-aligned page, so that all the stack frames for a given stack are on the same page. You can use `PGROUNDDOWN(fp)` (see `kernel/riscv.h`) to identify the page that a frame pointer refers to. - -Once your backtrace is working, call it from `panic` in `kernel/printf.c` so that you see the kernel's backtrace when it panics.","Implement a `backtrace()` function in `kernel/printf.c`. Insert a call to this function in `sys_sleep`, and then run bttest, which calls `sys_sleep`. Your output should be a list of return addresses with this form (but the numbers will likely be different): - -``` -backtrace: -0x0000000080002cda -0x0000000080002bb6 -0x0000000080002898 - -``` - -After `bttest` exit qemu. In a terminal window: run `addr2line -e kernel/kernel` (or `riscv64-unknown-elf-addr2line -e kernel/kernel`) and cut-and-paste the addresses from your backtrace, like this: - -``` -$ addr2line -e kernel/kernel -0x0000000080002de2 -0x0000000080002f4a -0x0000000080002bfc -Ctrl-D - -``` - -You should see something like this: - -``` -kernel/sysproc.c:74 -kernel/syscall.c:224 -kernel/trap.c:85 - -``` -","- Add the prototype for your `backtrace()` to `kernel/defs.h` so that you can invoke `backtrace` in `sys_sleep`. - -- The GCC compiler stores the frame pointer of the currently executing function in the register s0 . In the section marked by #ifndef __ASSEMBLER__ ... #endif, add the following function to `kernel/riscv.h` : - - ``` - static inline uint64 - r_fp() - { - uint64 x; - asm volatile(""mv %0, s0"" : ""=r"" (x) ); - return x; - } - ``` - - and call this function in `backtrace` to read the current frame pointer. `r_fp()` uses in-line assembly to read s0 . - -- These [lecture notes](https://pdos.csail.mit.edu/6.1810/2023/lec/l-riscv.txt) have a picture of the layout of stack frames. Note that the return address lives at a fixed offset (-8) from the frame pointer of a stackframe, and that the saved frame pointer lives at fixed offset (-16) from the frame pointer. - -- Your `backtrace()` will need a way to recognize that it has seen the last stack frame, and should stop. A useful fact is that the memory allocated for each kernel stack consists of a single page-aligned page, so that all the stack frames for a given stack are on the same page. You can use `PGROUNDDOWN(fp)` (see `kernel/riscv.h`) to identify the page that a frame pointer refers to.",," $ git fetch - $ git checkout traps - $ make clean",,xv6-labs-2024,addr2line -e kernel/kernel,"0x0000000080002de2 -0x0000000080002f4a -0x0000000080002bfc -Ctrl-D",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/traps.html -42,6.1810: Operating System Engineering,2024,Lab: traps,Alarm,"This lab explores how system calls are implemented using traps. You will first do a warm-up exercises with stacks and then you will implement an example of user-level trap handling. - -Before you start coding, read Chapter 4 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related source files: - -- `kernel/trampoline.S`: the assembly involved in changing from user space to kernel space and back -- `kernel/trap.c`: code handling all interrupts - -To start the lab, switch to the trap branch: - -``` - $ git fetch - $ git checkout traps - $ make clean - -``` -",,,"In this exercise you'll add a feature to xv6 that periodically alerts a process as it uses CPU time. This might be useful for compute-bound processes that want to limit how much CPU time they chew up, or for processes that want to compute but also want to take some periodic action. More generally, you'll be implementing a primitive form of user-level interrupt/fault handlers; you could use something similar to handle page faults in the application, for example. Your solution is correct if it passes alarmtest and 'usertests -q' - -You should add a new `sigalarm(interval, handler)` system call. If an application calls `sigalarm(n, fn)`, then after every `n` ""ticks"" of CPU time that the program consumes, the kernel should cause application function `fn` to be called. When `fn` returns, the application should resume where it left off. A tick is a fairly arbitrary unit of time in xv6, determined by how often a hardware timer generates interrupts. If an application calls `sigalarm(0, 0)`, the kernel should stop generating periodic alarm calls. - -You'll find a file `user/alarmtest.c` in your xv6 repository. Add it to the Makefile. It won't compile correctly until you've added `sigalarm` and `sigreturn` system calls (see below). - -`alarmtest` calls `sigalarm(2, periodic)` in `test0` to ask the kernel to force a call to `periodic()` every 2 ticks, and then spins for a while. You can see the assembly code for alarmtest in user/alarmtest.asm, which may be handy for debugging. Your solution is correct when `alarmtest` produces output like this and usertests -q also runs correctly: - -``` -$ alarmtest -test0 start -........alarm! -test0 passed -test1 start -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -test1 passed -test2 start -................alarm! -test2 passed -test3 start -test3 passed -$ usertest -q -... -ALL TESTS PASSED -$ -``` - -When you're done, your solution will be only a few lines of code, but it may be tricky to get it right. We'll test your code with the version of alarmtest.c in the original repository. You can modify alarmtest.c to help you debug, but make sure the original alarmtest says that all the tests pass. - -### test0: invoke handler - -Get started by modifying the kernel to jump to the alarm handler in user space, which will cause test0 to print ""alarm!"". Don't worry yet what happens after the ""alarm!"" output; it's OK for now if your program crashes after printing ""alarm!"". Here are some hints: - -- You'll need to modify the Makefile to cause `alarmtest.c` to be compiled as an xv6 user program. - -- The right declarations to put in user/user.h are: - - ``` - int sigalarm(int ticks, void (*handler)()); - int sigreturn(void); - ``` - -- Update user/usys.pl (which generates user/usys.S), kernel/syscall.h, and kernel/syscall.c to allow `alarmtest` to invoke the sigalarm and sigreturn system calls. - -- For now, your `sys_sigreturn` should just return zero. - -- Your `sys_sigalarm()` should store the alarm interval and the pointer to the handler function in new fields in the `proc` structure (in `kernel/proc.h`). - -- You'll need to keep track of how many ticks have passed since the last call (or are left until the next call) to a process's alarm handler; you'll need a new field in `struct proc` for this too. You can initialize `proc` fields in `allocproc()` in `proc.c`. - -- Every tick, the hardware clock forces an interrupt, which is handled in `usertrap()` in `kernel/trap.c`. - -- You only want to manipulate a process's alarm ticks if there's a timer interrupt; you want something like - - ``` - if(which_dev == 2) ... - ``` - -- Only invoke the alarm function if the process has a timer outstanding. Note that the address of the user's alarm function might be 0 (e.g., in user/alarmtest.asm, `periodic` is at address 0). - -- You'll need to modify `usertrap()` so that when a process's alarm interval expires, the user process executes the handler function. When a trap on the RISC-V returns to user space, what determines the instruction address at which user-space code resumes execution? - -- It will be easier to look at traps with gdb if you tell qemu to use only one CPU, which you can do by running - - ``` - make CPUS=1 qemu-gdb - ``` - -- You've succeeded if alarmtest prints ""alarm!"". - -### test1/test2()/test3(): resume interrupted code - -Chances are that alarmtest crashes in test0 or test1 after it prints ""alarm!"", or that alarmtest (eventually) prints ""test1 failed"", or that alarmtest exits without printing ""test1 passed"". To fix this, you must ensure that, when the alarm handler is done, control returns to the instruction at which the user program was originally interrupted by the timer interrupt. You must ensure that the register contents are restored to the values they held at the time of the interrupt, so that the user program can continue undisturbed after the alarm. Finally, you should ""re-arm"" the alarm counter after each time it goes off, so that the handler is called periodically. - -As a starting point, we've made a design decision for you: user alarm handlers are required to call the `sigreturn` system call when they have finished. Have a look at `periodic` in `alarmtest.c` for an example. This means that you can add code to `usertrap` and `sys_sigreturn` that cooperate to cause the user process to resume properly after it has handled the alarm. - -Some hints: - -- Your solution will require you to save and restore registers---what registers do you need to save and restore to resume the interrupted code correctly? (Hint: it will be many). -- Have `usertrap` save enough state in `struct proc` when the timer goes off that `sigreturn` can correctly return to the interrupted user code. -- Prevent re-entrant calls to the handler----if a handler hasn't returned yet, the kernel shouldn't call it again. `test2` tests this. -- Make sure to restore a0. `sigreturn` is a system call, and its return value is stored in a0. - -Once you pass `test0`, `test1`, `test2`, and `test3` run `usertests -q` to make sure you didn't break any other parts of the kernel.","In this exercise you'll add a feature to xv6 that periodically alerts a process as it uses CPU time. This might be useful for compute-bound processes that want to limit how much CPU time they chew up, or for processes that want to compute but also want to take some periodic action. More generally, you'll be implementing a primitive form of user-level interrupt/fault handlers; you could use something similar to handle page faults in the application, for example. Your solution is correct if it passes alarmtest and 'usertests -q'","- You'll need to modify the Makefile to cause `alarmtest.c` to be compiled as an xv6 user program. - -- The right declarations to put in user/user.h are: - - ``` - int sigalarm(int ticks, void (*handler)()); - int sigreturn(void); - ``` - -- Update user/usys.pl (which generates user/usys.S), kernel/syscall.h, and kernel/syscall.c to allow `alarmtest` to invoke the sigalarm and sigreturn system calls. - -- For now, your `sys_sigreturn` should just return zero. - -- Your `sys_sigalarm()` should store the alarm interval and the pointer to the handler function in new fields in the `proc` structure (in `kernel/proc.h`). - -- You'll need to keep track of how many ticks have passed since the last call (or are left until the next call) to a process's alarm handler; you'll need a new field in `struct proc` for this too. You can initialize `proc` fields in `allocproc()` in `proc.c`. - -- Every tick, the hardware clock forces an interrupt, which is handled in `usertrap()` in `kernel/trap.c`. - -- You only want to manipulate a process's alarm ticks if there's a timer interrupt; you want something like - - ``` - if(which_dev == 2) ... - ``` - -- Only invoke the alarm function if the process has a timer outstanding. Note that the address of the user's alarm function might be 0 (e.g., in user/alarmtest.asm, `periodic` is at address 0). - -- You'll need to modify `usertrap()` so that when a process's alarm interval expires, the user process executes the handler function. When a trap on the RISC-V returns to user space, what determines the instruction address at which user-space code resumes execution? - -- It will be easier to look at traps with gdb if you tell qemu to use only one CPU, which you can do by running - - ``` - make CPUS=1 qemu-gdb - ``` - -- You've succeeded if alarmtest prints ""alarm!"".",," $ git fetch - $ git checkout traps - $ make clean",,xv6-labs-2024,alarmtest,"test0 start -........alarm! -test0 passed -test1 start -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -test1 passed -test2 start -................alarm! -test2 passed -test3 start -test3 passed -$ usertest -q -... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/traps.html -43,6.1810: Operating System Engineering,2024,Lab: Copy-on-Write Fork for xv6,Implement copy-on-write fork,"Virtual memory provides a level of indirection: the kernel can intercept memory references by marking PTEs invalid or read-only, leading to page faults, and can change what addresses mean by modifying PTEs. There is a saying in computer systems that any systems problem can be solved with a level of indirection. This lab explores an example: copy-on-write fork. - -To start the lab, switch to the cow branch: - -``` -$ git fetch -$ git checkout cow -$ make clean -``` - -## The problem - -The fork() system call in xv6 copies all of the parent process's user-space memory into the child. If the parent is large, copying can take a long time. Worse, the work is often largely wasted: fork() is commonly followed by exec() in the child, which discards the copied memory, usually without using most of it. On the other hand, if both parent and child use a copied page, and one or both writes it, the copy is truly needed. - -## The solution - -Your goal in implementing copy-on-write (COW) fork() is to defer allocating and copying physical memory pages until the copies are actually needed, if ever. - -COW fork() creates just a pagetable for the child, with PTEs for user memory pointing to the parent's physical pages. COW fork() marks all the user PTEs in both parent and child as read-only. When either process tries to write one of these COW pages, the CPU will force a page fault. The kernel page-fault handler detects this case, allocates a page of physical memory for the faulting process, copies the original page into the new page, and modifies the relevant PTE in the faulting process to refer to the new page, this time with the PTE marked writeable. When the page fault handler returns, the user process will be able to write its copy of the page. - -COW fork() makes freeing of the physical pages that implement user memory a little trickier. A given physical page may be referred to by multiple processes' page tables, and should be freed only when the last reference disappears. In a simple kernel like xv6 this bookkeeping is reasonably straightforward, but in production kernels this can be difficult to get right; see, for example, [Patching until the COWs come home](https://lwn.net/Articles/849638/).",,,"Your task is to implement copy-on-write fork in the xv6 kernel. You are done if your modified kernel executes both the cowtest and 'usertests -q' programs successfully. - -To help you test your implementation, we've provided an xv6 program called cowtest (source in user/cowtest.c). cowtest runs various tests, but even the first will fail on unmodified xv6. Thus, initially, you will see: - -``` -$ cowtest -simple: fork() failed -$ -``` - -The ""simple"" test allocates more than half of available physical memory, and then fork()s. The fork fails because there is not enough free physical memory to give the child a complete copy of the parent's memory. - -When you are done, your kernel should pass all the tests in both cowtest and usertests -q. That is: - -``` -$ cowtest -simple: ok -simple: ok -three: ok -three: ok -three: ok -file: ok -forkfork: ok -ALL COW TESTS PASSED -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -Here's a reasonable plan of attack. - -1. Modify uvmcopy() to map the parent's physical pages into the child, instead of allocating new pages. Clear `PTE_W` in the PTEs of both child and parent for pages that have `PTE_W` set. -2. Modify usertrap() to recognize page faults. When a write page-fault occurs on a COW page that was originally writeable, allocate a new page with kalloc(), copy the old page to the new page, and install the new page in the PTE with `PTE_W` set. Pages that were originally read-only (not mapped `PTE_W`, like pages in the text segment) should remain read-only and shared between parent and child; a process that tries to write such a page should be killed. -3. Ensure that each physical page is freed when the last PTE reference to it goes away -- but not before. A good way to do this is to keep, for each physical page, a ""reference count"" of the number of user page tables that refer to that page. Set a page's reference count to one when `kalloc()` allocates it. Increment a page's reference count when fork causes a child to share the page, and decrement a page's count each time any process drops the page from its page table. `kfree()` should only place a page back on the free list if its reference count is zero. It's OK to to keep these counts in a fixed-size array of integers. You'll have to work out a scheme for how to index the array and how to choose its size. For example, you could index the array with the page's physical address divided by 4096, and give the array a number of elements equal to highest physical address of any page placed on the free list by `kinit()` in kalloc.c. Feel free to modify kalloc.c (e.g., `kalloc()` and `kfree()`) to maintain the reference counts. -4. Modify copyout() to use the same scheme as page faults when it encounters a COW page. - -Some hints: - -- It may be useful to have a way to record, for each PTE, whether it is a COW mapping. You can use the RSW (reserved for software) bits in the RISC-V PTE for this. -- `usertests -q` explores scenarios that `cowtest` does not test, so don't forget to check that all tests pass for both. -- Some helpful macros and definitions for page table flags are at the end of `kernel/riscv.h`. -- If a COW page fault occurs and there's no free memory, the process should be killed.",Your task is to implement copy-on-write fork in the xv6 kernel. You are done if your modified kernel executes both the cowtest and 'usertests -q' programs successfully.,"- It may be useful to have a way to record, for each PTE, whether it is a COW mapping. You can use the RSW (reserved for software) bits in the RISC-V PTE for this. -- `usertests -q` explores scenarios that `cowtest` does not test, so don't forget to check that all tests pass for both. -- Some helpful macros and definitions for page table flags are at the end of `kernel/riscv.h`. -- If a COW page fault occurs and there's no free memory, the process should be killed.",,"$ git fetch -$ git checkout cow -$ make clean",,xv6-labs-2024,usertests -q,"... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/cow.html -44,6.1810: Operating System Engineering,2024,Lab: networking,Part One: NIC,"In this lab you will write an xv6 device driver for a network interface card (NIC), and then write the receive half of an ethernet/IP/UDP protocol processing stack. - -Fetch the xv6 source for the lab and check out the `net` branch: - -``` - $ git fetch - $ git checkout net - $ make clean -``` - -## Background - -Before writing code, you may find it helpful to review ""Chapter 5: Interrupts and device drivers"" in the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). - -You'll use a network device called the E1000 to handle network communication. To xv6 (and the driver you write), the E1000 looks like a real piece of hardware connected to a real Ethernet local area network (LAN). In fact, the E1000 your driver will talk to is an emulation provided by qemu, connected to a LAN that is also emulated by qemu. On this emulated LAN, xv6 (the ""guest"") has an IP address of 10.0.2.15. Qemu arranges for the computer running qemu (the ""host"") to appear on the LAN with IP address 10.0.2.2. When xv6 uses the E1000 to send a packet to 10.0.2.2, qemu delivers the packet to the appropriate application on the host. - -You will use QEMU's ""user-mode network stack"". QEMU's documentation has more about the user-mode stack [here](https://wiki.qemu.org/Documentation/Networking#User_Networking_.28SLIRP.29). We've updated the Makefile to enable QEMU's user-mode network stack and E1000 network card emulation. - -The Makefile configures QEMU to record all incoming and outgoing packets to the file `packets.pcap` in your lab directory. It may be helpful to review these recordings to confirm that xv6 is transmitting and receiving the packets you expect. To display the recorded packets: - -``` -tcpdump -XXnr packets.pcap -``` - -We've added some files to the xv6 repository for this lab. The file `kernel/e1000.c` contains initialization code for the E1000 as well as empty functions for transmitting and receiving packets, which you'll fill in. `kernel/e1000_dev.h` contains definitions for registers and flag bits defined by the E1000 and described in the Intel E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). `kernel/net.c` and `kernel/net.h` contain simple network stack that implements the [IP](https://en.wikipedia.org/wiki/Internet_Protocol), [UDP](https://en.wikipedia.org/wiki/User_Datagram_Protocol), and [ARP](https://en.wikipedia.org/wiki/Address_Resolution_Protocol) protocols; `net.c` has complete code for user processes to send UDP packets, but lacks most of the code to receive packets and deliver them to user space. Finally, `kernel/pci.c` contains code that searches for an E1000 card on the PCI bus when xv6 boots.",,,"Your job is to complete `e1000_transmit()` and `e1000_recv()`, both in `kernel/e1000.c`, so that the driver can transmit and receive packets. You are done with this part when `make grade` says your solution passes the ""txone"" and ""rxone"" tests. - -While writing your code, you'll find yourself referring to the E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). Of particular help may be the following sections: - -- Section 2 is essential and gives an overview of the entire device. -- Section 3.2 gives an overview of packet receiving. -- Section 3.3 gives an overview of packet transmission, alongside section 3.4. -- Section 13 gives an overview of the registers used by the E1000. -- Section 14 may help you understand the init code that we've provided. - -Browse the E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). This manual covers several closely related Ethernet controllers. QEMU emulates the 82540EM. Skim Chapter 2 now to get a feel for the device. To write your driver, you'll need to be familiar with Chapters 3 and 14, as well as 4.1 (though not 4.1's subsections). You'll also need to use Chapter 13 as a reference. The other chapters mostly cover components of the E1000 that your driver won't have to interact with. Don't worry about the details at first; just get a feel for how the document is structured so you can find things later. The E1000 has many advanced features, most of which you can ignore. Only a small set of basic features is needed to complete this lab. - -The `e1000_init()` function we provide you in `e1000.c` configures the E1000 to read packets to be transmitted from RAM, and to write received packets to RAM. This technique is called DMA, for direct memory access, referring to the fact that the E1000 hardware directly writes and reads packets to/from RAM. - -Because bursts of packets might arrive faster than the driver can process them, `e1000_init()` provides the E1000 with multiple buffers into which the E1000 can write packets. The E1000 requires these buffers to be described by an array of ""descriptors"" in RAM; each descriptor contains an address in RAM where the E1000 can write a received packet. `struct rx_desc` describes the descriptor format. The array of descriptors is called the receive ring, or receive queue. It's a circular ring in the sense that when the card or driver reaches the end of the array, it wraps back to the beginning. `e1000_init()` allocates packet buffers with `kalloc()` for the E1000 to DMA into. There is also a transmit ring into which the driver should place packets it wants the E1000 to send. `e1000_init()` configures the two rings to have size `RX_RING_SIZE` and `TX_RING_SIZE`. - -When the network stack in `net.c` needs to send a packet, it calls `e1000_transmit()` with a pointer to a buffer that holds the packet to be sent; `net.c` allocates this buffer with `kalloc()`. Your transmit code must place a pointer to the packet data in a descriptor in the TX (transmit) ring. `struct tx_desc` describes the descriptor format. You will need to ensure that each buffer is eventually passed to `kfree()`, but only after the E1000 has finished transmitting the packet (the E1000 sets the `E1000_TXD_STAT_DD` bit in the descriptor to indicate this). - -When the E1000 receives each packet from the ethernet, it DMAs the packet to the memory pointed to by `addr` in the next RX (receive) ring descriptor. If an E1000 interrupt is not already pending, the E1000 asks the PLIC to deliver one as soon as interrupts are enabled. Your `e1000_recv()` code must scan the RX ring and deliver each new packet to the network stack (in `net.c`) by calling `net_rx()`. You will then need to allocate a new buffer and place it into the descriptor, so that when the E1000 reaches that point in the RX ring again it finds a fresh buffer into which to DMA a new packet. - -In addition to reading and writing the descriptor rings in RAM, your driver will need to interact with the E1000 through its memory-mapped control registers, to detect when received packets are available and to inform the E1000 that the driver has filled in some TX descriptors with packets to send. The global variable `regs` holds a pointer to the E1000's first control register; your driver can get at the other registers by indexing `regs` as an array. You'll need to use indices `E1000_RDT` and `E1000_TDT` in particular. - -To test e1000_transmit() sending a single packet, run `python3 nettest.py txone` in one window, and in another window run `make qemu` and then run `nettest txone` in xv6, which sends a single packet. `nettest.py` will print `txone: OK` if all went well (i.e. qemu's e1000 emulator saw the packet on the DMA ring and forwarded it outside of qemu). - -If transmitting worked, tcpdump -XXnr packets.pcap shold produce output like this: - -``` -reading from file packets.pcap, link-type EN10MB (Ethernet) -21:27:31.688123 IP 10.0.2.15.2000 > 10.0.2.2.25603: UDP, length 5 - 0x0000: 5255 0a00 0202 5254 0012 3456 0800 4500 RU....RT..4V..E. - 0x0010: 0021 0000 0000 6411 3ebc 0a00 020f 0a00 .!....d.>....... - 0x0020: 0202 07d0 6403 000d 0000 7478 6f6e 65 ....d.....txone -``` - -To test e1000_recv() receiving two packets (an ARP query, then a IP/UDP packet), run `make qemu` in one window, and `python3 nettest.py rxone` in another window. `nettest.py rxone` sends a single UDP packet via qemu to xv6; qemu actually first sends an ARP request to xv6, and (after xv6 returns an ARP reply) qemu forwards the UDP packet to xv6. If e1000_recv() works correctly and passes those packets to `net_rx()`, `net.c` should print - -``` -arp_rx: received an ARP packet -ip_rx: received an IP packet -``` - -`net.c` already contains the code to detect qemu's ARP request and call `e1000_transmit()` to send its reply. This test requires that both e1000_transmit() and e1000_recv() work. In addition, if all went well, tcpdump -XXnr packets.pcap should produce output like this: - -``` -reading from file packets.pcap, link-type EN10MB (Ethernet) -21:29:16.893600 ARP, Request who-has 10.0.2.15 tell 10.0.2.2, length 28 - 0x0000: ffff ffff ffff 5255 0a00 0202 0806 0001 ......RU........ - 0x0010: 0800 0604 0001 5255 0a00 0202 0a00 0202 ......RU........ - 0x0020: 0000 0000 0000 0a00 020f .......... -21:29:16.894543 ARP, Reply 10.0.2.15 is-at 52:54:00:12:34:56, length 28 - 0x0000: 5255 0a00 0202 5254 0012 3456 0806 0001 RU....RT..4V.... - 0x0010: 0800 0604 0002 5254 0012 3456 0a00 020f ......RT..4V.... - 0x0020: 5255 0a00 0202 0a00 0202 RU........ -21:29:16.902656 IP 10.0.2.2.61350 > 10.0.2.15.2000: UDP, length 3 - 0x0000: 5254 0012 3456 5255 0a00 0202 0800 4500 RT..4VRU......E. - 0x0010: 001f 0000 0000 4011 62be 0a00 0202 0a00 ......@.b....... - 0x0020: 020f efa6 07d0 000b fdd6 7879 7a ..........xyz -``` - -Your output will look somewhat different, but it should contain the strings ""ARP, Request"", ""ARP, Reply"", ""UDP"", and ""....xyz"". - -If both of the above tests work, then `make grade` should show that the first two tests pass. - -## e1000 hints - -Start by adding print statements to `e1000_transmit()` and `e1000_recv()`, and running (in xv6) `nettest txone`. You should see from your print statements that `nettest txone` generates a call to `e1000_transmit`. - -Some hints for implementing `e1000_transmit`: - -- First ask the E1000 for the TX ring index at which it's expecting the next packet, by reading the `E1000_TDT` control register. -- Then check if the the ring is overflowing. If `E1000_TXD_STAT_DD` is not set in the descriptor indexed by `E1000_TDT`, the E1000 hasn't finished the corresponding previous transmission request, so return an error. -- Otherwise, use `kfree()` to free the last buffer that was transmitted from that descriptor (if there was one). -- Then fill in the descriptor. Set the necessary cmd flags (look at Section 3.3 in the E1000 manual) and stash away a pointer to the buffer for later freeing. -- Finally, update the ring position by adding one to `E1000_TDT` modulo `TX_RING_SIZE`. -- If `e1000_transmit()` added the packet successfully to the ring, return 0. On failure (e.g., there is no descriptor available), return -1 so that the caller knows to free the buffer. - -Some hints for implementing `e1000_recv`: - -- First ask the E1000 for the ring index at which the next waiting received packet (if any) is located, by fetching the `E1000_RDT` control register and adding one modulo `RX_RING_SIZE`. -- Then check if a new packet is available by checking for the `E1000_RXD_STAT_DD` bit in the `status` portion of the descriptor. If not, stop. -- Deliver the packet buffer to the network stack by calling `net_rx()`. -- Then allocate a new buffer using `kalloc()` to replace the one just given to `net_rx()`. Clear the descriptor's status bits to zero. -- Finally, update the `E1000_RDT` register to be the index of the last ring descriptor processed. -- `e1000_init()` initializes the RX ring with buffers, and you'll want to look at how it does that and perhaps borrow code. -- At some point the total number of packets that have ever arrived will exceed the ring size (16); make sure your code can handle that. -- The e1000 can deliver more than one packet per interrupt; your `e1000_recv` should handle that situation. - -You'll need locks to cope with the possibility that xv6 might use the E1000 from more than one process, or might be using the E1000 in a kernel thread when an interrupt arrives.","Your job is to complete `e1000_transmit()` and `e1000_recv()`, both in `kernel/e1000.c`, so that the driver can transmit and receive packets. You are done with this part when `make grade` says your solution passes the ""txone"" and ""rxone"" tests.","Start by adding print statements to `e1000_transmit()` and `e1000_recv()`, and running (in xv6) `nettest txone`. You should see from your print statements that `nettest txone` generates a call to `e1000_transmit`. - -Some hints for implementing `e1000_transmit`: - -- First ask the E1000 for the TX ring index at which it's expecting the next packet, by reading the `E1000_TDT` control register. -- Then check if the the ring is overflowing. If `E1000_TXD_STAT_DD` is not set in the descriptor indexed by `E1000_TDT`, the E1000 hasn't finished the corresponding previous transmission request, so return an error. -- Otherwise, use `kfree()` to free the last buffer that was transmitted from that descriptor (if there was one). -- Then fill in the descriptor. Set the necessary cmd flags (look at Section 3.3 in the E1000 manual) and stash away a pointer to the buffer for later freeing. -- Finally, update the ring position by adding one to `E1000_TDT` modulo `TX_RING_SIZE`. -- If `e1000_transmit()` added the packet successfully to the ring, return 0. On failure (e.g., there is no descriptor available), return -1 so that the caller knows to free the buffer. - -Some hints for implementing `e1000_recv`: - -- First ask the E1000 for the ring index at which the next waiting received packet (if any) is located, by fetching the `E1000_RDT` control register and adding one modulo `RX_RING_SIZE`. -- Then check if a new packet is available by checking for the `E1000_RXD_STAT_DD` bit in the `status` portion of the descriptor. If not, stop. -- Deliver the packet buffer to the network stack by calling `net_rx()`. -- Then allocate a new buffer using `kalloc()` to replace the one just given to `net_rx()`. Clear the descriptor's status bits to zero. -- Finally, update the `E1000_RDT` register to be the index of the last ring descriptor processed. -- `e1000_init()` initializes the RX ring with buffers, and you'll want to look at how it does that and perhaps borrow code. -- At some point the total number of packets that have ever arrived will exceed the ring size (16); make sure your code can handle that. -- The e1000 can deliver more than one packet per interrupt; your `e1000_recv` should handle that situation. - -You'll need locks to cope with the possibility that xv6 might use the E1000 from more than one process, or might be using the E1000 in a kernel thread when an interrupt arrives.",,"$ git fetch -$ git checkout net -$ make clean",,xv6-labs-2024, tcpdump -XXnr packets.pcap,"reading from file packets.pcap, link-type EN10MB (Ethernet) -21:29:16.893600 ARP, Request who-has 10.0.2.15 tell 10.0.2.2, length 28 - 0x0000: ffff ffff ffff 5255 0a00 0202 0806 0001 ......RU........ - 0x0010: 0800 0604 0001 5255 0a00 0202 0a00 0202 ......RU........ - 0x0020: 0000 0000 0000 0a00 020f .......... -21:29:16.894543 ARP, Reply 10.0.2.15 is-at 52:54:00:12:34:56, length 28 - 0x0000: 5255 0a00 0202 5254 0012 3456 0806 0001 RU....RT..4V.... - 0x0010: 0800 0604 0002 5254 0012 3456 0a00 020f ......RT..4V.... - 0x0020: 5255 0a00 0202 0a00 0202 RU........ -21:29:16.902656 IP 10.0.2.2.61350 > 10.0.2.15.2000: UDP, length 3 - 0x0000: 5254 0012 3456 5255 0a00 0202 0800 4500 RT..4VRU......E. - 0x0010: 001f 0000 0000 4011 62be 0a00 0202 0a00 ......@.b....... - 0x0020: 020f efa6 07d0 000b fdd6 7879 7a ..........xyz",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/net.html -45,6.1810: Operating System Engineering,2024,Lab: networking,Part Two: UDP Receive,"In this lab you will write an xv6 device driver for a network interface card (NIC), and then write the receive half of an ethernet/IP/UDP protocol processing stack. - -Fetch the xv6 source for the lab and check out the `net` branch: - -``` - $ git fetch - $ git checkout net - $ make clean -``` - -## Background - -Before writing code, you may find it helpful to review ""Chapter 5: Interrupts and device drivers"" in the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). - -You'll use a network device called the E1000 to handle network communication. To xv6 (and the driver you write), the E1000 looks like a real piece of hardware connected to a real Ethernet local area network (LAN). In fact, the E1000 your driver will talk to is an emulation provided by qemu, connected to a LAN that is also emulated by qemu. On this emulated LAN, xv6 (the ""guest"") has an IP address of 10.0.2.15. Qemu arranges for the computer running qemu (the ""host"") to appear on the LAN with IP address 10.0.2.2. When xv6 uses the E1000 to send a packet to 10.0.2.2, qemu delivers the packet to the appropriate application on the host. - -You will use QEMU's ""user-mode network stack"". QEMU's documentation has more about the user-mode stack [here](https://wiki.qemu.org/Documentation/Networking#User_Networking_.28SLIRP.29). We've updated the Makefile to enable QEMU's user-mode network stack and E1000 network card emulation. - -The Makefile configures QEMU to record all incoming and outgoing packets to the file `packets.pcap` in your lab directory. It may be helpful to review these recordings to confirm that xv6 is transmitting and receiving the packets you expect. To display the recorded packets: - -``` -tcpdump -XXnr packets.pcap -``` - -We've added some files to the xv6 repository for this lab. The file `kernel/e1000.c` contains initialization code for the E1000 as well as empty functions for transmitting and receiving packets, which you'll fill in. `kernel/e1000_dev.h` contains definitions for registers and flag bits defined by the E1000 and described in the Intel E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). `kernel/net.c` and `kernel/net.h` contain simple network stack that implements the [IP](https://en.wikipedia.org/wiki/Internet_Protocol), [UDP](https://en.wikipedia.org/wiki/User_Datagram_Protocol), and [ARP](https://en.wikipedia.org/wiki/Address_Resolution_Protocol) protocols; `net.c` has complete code for user processes to send UDP packets, but lacks most of the code to receive packets and deliver them to user space. Finally, `kernel/pci.c` contains code that searches for an E1000 card on the PCI bus when xv6 boots.",,,"UDP, the User Datagram Protocol, allows user processes on different Internet hosts to exchange individual packets (datagrams). UDP is layered on top of IP. A user process indicates which host it wants to send a packet to by specifying a 32-bit IP address. Each UDP packet contains a source port number and a destination port number; processes can request to receive packets that arrive addressed to particular port numbers, and can specify the destination port number when sending. Thus two processes on different hosts can communicate with UDP if they know each others' IP addresses and the port numbers each is listening for. For example, Google operates a DNS name server on the host with IP address 8.8.8.8, listening on UDP port 53. - -In this task, you'll add code to `kernel/net.c` to receive UDP packets, queue them, and allow user processes to read them. `net.c` already contains the code required for user processes to transmit UDP packets (with the exception of e1000_transmit(), which you provide). - -Your job is to implement `ip_rx()`, `sys_recv()`, and `sys_bind()` in `kernel/net.c`. You are done when `make grade` says your solution passes all of the tests. - -You can run the same tests that `make grade` runs by running `python3 nettest.py grade` in one window, and (in another window) then running `nettest grade` inside xv6. If all goes well, `nettest.py` should print `txone: OK`, and you should see this in the xv6 window: - -``` -$ nettest grade -txone: sending one packet -arp_rx: received an ARP packet -ip_rx: received an IP packet -ping0: starting -ping0: OK -ping1: starting -ping1: OK -ping2: starting -ping2: OK -ping3: starting -ping3: OK -dns: starting -DNS arecord for pdos.csail.mit.edu. is 128.52.129.126 -dns: OK -``` - -The system-call API specification for UDP looks like this: - -- `send(short sport, int dst, short dport, char *buf, int len)`: This system call sends a UDP packet to the host with IP address `dst`, and (on that host) the process listening to port `dport`. The packet's source port number will be `sport` (this port number is reported to the receiving process, so that it can reply to the sender). The content (""payload"") of the UDP packet will the `len` bytes at address `buf`. The return value is 0 on success, and -1 on failure. -- `recv(short dport, int *src, short *sport, char *buf, int maxlen)`: This system call returns the payload of a UDP packet that arrives with destination port `dport`. If one or more packets arrived before the call to `recv()`, it should return right away with the earliest waiting packet. If no packets are waiting, `recv()` should wait until a packet for `dport` arrives. `recv()` should see arriving packets for a given port in arrival order. `recv()` copies the packet's 32-bit source IP address to `*src`, copies the packet's 16-bit UDP source port number to `*sport`, copies at most `maxlen` bytes of the packet's UDP payload to `buf`, and removes the packet from the queue. The system call returns the number of bytes of the UDP payload copied, or -1 if there was an error. -- `bind(short port)`: A process should call `bind(port)` before it calls `recv(port, ...)`. If a UDP packet arrives with a destination port that hasn't been passed to `bind()`, `net.c` should discard that packet. The reason for this system call is to initialize any structures `net.c` needs in order to store arriving packets for a subsequent `recv()` call. -- `unbind(short port)`: You do not need to implement this system call, since the test code does not use it. But you can if you like in order to provide symmetry with `bind()`. - -All the addresses and port numbers passed as arguments to these system calls, and returned by them, must be in host byte order (see below). - -You'll need to provide the kernel implementations of the system calls, with the exception of `send()`. The program `user/nettest.c` uses this API. - -To make `recv()` work, you'll need to add code to `ip_rx()`, which `net_rx()` calls for each received IP packet. `ip_rx()` should decide if the arriving packet is UDP, and whether its destination port has been passed to `bind()`; if both are true, it should save the packet where `recv()` can find it. However, for any given port, no more than 16 packets should be saved; if 16 are already waiting for `recv()`, an incoming packet for that port should be dropped. The point of this rule is to prevent a fast or abusive sender from forcing xv6 to run out of memory. Furthermore, if packets are being dropped for one port because it already has 16 packets waiting, that should not affect packets arriving for other ports. - -The packet buffers that `ip_rx()` looks at contain a 14-byte ethernet header, followed by a 20-byte IP header, followed by an 8-byte UDP header, followed by the UDP payload. You'll find C struct definitions for each of these in `kernel/net.h`. Wikipedia has a description of the IP header [here](https://en.wikipedia.org/wiki/Internet_Protocol_version_4#Header), and UDP [here](https://en.wikipedia.org/wiki/User_Datagram_Protocol). - -Production IP/UDP implementations are complex, handling protocol options and validating invariants. You only need to do enough to pass `make grade`. Your code needs to look at ip_p and ip_src in the IP header, and dport, sport, and ulen in the UDP header. - -You will have to pay attention to byte order. Ethernet, IP, and UDP header fields that contain multi-byte integers place the most significant byte first in the packet. The RISC-V CPU, when it lays out a multi-byte integer in memory, places the least-significant byte first. This means that, when code extracts a multi-byte integer from a packet, it must re-arrange the bytes. This applies to short (2-byte) and int (4-byte) fields. You can use the `ntohs()` and `ntohl()` functions for 2-byte and 4-byte fields, respectively. Look at `net_rx()` for an example of this when looking at the 2-byte ethernet type field. - -If there are errors or omissions in your E1000 code, they may only start to cause problems during the ping tests. For example, the ping tests send and receive enough packets that the descriptor ring indices will wrap around. - -Some hints: - -- Create a struct to keep track of bound ports and the packets in their queues. -- Refer to the `sleep(void *chan, struct spinlock *lk)` and `wakeup(void *chan)` functions in `kernel/proc.c` to implement the waiting logic for `recv()`. -- The destination addresses that `sys_recv()` copies the packets to are virtual addresses; you will have to copy from the kernel to the current user process. -- Make sure to free packets that have been copied over or have been dropped.","Your job is to implement `ip_rx()`, `sys_recv()`, and `sys_bind()` in `kernel/net.c`. You are done when `make grade` says your solution passes all of the tests. - -You can run the same tests that `make grade` runs by running `python3 nettest.py grade` in one window, and (in another window) then running `nettest grade` inside xv6. If all goes well, `nettest.py` should print `txone: OK`, and you should see this in the xv6 window: - -``` -$ nettest grade -txone: sending one packet -arp_rx: received an ARP packet -ip_rx: received an IP packet -ping0: starting -ping0: OK -ping1: starting -ping1: OK -ping2: starting -ping2: OK -ping3: starting -ping3: OK -dns: starting -DNS arecord for pdos.csail.mit.edu. is 128.52.129.126 -dns: OK -``` -","- Create a struct to keep track of bound ports and the packets in their queues. -- Refer to the `sleep(void *chan, struct spinlock *lk)` and `wakeup(void *chan)` functions in `kernel/proc.c` to implement the waiting logic for `recv()`. -- The destination addresses that `sys_recv()` copies the packets to are virtual addresses; you will have to copy from the kernel to the current user process. -- Make sure to free packets that have been copied over or have been dropped.",,"$ git fetch -$ git checkout net -$ make clean",,xv6-labs-2024,nettest grade,"txone: sending one packet -arp_rx: received an ARP packet -ip_rx: received an IP packet -ping0: starting -ping0: OK -ping1: starting -ping1: OK -ping2: starting -ping2: OK -ping3: starting -ping3: OK -dns: starting -DNS arecord for pdos.csail.mit.edu. is 128.52.129.126 -dns: OK",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/net.html -46,6.1810: Operating System Engineering,2024,Lab: locks,Memory allocator,"In this lab you'll gain experience in re-designing code to increase parallelism. A common symptom of poor parallelism on multi-core machines is high lock contention. Improving parallelism often involves changing both data structures and locking strategies in order to reduce contention. You'll do this for the xv6 memory allocator and block cache. - -Before writing code, make sure to read the following parts from the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf) : - -- Chapter 6: ""Locking"" and the corresponding code. -- Section 3.5: ""Code: Physical memory allocator"" -- Section 8.1 through 8.3: ""Overview"", ""Buffer cache layer"", and ""Code: Buffer cache"" - -``` - $ git fetch - $ git checkout lock - $ make clean -``` -",,,"The program user/kalloctest stresses xv6's memory allocator: three processes grow and shrink their address spaces, resulting in many calls to `kalloc` and `kfree`. `kalloc` and `kfree` obtain `kmem.lock`. kalloctest prints (as ""#test-and-set"") the number of loop iterations in `acquire` due to attempts to acquire a lock that another core already holds, for the `kmem` lock and a few other locks. The number of loop iterations in `acquire` is a rough measure of lock contention. The output of `kalloctest` looks similar to this before you start the lab: - -``` -$ kalloctest -start test1 -test1 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 83375 #acquire() 433015 -lock: bcache: #test-and-set 0 #acquire() 1260 ---- top 5 contended locks: -lock: kmem: #test-and-set 83375 #acquire() 433015 -lock: proc: #test-and-set 23737 #acquire() 130718 -lock: virtio_disk: #test-and-set 11159 #acquire() 114 -lock: proc: #test-and-set 5937 #acquire() 130786 -lock: proc: #test-and-set 4080 #acquire() 130786 -tot= 83375 -test1 FAIL -start test2 -total free number of pages: 32497 (out of 32768) -..... -test2 OK -start test3 -child done 1 -child done 100000 -test3 OK -start test2 -total free number of pages: 32497 (out of 32768) -..... -test2 OK -start test3 -..........child done 100000 ---- lock kmem/bcache stats -lock: kmem: #test-and-set 28002 #acquire() 4228151 -lock: bcache: #test-and-set 0 #acquire() 1374 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 96998 #acquire() 147 -lock: kmem: #test-and-set 28002 #acquire() 4228151 -lock: proc: #test-and-set 6802 #acquire() 7125 -lock: pr: #test-and-set 3321 #acquire() 5 -lock: log: #test-and-set 1912 #acquire() 68 -tot= 28002 -0 -test3 FAIL m 11720 n 28002 -``` - -You'll likely see different counts than shown here, and a different order for the top 5 contended locks. - -`acquire` maintains, for each lock, the count of calls to `acquire` for that lock, and the number of times the loop in `acquire` tried but failed to set the lock. kalloctest calls a system call that causes the kernel to print those counts for the kmem and bcache locks (which are the focus of this lab) and for the 5 most contended locks. If there is lock contention the number of `acquire` loop iterations will be large. The system call returns the sum of the number of loop iterations for the kmem and bcache locks. - -For this lab, you must use a dedicated unloaded machine with multiple cores. If you use a machine that is doing other things, the counts that kalloctest prints will be nonsense. You can use a dedicated Athena workstation, or your own laptop, but don't use a dialup machine. - -The root cause of lock contention in kalloctest is that `kalloc()` has a single free list, protected by a single lock. To remove lock contention, you will have to redesign the memory allocator to avoid a single lock and list. The basic idea is to maintain a free list per CPU, each list with its own lock. Allocations and frees on different CPUs can run in parallel, because each CPU will operate on a different list. The main challenge will be to deal with the case in which one CPU's free list is empty, but another CPU's list has free memory; in that case, the one CPU must ""steal"" part of the other CPU's free list. Stealing may introduce lock contention, but that will hopefully be infrequent. - -Your job is to implement per-CPU freelists, and stealing when a CPU's free list is empty. You must give all of your locks names that start with ""kmem"". That is, you should call `initlock` for each of your locks, and pass a name that starts with ""kmem"". Run kalloctest to see if your implementation has reduced lock contention. To check that it can still allocate all of memory, run `usertests sbrkmuch`. Your output will look similar to that shown below, with much-reduced contention in total on kmem locks, although the specific numbers will differ. Make sure all tests in `usertests -q` pass. `make grade` should say that the kalloctests pass. - -``` -$ kalloctest -start test1 -test1 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 94703 -lock: kmem: #test-and-set 0 #acquire() 173699 -lock: kmem: #test-and-set 0 #acquire() 164725 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 80 -lock: bcache.bucket: #test-and-set 0 #acquire() 1045 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 37123 #acquire() 497420 -lock: proc: #test-and-set 27415 #acquire() 497425 -lock: wait_lock: #test-and-set 9650 #acquire() 12 -lock: pr: #test-and-set 4451 #acquire() 5 -tot= 0 -test1 OK -start test2 -total free number of pages: 32463 (out of 32768) -..... -test2 OK -start test3 -..........child done 100000 ---- lock kmem/bcache stats -lock: kmem: #test-and-set 758 #acquire() 1375324 -lock: kmem: #test-and-set 796 #acquire() 1864634 -lock: kmem: #test-and-set 1395 #acquire() 1779346 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 84 -lock: bcache.bucket: #test-and-set 0 #acquire() 1145 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: proc: #test-and-set 135932 #acquire() 2617654 -lock: proc: #test-and-set 99612 #acquire() 5132219 -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 46889 #acquire() 2538791 -lock: proc: #test-and-set 33853 #acquire() 1817240 -tot= 2949 - -test3 OK -$ usertests sbrkmuch -usertests starting -test sbrkmuch: OK -ALL TESTS PASSED -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -Some hints: - -- You can use the constant `NCPU` from kernel/param.h - -- Let `freerange` give all free memory to the CPU running `freerange`. - -- The function `cpuid` returns the current core number, but it's only safe to call it and use its result when interrupts are turned off. You should use `push_off()` and `pop_off()` to turn interrupts off and on. - -- Have a look at the `snprintf` function in kernel/sprintf.c for string formatting ideas. It is OK to just name all locks ""kmem"" though. - -- Optionally run your solution using xv6's race detector: - - ``` - $ make clean - $ make KCSAN=1 qemu - $ kalloctest - .. - - ``` - - The kalloctest may fail but you shouldn't see any races. If the xv6's race detector observes a race, it will print two stack traces describing the races along the following lines: - - ``` - == race detected == - backtrace for racing load - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - backtrace for watchpoint: - 0x000000008000ad28 - 0x000000008000af22 - 0x000000008000023c - 0x0000000080000292 - 0x0000000080000316 - 0x000000008000098c - 0x0000000080000ad2 - 0x000000008000113a - 0x0000000080001df2 - 0x000000008000364c - 0x0000000080003522 - 0x0000000080002fdc - ========== - - ``` - - On your OS, you can turn a backtrace into function names with line numbers by cutting and pasting it into addr2line: - - ``` - $ riscv64-linux-gnu-addr2line -e kernel/kernel - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - ctrl-d - kernel/kcsan.c:157 - kernel/kcsan.c:241 - kernel/kalloc.c:174 - kernel/kalloc.c:211 - kernel/vm.c:255 - kernel/proc.c:295 - kernel/sysproc.c:54 - kernel/syscall.c:251 - - ``` - - You are not required to run the race detector, but you might find it helpful. Note that the race detector slows xv6 down significantly, so you probably don't want to use it when running usertests . - -## Buffer cache ([hard](https://pdos.csail.mit.edu/6.1810/2024/labs/guidance.html))","Your job is to implement per-CPU freelists, and stealing when a CPU's free list is empty. You must give all of your locks names that start with ""kmem"". That is, you should call `initlock` for each of your locks, and pass a name that starts with ""kmem"". Run kalloctest to see if your implementation has reduced lock contention. To check that it can still allocate all of memory, run `usertests sbrkmuch`. Your output will look similar to that shown below, with much-reduced contention in total on kmem locks, although the specific numbers will differ. Make sure all tests in `usertests -q` pass. `make grade` should say that the kalloctests pass.","Some hints: - -- You can use the constant `NCPU` from kernel/param.h - -- Let `freerange` give all free memory to the CPU running `freerange`. - -- The function `cpuid` returns the current core number, but it's only safe to call it and use its result when interrupts are turned off. You should use `push_off()` and `pop_off()` to turn interrupts off and on. - -- Have a look at the `snprintf` function in kernel/sprintf.c for string formatting ideas. It is OK to just name all locks ""kmem"" though. - -- Optionally run your solution using xv6's race detector: - - ``` - $ make clean - $ make KCSAN=1 qemu - $ kalloctest - .. - - ``` - - The kalloctest may fail but you shouldn't see any races. If the xv6's race detector observes a race, it will print two stack traces describing the races along the following lines: - - ``` - == race detected == - backtrace for racing load - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - backtrace for watchpoint: - 0x000000008000ad28 - 0x000000008000af22 - 0x000000008000023c - 0x0000000080000292 - 0x0000000080000316 - 0x000000008000098c - 0x0000000080000ad2 - 0x000000008000113a - 0x0000000080001df2 - 0x000000008000364c - 0x0000000080003522 - 0x0000000080002fdc - ========== - - ``` - - On your OS, you can turn a backtrace into function names with line numbers by cutting and pasting it into addr2line: - - ``` - $ riscv64-linux-gnu-addr2line -e kernel/kernel - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - ctrl-d - kernel/kcsan.c:157 - kernel/kcsan.c:241 - kernel/kalloc.c:174 - kernel/kalloc.c:211 - kernel/vm.c:255 - kernel/proc.c:295 - kernel/sysproc.c:54 - kernel/syscall.c:251 - - ``` - - You are not required to run the race detector, but you might find it helpful. Note that the race detector slows xv6 down significantly, so you probably don't want to use it when running usertests .",," $ git fetch - $ git checkout lock - $ make clean",,xv6-labs-2024,kalloctest,"start test1 -test1 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 94703 -lock: kmem: #test-and-set 0 #acquire() 173699 -lock: kmem: #test-and-set 0 #acquire() 164725 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 80 -lock: bcache.bucket: #test-and-set 0 #acquire() 1045 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 37123 #acquire() 497420 -lock: proc: #test-and-set 27415 #acquire() 497425 -lock: wait_lock: #test-and-set 9650 #acquire() 12 -lock: pr: #test-and-set 4451 #acquire() 5 -tot= 0 -test1 OK -start test2 -total free number of pages: 32463 (out of 32768) -..... -test2 OK -start test3 -..........child done 100000 ---- lock kmem/bcache stats -lock: kmem: #test-and-set 758 #acquire() 1375324 -lock: kmem: #test-and-set 796 #acquire() 1864634 -lock: kmem: #test-and-set 1395 #acquire() 1779346 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 84 -lock: bcache.bucket: #test-and-set 0 #acquire() 1145 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: proc: #test-and-set 135932 #acquire() 2617654 -lock: proc: #test-and-set 99612 #acquire() 5132219 -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 46889 #acquire() 2538791 -lock: proc: #test-and-set 33853 #acquire() 1817240 -tot= 2949 - -test3 OK -$ usertests sbrkmuch -usertests starting -test sbrkmuch: OK -ALL TESTS PASSED -$ usertests -q -... -ALL TESTS PASSED",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/lock.html -47,6.1810: Operating System Engineering,2024,Lab: locks,Buffer cache,"In this lab you'll gain experience in re-designing code to increase parallelism. A common symptom of poor parallelism on multi-core machines is high lock contention. Improving parallelism often involves changing both data structures and locking strategies in order to reduce contention. You'll do this for the xv6 memory allocator and block cache. - -Before writing code, make sure to read the following parts from the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf) : - -- Chapter 6: ""Locking"" and the corresponding code. -- Section 3.5: ""Code: Physical memory allocator"" -- Section 8.1 through 8.3: ""Overview"", ""Buffer cache layer"", and ""Code: Buffer cache"" - -``` - $ git fetch - $ git checkout lock - $ make clean -``` -",,,"This half of the assignment is independent from the first half; you can work on this half (and pass the tests) whether or not you have completed the first half. - -If multiple processes use the file system intensively, they will likely contend for `bcache.lock`, which protects the disk block cache in kernel/bio.c. `bcachetest` creates several processes that repeatedly read different files in order to generate contention on `bcache.lock`; its output looks like this (before you complete this lab): - -``` -$ bcachetest -start test0 -test0 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 33099 -lock: bcache: #test-and-set 10273 #acquire() 65964 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 814630 #acquire() 1221 -lock: proc: #test-and-set 57695 #acquire() 67093 -lock: proc: #test-and-set 24368 #acquire() 67103 -lock: bcache: #test-and-set 10273 #acquire() 65964 -lock: pr: #test-and-set 3441 #acquire() 5 -tot= 10273 -test0: FAIL -start test1 - -test1 OK -start test2 - -test2 OK -start test3 - -test3 OK -``` - -You will likely see different output, but the number of test-and-sets for the `bcache` lock will be high. If you look at the code in `kernel/bio.c`, you'll see that `bcache.lock` protects the list of cached block buffers, the reference count (`b->refcnt`) in each block buffer, and the identities of the cached blocks (`b->dev` and `b->blockno`). - - - -Modify the block cache so that the number of `acquire` loop iterations for all locks in the bcache is close to zero when running `bcachetest`. Ideally the sum of the counts for all locks involved in the block cache should be zero, but it's OK if the sum is less than 500. Modify `bget` and `brelse` so that concurrent lookups and releases for different blocks that are in the bcache are unlikely to conflict on locks (e.g., don't all have to wait for `bcache.lock`). You must maintain the invariant that at most one copy of each block is cached. You must not increase the number of buffers; there must be exactly NBUF (30) of them. Your modified cache does not need to use LRU replacement, but it must be able to use any of the NBUF `struct buf`s with zero `refcnt` when it misses in the cache. When you are done, your output should be similar to that shown below (though not identical). Make sure 'usertests -q' still passes. `make grade` should pass all tests when you are done. - -``` -$ bcachetest -start test0 -test0 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 33030 -lock: kmem: #test-and-set 0 #acquire() 28 -lock: kmem: #test-and-set 0 #acquire() 73 -lock: bcache: #test-and-set 0 #acquire() 96 -lock: bcache.bucket: #test-and-set 0 #acquire() 6229 -lock: bcache.bucket: #test-and-set 0 #acquire() 6204 -lock: bcache.bucket: #test-and-set 0 #acquire() 4298 -lock: bcache.bucket: #test-and-set 0 #acquire() 4286 -lock: bcache.bucket: #test-and-set 0 #acquire() 2302 -lock: bcache.bucket: #test-and-set 0 #acquire() 4272 -lock: bcache.bucket: #test-and-set 0 #acquire() 2695 -lock: bcache.bucket: #test-and-set 0 #acquire() 4709 -lock: bcache.bucket: #test-and-set 0 #acquire() 6512 -lock: bcache.bucket: #test-and-set 0 #acquire() 6197 -lock: bcache.bucket: #test-and-set 0 #acquire() 6196 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 1483888 #acquire() 1221 -lock: proc: #test-and-set 38718 #acquire() 76050 -lock: proc: #test-and-set 34460 #acquire() 76039 -lock: proc: #test-and-set 31663 #acquire() 75963 -lock: wait_lock: #test-and-set 11794 #acquire() 16 -tot= 0 -test0: OK -start test1 - -test1 OK -start test2 - -test2 OK -start test3 - -test3 OK -$ usertests -q - ... -ALL TESTS PASSED -$ -``` - -Please give all of your locks names that start with ""bcache"". That is, you should call `initlock` for each of your locks, and pass a name that starts with ""bcache"". - -Reducing contention in the block cache is more tricky than for kalloc, because bcache buffers are truly shared among processes (and thus CPUs). For kalloc, one could eliminate most contention by giving each CPU its own allocator; that won't work for the block cache. We suggest you look up block numbers in the cache with a hash table that has a lock per hash bucket. - -There are some circumstances in which it's OK if your solution has lock conflicts: - -- When two processes concurrently use the same block number. `bcachetest` `test0` doesn't ever do this. -- When two processes concurrently miss in the cache, and need to find an unused block to replace. `bcachetest` `test0` doesn't ever do this. -- When two processes concurrently use blocks that conflict in whatever scheme you use to partition the blocks and locks; for example, if two processes use blocks whose block numbers hash to the same slot in a hash table. `bcachetest` `test0` might do this, depending on your design, but you should try to adjust your scheme's details to avoid conflicts (e.g., change the size of your hash table). - -`bcachetest`'s `test1` uses more distinct blocks than there are buffers, and exercises lots of file system code paths. - -Here are some hints: - -- Read the description of the block cache in the xv6 book (Section 8.1-8.3). -- It is OK to use a fixed number of buckets and not resize the hash table dynamically. Use a prime number of buckets (e.g., 13) to reduce the likelihood of hashing conflicts. -- Searching in the hash table for a buffer and allocating an entry for that buffer when the buffer is not found must be atomic. -- Remove the list of all buffers (`bcache.head` etc.) and don't implement LRU. With this change `brelse` doesn't need to acquire the bcache lock. In `bget` you can select any block that has `refcnt == 0` instead of the least-recently used one. -- You probably won't be able to atomically check for a cached buf and (if not cached) find an unused buf; you will likely have to drop all locks and start from scratch if the buffer isn't in the cache. It is OK to serialize finding an unused buf in `bget` (i.e., the part of `bget` that selects a buffer to re-use when a lookup misses in the cache). -- Your solution might need to hold two locks in some cases; for example, during eviction you may need to hold the bcache lock and a lock per bucket. Make sure you avoid deadlock. -- When replacing a block, you might move a `struct buf` from one bucket to another bucket, because the new block hashes to a different bucket. You might have a tricky case: the new block might hash to the same bucket as the old block. Make sure you avoid deadlock in that case. -- Some debugging tips: implement bucket locks but leave the global bcache.lock acquire/release at the beginning/end of bget to serialize the code. Once you are sure it is correct without race conditions, remove the global locks and deal with concurrency issues. You can also run `make CPUS=1 qemu` to test with one core. -- Use xv6's race detector to find potential races (see above how to use the race detector).","Modify the block cache so that the number of `acquire` loop iterations for all locks in the bcache is close to zero when running `bcachetest`. Ideally the sum of the counts for all locks involved in the block cache should be zero, but it's OK if the sum is less than 500. Modify `bget` and `brelse` so that concurrent lookups and releases for different blocks that are in the bcache are unlikely to conflict on locks (e.g., don't all have to wait for `bcache.lock`). You must maintain the invariant that at most one copy of each block is cached. You must not increase the number of buffers; there must be exactly NBUF (30) of them. Your modified cache does not need to use LRU replacement, but it must be able to use any of the NBUF `struct buf`s with zero `refcnt` when it misses in the cache. When you are done, your output should be similar to that shown below (though not identical). Make sure 'usertests -q' still passes. `make grade` should pass all tests when you are done.","Here are some hints: - -- Read the description of the block cache in the xv6 book (Section 8.1-8.3). -- It is OK to use a fixed number of buckets and not resize the hash table dynamically. Use a prime number of buckets (e.g., 13) to reduce the likelihood of hashing conflicts. -- Searching in the hash table for a buffer and allocating an entry for that buffer when the buffer is not found must be atomic. -- Remove the list of all buffers (`bcache.head` etc.) and don't implement LRU. With this change `brelse` doesn't need to acquire the bcache lock. In `bget` you can select any block that has `refcnt == 0` instead of the least-recently used one. -- You probably won't be able to atomically check for a cached buf and (if not cached) find an unused buf; you will likely have to drop all locks and start from scratch if the buffer isn't in the cache. It is OK to serialize finding an unused buf in `bget` (i.e., the part of `bget` that selects a buffer to re-use when a lookup misses in the cache). -- Your solution might need to hold two locks in some cases; for example, during eviction you may need to hold the bcache lock and a lock per bucket. Make sure you avoid deadlock. -- When replacing a block, you might move a `struct buf` from one bucket to another bucket, because the new block hashes to a different bucket. You might have a tricky case: the new block might hash to the same bucket as the old block. Make sure you avoid deadlock in that case. -- Some debugging tips: implement bucket locks but leave the global bcache.lock acquire/release at the beginning/end of bget to serialize the code. Once you are sure it is correct without race conditions, remove the global locks and deal with concurrency issues. You can also run `make CPUS=1 qemu` to test with one core. -- Use xv6's race detector to find potential races (see above how to use the race detector). -",," $ git fetch - $ git checkout lock - $ make clean",,xv6-labs-2024,bcachetest,"start test0 -test0 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 33030 -lock: kmem: #test-and-set 0 #acquire() 28 -lock: kmem: #test-and-set 0 #acquire() 73 -lock: bcache: #test-and-set 0 #acquire() 96 -lock: bcache.bucket: #test-and-set 0 #acquire() 6229 -lock: bcache.bucket: #test-and-set 0 #acquire() 6204 -lock: bcache.bucket: #test-and-set 0 #acquire() 4298 -lock: bcache.bucket: #test-and-set 0 #acquire() 4286 -lock: bcache.bucket: #test-and-set 0 #acquire() 2302 -lock: bcache.bucket: #test-and-set 0 #acquire() 4272 -lock: bcache.bucket: #test-and-set 0 #acquire() 2695 -lock: bcache.bucket: #test-and-set 0 #acquire() 4709 -lock: bcache.bucket: #test-and-set 0 #acquire() 6512 -lock: bcache.bucket: #test-and-set 0 #acquire() 6197 -lock: bcache.bucket: #test-and-set 0 #acquire() 6196 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 1483888 #acquire() 1221 -lock: proc: #test-and-set 38718 #acquire() 76050 -lock: proc: #test-and-set 34460 #acquire() 76039 -lock: proc: #test-and-set 31663 #acquire() 75963 -lock: wait_lock: #test-and-set 11794 #acquire() 16 -tot= 0 -test0: OK -start test1 - -test1 OK -start test2 - -test2 OK -start test3 - -test3 OK -$ usertests -q - ... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/lock.html -48,6.1810: Operating System Engineering,2024,Lab: file system,Large files,"In this lab you will add large files and symbolic links to the xv6 file system. - -Before writing code, you should read ""Chapter 8: File system"" from the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf) and study the corresponding code. - -Fetch the xv6 source for the lab and check out the `util` branch: - -``` - $ git fetch - $ git checkout fs - $ make clean -``` -",,,"In this assignment you'll increase the maximum size of an xv6 file. Currently xv6 files are limited to 268 blocks, or 268*BSIZE bytes (BSIZE is 1024 in xv6). This limit comes from the fact that an xv6 inode contains 12 ""direct"" block numbers and one ""singly-indirect"" block number, which refers to a block that holds up to 256 more block numbers, for a total of 12+256=268 blocks. - -The `bigfile` command creates the longest file it can, and reports that size: - -``` -$ bigfile -.. -wrote 268 blocks -bigfile: file is too small -$ -``` - -The test fails because `bigfile` expects to be able to create a file with 65803 blocks, but unmodified xv6 limits files to 268 blocks. - -You'll change the xv6 file system code to support a ""doubly-indirect"" block in each inode, containing 256 addresses of singly-indirect blocks, each of which can contain up to 256 addresses of data blocks. The result will be that a file will be able to consist of up to 65803 blocks, or 256*256+256+11 blocks (11 instead of 12, because we will sacrifice one of the direct block numbers for the double-indirect block). - -### Preliminaries - -The `mkfs` program creates the xv6 file system disk image and determines how many total blocks the file system has; this size is controlled by `FSSIZE` in `kernel/param.h`. You'll see that `FSSIZE` in the repository for this lab is set to 200,000 blocks. You should see the following output from `mkfs/mkfs` in the make output: - -``` -nmeta 70 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 25) blocks 199930 total 200000 -``` - -This line describes the file system that `mkfs/mkfs` built: it has 70 meta-data blocks (blocks used to describe the file system) and 199,930 data blocks, totaling 200,000 blocks. - -Note that `make qemu` builds a new `fs.img`, and saves the old one in `fs.img.bk`. If you want to run xv6 with the existing `fs.img` instead of building a new one, run `make qemu-fs`. - -### What to Look At - -The format of an on-disk inode is defined by `struct dinode` in `fs.h`. You're particularly interested in `NDIRECT`, `NINDIRECT`, `MAXFILE`, and the `addrs[]` element of `struct dinode`. Look at Figure 8.3 in the xv6 text for a diagram of the standard xv6 inode. - -The code that finds a file's data on disk is in `bmap()` in `fs.c`. Have a look at it and make sure you understand what it's doing. `bmap()` is called both when reading and writing a file. When writing, `bmap()` allocates new blocks as needed to hold file content, as well as allocating an indirect block if needed to hold block addresses. - -`bmap()` deals with two kinds of block numbers. The `bn` argument is a ""logical block number"" -- a block number within the file, relative to the start of the file. The block numbers in `ip->addrs[]`, and the argument to `bread()`, are disk block numbers. You can view `bmap()` as mapping a file's logical block numbers into disk block numbers. - -### Your Job - -Modify `bmap()` so that it implements a doubly-indirect block, in addition to direct blocks and a singly-indirect block. You'll have to have only 11 direct blocks, rather than 12, to make room for your new doubly-indirect block; you're not allowed to change the size of an on-disk inode. The first 11 elements of `ip->addrs[]` should be direct blocks; the 12th should be a singly-indirect block (just like the current one); the 13th should be your new doubly-indirect block. You are done with this exercise when `bigfile` writes 65803 blocks and `usertests -q` runs successfully: - -``` -$ bigfile -.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -wrote 65803 blocks -done; ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -`bigfile` will take at least a minute and a half to run. - -Hints: - -- Make sure you understand `bmap()`. Write out a diagram of the relationships between `ip->addrs[]`, the indirect block, the doubly-indirect block and the singly-indirect blocks it points to, and data blocks. Make sure you understand why adding a doubly-indirect block increases the maximum file size by 256*256 blocks (really -1, since you have to decrease the number of direct blocks by one). -- Think about how you'll index the doubly-indirect block, and the indirect blocks it points to, with the logical block number. -- If you change the definition of `NDIRECT`, you'll probably have to change the declaration of `addrs[]` in `struct inode` in `file.h`. Make sure that `struct inode` and `struct dinode` have the same number of elements in their `addrs[]` arrays. -- If you change the definition of `NDIRECT`, make sure to create a new `fs.img`, since `mkfs` uses `NDIRECT` to build the file system. -- If your file system gets into a bad state, perhaps by crashing, delete `fs.img` (do this from Unix, not xv6). `make` will build a new clean file system image for you. -- Don't forget to `brelse()` each block that you `bread()`. -- You should allocate indirect blocks and doubly-indirect blocks only as needed, like the original `bmap()`. -- Make sure `itrunc` frees all blocks of a file, including double-indirect blocks. -- `usertests` takes longer to run than in previous labs because for this lab `FSSIZE` is larger and big files are larger.","Modify `bmap()` so that it implements a doubly-indirect block, in addition to direct blocks and a singly-indirect block. You'll have to have only 11 direct blocks, rather than 12, to make room for your new doubly-indirect block; you're not allowed to change the size of an on-disk inode. The first 11 elements of `ip->addrs[]` should be direct blocks; the 12th should be a singly-indirect block (just like the current one); the 13th should be your new doubly-indirect block. You are done with this exercise when `bigfile` writes 65803 blocks and `usertests -q` runs successfully: - -``` -$ bigfile -.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -wrote 65803 blocks -done; ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -`bigfile` will take at least a minute and a half to run.","- Make sure you understand `bmap()`. Write out a diagram of the relationships between `ip->addrs[]`, the indirect block, the doubly-indirect block and the singly-indirect blocks it points to, and data blocks. Make sure you understand why adding a doubly-indirect block increases the maximum file size by 256*256 blocks (really -1, since you have to decrease the number of direct blocks by one). -- Think about how you'll index the doubly-indirect block, and the indirect blocks it points to, with the logical block number. -- If you change the definition of `NDIRECT`, you'll probably have to change the declaration of `addrs[]` in `struct inode` in `file.h`. Make sure that `struct inode` and `struct dinode` have the same number of elements in their `addrs[]` arrays. -- If you change the definition of `NDIRECT`, make sure to create a new `fs.img`, since `mkfs` uses `NDIRECT` to build the file system. -- If your file system gets into a bad state, perhaps by crashing, delete `fs.img` (do this from Unix, not xv6). `make` will build a new clean file system image for you. -- Don't forget to `brelse()` each block that you `bread()`. -- You should allocate indirect blocks and doubly-indirect blocks only as needed, like the original `bmap()`. -- Make sure `itrunc` frees all blocks of a file, including double-indirect blocks. -- `usertests` takes longer to run than in previous labs because for this lab `FSSIZE` is larger and big files are larger.",," $ git fetch - $ git checkout fs - $ make clean",,xv6-labs-2024,bigfile,".................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -wrote 65803 blocks -done; ok -$ usertests -q -... -ALL TESTS PASSED",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/fs.html -49,6.1810: Operating System Engineering,2024,Lab: file system,Symbolic links,"In this exercise you will add symbolic links to xv6. Symbolic links (or soft links) refer to a linked file or directory by pathname; when a symbolic link is opened, the kernel looks up the linked-to name. Symbolic links resemble hard links, but hard links are restricted to pointing to files on the same disk, cannot refer to directories, and are tied to a specific target i-node rather than (as with symbolic links) referring to whatever happens at the moment to be at the target name, if anything. Implementing this system call is a good exercise to understand how pathname lookup works. - -You do not have to handle symbolic links to directories for this lab; the only system call that needs to know how to follow symbolic links is `open()`. - -### Your job - -You will implement the `symlink(char *target, char *path)` system call, which creates a new symbolic link at path that refers to file named by target. For further information, see the man page symlink. To test, add symlinktest to the Makefile and run it. Your solution is complete when the tests produce the following output (including usertests succeeding). - -``` -$ symlinktest -Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -Hints: - -- First, create a new system call number for symlink, add an entry to user/usys.pl, user/user.h, and implement an empty sys_symlink in kernel/sysfile.c. -- Add a new file type (`T_SYMLINK`) to kernel/stat.h to represent a symbolic link. -- Add a new flag to kernel/fcntl.h, (`O_NOFOLLOW`), that can be used with the `open` system call. Note that flags passed to `open` are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compile user/symlinktest.c once you add it to the Makefile. -- Implement the `symlink(target, path)` system call to create a new symbolic link at path that refers to target. Note that target does not need to exist for the system call to succeed. You will need to choose somewhere to store the target path of a symbolic link, for example, in the inode's data blocks. `symlink` should return an integer representing success (0) or failure (-1) similar to `link` and `unlink`. -- Modify the `open` system call to handle the case where the path refers to a symbolic link. If the file does not exist, `open` must fail. When a process specifies `O_NOFOLLOW` in the flags to `open`, `open` should open the symlink (and not follow the symbolic link). -- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10). -- Other system calls (e.g., link and unlink) must not follow symbolic links; these system calls operate on the symbolic link itself. -",,,"In this exercise you will add symbolic links to xv6. Symbolic links (or soft links) refer to a linked file or directory by pathname; when a symbolic link is opened, the kernel looks up the linked-to name. Symbolic links resemble hard links, but hard links are restricted to pointing to files on the same disk, cannot refer to directories, and are tied to a specific target i-node rather than (as with symbolic links) referring to whatever happens at the moment to be at the target name, if anything. Implementing this system call is a good exercise to understand how pathname lookup works. - -You do not have to handle symbolic links to directories for this lab; the only system call that needs to know how to follow symbolic links is `open()`. - -### Your job - -You will implement the `symlink(char *target, char *path)` system call, which creates a new symbolic link at path that refers to file named by target. For further information, see the man page symlink. To test, add symlinktest to the Makefile and run it. Your solution is complete when the tests produce the following output (including usertests succeeding). - -``` -$ symlinktest -Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - - - -Hints: - -- First, create a new system call number for symlink, add an entry to user/usys.pl, user/user.h, and implement an empty sys_symlink in kernel/sysfile.c. -- Add a new file type (`T_SYMLINK`) to kernel/stat.h to represent a symbolic link. -- Add a new flag to kernel/fcntl.h, (`O_NOFOLLOW`), that can be used with the `open` system call. Note that flags passed to `open` are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compile user/symlinktest.c once you add it to the Makefile. -- Implement the `symlink(target, path)` system call to create a new symbolic link at path that refers to target. Note that target does not need to exist for the system call to succeed. You will need to choose somewhere to store the target path of a symbolic link, for example, in the inode's data blocks. `symlink` should return an integer representing success (0) or failure (-1) similar to `link` and `unlink`. -- Modify the `open` system call to handle the case where the path refers to a symbolic link. If the file does not exist, `open` must fail. When a process specifies `O_NOFOLLOW` in the flags to `open`, `open` should open the symlink (and not follow the symbolic link). -- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10). -- Other system calls (e.g., link and unlink) must not follow symbolic links; these system calls operate on the symbolic link itself.","You will implement the `symlink(char *target, char *path)` system call, which creates a new symbolic link at path that refers to file named by target. For further information, see the man page symlink. To test, add symlinktest to the Makefile and run it. Your solution is complete when the tests produce the following output (including usertests succeeding). - -``` -$ symlinktest -Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` -","Hints: - -- First, create a new system call number for symlink, add an entry to user/usys.pl, user/user.h, and implement an empty sys_symlink in kernel/sysfile.c. -- Add a new file type (`T_SYMLINK`) to kernel/stat.h to represent a symbolic link. -- Add a new flag to kernel/fcntl.h, (`O_NOFOLLOW`), that can be used with the `open` system call. Note that flags passed to `open` are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compile user/symlinktest.c once you add it to the Makefile. -- Implement the `symlink(target, path)` system call to create a new symbolic link at path that refers to target. Note that target does not need to exist for the system call to succeed. You will need to choose somewhere to store the target path of a symbolic link, for example, in the inode's data blocks. `symlink` should return an integer representing success (0) or failure (-1) similar to `link` and `unlink`. -- Modify the `open` system call to handle the case where the path refers to a symbolic link. If the file does not exist, `open` must fail. When a process specifies `O_NOFOLLOW` in the flags to `open`, `open` should open the symlink (and not follow the symbolic link). -- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10). -- Other system calls (e.g., link and unlink) must not follow symbolic links; these system calls operate on the symbolic link itself.",," $ git fetch - $ git checkout fs - $ make clean",,xv6-labs-2024,symlinktest,"Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/fs.html -50,6.1810: Operating System Engineering,2024,Lab: mmap,Lab: mmap,,,,"The `mmap` and `munmap` system calls allow UNIX programs to exert detailed control over their address spaces. They can be used to share memory among processes, to map files into process address spaces, and as part of user-level page fault schemes such as the garbage-collection algorithms discussed in lecture. In this lab you'll add `mmap` and `munmap` to xv6, focusing on memory-mapped files. - -Fetch the xv6 source for the lab and check out the `mmap` branch: - -``` - $ git fetch - $ git checkout mmap - $ make clean -``` - -The manual page (run man 2 mmap) shows this declaration for `mmap`: - -``` -void *mmap(void *addr, size_t len, int prot, int flags, - int fd, off_t offset); -``` - -`mmap` can be called in many ways, but this lab requires only a subset of its features relevant to memory-mapping a file. You can assume that `addr` will always be zero, meaning that the kernel should decide the virtual address at which to map the file. `mmap` returns that address, or 0xffffffffffffffff if it fails. `len` is the number of bytes to map; it might not be the same as the file's length. `prot` indicates whether the memory should be mapped readable, writeable, and/or executable; you can assume that `prot` is `PROT_READ` or `PROT_WRITE` or both. `flags` will be either `MAP_SHARED`, meaning that modifications to the mapped memory should be written back to the file, or `MAP_PRIVATE`, meaning that they should not. You don't have to implement any other bits in `flags`. `fd` is the open file descriptor of the file to map. You can assume `offset` is zero (it's the starting point in the file at which to map). - -Your implementation should fill in the page table lazily, in response to page faults. That is, `mmap` itself should not allocate physical memory or read the file. Instead, do that in page fault handling code in (or called by) `usertrap`, as in the copy-on-write lab. The reason to be lazy is to ensure that `mmap` of a large file is fast, and that `mmap` of a file larger than physical memory is possible. - -It's OK if processes that map the same `MAP_SHARED` file do **not** share physical pages. - -The manual page (run man 2 munmap) shows this declaration for `munmap`: - -``` -int munmap(void *addr, size_t len); -``` - -`munmap` should remove mmap mappings in the indicated address range, if any. If the process has modified the memory and has it mapped `MAP_SHARED`, the modifications should first be written to the file. An `munmap` call might cover only a portion of an mmap-ed region, but you can assume that it will either unmap at the start, or at the end, or the whole region (but not punch a hole in the middle of a region). When a process exits, any modifictions it has made to `MAP_SHARED` regions should be written to the relevant files, as if the process had called `munmap`. - -You should implement enough `mmap` and `munmap` functionality to make the `mmaptest` test program work. If `mmaptest` doesn't use a `mmap` feature, you don't need to implement that feature. You must also ensure that `usertests -q` continues to work. - -When you're done, you should see output similar to this: - -``` -$ mmaptest -test basic mmap -test basic mmap: OK -test mmap private -test mmap private: OK -test mmap read-only -test mmap read-only: OK -test mmap read/write -test mmap read/write: OK -test mmap dirty -test mmap dirty: OK -test not-mapped unmap -test not-mapped unmap: OK -test lazy access -test lazy access: OK -test mmap two files -test mmap two files: OK -test fork -test fork: OK -test munmap prevents access -usertrap(): unexpected scause 0xd pid=7 - sepc=0x924 stval=0xc0001000 -usertrap(): unexpected scause 0xd pid=8 - sepc=0x9ac stval=0xc0000000 -test munmap prevents access: OK -test writes to read-only mapped memory -usertrap(): unexpected scause 0xf pid=9 - sepc=0xaf4 stval=0xc0000000 -test writes to read-only mapped memory: OK -mmaptest: all tests succeeded -$ usertests -q -usertests starting -... -ALL TESTS PASSED -$ -``` - -Here are some hints: - -- Start by adding `_mmaptest` to `UPROGS`, and `mmap` and `munmap` system calls, in order to get `user/mmaptest.c` to compile. For now, just return errors from `mmap` and `munmap`. We defined `PROT_READ` etc for you in `kernel/fcntl.h`. Run `mmaptest`, which will fail at the first mmap call. -- Keep track of what `mmap` has mapped for each process. Define a structure corresponding to the VMA (virtual memory area) described in the ""virtual memory for applications"" lecture. This should record the address, length, permissions, file, etc. for a virtual memory range created by `mmap`. Since the xv6 kernel doesn't have a variable-size memory allocator in the kernel, it's OK to declare a fixed-size array of VMAs and allocate from that array as needed. A size of 16 should be sufficient. -- Implement `mmap`: find an unused region in the process's address space in which to map the file, and add a VMA to the process's table of mapped regions. The VMA should contain a pointer to a `struct file` for the file being mapped; `mmap` should increase the file's reference count so that the structure doesn't disappear when the file is closed (hint: see `filedup`). Run `mmaptest`: the first `mmap` should succeed, but the first access to the mmap-ed memory will cause a page fault and kill `mmaptest`. -- Add code to cause a page-fault in a mmap-ed region to allocate a page of physical memory, read 4096 bytes of the relevant file into that page, and map it into the user address space. Read the file with `readi`, which takes an offset argument at which to read in the file (but you will have to lock/unlock the inode passed to `readi`). Don't forget to set the permissions correctly on the page. Run `mmaptest`; it should get to the first `munmap`. -- Implement `munmap`: find the VMA for the address range and unmap the specified pages (hint: use `uvmunmap`). If `munmap` removes all pages of a previous `mmap`, it should decrement the reference count of the corresponding `struct file`. If an unmapped page has been modified and the file is mapped `MAP_SHARED`, write the page back to the file. Look at `filewrite` for inspiration. -- Ideally your implementation would only write back `MAP_SHARED` pages that the program actually modified. The dirty bit (`D`) in the RISC-V PTE indicates whether a page has been written. However, `mmaptest` does not check that non-dirty pages are not written back; thus you can get away with writing pages back without looking at `D` bits. -- Modify `exit` to unmap the process's mapped regions as if `munmap` had been called. Run `mmaptest`; all tests through `test mmap two files` should pass, but probably not `test fork`. -- Modify `fork` to ensure that the child has the same mapped regions as the parent. Don't forget to increment the reference count for a VMA's `struct file`. In the page fault handler of the child, it is OK to allocate a new physical page instead of sharing a page with the parent. The latter would be cooler, but it would require more implementation work. Run `mmaptest`; it should pass all the tests. - -Run `usertests -q` to make sure everything still works. -","You should implement enough `mmap` and `munmap` functionality to make the `mmaptest` test program work. If `mmaptest` doesn't use a `mmap` feature, you don't need to implement that feature. You must also ensure that `usertests -q` continues to work.","Here are some hints: - -- Start by adding `_mmaptest` to `UPROGS`, and `mmap` and `munmap` system calls, in order to get `user/mmaptest.c` to compile. For now, just return errors from `mmap` and `munmap`. We defined `PROT_READ` etc for you in `kernel/fcntl.h`. Run `mmaptest`, which will fail at the first mmap call. -- Keep track of what `mmap` has mapped for each process. Define a structure corresponding to the VMA (virtual memory area) described in the ""virtual memory for applications"" lecture. This should record the address, length, permissions, file, etc. for a virtual memory range created by `mmap`. Since the xv6 kernel doesn't have a variable-size memory allocator in the kernel, it's OK to declare a fixed-size array of VMAs and allocate from that array as needed. A size of 16 should be sufficient. -- Implement `mmap`: find an unused region in the process's address space in which to map the file, and add a VMA to the process's table of mapped regions. The VMA should contain a pointer to a `struct file` for the file being mapped; `mmap` should increase the file's reference count so that the structure doesn't disappear when the file is closed (hint: see `filedup`). Run `mmaptest`: the first `mmap` should succeed, but the first access to the mmap-ed memory will cause a page fault and kill `mmaptest`. -- Add code to cause a page-fault in a mmap-ed region to allocate a page of physical memory, read 4096 bytes of the relevant file into that page, and map it into the user address space. Read the file with `readi`, which takes an offset argument at which to read in the file (but you will have to lock/unlock the inode passed to `readi`). Don't forget to set the permissions correctly on the page. Run `mmaptest`; it should get to the first `munmap`. -- Implement `munmap`: find the VMA for the address range and unmap the specified pages (hint: use `uvmunmap`). If `munmap` removes all pages of a previous `mmap`, it should decrement the reference count of the corresponding `struct file`. If an unmapped page has been modified and the file is mapped `MAP_SHARED`, write the page back to the file. Look at `filewrite` for inspiration. -- Ideally your implementation would only write back `MAP_SHARED` pages that the program actually modified. The dirty bit (`D`) in the RISC-V PTE indicates whether a page has been written. However, `mmaptest` does not check that non-dirty pages are not written back; thus you can get away with writing pages back without looking at `D` bits. -- Modify `exit` to unmap the process's mapped regions as if `munmap` had been called. Run `mmaptest`; all tests through `test mmap two files` should pass, but probably not `test fork`. -- Modify `fork` to ensure that the child has the same mapped regions as the parent. Don't forget to increment the reference count for a VMA's `struct file`. In the page fault handler of the child, it is OK to allocate a new physical page instead of sharing a page with the parent. The latter would be cooler, but it would require more implementation work. Run `mmaptest`; it should pass all the tests.",," $ git fetch - $ git checkout mmap - $ make clean",,xv6-labs-2024,mmaptest,"test basic mmap -test basic mmap: OK -test mmap private -test mmap private: OK -test mmap read-only -test mmap read-only: OK -test mmap read/write -test mmap read/write: OK -test mmap dirty -test mmap dirty: OK -test not-mapped unmap -test not-mapped unmap: OK -test lazy access -test lazy access: OK -test mmap two files -test mmap two files: OK -test fork -test fork: OK -test munmap prevents access -usertrap(): unexpected scause 0xd pid=7 - sepc=0x924 stval=0xc0001000 -usertrap(): unexpected scause 0xd pid=8 - sepc=0x9ac stval=0xc0000000 -test munmap prevents access: OK -test writes to read-only mapped memory -usertrap(): unexpected scause 0xf pid=9 - sepc=0xaf4 stval=0xc0000000 -test writes to read-only mapped memory: OK -mmaptest: all tests succeeded -$ usertests -q -usertests starting -... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/mmap.html -51,6.5830/6.5831: Database Systems,Fall 2024,Lab 0: Go tutorial,Start an http server and handle requests,,,,"This task requires you to start up the http server in `main.go` and handle the user's GET requests by filling out the `HomeHandler` method in `handlers/handlers.go`. - -The final web app looks like in the screenshot below, where users can select a T line (e.g. red line) and display its ridership statistics in a bar chart. The `HomeHandler` function first checks which line the user has selected in the drop down menu and then queries ridership numbers for that line from a `RiderhipDB` instance. The returned values are then displayed in a bar chart. You don't need to write code to plot the bar chart yourself, you can simply use the `GenerateBarChart` function in `utils/render_chart.go`. - -After completing this task, you should be able to start the web server by running `go run main.go` and see the web app in your browser by going to [http://localhost:PORT](http://localhost:PORT/) (where PORT is the port number you specified): - -[![Screenshot of web app](https://github.com/MIT-DB-Class/lab0/raw/main/screenshot.png)](https://github.com/MIT-DB-Class/lab0/blob/main/screenshot.png) - -You should also be able to pass the test in `handlers_test.go`: When running `go test` from the `handlers` directory, you should get a similar output to this: - -``` -PASS -ok main/handlers 0.246s -``` -",This task requires you to start up the http server in main.go and handle the user's GET requests by filling out the HomeHandler method in handlers/handlers.go.,,,https://github.com/MIT-DB-Class/lab0.git,,,go test,"PASS -ok main/handlers 0.246s",,http://db.lcs.mit.edu/6.5830/assign.php -52,6.5830/6.5831: Database Systems,Fall 2024,Lab 0: Go tutorial,Run a query over a CSV file,,,,"This task requires you to implement the missing methods in `ridership_db/csv_ridership_db.go` - -Instead of issuing the query against sqlite, `CsvRidershipDB` directly runs it over the `mbta.csv` CSV file. MBTA divides a day into nine different time periods (*time_period_01*, ..., *time_period_09*). The CSV file contains how many passengers boarded trains during a specific time period, at a specific station and for a specific line and direction. For the queried line (passed to `GetRidership`) compute the total number of passengers that boarded a train for each given time period (for each time period, sum over all stations and directions). The sum for each time period should be an entry in the returned `int64` slice. Make sure to use the `idIdxMap` map to map the time period id strings (e.g. *time_period_01*) to the correct index in the `boardings` slice (e.g. 0). - -To use your CSV implementation in the web app, instantiate RidershipDB to be a `CsvRidershipDB` instead of a `SqliteRidershipDB` in lines 23-24 in `handlers/handlers.go`: - -``` -// instantiate ridershipDB -// var db rdb.RidershipDB = &rdb.SqliteRidershipDB{} // Sqlite implementation -var db rdb.RidershipDB = &rdb.CsvRidershipDB{} // CSV implementation -``` - -You should also be able to pass the tests in `ridership_db/ridership_db_test.go`: When running `go test` from the `ridership_db` directory, you should get a similar output to this: - -``` -=== RUN TestRidershipDBsMatch -=== RUN TestRidershipDBsMatch/red ---- PASS: TestRidershipDBsMatch/red (0.00s) -=== RUN TestRidershipDBsMatch/green ---- PASS: TestRidershipDBsMatch/green (0.00s) -=== RUN TestRidershipDBsMatch/blue ---- PASS: TestRidershipDBsMatch/blue (0.00s) -=== RUN TestRidershipDBsMatch/orange ---- PASS: TestRidershipDBsMatch/orange (0.00s) ---- PASS: TestRidershipDBsMatch (0.01s) -PASS -ok main/ridership_db 0.226s -``` -",,,,https://github.com/MIT-DB-Class/lab0.git,,,go test,"=== RUN TestRidershipDBsMatch -=== RUN TestRidershipDBsMatch/red ---- PASS: TestRidershipDBsMatch/red (0.00s) -=== RUN TestRidershipDBsMatch/green ---- PASS: TestRidershipDBsMatch/green (0.00s) -=== RUN TestRidershipDBsMatch/blue ---- PASS: TestRidershipDBsMatch/blue (0.00s) -=== RUN TestRidershipDBsMatch/orange ---- PASS: TestRidershipDBsMatch/orange (0.00s) ---- PASS: TestRidershipDBsMatch (0.01s) -PASS -ok main/ridership_db 0.226SEARCH(",,http://db.lcs.mit.edu/6.5830/assign.php diff --git a/benchmarks/course_lab_bench/data/benchmark/lab_exam_data_20250529.csv b/benchmarks/course_lab_bench/data/benchmark/lab_exam_data_20250529.csv deleted file mode 100644 index f19e86a..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/lab_exam_data_20250529.csv +++ /dev/null @@ -1,5621 +0,0 @@ -instance_id,course,year,index,part_name,introduction,getting_started,The code,description,task,hint,rules,repo_location,branch,repo,test_method,test_results,difficluty,link -1,6.5840: Distributed Systems,Spring 2024,Lab 1: MapReduce,Your Job,"In this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses ""coordinator"" instead of the paper's ""master"".)","You need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs. - -Fetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html). - -``` -$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840 -$ cd 6.5840 -$ ls -Makefile src -$ -``` - -We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: - -``` -$ cd ~/6.5840 -$ cd src/main -$ go build -buildmode=plugin ../mrapps/wc.go -$ rm mr-out* -$ go run mrsequential.go wc.so pg*.txt -$ more mr-out-0 -A 509 -ABOUT 2 -ACT 8 -... -``` - -`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. - -Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. - -For this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.",,"Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The ""main"" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -$ go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -$ rm mr-out* -$ go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one ""split"", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -$ go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method ""Done"" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2024/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited. - -### A few rules: - -- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `""%v %v""` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented ""this is the correct format"". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a ""please exit"" pseudo-task that the coordinator can give to workers. - -### Hints - -- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values. -",,"- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's `encoding/json` package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC `call()` function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.","- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `""%v %v""` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented ""this is the correct format"". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a ""please exit"" pseudo-task that the coordinator can give to workers.",git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/main && bash test-mr.sh,"*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS",moderate/hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html -2,6.5840: Distributed Systems,Spring 2024,Lab 2: Key/Value Server,Key/value server with no network failures,"In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ -``` -",,"Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: ""one client"" and ""many clients"". - -- Check that your code is race-free using `go test -race`.","Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: ""one client"" and ""many clients"".",- Check that your code is race-free using `go test -race`.[@rules],,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvsr && go test,,easy,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html -3,6.5840: Distributed Systems,Spring 2024,Lab 2: Key/Value Server,Key/value server with dropped messages,"In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ -``` -",,"Now you should modify your solution to continue in the face of dropped messages (e.g., RPC requests and RPC replies). If a message was lost, then the client's `ck.server.Call()` will return `false` (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it succeeds. Each call to `Clerk.Put()` or `Clerk.Append()`, however, should result in just a *single* execution, so you will have to ensure that the re-send doesn't result in the server executing the request twice. - -Add code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). - -- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. -- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. -- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. - -Your code should now pass all tests, like this: - -``` -$ go test -Test: one client ... - ... Passed -- t 3.8 nrpc 31135 ops 31135 -Test: many clients ... - ... Passed -- t 4.7 nrpc 102853 ops 102853 -Test: unreliable net, many clients ... - ... Passed -- t 4.1 nrpc 580 ops 496 -Test: concurrent append to same key, unreliable ... - ... Passed -- t 0.6 nrpc 61 ops 52 -Test: memory use get ... - ... Passed -- t 0.4 nrpc 4 ops 0 -Test: memory use put ... - ... Passed -- t 0.2 nrpc 2 ops 0 -Test: memory use append ... - ... Passed -- t 0.4 nrpc 2 ops 0 -Test: memory use many puts ... - ... Passed -- t 11.5 nrpc 100000 ops 0 -Test: memory use many gets ... - ... Passed -- t 12.2 nrpc 100001 ops 0 -PASS -ok 6.5840/kvsrv 39.000s -``` - -The numbers after each `Passed` are real time in seconds, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls). -","Add code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt).","- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. -- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. -- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time.[@[repo/location]]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvsr && go test,"Test: one client ... - ... Passed -- t 3.8 nrpc 31135 ops 31135 -Test: many clients ... - ... Passed -- t 4.7 nrpc 102853 ops 102853 -Test: unreliable net, many clients ... - ... Passed -- t 4.1 nrpc 580 ops 496 -Test: concurrent append to same key, unreliable ... - ... Passed -- t 0.6 nrpc 61 ops 52 -Test: memory use get ... - ... Passed -- t 0.4 nrpc 4 ops 0 -Test: memory use put ... - ... Passed -- t 0.2 nrpc 2 ops 0 -Test: memory use append ... - ... Passed -- t 0.4 nrpc 2 ops 0 -Test: memory use many puts ... - ... Passed -- t 11.5 nrpc 100000 ops 0 -Test: memory use many gets ... - ... Passed -- t 12.2 nrpc 100001 ops 0 -PASS -ok 6.5840/kvsrv 39.000s",easy,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvsrv.html -4,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3A: leader election,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](https://pdos.csail.mit.edu/6.824/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](https://pdos.csail.mit.edu/6.824/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(""Server 0"", ""short description"", ""details"")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know! - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak -$ -``` - -Each ""Passed"" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag. -","Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) page for debugging tips. - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election ... - ... Passed -- 3.5 3 58 16840 0 -Test (3A): election after network failure ... - ... Passed -- 5.4 3 118 25269 0 -Test (3A): multiple elections ... - ... Passed -- 7.3 7 624 138014 0 -PASS -ok 6.5840/raft 16.265s -$ -``` - -Each ""Passed"" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag.",,,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && go test -run 3A,"Test (3A): initial election ... - ... Passed -- 3.5 3 58 16840 0 -Test (3A): election after network failure ... - ... Passed -- 5.4 3 118 25269 0 -Test (3A): multiple elections ... - ... Passed -- 7.3 7 624 138014 0 -PASS -ok 6.5840/raft 16.265s",moderate,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -5,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3B: log,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass. - -- Run `git pull` to get the latest lab software. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API. - -The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: - -``` -$ time go test -run 3B -Test (3B): basic agreement ... - ... Passed -- 0.9 3 16 4572 3 -Test (3B): RPC byte count ... - ... Passed -- 1.7 3 48 114536 11 -Test (3B): agreement after follower reconnects ... - ... Passed -- 3.6 3 78 22131 7 -Test (3B): no agreement if too many followers disconnect ... - ... Passed -- 3.8 5 172 40935 3 -Test (3B): concurrent Start()s ... - ... Passed -- 1.1 3 24 7379 6 -Test (3B): rejoin of partitioned leader ... - ... Passed -- 5.1 3 152 37021 4 -Test (3B): leader backs up quickly over incorrect follower logs ... - ... Passed -- 17.2 5 2080 1587388 102 -Test (3B): RPC counts aren't too high ... - ... Passed -- 2.2 3 60 20119 12 -PASS -ok 6.5840/raft 35.557s - -real 0m35.899s -user 0m2.556s -sys 0m1.458s -$ -``` - -The ""ok 6.5840/raft 35.557s"" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The ""user 0m2.556s"" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. - -### ","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass.","- Run `git pull` to get the latest lab software. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API.[@task]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && time go test -run 3B,"Test (3B): basic agreement ... - ... Passed -- 0.9 3 16 4572 3 -Test (3B): RPC byte count ... - ... Passed -- 1.7 3 48 114536 11 -Test (3B): agreement after follower reconnects ... - ... Passed -- 3.6 3 78 22131 7 -Test (3B): no agreement if too many followers disconnect ... - ... Passed -- 3.8 5 172 40935 3 -Test (3B): concurrent Start()s ... - ... Passed -- 1.1 3 24 7379 6 -Test (3B): rejoin of partitioned leader ... - ... Passed -- 5.1 3 152 37021 4 -Test (3B): leader backs up quickly over incorrect follower logs ... - ... Passed -- 17.2 5 2080 1587388 102 -Test (3B): RPC counts aren't too high ... - ... Passed -- 2.2 3 60 20119 12 -PASS -ok 6.5840/raft 35.557s - -real 0m35.899s -user 0m2.556s -sys 0m1.458s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -6,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3C: persistence,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. - -A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. - -Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. - -You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: - -``` - XTerm: term in the conflicting entry (if any) - XIndex: index of first entry with that term (if any) - XLen: log length -``` - -Then the leader's logic can be something like: - -``` - Case 1: leader doesn't have XTerm: - nextIndex = XIndex - Case 2: leader has XTerm: - nextIndex = leader's last entry for XTerm - Case 3: follower's log is too short: - nextIndex = XLen -``` - -A few other hints: - -- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. - -Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. - -``` -$ go test -run 3C -Test (3C): basic persistence ... - ... Passed -- 5.0 3 86 22849 6 -Test (3C): more persistence ... - ... Passed -- 17.6 5 952 218854 16 -Test (3C): partitioned leader and one follower crash, leader restarts ... - ... Passed -- 2.0 3 34 8937 4 -Test (3C): Figure 8 ... - ... Passed -- 31.2 5 580 130675 32 -Test (3C): unreliable agreement ... - ... Passed -- 1.7 5 1044 366392 246 -Test (3C): Figure 8 (unreliable) ... - ... Passed -- 33.6 5 10700 33695245 308 -Test (3C): churn ... - ... Passed -- 16.1 5 8864 44771259 1544 -Test (3C): unreliable churn ... - ... Passed -- 16.5 5 4220 6414632 906 -PASS -ok 6.5840/raft 123.564s -$ -``` - -It is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`. - -``` -$ for i in {0..10}; do go test; done -```","Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests.","- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B.[@task]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && go test -run 3C,"Test (3C): basic persistence ... - ... Passed -- 5.0 3 86 22849 6 -Test (3C): more persistence ... - ... Passed -- 17.6 5 952 218854 16 -Test (3C): partitioned leader and one follower crash, leader restarts ... - ... Passed -- 2.0 3 34 8937 4 -Test (3C): Figure 8 ... - ... Passed -- 31.2 5 580 130675 32 -Test (3C): unreliable agreement ... - ... Passed -- 1.7 5 1044 366392 246 -Test (3C): Figure 8 (unreliable) ... - ... Passed -- 33.6 5 10700 33695245 308 -Test (3C): churn ... - ... Passed -- 16.1 5 8864 44771259 1544 -Test (3C): unreliable churn ... - ... Passed -- 16.5 5 4220 6414632 906 -PASS -ok 6.5840/raft 123.564s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -7,6.5840: Distributed Systems,Spring 2024,Lab 3: Raft,Part 3D: log compaction,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -```","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a ""snapshot"" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) outlines the scheme; you will have to design the details. - -Your Raft must provide the following function that the service can call with a serialized snapshot of its state: - -``` -Snapshot(index int, snapshot []byte) -``` - -In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). - -The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. - -You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. - -When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. - -If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. - -When a server restarts, the application layer reads the persisted snapshot and restores its saved state. - -Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). - -- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. - -Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. - -``` -$ go test -run 3D -Test (3D): snapshots basic ... - ... Passed -- 11.6 3 176 61716 192 -Test (3D): install snapshots (disconnect) ... - ... Passed -- 64.2 3 878 320610 336 -Test (3D): install snapshots (disconnect+unreliable) ... - ... Passed -- 81.1 3 1059 375850 341 -Test (3D): install snapshots (crash) ... - ... Passed -- 53.5 3 601 256638 339 -Test (3D): install snapshots (unreliable+crash) ... - ... Passed -- 63.5 3 687 288294 336 -Test (3D): crash and restart all servers ... - ... Passed -- 19.5 3 268 81352 58 -PASS -ok 6.5840/raft 293.456s -```","Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests).","- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time.[@rules]",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/raft && go test -run 3D,"Test (3D): snapshots basic ... - ... Passed -- 11.6 3 176 61716 192 -Test (3D): install snapshots (disconnect) ... - ... Passed -- 64.2 3 878 320610 336 -Test (3D): install snapshots (disconnect+unreliable) ... - ... Passed -- 81.1 3 1059 375850 341 -Test (3D): install snapshots (crash) ... - ... Passed -- 53.5 3 601 256638 339 -Test (3D): install snapshots (unreliable+crash) ... - ... Passed -- 63.5 3 687 288294 336 -Test (3D): crash and restart all servers ... - ... Passed -- 19.5 3 268 81352 58 -PASS -ok 6.5840/raft 293.456s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html -8,6.5840: Distributed Systems,Spring 2024,Lab 4: Fault-tolerant Key/Value Service,Part A: Key/value service without snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf). - -Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: - -- `Put(key, value)`: replaces the value for a particular key in the database -- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) -- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) - -Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. - -Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) - -Start early.","We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvraft -$ go test -... -$ -``` -",,"Each of your key/value servers (""kvservers"") will have an associated Raft peer. Clerks send `Put()`, `Append()`, and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Append/Get operation to Raft, so that the Raft log holds a sequence of Put/Append/Get operations. All of the kvservers execute operations from the Raft log in order, applying the operations to their key/value databases; the intent is for the servers to maintain identical replicas of the key/value database. - -A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. - -Your kvservers should not directly communicate; they should only interact with each other through Raft. - -Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - -Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. - -You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. - -You have completed this task when you **reliably** pass the first test in the test suite: ""One client"". - -- After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. -- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. -- You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` or `Clerk.Append()` should result in just a single execution, so you will have to ensure that the re-send doesn't result in the servers executing the request twice. - -Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). Your code should pass the `go test -run 4A` tests. - -- Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. -- You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2. - -Your code should now pass the Lab 4A tests, like this: - -``` -$ go test -run 4A -Test: one client (4A) ... - ... Passed -- 15.5 5 4576 903 -Test: ops complete fast enough (4A) ... - ... Passed -- 15.7 3 3022 0 -Test: many clients (4A) ... - ... Passed -- 15.9 5 5884 1160 -Test: unreliable net, many clients (4A) ... - ... Passed -- 19.2 5 3083 441 -Test: concurrent append to same key, unreliable (4A) ... - ... Passed -- 2.5 3 218 52 -Test: progress in majority (4A) ... - ... Passed -- 1.7 5 103 2 -Test: no progress in minority (4A) ... - ... Passed -- 1.0 5 102 3 -Test: completion after heal (4A) ... - ... Passed -- 1.2 5 70 3 -Test: partitions, one client (4A) ... - ... Passed -- 23.8 5 4501 765 -Test: partitions, many clients (4A) ... - ... Passed -- 23.5 5 5692 974 -Test: restarts, one client (4A) ... - ... Passed -- 22.2 5 4721 908 -Test: restarts, many clients (4A) ... - ... Passed -- 22.5 5 5490 1033 -Test: unreliable net, restarts, many clients (4A) ... - ... Passed -- 26.5 5 3532 474 -Test: restarts, partitions, many clients (4A) ... - ... Passed -- 29.7 5 6122 1060 -Test: unreliable net, restarts, partitions, many clients (4A) ... - ... Passed -- 32.9 5 2967 317 -Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... - ... Passed -- 35.0 7 8249 746 -PASS -ok 6.5840/kvraft 290.184s -``` - -The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls).","1. task1 - - Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - - Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. - - You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. - - You have completed this task when you **reliably** pass the first test in the test suite: ""One client"". - -2. task2 - - Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). Your code should pass the `go test -run 4A` tests.","1. hint1 - - After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. - - A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. - - You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. - - It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. -2. hint2 - - Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. - - You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. - - You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvraft && go test -run 4A,"Test: one client (4A) ... - ... Passed -- 15.5 5 4576 903 -Test: ops complete fast enough (4A) ... - ... Passed -- 15.7 3 3022 0 -Test: many clients (4A) ... - ... Passed -- 15.9 5 5884 1160 -Test: unreliable net, many clients (4A) ... - ... Passed -- 19.2 5 3083 441 -Test: concurrent append to same key, unreliable (4A) ... - ... Passed -- 2.5 3 218 52 -Test: progress in majority (4A) ... - ... Passed -- 1.7 5 103 2 -Test: no progress in minority (4A) ... - ... Passed -- 1.0 5 102 3 -Test: completion after heal (4A) ... - ... Passed -- 1.2 5 70 3 -Test: partitions, one client (4A) ... - ... Passed -- 23.8 5 4501 765 -Test: partitions, many clients (4A) ... - ... Passed -- 23.5 5 5692 974 -Test: restarts, one client (4A) ... - ... Passed -- 22.2 5 4721 908 -Test: restarts, many clients (4A) ... - ... Passed -- 22.5 5 5490 1033 -Test: unreliable net, restarts, many clients (4A) ... - ... Passed -- 26.5 5 3532 474 -Test: restarts, partitions, many clients (4A) ... - ... Passed -- 29.7 5 6122 1060 -Test: unreliable net, restarts, partitions, many clients (4A) ... - ... Passed -- 32.9 5 2967 317 -Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... - ... Passed -- 35.0 7 8249 746 -PASS -ok 6.5840/kvraft 290.184s",moderate/hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvraft.html -9,6.5840: Distributed Systems,Spring 2024,Lab 4: Fault-tolerant Key/Value Service,Part B: Key/value service with snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf). - -Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: - -- `Put(key, value)`: replaces the value for a particular key in the database -- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) -- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) - -Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. - -Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) - -Start early.","We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvraft -$ go test -... -$ -``` -",,"As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver to cooperate with Raft to save log space, and reduce restart time, using Raft's `Snapshot()` from Lab 3D. - -The tester passes `maxraftstate` to your `StartKVServer()`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `persister.RaftStateSize()`. Whenever your key/value server detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. If `maxraftstate` is -1, you do not have to snapshot. `maxraftstate` applies to the GOB-encoded bytes your Raft passes as the first argument to to `persister.Save()`. - -Modify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot. - -- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots. -- Capitalize all fields of structures stored in the snapshot. -- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time. - -Your code should pass the 4B tests (as in the example here) as well as the 4A tests (and your Raft must continue to pass the Lab 3 tests). - -``` -$ go test -run 4B -Test: InstallSnapshot RPC (4B) ... - ... Passed -- 4.0 3 289 63 -Test: snapshot size is reasonable (4B) ... - ... Passed -- 2.6 3 2418 800 -Test: ops complete fast enough (4B) ... - ... Passed -- 3.2 3 3025 0 -Test: restarts, snapshots, one client (4B) ... - ... Passed -- 21.9 5 29266 5820 -Test: restarts, snapshots, many clients (4B) ... - ... Passed -- 21.5 5 33115 6420 -Test: unreliable net, snapshots, many clients (4B) ... - ... Passed -- 17.4 5 3233 482 -Test: unreliable net, restarts, snapshots, many clients (4B) ... - ... Passed -- 22.7 5 3337 471 -Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... - ... Passed -- 30.4 5 2725 274 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... - ... Passed -- 37.7 7 8378 681 -PASS -ok 6.5840/kvraft 161.538s -```","Modify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot.","- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots. -- Capitalize all fields of structures stored in the snapshot. -- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/kvraft && go test -run 4B,"Test: InstallSnapshot RPC (4B) ... - ... Passed -- 4.0 3 289 63 -Test: snapshot size is reasonable (4B) ... - ... Passed -- 2.6 3 2418 800 -Test: ops complete fast enough (4B) ... - ... Passed -- 3.2 3 3025 0 -Test: restarts, snapshots, one client (4B) ... - ... Passed -- 21.9 5 29266 5820 -Test: restarts, snapshots, many clients (4B) ... - ... Passed -- 21.5 5 33115 6420 -Test: unreliable net, snapshots, many clients (4B) ... - ... Passed -- 17.4 5 3233 482 -Test: unreliable net, restarts, snapshots, many clients (4B) ... - ... Passed -- 22.7 5 3337 471 -Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... - ... Passed -- 30.4 5 2725 274 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... - ... Passed -- 37.7 7 8378 681 -PASS -ok 6.5840/kvraft 161.538s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-kvraft.html -10,6.5840: Distributed Systems,Spring 2024,Lab 5: Sharded Key/Value Service,Part A: The Controller and Static Sharding,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. - -Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the ""shard controller"". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. - -A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. - -The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. - -Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -This lab uses ""configuration"" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. - -Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation.","Do a `git pull` to get the latest lab software. - -We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardctrler -$ go test ---- FAIL: TestBasic (0.00s) - test_test.go:11: wanted 1 groups, got 0 -FAIL -exit status 1 -FAIL shardctrler 0.008s -$ -``` - -When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`.",,"First you'll implement the shard controller, in `shardctrler/server.go` and `client.go`, and a sharded key/value server that can handle an unchanging (static) configuration. When you're done, your code should pass all the tests in the `shardctrler/` directory, and the `5A` tests in `shardkv/`. - -``` -$ cd ~/6.5840/src/shardctrler -$ go test -Test: Basic leave/join ... - ... Passed -Test: Historical queries ... - ... Passed -Test: Move ... - ... Passed -Test: Concurrent leave/join ... - ... Passed -Test: Minimal transfers after joins ... - ... Passed -Test: Minimal transfers after leaves ... - ... Passed -Test: Multi-group join/leave ... - ... Passed -Test: Concurrent multi leave/join ... - ... Passed -Test: Minimal transfers after multijoins ... - ... Passed -Test: Minimal transfers after multileaves ... - ... Passed -Test: Check Same config on servers ... - ... Passed -PASS -ok 6.5840/shardctrler 5.863s -$ -$ cd ../shardkv -$ go test -run 5A -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -PASS -ok 6.5840/shardkv 9.262s -$ -``` - -The shardctrler manages a sequence of numbered configurations. Each configuration describes a set of replica groups and an assignment of shards to replica groups. Whenever this assignment needs to change, the shard controller creates a new configuration with the new assignment. Key/value clients and servers contact the shardctrler when they want to know the current (or a past) configuration. - -Your implementation must support the RPC interface described in `shardctrler/common.go`, which consists of `Join`, `Leave`, `Move`, and `Query` RPCs. These RPCs are intended to allow an administrator (and the tests) to control the shardctrler: to add new replica groups, to eliminate replica groups, and to move shards between replica groups. - -The `Join` RPC is used by an administrator to add new replica groups. Its argument is a set of mappings from unique, non-zero replica group identifiers (GIDs) to lists of server names. The shardctrler should react by creating a new configuration that includes the new replica groups. The new configuration should divide the shards as evenly as possible among the full set of groups, and should move as few shards as possible to achieve that goal. The shardctrler should allow re-use of a GID if it's not part of the current configuration (i.e. a GID should be allowed to Join, then Leave, then Join again). - -The `Leave` RPC's argument is a list of GIDs of previously joined groups. The shardctrler should create a new configuration that does not include those groups, and that assigns those groups' shards to the remaining groups. The new configuration should divide the shards as evenly as possible among the groups, and should move as few shards as possible to achieve that goal. - -The `Move` RPC's arguments are a shard number and a GID. The shardctrler should create a new configuration in which the shard is assigned to the group. The purpose of `Move` is to allow us to test your software. A `Join` or `Leave` following a `Move` will likely un-do the `Move`, since `Join` and `Leave` re-balance. - -The `Query` RPC's argument is a configuration number. The shardctrler replies with the configuration that has that number. If the number is -1 or bigger than the biggest known configuration number, the shardctrler should reply with the latest configuration. The result of `Query(-1)` should reflect every `Join`, `Leave`, or `Move` RPC that the shardctrler finished handling before it received the `Query(-1)` RPC. - -The very first configuration should be numbered zero. It should contain no groups, and all shards should be assigned to GID zero (an invalid GID). The next configuration (created in response to a `Join` RPC) should be numbered 1, &c. There will usually be significantly more shards than groups (i.e., each group will serve more than one shard), in order that load can be shifted at a fairly fine granularity. - -You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`. - -- Start with a stripped-down copy of your kvraft server. -- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. -- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is [not deterministic](https://blog.golang.org/maps#TOC_7.). -- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. -- The Go race detector (go test -race) may help you find bugs. - -Next, in the `shardkv/` directory, implement enough of a sharded key/value server to pass the first two tests in `shardkv/`. Again, start by copying code from your existing `kvraft` server. You should be able to get the first test to pass without doing anything special regarding sharding, since the `shardkv/client.go` we give you takes care of sending RPCs to the group that the controller assigns to the key in question. - -For the second `shardkv` test, each k/v replica group must reject requests for keys for shards for which the group is not the assigned group. At this point, it's enough for the k/v servers to periodically ask the controller for the latest configuration, and to check that configuration each time a client Get/Put/Append RPC arrives. Use `key2shard()` (in `client.go`) to find the shard number for a key. - -Your server should respond with an `ErrWrongGroup` error to a client RPC with a key that the server isn't responsible for (i.e. for a key whose shard is not assigned to the server's group). - -Your server should not call the shard controller's `Join()` handler. The tester will call `Join()` when appropriate.","You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`.","- Start with a stripped-down copy of your kvraft server. -- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. -- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is [not deterministic](https://blog.golang.org/maps#TOC_7.). -- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. -- The Go race detector (go test -race) may help you find bugs.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/shardctrler && go test,"Test: Basic leave/join ... - ... Passed -Test: Historical queries ... - ... Passed -Test: Move ... - ... Passed -Test: Concurrent leave/join ... - ... Passed -Test: Minimal transfers after joins ... - ... Passed -Test: Minimal transfers after leaves ... - ... Passed -Test: Multi-group join/leave ... - ... Passed -Test: Concurrent multi leave/join ... - ... Passed -Test: Minimal transfers after multijoins ... - ... Passed -Test: Minimal transfers after multileaves ... - ... Passed -Test: Check Same config on servers ... - ... Passed -PASS -ok 6.5840/shardctrler 5.863s -$ -$ cd ../shardkv -$ go test -run 5A -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -PASS -ok 6.5840/shardkv 9.262s",easy,http://nil.csail.mit.edu/6.5840/2024/labs/lab-shard.html -11,6.5840: Distributed Systems,Spring 2024,Lab 5: Sharded Key/Value Service,Part B: Shard Movement,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. - -Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the ""shard controller"". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. - -A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. - -The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. - -Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -This lab uses ""configuration"" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. - -Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation.","Do a `git pull` to get the latest lab software. - -We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardctrler -$ go test ---- FAIL: TestBasic (0.00s) - test_test.go:11: wanted 1 groups, got 0 -FAIL -exit status 1 -FAIL shardctrler 0.008s -$ -``` - -When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`.",,"Do a `git pull` to get the latest lab software. - -The main task in this part of the lab is to move shards among replica groups when the controller changes the sharding, and do it in a way that provides linearizable k/v client operations. - -Each of your shards is only required to make progress when a majority of servers in the shard's Raft replica group is alive and can talk to each other, and can talk to a majority of the `shardctrler` servers. Your implementation must operate (serve requests and be able to re-configure as needed) even if a minority of servers in some replica group(s) are dead, temporarily unavailable, or slow. - -A shardkv server is a member of only a single replica group. The set of servers in a given replica group will never change. - -We supply you with `client.go` code that sends each RPC to the replica group responsible for the RPC's key. It re-tries if the replica group says it is not responsible for the key; in that case, the client code asks the shard controller for the latest configuration and tries again. You'll have to modify client.go as part of your support for dealing with duplicate client RPCs, much as in the kvraft lab. - -When you're done your code should pass all the shardkv tests other than the challenge tests: - -``` -$ cd ~/6.5840/src/shardkv -$ go test -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -Test (5B): join then leave ... - ... Passed -Test (5B): snapshots, join, and leave ... -labgob warning: Decoding into a non-default variable/field Num may not work - ... Passed -Test (5B): servers miss configuration changes... - ... Passed -Test (5B): concurrent puts and configuration changes... - ... Passed -Test (5B): more concurrent puts and configuration changes... - ... Passed -Test (5B): concurrent configuration change and restart... - ... Passed -Test (5B): unreliable 1... - ... Passed -Test (5B): unreliable 2... - ... Passed -Test (5B): unreliable 3... - ... Passed -Test: shard deletion (challenge 1) ... - ... Passed -Test: unaffected shard access (challenge 2) ... - ... Passed -Test: partial migration shard access (challenge 2) ... - ... Passed -PASS -ok 6.5840/shardkv 173.974s -$ -``` - -You will need to make your servers watch for configuration changes, and when one is detected, to start the shard migration process. If a replica group loses a shard, it must stop serving requests to keys in that shard immediately, and start migrating the data for that shard to the replica group that is taking over ownership. If a replica group gains a shard, it needs to wait for the previous owner to send over the old shard data before accepting requests for that shard. - -Implement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test (""join then leave"") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`. - -Your server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems. - -Servers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this. - -- Process re-configurations one at a time, in order. -- If a test fails, check for gob errors (e.g. ""gob: type not registered for interface ...""). Go doesn't consider gob errors to be fatal, although they are fatal for the lab. -- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement. -- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request? -- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation. -- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1? -- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. -- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log. -- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source.","Implement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test (""join then leave"") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`.","Your server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems. - -Servers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this. - -- Process re-configurations one at a time, in order. -- If a test fails, check for gob errors (e.g. ""gob: type not registered for interface ...""). Go doesn't consider gob errors to be fatal, although they are fatal for the lab. -- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement. -- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request? -- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation. -- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1? -- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. -- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log. -- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source.",,git://g.csail.mit.edu/6.5840-golabs-2024,,6.5840-golabs-2024,cd src/shardkv && $ go test,"Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -Test (5B): join then leave ... - ... Passed -Test (5B): snapshots, join, and leave ... -labgob warning: Decoding into a non-default variable/field Num may not work - ... Passed -Test (5B): servers miss configuration changes... - ... Passed -Test (5B): concurrent puts and configuration changes... - ... Passed -Test (5B): more concurrent puts and configuration changes... - ... Passed -Test (5B): concurrent configuration change and restart... - ... Passed -Test (5B): unreliable 1... - ... Passed -Test (5B): unreliable 2... - ... Passed -Test (5B): unreliable 3... - ... Passed -Test: shard deletion (challenge 1) ... - ... Passed -Test: unaffected shard access (challenge 2) ... - ... Passed -Test: partial migration shard access (challenge 2) ... - ... Passed -PASS -ok 6.5840/shardkv 173.974s",hard,http://nil.csail.mit.edu/6.5840/2024/labs/lab-shard.html -12,6.5840: Distributed Systems,Spring 2025,Lab 1: MapReduce,Your Job,"In this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses ""coordinator"" instead of the paper's ""master"".)","You need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs. - -Fetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html). - -``` -$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840 -$ cd 6.5840 -$ ls -Makefile src -$ -``` - -We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: - -``` -$ cd ~/6.5840 -$ cd src/main -$ go build -buildmode=plugin ../mrapps/wc.go -$ rm mr-out* -$ go run mrsequential.go wc.so pg*.txt -$ more mr-out-0 -A 509 -ABOUT 2 -ACT 8 -... -``` - -`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. - -Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. - -For this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files.",,"Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The ""main"" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -$ go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -$ rm mr-out* -$ go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one ""split"", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -$ go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method ""Done"" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2025/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited. - - - -### A few rules: - -- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `""%v %v""` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented ""this is the correct format"". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a ""please exit"" pseudo-task that the coordinator can give to workers. - -### Hints - -- The [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values. - - - - - -### -",,"- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's `encoding/json` package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC `call()` function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values.","- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `""%v %v""` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented ""this is the correct format"". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a ""please exit"" pseudo-task that the coordinator can give to workers.",git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/main && bash test-mr.sh,"*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS",moderate/hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html -13,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Key/value server with reliable network,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"Your first task is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in `client.go`, and implement `Put` and `Get` RPC handlers in `server.go`. - -You have completed this task when you pass the Reliable tests in the test suite: - -``` -$ go test -v -run Reliable -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 90171 90171 ---- PASS: TestPutConcurrentReliable (3.07s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 9.2 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (16.59s) -PASS -ok 6.5840/kvsrv1 19.681s -``` - -The numbers after each `Passed` are real time in seconds, the constant 1, the number of RPCs sent (including client RPCs), and the number of key/value operations executed (`Clerk` `Get` and `Put` calls). - -- Check that your code is race-free using `go test -race`.","Your first task is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in `client.go`, and implement `Put` and `Get` RPC handlers in `server.go`. - -You have completed this task when you pass the Reliable tests in the test suite:",- Check that your code is race-free using `go test -race`.,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvsrv1 && go test -v -run Reliable,"=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 90171 90171 ---- PASS: TestPutConcurrentReliable (3.07s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 9.2 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (16.59s) -PASS -ok 6.5840/kvsrv1 19.681s",easy,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -14,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Implementing a lock using key/value clerk,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"In many distributed applications, clients running on different machines use a key/value server to coordinate their activities. For example, ZooKeeper and Etcd allow clients to coordinate using a distributed lock, in analogy with how threads in a Go program can coordinate with locks (i.e., `sync.Mutex`). Zookeeper and Etcd implement such a lock with conditional put. - -In this exercise your task is to implement a lock layered on client `Clerk.Put` and `Clerk.Get` calls. The lock supports two methods: `Acquire` and `Release`. The lock's specification is that only one client can successfully acquire the lock at a time; other clients must wait until the first client has released the lock using `Release`. - -We supply you with skeleton code and tests in `src/kvsrv1/lock/`. You will need to modify `src/kvsrv1/lock/lock.go`. Your `Acquire` and `Release` code can talk to your key/value server by calling `lk.ck.Put()` and `lk.ck.Get()`. - -If a client crashes while holding a lock, the lock will never be released. In a design more sophisticated than this lab, the client would attach a [lease](https://en.wikipedia.org/wiki/Lease_(computer_science)#:~:text=Leases are commonly used in,to rely on the resource.) to a lock. When the lease expires, the lock server would release the lock on behalf of the client. In this lab clients don't crash and you can ignore this problem. - -Implement `Acquire` and `Release`. You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory: - -``` -$ cd lock -$ go test -v -run Reliable -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 974 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 83194 0 ---- PASS: TestManyClientsReliable (2.11s) -PASS -ok 6.5840/kvsrv1/lock 4.120s -``` - -If you haven't implemented the lock yet, the first test will succeed. - -This exercise requires little code but will require a bit more independent thought than the previous exercise. - -- You will need a unique identifier for each lock client; call `kvtest.RandValue(8)` to generate a random string. -- The lock service should use a specific key to store the ""lock state"" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter `l` of `MakeLock` in `src/kvsrv1/lock/lock.go`.",Implement `Acquire` and `Release`. You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory:,"- You will need a unique identifier for each lock client; call `kvtest.RandValue(8)` to generate a random string. -- The lock service should use a specific key to store the ""lock state"" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter `l` of `MakeLock` in `src/kvsrv1/lock/lock.go`.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd cd src/kvsrv1/lock && go test -v -run Reliable,"=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 974 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 83194 0 ---- PASS: TestManyClientsReliable (2.11s) -PASS -ok 6.5840/kvsrv1/lock 4.120s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -15,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Key/value server with dropped messages,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"The main challenge in this exercise is that the network may re-order, delay, or discard RPC requests and/or replies. To recover from discarded requests/replies, the Clerk must keep re-trying each RPC until it receives a reply from the server. - -If the network discards an RPC request message, then the client re-sending the request will solve the problem: the server will receive and execute just the re-sent request. - -However, the network might instead discard an RPC reply message. The client does not know which message was discarded; the client only observes that it received no reply. If it was the reply that was discarded, and the client re-sends the RPC request, then the server will receive two copies of the request. That's OK for a `Get`, since `Get` doesn't modify the server state. It is safe to resend a `Put` RPC with the same version number, since the server executes `Put` conditionally on the version number; if the server received and executed a `Put` RPC, it will respond to a re-transmitted copy of that RPC with `rpc.ErrVersion` rather than executing the Put a second time. - -A tricky case is if the server replies with an `rpc.ErrVersion` in a response to an RPC that the Clerk retried. In this case, the Clerk cannot know if the Clerk's `Put` was executed by the server or not: the first RPC might have been executed by the server but the network may have discarded the successful response from the server, so that the server sent `rpc.ErrVersion` only for the retransmitted RPC. Or, it might be that another Clerk updated the key before the Clerk's first RPC arrived at the server, so that the server executed neither of the Clerk's RPCs and replied `rpc.ErrVersion` to both. Therefore, if a Clerk receives `rpc.ErrVersion` for a retransmitted Put RPC, `Clerk.Put` must return `rpc.ErrMaybe` to the application instead of `rpc.ErrVersion` since the request may have been executed. It is then up to the application to handle this case. If the server responds to an initial (not retransmitted) Put RPC with `rpc.ErrVersion`, then the Clerk should return `rpc.ErrVersion` to the application, since the RPC was definitely not executed by the server. - -It would be more convenient for application developers if `Put`'s were exactly-once (i.e., no `rpc.ErrMaybe` errors) but that is difficult to guarantee without maintaining state at the server for each Clerk. In the last exercise of this lab, you will implement a lock using your Clerk to explore how to program with at-most-once `Clerk.Put`. - -Now you should modify your `kvsrv1/client.go` to continue in the face of dropped RPC requests and replies. A return value of `true` from the client's `ck.clnt.Call()` indicates that the client received an RPC reply from the server; a return value of `false` indicates that it did not receive a reply (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). Your `Clerk` should keep re-sending an RPC until it receives a reply. Keep in mind the discussion of `rpc.ErrMaybe` above. Your solution shouldn't require any changes to the server. - -Add code to `Clerk` to retry if doesn't receive a reply. Your have completed this task if your code passes all tests in `kvsrv1/`, like this: - -``` -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 106647 106647 ---- PASS: TestPutConcurrentReliable (3.09s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 8.0 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (14.61s) -=== RUN TestUnreliableNet -One client (unreliable network)... - ... Passed -- 7.6 1 251 208 ---- PASS: TestUnreliableNet (7.60s) -PASS -ok 6.5840/kvsrv1 25.319s -``` - -- Before the client retries, it should wait a little bit; you can use go's `time` package and call `time.Sleep(100 * time.Millisecond)`","Add code to `Clerk` to retry if doesn't receive a reply. Your have completed this task if your code passes all tests in `kvsrv1/`, like this:","- Before the client retries, it should wait a little bit; you can use go's `time` package and call `time.Sleep(100 * time.Millisecond)`",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvsrv1 && go test -v,"=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 106647 106647 ---- PASS: TestPutConcurrentReliable (3.09s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 8.0 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (14.61s) -=== RUN TestUnreliableNet -One client (unreliable network)... - ... Passed -- 7.6 1 251 208 ---- PASS: TestUnreliableNet (7.60s) -PASS -ok 6.5840/kvsrv1 25.319s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -16,6.5840: Distributed Systems,Spring 2025,Lab 2: Key/Value Server,Implementing a lock using key/value clerk and unreliable network,"In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server.","We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` -",,"Modify your lock implementation to work correctly with your modified key/value client when the network is not reliable. You have completed this exercise when your code passes all the `kvsrv1/lock/` tests, including the unreliable ones: - -``` -$ cd lock -$ go test -v -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 968 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 10789 0 ---- PASS: TestManyClientsReliable (2.12s) -=== RUN TestOneClientUnreliable -Test: 1 lock clients (unreliable network)... - ... Passed -- 2.3 1 70 0 ---- PASS: TestOneClientUnreliable (2.27s) -=== RUN TestManyClientsUnreliable -Test: 10 lock clients (unreliable network)... - ... Passed -- 3.6 1 908 0 ---- PASS: TestManyClientsUnreliable (3.62s) -PASS -ok 6.5840/kvsrv1/lock 10.033s -``` -","Modify your lock implementation to work correctly with your modified key/value client when the network is not reliable. You have completed this exercise when your code passes all the `kvsrv1/lock/` tests, including the unreliable ones:",,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvsrv1/lock && go test -v,"=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 968 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 10789 0 ---- PASS: TestManyClientsReliable (2.12s) -=== RUN TestOneClientUnreliable -Test: 1 lock clients (unreliable network)... - ... Passed -- 2.3 1 70 0 ---- PASS: TestOneClientUnreliable (2.27s) -=== RUN TestManyClientsUnreliable -Test: 10 lock clients (unreliable network)... - ... Passed -- 3.6 1 908 0 ---- PASS: TestManyClientsUnreliable (3.62s) -PASS -ok 6.5840/kvsrv1/lock 10.033s",easy,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html -17,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3A: leader election,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](http://nil.csail.mit.edu/6.5840/2025/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(""Server 0"", ""short description"", ""details"")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know! - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak -$ -``` - -Each ""Passed"" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag.","Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A `to test your 3A code.","- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A `. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](http://nil.csail.mit.edu/6.5840/2025/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate(""Server 0"", ""short description"", ""details"")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know!",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && go test -run 3A,"Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -18,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3B: log,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass. - -- Run `git pull` to get the latest lab software. -- Raft log is 1-indexed, but we suggest that you view it as 0-indexed, and starting out with an entry (at index=0) that has term 0. That allows the very first AppendEntries RPC to contain 0 as PrevLogIndex, and be a valid index into the log. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `raft_test.go` and trace the test code from there to understand what's being tested. - -The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: - -``` -$ time go test -run 3B -Test (3B): basic agreement (reliable network)... - ... Passed -- 1.3 3 18 0 -Test (3B): RPC byte count (reliable network)... - ... Passed -- 2.8 3 56 0 -Test (3B): test progressive failure of followers (reliable network)... - ... Passed -- 5.3 3 188 0 -Test (3B): test failure of leaders (reliable network)... - ... Passed -- 6.4 3 378 0 -Test (3B): agreement after follower reconnects (reliable network)... - ... Passed -- 5.9 3 176 0 -Test (3B): no agreement if too many followers disconnect (reliable network)... - ... Passed -- 4.3 5 288 0 -Test (3B): concurrent Start()s (reliable network)... - ... Passed -- 1.5 3 32 0 -Test (3B): rejoin of partitioned leader (reliable network)... - ... Passed -- 5.3 3 216 0 -Test (3B): leader backs up quickly over incorrect follower logs (reliable network)... - ... Passed -- 12.1 5 1528 0 -Test (3B): RPC counts aren't too high (reliable network)... - ... Passed -- 3.1 3 106 0 -PASS -ok 6.5840/raft1 48.353s -go test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total -$ -``` - -The ""ok 6.5840/raft 35.557s"" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The ""user 0m2.556s"" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent.","Implement the leader and follower code to append new log entries, so that the `go test -run 3B `tests pass.","- Run `git pull` to get the latest lab software. -- Raft log is 1-indexed, but we suggest that you view it as 0-indexed, and starting out with an entry (at index=0) that has term 0. That allows the very first AppendEntries RPC to contain 0 as PrevLogIndex, and be a valid index into the log. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `raft_test.go` and trace the test code from there to understand what's being tested.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && time go test -run 3B,"Test (3B): basic agreement (reliable network)... - ... Passed -- 1.3 3 18 0 -Test (3B): RPC byte count (reliable network)... - ... Passed -- 2.8 3 56 0 -Test (3B): test progressive failure of followers (reliable network)... - ... Passed -- 5.3 3 188 0 -Test (3B): test failure of leaders (reliable network)... - ... Passed -- 6.4 3 378 0 -Test (3B): agreement after follower reconnects (reliable network)... - ... Passed -- 5.9 3 176 0 -Test (3B): no agreement if too many followers disconnect (reliable network)... - ... Passed -- 4.3 5 288 0 -Test (3B): concurrent Start()s (reliable network)... - ... Passed -- 1.5 3 32 0 -Test (3B): rejoin of partitioned leader (reliable network)... - ... Passed -- 5.3 3 216 0 -Test (3B): leader backs up quickly over incorrect follower logs (reliable network)... - ... Passed -- 12.1 5 1528 0 -Test (3B): RPC counts aren't too high (reliable network)... - ... Passed -- 3.1 3 106 0 -PASS -ok 6.5840/raft1 48.353s -go test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -19,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3C: persistence,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. - -A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. - -Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. - -You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: - -``` - XTerm: term in the conflicting entry (if any) - XIndex: index of first entry with that term (if any) - XLen: log length -``` - -Then the leader's logic can be something like: - -``` - Case 1: leader doesn't have XTerm: - nextIndex = XIndex - Case 2: leader has XTerm: - nextIndex = (index of leader's last entry for XTerm) + 1 - Case 3: follower's log is too short: - nextIndex = XLen -``` - -A few other hints: - -- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. - -Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. - -``` -$ go test -run 3C -Test (3C): basic persistence (reliable network)... - ... Passed -- 6.6 3 110 0 -Test (3C): more persistence (reliable network)... - ... Passed -- 15.6 5 428 0 -Test (3C): partitioned leader and one follower crash, leader restarts (reliable network)... - ... Passed -- 3.1 3 50 0 -Test (3C): Figure 8 (reliable network)... - ... Passed -- 33.7 5 654 0 -Test (3C): unreliable agreement (unreliable network)... - ... Passed -- 2.1 5 1076 0 -Test (3C): Figure 8 (unreliable) (unreliable network)... - ... Passed -- 31.9 5 4400 0 -Test (3C): churn (reliable network)... - ... Passed -- 16.8 5 4896 0 -Test (3C): unreliable churn (unreliable network)... - ... Passed -- 16.1 5 7204 0 -PASS -ok 6.5840/raft1 126.054s -$ -``` - -It is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`. - -``` -$ for i in {0..10}; do go test; done -``` -","Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or ""serialize"") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests.","- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && go test -run 3C,"Test (3C): basic persistence (reliable network)... - ... Passed -- 6.6 3 110 0 -Test (3C): more persistence (reliable network)... - ... Passed -- 15.6 5 428 0 -Test (3C): partitioned leader and one follower crash, leader restarts (reliable network)... - ... Passed -- 3.1 3 50 0 -Test (3C): Figure 8 (reliable network)... - ... Passed -- 33.7 5 654 0 -Test (3C): unreliable agreement (unreliable network)... - ... Passed -- 2.1 5 1076 0 -Test (3C): Figure 8 (unreliable) (unreliable network)... - ... Passed -- 31.9 5 4400 0 -Test (3C): churn (reliable network)... - ... Passed -- 16.8 5 4896 0 -Test (3C): unreliable churn (unreliable network)... - ... Passed -- 16.1 5 7204 0 -PASS -ok 6.5840/raft1 126.054s",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -20,6.5840: Distributed Systems,Spring 2025,Lab 3: Raft,Part 3D: log compaction,"This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date.","Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` -","Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code.","As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a ""snapshot"" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) outlines the scheme; you will have to design the details. - -Your Raft must provide the following function that the service can call with a serialized snapshot of its state: - -``` -Snapshot(index int, snapshot []byte) -``` - -In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). - -The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. - -You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. - -When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. - -If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. - -When a server restarts, the application layer reads the persisted snapshot and restores its saved state. - -Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). - -- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- A common reason for failing the first 3D test is that followers take too long to catch up to the leader. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. - -Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. - -``` -$ go test -run 3D -Test (3D): snapshots basic (reliable network)... - ... Passed -- 3.3 3 522 0 -Test (3D): install snapshots (disconnect) (reliable network)... - ... Passed -- 48.4 3 2710 0 -Test (3D): install snapshots (disconnect) (unreliable network)... - ... Passed -- 56.1 3 3025 0 -Test (3D): install snapshots (crash) (reliable network)... - ... Passed -- 33.3 3 1559 0 -Test (3D): install snapshots (crash) (unreliable network)... - ... Passed -- 38.1 3 1723 0 -Test (3D): crash and restart all servers (unreliable network)... - ... Passed -- 11.2 3 296 0 -Test (3D): snapshot initialization after crash (unreliable network)... - ... Passed -- 4.3 3 84 0 -PASS -ok 6.5840/raft1 195.006s -``` -","Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests).","- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- A common reason for failing the first 3D test is that followers take too long to catch up to the leader. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/raft1 && go test -run 3D,"Test (3D): snapshots basic (reliable network)... - ... Passed -- 3.3 3 522 0 -Test (3D): install snapshots (disconnect) (reliable network)... - ... Passed -- 48.4 3 2710 0 -Test (3D): install snapshots (disconnect) (unreliable network)... - ... Passed -- 56.1 3 3025 0 -Test (3D): install snapshots (crash) (reliable network)... - ... Passed -- 33.3 3 1559 0 -Test (3D): install snapshots (crash) (unreliable network)... - ... Passed -- 38.1 3 1723 0 -Test (3D): crash and restart all servers (unreliable network)... - ... Passed -- 11.2 3 296 0 -Test (3D): snapshot initialization after crash (unreliable network)... - ... Passed -- 4.3 3 84 0 -PASS -ok 6.5840/raft1 195.006s",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html -21,6.5840: Distributed Systems,Spring 2025,Lab 4: Fault-tolerant Key/Value Service,Part A: replicated state machine (RSM),"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.","We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `""src/kvsrv1""` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` -",," - -``` -$ cd src/kvraft1/rsm -$ go test -v -=== RUN TestBasic -Test RSM basic (reliable network)... -.. - config.go:147: one: took too long -``` - -In the common situation of a client/server service using Raft for replication, the service interacts with Raft in two ways: the service leader submits client operations by calling `raft.Start()`, and all service replicas receive committed operations via Raft's `applyCh`, which they execute. On the leader, these two activities interact. At any given time, some server goroutines are handling client requests, have called `raft.Start()`, and each is waiting for its operation to commit and to find out what the result of executing the operation is. And as committed operations appear on the `applyCh`, each needs to be executed by the service, and the results need to be handed to the goroutine that called `raft.Start()` so that it can return the result to the client. - -The `rsm` package encapsulates the above interaction. It sits as a layer between the service (e.g. a key/value database) and Raft. In `rsm/rsm.go` you will need to implement a ""reader"" goroutine that reads the `applyCh`, and a `rsm.Submit()` function that calls `raft.Start()` for a client operation and then waits for the reader goroutine to hand it the result of executing that operation. - -The service that is using `rsm` appears to the `rsm` reader goroutine as a `StateMachine` object providing a `DoOp()` method. The reader goroutine should hand each committed operation to `DoOp()`; `DoOp()`'s return value should be given to the corresponding `rsm.Submit()` call for it to return. `DoOp()`'s argument and return value have type `any`; the actual values should have the same types as the argument and return values that the service passes to `rsm.Submit()`, respectively. - -The service should pass each client operation to `rsm.Submit()`. To help the reader goroutine match `applyCh` messages with waiting calls to `rsm.Submit()`, `Submit()` should wrap each client operation in an `Op` structure along with a unique identifier. `Submit()` should then wait until the operation has committed and been executed, and return the result of execution (the value returned by `DoOp()`). If `raft.Start()` indicates that the current peer is not the Raft leader, `Submit()` should return an `rpc.ErrWrongLeader` error. `Submit()` should detect and handle the situation in which leadership changed just after it called `raft.Start()`, causing the operation to be lost (never committed). - -For Part A, the `rsm` tester acts as the service, submitting operations that it interprets as increments on a state consisting of a single integer. In Part B you'll use `rsm` as part of a key/value service that implements `StateMachine` (and `DoOp()`), and calls `rsm.Submit()`. - -If all goes well, the sequence of events for a client request is: - -- The client sends a request to the service leader. -- The service leader calls `rsm.Submit()` with the request. -- `rsm.Submit()` calls `raft.Start()` with the request, and then waits. -- Raft commits the request and sends it on all peers' `applyCh`s. -- The `rsm` reader goroutine on each peer reads the request from the `applyCh` and passes it to the service's `DoOp()`. -- On the leader, the `rsm` reader goroutine hands the `DoOp()` return value to the `Submit()` goroutine that originally submitted the request, and `Submit()` returns that value. - -Your servers should not directly communicate; they should only interact with each other through Raft. - -Implement `rsm.go`: the `Submit()` method and a reader goroutine. You have completed this task if you pass the `rsm` 4A tests: - -``` - $ cd src/kvraft1/rsm - $ go test -v -run 4A -=== RUN TestBasic4A -Test RSM basic (reliable network)... - ... Passed -- 1.2 3 48 0 ---- PASS: TestBasic4A (1.21s) -=== RUN TestLeaderFailure4A - ... Passed -- 9223372036.9 3 31 0 ---- PASS: TestLeaderFailure4A (1.50s) -PASS -ok 6.5840/kvraft1/rsm 2.887s -``` - -- You should not need to add any fields to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- Your solution needs to handle an `rsm` leader that has called `Start()` for a request submitted with `Submit()` but loses its leadership before the request is committed to the log. One way to do this is for the `rsm` to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by `Start()`, and return `rpc.ErrWrongLeader` from `Submit()`. If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server to wait indefinitely until the partition heals. -- The tester calls your Raft's `rf.Kill()` when it is shutting down a peer. Raft should close the `applyCh` so that your rsm learns about the shutdown, and can exit out of all loops.",Implement `rsm.go`: the `Submit()` method and a reader goroutine. You have completed this task if you pass the `rsm` 4A tests:,"- You should not need to add any fields to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- Your solution needs to handle an `rsm` leader that has called `Start()` for a request submitted with `Submit()` but loses its leadership before the request is committed to the log. One way to do this is for the `rsm` to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by `Start()`, and return `rpc.ErrWrongLeader` from `Submit()`. If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server to wait indefinitely until the partition heals. -- The tester calls your Raft's `rf.Kill()` when it is shutting down a peer. Raft should close the `applyCh` so that your rsm learns about the shutdown, and can exit out of all loops.",,git://g.csail.mit.edu/6.5840-golabs-20250,,6.5840-golabs-2025,cd src/kvraft1/rsm && go test -v -run 4A,"=== RUN TestBasic4A -Test RSM basic (reliable network)... - ... Passed -- 1.2 3 48 0 ---- PASS: TestBasic4A (1.21s) -=== RUN TestLeaderFailure4A - ... Passed -- 9223372036.9 3 31 0 ---- PASS: TestLeaderFailure4A (1.50s) -PASS -ok 6.5840/kvraft1/rsm 2.887s",moderate/hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html -22,6.5840: Distributed Systems,Spring 2025,Lab 4: Fault-tolerant Key/Value Service,Part B: Key/value service without snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.","We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `""src/kvsrv1""` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` -",," - -``` -$ cd src/kvraft1 -$ go test -v -run TestBasic4B -=== RUN TestBasic4B -Test: one client (4B basic) (reliable network)... - kvtest.go:62: Wrong error -$ -``` - -Now you will use the `rsm` package to replicate a key/value server. Each of the servers (""kvservers"") will have an associated rsm/Raft peer. Clerks send `Put()` and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Get operation to `rsm`, which replicates it using Raft and invokes your server's `DoOp` at each peer, which should apply the operations to the peer's key/value database; the intent is for the servers to maintain identical replicas of the key/value database. - -A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. - -Your kvservers should not directly communicate; they should only interact with each other through Raft. - -Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - -Feel free to copy your client code from Lab 2 (`kvsrv1/client.go`) into `kvraft1/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. - -You'll also need to implement `Put()` and `Get()` RPC handlers in `server.go`. These handlers should submit the request to Raft using `rsm.Submit()`. As the `rsm` package reads commands from `applyCh`, it should invoke the `DoOp` method, which you will have to implement in `server.go`. - -You have completed this task when you **reliably** pass the first test in the test suite, with `go test -v -run TestBasic4B`. - -- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()`) in the Raft log using `Submit()`. You don't have to implement the optimization for read-only operations that is described in Section 8. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` should result in just a single execution for a particular version number. - -Add code to handle failures. Your `Clerk` can use a similar retry plan as in lab 2, including returning `ErrMaybe` if a response to a retried `Put` RPC is lost. You are done when your code reliably passes all the 4B tests, with `go test -v -run 4B`. - -- Recall that the rsm leader may lose its leadership and return `rpc.ErrWrongLeader` from `Submit()`. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. - -Your code should now pass the Lab 4B tests, like this: - -``` -$ cd kvraft1 -$ go test -run 4B -Test: one client (4B basic) ... - ... Passed -- 3.2 5 1041 183 -Test: one client (4B speed) ... - ... Passed -- 15.9 3 3169 0 -Test: many clients (4B many clients) ... - ... Passed -- 3.9 5 3247 871 -Test: unreliable net, many clients (4B unreliable net, many clients) ... - ... Passed -- 5.3 5 1035 167 -Test: unreliable net, one client (4B progress in majority) ... - ... Passed -- 2.9 5 155 3 -Test: no progress in minority (4B) ... - ... Passed -- 1.6 5 102 3 -Test: completion after heal (4B) ... - ... Passed -- 1.3 5 67 4 -Test: partitions, one client (4B partitions, one client) ... - ... Passed -- 6.2 5 958 155 -Test: partitions, many clients (4B partitions, many clients (4B)) ... - ... Passed -- 6.8 5 3096 855 -Test: restarts, one client (4B restarts, one client 4B ) ... - ... Passed -- 6.7 5 311 13 -Test: restarts, many clients (4B restarts, many clients) ... - ... Passed -- 7.5 5 1223 95 -Test: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ... - ... Passed -- 8.4 5 804 33 -Test: restarts, partitions, many clients (4B restarts, partitions, many clients) ... - ... Passed -- 10.1 5 1308 105 -Test: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ... - ... Passed -- 11.9 5 1040 33 -Test: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ... - ... Passed -- 12.1 7 2801 93 -PASS -ok 6.5840/kvraft1 103.797s -``` - -The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put calls).","1. task1 - - Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - - Feel free to copy your client code from Lab 2 (`kvsrv1/client.go`) into `kvraft1/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. - - You'll also need to implement `Put()` and `Get()` RPC handlers in `server.go`. These handlers should submit the request to Raft using `rsm.Submit()`. As the `rsm` package reads commands from `applyCh`, it should invoke the `DoOp` method, which you will have to implement in `server.go`. - - You have completed this task when you **reliably** pass the first test in the test suite, with `go test -v -run TestBasic4B`. - -2. task2 - - Add code to handle failures. Your `Clerk` can use a similar retry plan as in lab 2, including returning `ErrMaybe` if a response to a retried `Put` RPC is lost. You are done when your code reliably passes all the 4B tests, with `go test -v -run 4B`. - - - - -","- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()`) in the Raft log using `Submit()`. You don't have to implement the optimization for read-only operations that is described in Section 8. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -- Recall that the rsm leader may lose its leadership and return `rpc.ErrWrongLeader` from `Submit()`. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/kvraft1 && go test -run 4B,"Test: one client (4B basic) ... - ... Passed -- 3.2 5 1041 183 -Test: one client (4B speed) ... - ... Passed -- 15.9 3 3169 0 -Test: many clients (4B many clients) ... - ... Passed -- 3.9 5 3247 871 -Test: unreliable net, many clients (4B unreliable net, many clients) ... - ... Passed -- 5.3 5 1035 167 -Test: unreliable net, one client (4B progress in majority) ... - ... Passed -- 2.9 5 155 3 -Test: no progress in minority (4B) ... - ... Passed -- 1.6 5 102 3 -Test: completion after heal (4B) ... - ... Passed -- 1.3 5 67 4 -Test: partitions, one client (4B partitions, one client) ... - ... Passed -- 6.2 5 958 155 -Test: partitions, many clients (4B partitions, many clients (4B)) ... - ... Passed -- 6.8 5 3096 855 -Test: restarts, one client (4B restarts, one client 4B ) ... - ... Passed -- 6.7 5 311 13 -Test: restarts, many clients (4B restarts, many clients) ... - ... Passed -- 7.5 5 1223 95 -Test: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ... - ... Passed -- 8.4 5 804 33 -Test: restarts, partitions, many clients (4B restarts, partitions, many clients) ... - ... Passed -- 10.1 5 1308 105 -Test: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ... - ... Passed -- 11.9 5 1040 33 -Test: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ... - ... Passed -- 12.1 7 2801 93 -PASS -ok 6.5840/kvraft1 103.797s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html -23,6.5840: Distributed Systems,Spring 2025,Lab 4: Fault-tolerant Key/Value Service,Part C: Key/value service with snapshots,"In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early.","We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `""src/kvsrv1""` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` -",,"As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver and `rsm` to cooperate with Raft to save log space and reduce restart time, using Raft's `Snapshot()` from Lab 3D. - -The tester passes `maxraftstate` to your `StartKVServer()`, which passes it to `rsm`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `rf.PersistBytes()`. Whenever your `rsm` detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. `rsm` can create this snapshot by calling the `Snapshot` method of the `StateMachine` interface to obtain a snapshot of the kvserver. If `maxraftstate` is -1, you do not have to snapshot. The `maxraftstate` limit applies to the GOB-encoded bytes your Raft passes as the first argument to `persister.Save()`. - -You can find the source for the `persister` object in `tester1/persister.go`. - -Modify your rsm so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a `rsm` server restarts, it should read the snapshot with `persister.ReadSnapshot()` and, if the snapshot's length is greater than zero, pass the snapshot to the `StateMachine`'s `Restore()` method. You complete this task if you pass TestSnapshot4C in `rsm`. - -``` -$ cd kvraft1/rsm -$ go test -run TestSnapshot4C -=== RUN TestSnapshot4C - ... Passed -- 9223372036.9 3 230 0 ---- PASS: TestSnapshot4C (3.88s) -PASS -ok 6.5840/kvraft1/rsm 3.882s -``` - -- Think about when `rsm` should snapshot its state and what should be included in the snapshot beyond just the server state. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Capitalize all fields of structures stored in the snapshot. - -Implement the `kvraft1/server.go` `Snapshot()` and `Restore()` methods, which `rsm` calls. Modify `rsm` to handle applyCh messages that contain snapshots. - -- You may have bugs in your Raft and rsm library that this task exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. - -Your code should pass the 4C tests (as in the example here) as well as the 4A+B tests (and your Raft must continue to pass the Lab 3 tests). - -``` -$ go test -run 4C -Test: snapshots, one client (4C SnapshotsRPC) ... -Test: InstallSnapshot RPC (4C) ... - ... Passed -- 4.5 3 241 64 -Test: snapshots, one client (4C snapshot size is reasonable) ... - ... Passed -- 11.4 3 2526 800 -Test: snapshots, one client (4C speed) ... - ... Passed -- 14.2 3 3149 0 -Test: restarts, snapshots, one client (4C restarts, snapshots, one client) ... - ... Passed -- 6.8 5 305 13 -Test: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ... - ... Passed -- 9.0 5 5583 795 -Test: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ... - ... Passed -- 4.7 5 977 155 -Test: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ... - ... Passed -- 8.6 5 847 33 -Test: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ... - ... Passed -- 11.5 5 841 33 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ... - ... Passed -- 12.8 7 2903 93 -PASS -ok 6.5840/kvraft1 83.543s -``` -","1. task1 - - Modify your rsm so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a `rsm` server restarts, it should read the snapshot with `persister.ReadSnapshot()` and, if the snapshot's length is greater than zero, pass the snapshot to the `StateMachine`'s `Restore()` method. You complete this task if you pass TestSnapshot4C in `rsm`. - -2. task2 - - Implement the `kvraft1/server.go` `Snapshot()` and `Restore()` methods, which `rsm` calls. Modify `rsm` to handle applyCh messages that contain snapshots.","1. hint - - Think about when `rsm` should snapshot its state and what should be included in the snapshot beyond just the server state. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. - - Capitalize all fields of structures stored in the snapshot. -2. hint2 - - You may have bugs in your Raft and rsm library that this task exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. - - A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd kvraft1/rsm && go test -run 4C,"Test: snapshots, one client (4C SnapshotsRPC) ... -Test: InstallSnapshot RPC (4C) ... - ... Passed -- 4.5 3 241 64 -Test: snapshots, one client (4C snapshot size is reasonable) ... - ... Passed -- 11.4 3 2526 800 -Test: snapshots, one client (4C speed) ... - ... Passed -- 14.2 3 3149 0 -Test: restarts, snapshots, one client (4C restarts, snapshots, one client) ... - ... Passed -- 6.8 5 305 13 -Test: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ... - ... Passed -- 9.0 5 5583 795 -Test: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ... - ... Passed -- 4.7 5 977 155 -Test: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ... - ... Passed -- 8.6 5 847 33 -Test: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ... - ... Passed -- 11.5 5 841 33 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ... - ... Passed -- 12.8 7 2903 93 -PASS -ok 6.5840/kvraft1 83.543s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvraft1.html -24,6.5840: Distributed Systems,Spring 2025,Lab 5: Sharded Key/Value Service,Part A: Moving shards,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key ""a"", and shardgrp 2 holds a shard storing key ""b"". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses ""configuration"" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D.","Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` -",,"Your first job is to implement shardgrps and the `InitConfig`, `Query`, and `ChangeConfigTo` methods when there are no failures. We have given you the code for describing a configuration, in `shardkv1/shardcfg`. Each `shardcfg.ShardConfig` has a unique identifying number, a mapping from shard number to group number, and a mapping from group number to the list of servers replicating that group. There will usually be more shards than groups (so that each group serves more than one shard), in order that load can be shifted at a fairly fine granularity. - -Implement these two methods in `shardctrler/shardctrler.go`: - -- The `InitConfig` method receives the first configuration, passed to it by the tester as a `shardcfg.ShardConfig`. `InitConfig` should store the configuration in an instance of Lab 2's `kvsrv`. -- The `Query` method returns the current configuration; it should read the configuration from `kvsrv`, previously stored there by `InitConfig`. - -Implement `InitConfig` and `Query`, and store the configuration in `kvsrv`. You're done when your code passes the first test. Note this task doesn't require any shardgrps. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -PASS -ok 6.5840/shardkv1 0.197s -$ -``` - -- Implement `InitConfig` and `Query` by storing and reading the initial configuration from `kvsrv`: use the `Get`/`Put` methods of `ShardCtrler.IKVClerk` to talk to `kvsrv`, use the `String` method of `ShardConfig` to turn a `ShardConfig` into a string that you can pass to `Put`, and use the `shardcfg.FromString()` function to turn a string into a `ShardConfig`. - -Implement an initial version of `shardgrp` in `shardkv1/shardgrp/server.go` and a corresponding clerk in `shardkv1/shardgrp/client.go` by copying code from your Lab 4 `kvraft` solution. - -Implement a clerk in `shardkv1/client.go` that uses the `Query` method to find the shardgrp for a key, and then talks to that shardgrp. You're done when your code passes the `Static` test. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run Static -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.4s #peers 1 #RPCs 793 #Ops 180 -PASS -ok 6.5840/shardkv1 5.632s -$ -``` - -- Copy code from your `kvraft` client.go and server.go for `Put` and `Get`, and any other code you need from `kvraft`. -- The code in `shardkv1/client.go` provides the `Put`/`Get` clerk for the overall system: it finds out which shardgrp holds the desired key's shard by invoking the `Query` method, and then talks to the shardgrp that holds that shard. -- Implement `shardkv1/client.go`, including its `Put`/`Get` methods. Use `shardcfg.Key2Shard()` to find the shard number for a key. The tester passes a `ShardCtrler` object to `MakeClerk` in `shardkv1/client.go`. Retrieve the current configuration using the `Query` method. -- To put/get a key from a shardgrp, the shardkv clerk should create a shardgrp clerk for the shardgrp by calling `shardgrp.MakeClerk`, passing in the servers found in the configuration and the shardkv clerk's `ck.clnt`. Use the `GidServers()` method from `ShardConfig` to get the group for a shard. -- `shardkv1/client.go`'s Put must return `ErrMaybe` when the reply was maybe lost, but this Put invokes `shardgrp`'s Put to talk a particular shardgrp. The inner Put can signal this with an error. -- Upon creation, the first shardgrp (`shardcfg.Gid1`) should initialize itself to own all shards. - -Now you should support movement of shards among groups by implementing the `ChangeConfigTo` method, which changes from an old configuration to a new configuration. The new configuration may include new shardgrps that are not present in the old configuration, and may exclude shardgrps that were present in the old configuration. The controller should move shards (the key/value data) so that the set of shards stored by each shardgrp matches the new configuration. - -The approach we suggest for moving a shard is for `ChangeConfigTo` to first ""freeze"" the shard at the source shardgrp, causing that shardgrp to reject `Put`'s for keys in the moving shard. Then, copy (install) the shard to the destination shardgrp; then delete the frozen shard. Finally, post a new configuration so that clients can find the moved shard. A nice property of this approach is that it avoids any direct interactions among the shardgrps. It also supports serving shards that are not affected by an ongoing configuration change. - -To be able to order changes to the configuration, each configuration has a unique number `Num` (see `shardcfg/shardcfg.go`). The tester in Part A invokes `ChangeConfigTo` sequentially, and the configuration passed to `ChangeConfigTo` will have a `Num` one larger than the previous one; thus, a configuration with a higher `Num` is newer than one with a lower `Num`. - -The network may delay RPCs, and RPCs may arrive out of order at the shardgrps. To reject old `FreezeShard`, `InstallShard`, and `DeleteShard` RPCs, they should include `Num` (see `shardgrp/shardrpc/shardrpc.go`), and shardgrps must remember the largest `Num` they have seen for each shard. - -Implement `ChangeConfigTo` (in `shardctrler/shardctrler.go`) and extend `shardgrp` to support freeze, install, and delete. `ChangeConfigTo` should always succeed in Part A because the tester doesn't induce failures in this part. You will need to implement `FreezeShard`, `InstallShard`, and `DeleteShard` in `shardgrp/client.go` and `shardgrp/server.go` using the RPCs in the `shardgrp/shardrpc` package, and reject old RPCs based on `Num`. You will also need modify the shardkv clerk in `shardkv1/client.go` to handle `ErrWrongGroup`, which a shardgrp should return if it isn't responsible for the shard. - -You have completed this task when you pass the `JoinBasic` and `DeleteBasic` tests. These tests focus on adding shardgrps; you don't have to worry about shardgrps leaving just yet. - -- A shardgrp should respond with an `ErrWrongGroup` error to a client `Put`/`Get` with a key that the shardgrp isn't responsible for (i.e., for a key whose shard is not assigned to the shardgrp). You will have to modify `shardkv1/client.go` to reread the configuration and retry the `Put`/`Get`. -- Note that you will have to run `FreezeShard`, `InstallShard`, and `DeleteShard` through your `rsm` package, just like `Put` and `Get`. -- You can send an entire map as your state in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. - -Extend `ChangeConfigTo` to handle shard groups that leave; i.e., shardgrps that are present in the current configuration but not in the new one. Your solution should pass `TestJoinLeaveBasic5A` now. (You may have handled this scenario already in the previous task, but the previous tests didn't test for shardgrps leaving.) - -Make your solution pass all Part A tests, which check that your sharded key/value service supports many groups joining and leaving, shardgrps restarting from snapshots, processing `Get`s while some shards are offline or involved in a configuration change, and linearizability when many clients interact with the service while the tester concurrently invokes the controller's `ChangeConfigTo` to rebalance shards. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run 5A -Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180 -Test (5A): a group joins... (reliable network)... - ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180 -Test (5A): delete ... (reliable network)... - ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360 -Test (5A): basic groups join/leave ... (reliable network)... - ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240 -Test (5A): many groups join/leave ... (reliable network)... - ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180 -Test (5A): many groups join/leave ... (unreliable network)... - ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180 -Test (5A): shutdown ... (reliable network)... - ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180 -Test (5A): progress ... (reliable network)... - ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82 -Test (5A): progress ... (reliable network)... - ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390 -Test (5A): one concurrent clerk reliable... (reliable network)... - ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248 -Test (5A): many concurrent clerks reliable... (reliable network)... - ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500 -Test (5A): one concurrent clerk unreliable ... (unreliable network)... - ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176 -Test (5A): many concurrent clerks unreliable... (unreliable network)... - ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896 -PASS -ok 6.5840/shardkv1 243.115s -$ -``` - -Your solution must continue serving shards that are not affected by an ongoing configuration change.","1. task1 - - Implement `InitConfig` and `Query`, and store the configuration in `kvsrv`. You're done when your code passes the first test. Note this task doesn't require any shardgrps. - -2. task2 - - Implement an initial version of `shardgrp` in `shardkv1/shardgrp/server.go` and a corresponding clerk in `shardkv1/shardgrp/client.go` by copying code from your Lab 4 `kvraft` solution. - - Implement a clerk in `shardkv1/client.go` that uses the `Query` method to find the shardgrp for a key, and then talks to that shardgrp. You're done when your code passes the `Static` test. - -3. task3 - - Implement `ChangeConfigTo` (in `shardctrler/shardctrler.go`) and extend `shardgrp` to support freeze, install, and delete. `ChangeConfigTo` should always succeed in Part A because the tester doesn't induce failures in this part. You will need to implement `FreezeShard`, `InstallShard`, and `DeleteShard` in `shardgrp/client.go` and `shardgrp/server.go` using the RPCs in the `shardgrp/shardrpc` package, and reject old RPCs based on `Num`. You will also need modify the shardkv clerk in `shardkv1/client.go` to handle `ErrWrongGroup`, which a shardgrp should return if it isn't responsible for the shard. - - You have completed this task when you pass the `JoinBasic` and `DeleteBasic` tests. These tests focus on adding shardgrps; you don't have to worry about shardgrps leaving just yet. - -4. task4 - - Extend `ChangeConfigTo` to handle shard groups that leave; i.e., shardgrps that are present in the current configuration but not in the new one. Your solution should pass `TestJoinLeaveBasic5A` now. (You may have handled this scenario already in the previous task, but the previous tests didn't test for shardgrps leaving.) - -5. task5 - - Make your solution pass all Part A tests, which check that your sharded key/value service supports many groups joining and leaving, shardgrps restarting from snapshots, processing `Get`s while some shards are offline or involved in a configuration change, and linearizability when many clients interact with the service while the tester concurrently invokes the controller's `ChangeConfigTo` to rebalance shards.","1. hint1 - - Implement `InitConfig` and `Query` by storing and reading the initial configuration from `kvsrv`: use the `Get`/`Put` methods of `ShardCtrler.IKVClerk` to talk to `kvsrv`, use the `String` method of `ShardConfig` to turn a `ShardConfig` into a string that you can pass to `Put`, and use the `shardcfg.FromString()` function to turn a string into a `ShardConfig`. -2. hint2 - - Copy code from your `kvraft` client.go and server.go for `Put` and `Get`, and any other code you need from `kvraft`. - - The code in `shardkv1/client.go` provides the `Put`/`Get` clerk for the overall system: it finds out which shardgrp holds the desired key's shard by invoking the `Query` method, and then talks to the shardgrp that holds that shard. - - Implement `shardkv1/client.go`, including its `Put`/`Get` methods. Use `shardcfg.Key2Shard()` to find the shard number for a key. The tester passes a `ShardCtrler` object to `MakeClerk` in `shardkv1/client.go`. Retrieve the current configuration using the `Query` method. - - To put/get a key from a shardgrp, the shardkv clerk should create a shardgrp clerk for the shardgrp by calling `shardgrp.MakeClerk`, passing in the servers found in the configuration and the shardkv clerk's `ck.clnt`. Use the `GidServers()` method from `ShardConfig` to get the group for a shard. - - `shardkv1/client.go`'s Put must return `ErrMaybe` when the reply was maybe lost, but this Put invokes `shardgrp`'s Put to talk a particular shardgrp. The inner Put can signal this with an error. - - Upon creation, the first shardgrp (`shardcfg.Gid1`) should initialize itself to own all shards. -3. hint3 - - A shardgrp should respond with an `ErrWrongGroup` error to a client `Put`/`Get` with a key that the shardgrp isn't responsible for (i.e., for a key whose shard is not assigned to the shardgrp). You will have to modify `shardkv1/client.go` to reread the configuration and retry the `Put`/`Get`. - - Note that you will have to run `FreezeShard`, `InstallShard`, and `DeleteShard` through your `rsm` package, just like `Put` and `Get`. - - You can send an entire map as your state in an RPC request or reply, which may help keep the code for shard transfer simple. - - If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply.",,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/shardkv1 && go test -run 5A,"Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180 -Test (5A): a group joins... (reliable network)... - ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180 -Test (5A): delete ... (reliable network)... - ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360 -Test (5A): basic groups join/leave ... (reliable network)... - ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240 -Test (5A): many groups join/leave ... (reliable network)... - ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180 -Test (5A): many groups join/leave ... (unreliable network)... - ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180 -Test (5A): shutdown ... (reliable network)... - ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180 -Test (5A): progress ... (reliable network)... - ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82 -Test (5A): progress ... (reliable network)... - ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390 -Test (5A): one concurrent clerk reliable... (reliable network)... - ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248 -Test (5A): many concurrent clerks reliable... (reliable network)... - ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500 -Test (5A): one concurrent clerk unreliable ... (unreliable network)... - ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176 -Test (5A): many concurrent clerks unreliable... (unreliable network)... - ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896 -PASS -ok 6.5840/shardkv1 243.115s",hard,http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html -25,6.5840: Distributed Systems,Spring 2025,Lab 5: Sharded Key/Value Service,Part B: Handling a failed controller,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key ""a"", and shardgrp 2 holds a shard storing key ""b"". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses ""configuration"" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D.","Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` -",,"The controller is a short-lived command, which an administrator invokes: it moves shards and then exits. But, it may fail or lose network connectivity while moving shards. The main task in this part of the lab is recovering from a controller that fails to complete `ChangeConfigTo`. The tester starts a new controller and invokes its `ChangeConfigTo` after partitioning the first controller; you have to modify the controller so that the new one finishes the reconfiguration. The tester calls `InitController` when starting a controller; you can modify that function to check whether an interrupted configuration change needs to be completed. - -A good approach to allowing a controller to finish a reconfiguration that a previous one started is to keep two configurations: a current one and a next one, both stored in the controller's kvsrv. When a controller starts a reconfiguration, it stores the next configuration. Once a controller completes the reconfiguration, it makes the next configuration the current one. Modify `InitController` to first check if there is a stored next configuration with a higher configuration number than the current one, and if so, complete the shard moves necessary to reconfigure to the next one. - -Modify shardctrler to implement the above approach. A controller that picks up the work from a failed controller may repeat `FreezeShard`, `InstallShard`, and `Delete` RPCs; shardgrps can use `Num` to detect duplicates and reject them. You have completed this task if your solution passes the Part B tests. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run 5B -Test (5B): Join/leave while a shardgrp is down... (reliable network)... - ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120 -Test (5B): recover controller ... (reliable network)... - ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360 -PASS -ok 6.5840/shardkv1 35.805s -$ -``` - -- The tester calls `InitController` when starting a controller; you can implement recovery in that method in `shardctrler/shardctrler.go`.","Modify shardctrler to implement the above approach. A controller that picks up the work from a failed controller may repeat `FreezeShard`, `InstallShard`, and `Delete` RPCs; shardgrps can use `Num` to detect duplicates and reject them. You have completed this task if your solution passes the Part B tests.",- The tester calls `InitController` when starting a controller; you can implement recovery in that method in `shardctrler/shardctrler.go`.,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/shardkv1 && go test -run 5B,"Test (5B): Join/leave while a shardgrp is down... (reliable network)... - ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120 -Test (5B): recover controller ... (reliable network)... - ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360 -PASS -ok 6.5840/shardkv1 35.805s",easy,http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html -26,6.5840: Distributed Systems,Spring 2025,Lab 5: Sharded Key/Value Service,Part C: Concurrent configuration changes,"You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that ""shards,"" or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with ""a"" might be one shard, all the keys starting with ""b"" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key ""a"", and shardgrp 2 holds a shard storing key ""b"". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses ""configuration"" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D.","Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` -",,"In this part of the lab you will modify the controller to allow for concurrent controllers. When a controller crashes or is partitioned, the tester will start a new controller, which must finish any work that the old controller might have in progress (i.e., finishing moving shards like in Part B). This means that several controllers may run concurrently and send RPCs to the shardgrps and the `kvsrv` that stores configurations. - -The main challenge is to ensure these controllers don't step on each other. In Part A you already fenced all the shardgrp RPCs with `Num` so that old RPCs are rejected. Even if several controllers pick up the work of an old controller concurrently, one of them succeeds and the others repeat all the RPCs, the shardgrps will ignore them. - -Thus the challenging case left is to ensure that only one controller updates the next configuration to avoid that two controllers (e.g., a partitioned one and a new one) put different configurations in the next one. To stress this scenario, the tester runs several controllers concurrently and each one computes the next configuration by reading the current configuration and updating it for a shardgrp that left or joined, and then the tester invokes `ChangeConfigTo`; thus multiple controllers may invoke `ChangeConfigTo` with different configuration with the same `Num`. You can use the version number of a key and versioned `Put`s to ensure that only one controller updates the next configuration and that the other invocations return without doing anything. - -Modify your controller so that only one controller can post a next configuration for a configuration `Num`. The tester will start many controllers but only one should start `ChangeConfigTo` for a new configuation. You have completed this task if you pass the concurrent tests of Part C: - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run TestConcurrentReliable5C -Test (5C): Concurrent ctrlers ... (reliable network)... - ... Passed -- time 8.2s #peers 1 #RPCs 1753 #Ops 120 -PASS -ok 6.5840/shardkv1 8.364s -$ go test -run TestAcquireLockConcurrentUnreliable5C -Test (5C): Concurrent ctrlers ... (unreliable network)... - ... Passed -- time 23.8s #peers 1 #RPCs 1850 #Ops 120 -PASS -ok 6.5840/shardkv1 24.008s -$ -``` - -- See `concurCtrler` in `test.go` to see how the tester runs controllers concurrently. - -In this exercise you will put recovery of an old controller together with a new controller: a new controller should perform recovery from Part B. If the old controller was partitioned during `ChangeConfigTo`, you will have to make sure that the old controller doesn't interfere with the new controller. If all the controller's updates are already properly fenced with `Num` checks from Part B, you don't have to write extra code. You have completed this task if you pass the `Partition` tests. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run Partition -Test (5C): partition controller in join... (reliable network)... - ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336 -PASS -ok 6.5840/shardkv1 217.779s -$ -``` - -You have completed implementing a highly-available sharded key/value service with many shard groups for scalability, reconfiguration to handle changes in load, and with a fault-tolerant controller; congrats! - -Rerun all tests to check that your recent changes to the controller haven't broken earlier tests. - -Gradescope will rerun the Lab 3A-D and Lab 4A-C tests on your submission, in addition to the 5C tests. Before submitting, double check that your solution works: - -``` -$ go test ./raft1 -$ go test ./kvraft1 -$ go test ./shardkv1 -``` -","1. task1 - - Modify your controller so that only one controller can post a next configuration for a configuration `Num`. The tester will start many controllers but only one should start `ChangeConfigTo` for a new configuation. You have completed this task if you pass the concurrent tests of Part C: - -2. task2 - - In this exercise you will put recovery of an old controller together with a new controller: a new controller should perform recovery from Part B. If the old controller was partitioned during `ChangeConfigTo`, you will have to make sure that the old controller doesn't interfere with the new controller. If all the controller's updates are already properly fenced with `Num` checks from Part B, you don't have to write extra code. You have completed this task if you pass the `Partition` tests. - -3. task3 - - Rerun all tests to check that your recent changes to the controller haven't broken earlier tests.",- See `concurCtrler` in `test.go` to see how the tester runs controllers concurrently.,,git://g.csail.mit.edu/6.5840-golabs-2025,,6.5840-golabs-2025,cd src/shardkv1 && go test -run Partition,"Test (5C): partition controller in join... (reliable network)... - ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336 -PASS -ok 6.5840/shardkv1 217.779s",moderate,http://nil.csail.mit.edu/6.5840/2025/labs/lab-shard1.html -27,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,Boot xv6,This lab will familiarize you with xv6 and its system calls.,,,"Have a look at the [lab tools page](https://pdos.csail.mit.edu/6.1810/2024/tools.html) for information about how to set up your computer to run these labs. - -Fetch the git repository for the xv6 source for the lab: - -``` -$ git clone git://g.csail.mit.edu/xv6-labs-2024 -Cloning into 'xv6-labs-2024'... -... -$ cd xv6-labs-2024 -``` - -The files you will need for this and subsequent labs are distributed using the [Git](http://www.git-scm.com/) version control system. For each of the labs you will check out a version of xv6 tailored for that lab. To learn more about Git, take a look at the [Git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html), or this [CS-oriented overview of Git](http://eagain.net/articles/git-for-computer-scientists/). Git allows you to keep track of the changes you make to the code. For example, if you are finished with one of the exercises, and want to checkpoint your progress, you can *commit* your changes by running: - -``` -$ git commit -am 'my solution for util lab exercise 1' -Created commit 60d2135: my solution for util lab exercise 1 - 1 files changed, 1 insertions(+), 0 deletions(-) -$ -``` - -You can view your changes with git diff, which displays changes since your last commit. git diff origin/util displays changes relative to the initial `util` code. `origin/util` is the name of the git branch for this lab. - -Build and run xv6: - -``` -$ make qemu -riscv64-unknown-elf-gcc -c -o kernel/entry.o kernel/entry.S -riscv64-unknown-elf-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -DSOL_UTIL -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie -c -o kernel/start.o kernel/start.c -... -riscv64-unknown-elf-ld -z max-page-size=4096 -N -e main -Ttext 0 -o user/_zombie user/zombie.o user/ulib.o user/usys.o user/printf.o user/umalloc.o -riscv64-unknown-elf-objdump -S user/_zombie > user/zombie.asm -riscv64-unknown-elf-objdump -t user/_zombie | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$/d' > user/zombie.sym -mkfs/mkfs fs.img README user/xargstest.sh user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_grind user/_wc user/_zombie -nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 954 total 1000 -balloc: first 591 blocks have been allocated -balloc: write bitmap block at sector 45 -qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 - -xv6 kernel is booting - -hart 2 starting -hart 1 starting -init: starting sh -$ -``` - -If you type `ls` at the prompt, you should see output similar to the following: - -``` -$ ls -. 1 1 1024 -.. 1 1 1024 -README 2 2 2227 -xargstest.sh 2 3 93 -cat 2 4 32864 -echo 2 5 31720 -forktest 2 6 15856 -grep 2 7 36240 -init 2 8 32216 -kill 2 9 31680 -ln 2 10 31504 -ls 2 11 34808 -mkdir 2 12 31736 -rm 2 13 31720 -sh 2 14 54168 -stressfs 2 15 32608 -usertests 2 16 178800 -grind 2 17 47528 -wc 2 18 33816 -zombie 2 19 31080 -console 3 20 0 -``` - -These are the files that `mkfs` includes in the initial file system; most are programs you can run. You just ran one of them: `ls`. - -xv6 has no `ps` command, but, if you type Ctrl-p, the kernel will print information about each process. If you try it now, you'll see two lines: one for `init`, and one for `sh`. - -To quit qemu type: Ctrl-a x (press Ctrl and a at the same time, followed by x).",,,,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && ls,"riscv64-unknown-elf-gcc -c -o kernel/entry.o kernel/entry.S -riscv64-unknown-elf-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -DSOL_UTIL -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie -c -o kernel/start.o kernel/start.c -... -riscv64-unknown-elf-ld -z max-page-size=4096 -N -e main -Ttext 0 -o user/_zombie user/zombie.o user/ulib.o user/usys.o user/printf.o user/umalloc.o -riscv64-unknown-elf-objdump -S user/_zombie > user/zombie.asm -riscv64-unknown-elf-objdump -t user/_zombie | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$/d' > user/zombie.sym -mkfs/mkfs fs.img README user/xargstest.sh user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_grind user/_wc user/_zombie -nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 954 total 1000 -balloc: first 591 blocks have been allocated -balloc: write bitmap block at sector 45 -qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 - -xv6 kernel is booting - -hart 2 starting -hart 1 starting -init: starting sh",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -28,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,sleep,This lab will familiarize you with xv6 and its system calls.,,,"Implement a user-level `sleep` program for xv6, along the lines of the UNIX sleep command. Your `sleep` should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file `user/sleep.c`. - -Some hints: - -- Before you start coding, read Chapter 1 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). -- Put your code in `user/sleep.c`. Look at some of the other programs in `user/` (e.g., `user/echo.c`, `user/grep.c`, and `user/rm.c`) to see how command-line arguments are passed to a program. -- Add your `sleep` program to `UPROGS` in Makefile; once you've done that, `make qemu` will compile your program and you'll be able to run it from the xv6 shell. -- If the user forgets to pass an argument, sleep should print an error message. -- The command-line argument is passed as a string; you can convert it to an integer using `atoi` (see user/ulib.c). -- Use the system call `sleep`. -- See `kernel/sysproc.c` for the xv6 kernel code that implements the `sleep` system call (look for `sys_sleep`), `user/user.h` for the C definition of `sleep` callable from a user program, and `user/usys.S` for the assembler code that jumps from user code into the kernel for `sleep`. -- sleep's `main` should call `exit(0)` when it is done. -- Look at Kernighan and Ritchie's book *The C programming language (second edition)* (K&R) to learn about C. - -Run the program from the xv6 shell: - -``` - $ make qemu - ... - init: starting sh - $ sleep 10 - (nothing happens for a little while) - $ - -``` - -Your program should pause when run as shown above. Run make grade in your command line (outside of qemu) to see if you pass the sleep tests. - -Note that make grade runs all tests, including the ones for the tasks below. If you want to run the grade tests for one task, type: - -``` - $ ./grade-lab-util sleep - -``` - -This will run the grade tests that match ""sleep"". Or, you can type: - -``` - $ make GRADEFLAGS=sleep grade - -``` - -which does the same.","Implement a user-level `sleep` program for xv6, along the lines of the UNIX sleep command. Your `sleep` should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file `user/sleep.c`.","- Before you start coding, read Chapter 1 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). -- Put your code in `user/sleep.c`. Look at some of the other programs in `user/` (e.g., `user/echo.c`, `user/grep.c`, and `user/rm.c`) to see how command-line arguments are passed to a program. -- Add your `sleep` program to `UPROGS` in Makefile; once you've done that, `make qemu` will compile your program and you'll be able to run it from the xv6 shell. -- If the user forgets to pass an argument, sleep should print an error message. -- The command-line argument is passed as a string; you can convert it to an integer using `atoi` (see user/ulib.c). -- Use the system call `sleep`. -- See `kernel/sysproc.c` for the xv6 kernel code that implements the `sleep` system call (look for `sys_sleep`), `user/user.h` for the C definition of `sleep` callable from a user program, and `user/usys.S` for the assembler code that jumps from user code into the kernel for `sleep`. -- sleep's `main` should call `exit(0)` when it is done. -- Look at Kernighan and Ritchie's book *The C programming language (second edition)* (K&R) to learn about C.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && sleep 10,(nothing happens for a little while),easy,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -29,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,pingpong,This lab will familiarize you with xv6 and its system calls.,,,"Write a user-level program that uses xv6 system calls to ''ping-pong'' a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print "": received ping"", where is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print "": received pong"", and exit. Your solution should be in the file `user/pingpong.c`. - -Some hints: - -- Add the program to `UPROGS` in Makefile. -- You'll need to use the `pipe`, `fork`, `write`, `read`, and `getpid` system calls. -- User programs on xv6 have a limited set of library functions available to them. You can see the list in `user/user.h`; the source (other than for system calls) is in `user/ulib.c`, `user/printf.c`, and `user/umalloc.c`. - -Run the program from the xv6 shell and it should produce the following output: - -``` - $ make qemu - ... - init: starting sh - $ pingpong - 4: received ping - 3: received pong - $ - -``` - -Your program should exchange a byte between two processes and produces output as shown above. Run make grade to check.","Write a user-level program that uses xv6 system calls to ''ping-pong'' a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print "": received ping"", where is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print "": received pong"", and exit. Your solution should be in the file `user/pingpong.c`.","- Add the program to `UPROGS` in Makefile. -- You'll need to use the `pipe`, `fork`, `write`, `read`, and `getpid` system calls. -- User programs on xv6 have a limited set of library functions available to them. You can see the list in `user/user.h`; the source (other than for system calls) is in `user/ulib.c`, `user/printf.c`, and `user/umalloc.c`.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && pingpong,"4: received ping -3: received pong",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -30,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,primes,This lab will familiarize you with xv6 and its system calls.,,,"Write a concurrent prime sieve program for xv6 using pipes and the design illustrated in the picture halfway down [this page](http://swtch.com/~rsc/thread/) and the surrounding text. This idea is due to Doug McIlroy, inventor of Unix pipes. Your solution should be in the file `user/primes.c`. - -Your goal is to use `pipe` and `fork` to set up the pipeline. The first process feeds the numbers 2 through 280 into the pipeline. For each prime number, you will arrange to create one process that reads from its left neighbor over a pipe and writes to its right neighbor over another pipe. Since xv6 has limited number of file descriptors and processes, the first process can stop at 280. - -Some hints: - -- Be careful to close file descriptors that a process doesn't need, because otherwise your program will run xv6 out of resources before the first process reaches 280. -- Once the first process reaches 280, it should wait until the entire pipeline terminates, including all children, grandchildren, &c. Thus the main primes process should only exit after all the output has been printed, and after all the other primes processes have exited. -- Hint: `read` returns zero when the write-side of a pipe is closed. -- It's simplest to directly write 32-bit (4-byte) `int`s to the pipes, rather than using formatted ASCII I/O. -- You should create the processes in the pipeline only as they are needed. -- Add the program to `UPROGS` in Makefile. -- If you get an infinite recursion error from the compiler for the function `primes`, you may have to declare `void primes(int) __attribute__((noreturn));` to indicate that `primes` doesn't return. - -Your solution should implement a pipe-based sieve and produce the following output: - -``` - $ make qemu - ... - init: starting sh - $ primes - prime 2 - prime 3 - prime 5 - prime 7 - prime 11 - prime 13 - prime 17 - prime 19 - prime 23 - prime 29 - prime 31 - ... - $ - -```","Write a concurrent prime sieve program for xv6 using pipes and the design illustrated in the picture halfway down [this page](http://swtch.com/~rsc/thread/) and the surrounding text. This idea is due to Doug McIlroy, inventor of Unix pipes. Your solution should be in the file `user/primes.c`. - -Your goal is to use `pipe` and `fork` to set up the pipeline. The first process feeds the numbers 2 through 280 into the pipeline. For each prime number, you will arrange to create one process that reads from its left neighbor over a pipe and writes to its right neighbor over another pipe. Since xv6 has limited number of file descriptors and processes, the first process can stop at 280.","- Be careful to close file descriptors that a process doesn't need, because otherwise your program will run xv6 out of resources before the first process reaches 280. -- Once the first process reaches 280, it should wait until the entire pipeline terminates, including all children, grandchildren, &c. Thus the main primes process should only exit after all the output has been printed, and after all the other primes processes have exited. -- Hint: `read` returns zero when the write-side of a pipe is closed. -- It's simplest to directly write 32-bit (4-byte) `int`s to the pipes, rather than using formatted ASCII I/O. -- You should create the processes in the pipeline only as they are needed. -- Add the program to `UPROGS` in Makefile. -- If you get an infinite recursion error from the compiler for the function `primes`, you may have to declare `void primes(int) __attribute__((noreturn));` to indicate that `primes` doesn't return.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && primes,"... -init: starting sh -$ primes -prime 2 -prime 3 -prime 5 -prime 7 -prime 11 -prime 13 -prime 17 -prime 19 -prime 23 -prime 29 -prime 31 -…",moderate/hard,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -31,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,find,This lab will familiarize you with xv6 and its system calls.,,,"Write a simple version of the UNIX find program for xv6: find all the files in a directory tree with a specific name. Your solution should be in the file `user/find.c`. - -Some hints: - -- Look at user/ls.c to see how to read directories. -- Use recursion to allow find to descend into sub-directories. -- Don't recurse into ""."" and "".."". -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu. -- You'll need to use C strings. Have a look at K&R (the C book), for example Section 5.5. -- Note that == does not compare strings like in Python. Use strcmp() instead. -- Add the program to `UPROGS` in Makefile. - -Your solution should produce the following output (when the file system contains the files `b`, `a/b` and `a/aa/b`): - -``` - $ make qemu - ... - init: starting sh - $ echo > b - $ mkdir a - $ echo > a/b - $ mkdir a/aa - $ echo > a/aa/b - $ find . b - ./b - ./a/b - ./a/aa/b - $ - -``` - -Run `make grade` to see what our tests think.",Write a simple version of the UNIX find program for xv6: find all the files in a directory tree with a specific name. Your solution should be in the file `user/find.c`.,"- Look at user/ls.c to see how to read directories. -- Use recursion to allow find to descend into sub-directories. -- Don't recurse into ""."" and "".."". -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu. -- You'll need to use C strings. Have a look at K&R (the C book), for example Section 5.5. -- Note that == does not compare strings like in Python. Use strcmp() instead. -- Add the program to `UPROGS` in Makefile.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && echo > b && mkdir a && echo > a/b && mkdir a/aa && echo > a/aa/b && find . B,"./b -./a/b -./a/aa/b",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -32,6.1810: Operating System Engineering,2024,Lab: Xv6 and Unix utilities,xargs,This lab will familiarize you with xv6 and its system calls.,,,"Write a simple version of the UNIX xargs program for xv6: its arguments describe a command to run, it reads lines from the standard input, and it runs the command for each line, appending the line to the command's arguments. Your solution should be in the file `user/xargs.c`. - -The following example illustrates xarg's behavior: - -``` - $ echo hello too | xargs echo bye - bye hello too - $ - -``` - -Note that the command here is ""echo bye"" and the additional arguments are ""hello too"", making the command ""echo bye hello too"", which outputs ""bye hello too"". - -Please note that xargs on UNIX makes an optimization where it will feed more than one argument to the command at a time. We don't expect you to make this optimization. To make xargs on UNIX behave the way we want it to for this lab, please run it with the -n option set to 1. For instance - -``` - $ (echo 1 ; echo 2) | xargs -n 1 echo - 1 - 2 - $ - -``` - -Some hints: - -- Use `fork` and `exec` to invoke the command on each line of input. Use `wait` in the parent to wait for the child to complete the command. -- To read individual lines of input, read a character at a time until a newline ('\n') appears. -- kernel/param.h declares MAXARG, which may be useful if you need to declare an argv array. -- Add the program to `UPROGS` in Makefile. -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu. - -xargs, find, and grep combine well: - -``` - $ find . b | xargs grep hello - -``` - -will run ""grep hello"" on each file named b in the directories below ""."". - -To test your solution for xargs, run the shell script xargstest.sh. Your solution should produce the following output: - -``` - $ make qemu - ... - init: starting sh - $ sh < xargstest.sh - $ $ $ $ $ $ hello - hello - hello - $ $ - -``` - -You may have to go back and fix bugs in your find program. The output has many `$` because the xv6 shell doesn't realize it is processing commands from a file instead of from the console, and prints a `$` for each command in the file.","Write a simple version of the UNIX xargs program for xv6: its arguments describe a command to run, it reads lines from the standard input, and it runs the command for each line, appending the line to the command's arguments. Your solution should be in the file `user/xargs.c`.","- Use `fork` and `exec` to invoke the command on each line of input. Use `wait` in the parent to wait for the child to complete the command. -- To read individual lines of input, read a character at a time until a newline ('\n') appears. -- kernel/param.h declares MAXARG, which may be useful if you need to declare an argv array. -- Add the program to `UPROGS` in Makefile. -- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,make qemu && sh < xargstest.sh,"... -init: starting sh -$ sh < xargstest.sh -$ $ $ $ $ $ hello -hello -hello",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/util.html -33,6.1810: Operating System Engineering,2024,Lab: system calls,Using gdb,"In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs. - -Before you start coding, read Chapter 2 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and Sections 4.3 and 4.4 of Chapter 4, and related source files: - -- The user-space ""stubs"" that route system calls into the kernel are in `user/usys.S`, which is generated by `user/usys.pl` when you run `make`. Declarations are in `user/user.h` -- The kernel-space code that routes a system call to the kernel function that implements it is in `kernel/syscall.c` and `kernel/syscall.h`. -- Process-related code is `kernel/proc.h` and `kernel/proc.c`. - -To start the lab, switch to the syscall branch: - -``` - $ git fetch - $ git checkout syscall - $ make clean - -``` - -If you run `make grade` you will see that the grading script cannot exec `trace`. Your job is to add the necessary system calls and stubs to make `trace` work. Furthermore, you will notice `attacktest` fails.",,,"In many cases, print statements will be sufficient to debug your kernel, but sometimes it is useful to single step through code or get a stack back-trace. The GDB debugger can help. - -To help you become familiar with gdb, run make qemu-gdb and then fire up gdb in another window (see the gdb material on the [guidance page](https://pdos.csail.mit.edu/6.1810/2024/labs/guidance.html)). Once you have two windows open, type in the gdb window: - -``` -(gdb) b syscall -Breakpoint 1 at 0x80002142: file kernel/syscall.c, line 243. -(gdb) c -Continuing. -[Switching to Thread 1.2] - -Thread 2 hit Breakpoint 1, syscall () at kernel/syscall.c:243 -243 { -(gdb) layout src -(gdb) backtrace -``` - -The `layout` command splits the window in two, showing where gdb is in the source code. `backtrace` prints a stack backtrace. - -Answer the following questions in `answers-syscall.txt`. - -Looking at the backtrace output, which function called `syscall`? - -Type n a few times to step past `struct proc *p = myproc();` Once past this statement, type p /x *p, which prints the current process's `proc struct` (see `kernel/proc.h>`) in hex. - -What is the value of `p->trapframe->a7` and what does that value represent? (Hint: look `user/initcode.S`, the first user program xv6 starts.) - -The processor is running in supervisor mode, and we can print privileged registers such as `sstatus` (see [RISC-V privileged instructions](https://github.com/riscv/riscv-isa-manual/releases/download/Priv-v1.12/riscv-privileged-20211203.pdf) for a description): - -``` - (gdb) p /x $sstatus - -``` - -What was the previous mode that the CPU was in? - -The xv6 kernel code contains consistency checks whose failure causes the kernel to panic; you may find that your kernel modifications cause panics. For example, replace the statement `num = p->trapframe->a7;` with `num = * (int *) 0;` at the beginning of `syscall`, run make qemu, and you will see something similar to: - -``` -xv6 kernel is booting - -hart 2 starting -hart 1 starting -scause=0xd sepc=0x80001bfe stval=0x0 -panic: kerneltrap - -``` - -Quit out of `qemu`. - -To track down the source of a kernel page-fault panic, search for the `sepc` value printed for the panic you just saw in the file `kernel/kernel.asm`, which contains the assembly for the compiled kernel. - -Write down the assembly instruction the kernel is panicing at. Which register corresponds to the variable `num`? - -To inspect the state of the processor and the kernel at the faulting instruction, fire up gdb, and set a breakpoint at the faulting `epc`, like this: - -``` -(gdb) b *0x80001bfe -Breakpoint 1 at 0x80001bfe: file kernel/syscall.c, line 138. -(gdb) layout asm -(gdb) c -Continuing. -[Switching to Thread 1.3] - -Thread 3 hit Breakpoint 1, syscall () at kernel/syscall.c:138 -``` - -Confirm that the faulting assembly instruction is the same as the one you found above. - -Why does the kernel crash? Hint: look at figure 3-3 in the text; is address 0 mapped in the kernel address space? Is that confirmed by the value in `scause` above? (See description of `scause` in [RISC-V privileged instructions](https://pdos.csail.mit.edu/6.1810/2024/labs/n//github.com/riscv/riscv-isa-manual/releases/download/Priv-v1.12/riscv-privileged-20211203.pdf)) - -Note that `scause` was printed by the kernel panic above, but often you need to look at additional info to track down the problem that caused the panic. For example, to find out which user process was running when the kernel paniced, you can print the process's name: - -``` - (gdb) p p->name - -``` - -What is the name of the process that was running when the kernel paniced? What is its process id (`pid`)? - -You may want to revisit [Using the GNU Debugger](https://pdos.csail.mit.edu/6.828/2019/lec/gdb_slides.pdf) as needed. The [guidance page](https://pdos.csail.mit.edu/6.1810/2024/labs/guidance.html) also has debugging tips.",,,,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,,,easy,https://pdos.csail.mit.edu/6.1810/2024/labs/syscall.html -34,6.1810: Operating System Engineering,2024,Lab: system calls,System call tracing,"In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs. - -Before you start coding, read Chapter 2 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and Sections 4.3 and 4.4 of Chapter 4, and related source files: - -- The user-space ""stubs"" that route system calls into the kernel are in `user/usys.S`, which is generated by `user/usys.pl` when you run `make`. Declarations are in `user/user.h` -- The kernel-space code that routes a system call to the kernel function that implements it is in `kernel/syscall.c` and `kernel/syscall.h`. -- Process-related code is `kernel/proc.h` and `kernel/proc.c`. - -To start the lab, switch to the syscall branch: - -``` - $ git fetch - $ git checkout syscall - $ make clean - -``` - -If you run `make grade` you will see that the grading script cannot exec `trace`. Your job is to add the necessary system calls and stubs to make `trace` work. Furthermore, you will notice `attacktest` fails.",,,"In this assignment you will add a system call tracing feature that may help you when debugging later labs. You'll create a new `trace` system call that will control tracing. It should take one argument, an integer ""mask"", whose bits specify which system calls to trace. For example, to trace the fork system call, a program calls `trace(1 << SYS_fork)`, where `SYS_fork` is a syscall number from `kernel/syscall.h`. You have to modify the xv6 kernel to print a line when each system call is about to return, if the system call's number is set in the mask. The line should contain the process id, the name of the system call and the return value; you don't need to print the system call arguments. The `trace` system call should enable tracing for the process that calls it and any children that it subsequently forks, but should not affect other processes. - -We provide a `trace` user-level program that runs another program with tracing enabled (see `user/trace.c`). When you're done, you should see output like this: - -``` -$ trace 32 grep hello README -3: syscall read -> 1023 -3: syscall read -> 966 -3: syscall read -> 70 -3: syscall read -> 0 -$ -$ trace 2147483647 grep hello README -4: syscall trace -> 0 -4: syscall exec -> 3 -4: syscall open -> 3 -4: syscall read -> 1023 -4: syscall read -> 966 -4: syscall read -> 70 -4: syscall read -> 0 -4: syscall close -> 0 -$ -$ grep hello README -$ -$ trace 2 usertests forkforkfork -usertests starting -test forkforkfork: 407: syscall fork -> 408 -408: syscall fork -> 409 -409: syscall fork -> 410 -410: syscall fork -> 411 -409: syscall fork -> 412 -410: syscall fork -> 413 -409: syscall fork -> 414 -411: syscall fork -> 415 -... -$ -``` - -In the first example above, trace invokes grep tracing just the read system call. The 32 is `1< 408 -408: syscall fork -> 409 -409: syscall fork -> 410 -410: syscall fork -> 411 -409: syscall fork -> 412 -410: syscall fork -> 413 -409: syscall fork -> 414 -411: syscall fork -> 415 -…",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/syscall.html -35,6.1810: Operating System Engineering,2024,Lab: system calls,Attack xv6,"In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs. - -Before you start coding, read Chapter 2 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and Sections 4.3 and 4.4 of Chapter 4, and related source files: - -- The user-space ""stubs"" that route system calls into the kernel are in `user/usys.S`, which is generated by `user/usys.pl` when you run `make`. Declarations are in `user/user.h` -- The kernel-space code that routes a system call to the kernel function that implements it is in `kernel/syscall.c` and `kernel/syscall.h`. -- Process-related code is `kernel/proc.h` and `kernel/proc.c`. - -To start the lab, switch to the syscall branch: - -``` - $ git fetch - $ git checkout syscall - $ make clean - -``` - -If you run `make grade` you will see that the grading script cannot exec `trace`. Your job is to add the necessary system calls and stubs to make `trace` work. Furthermore, you will notice `attacktest` fails.",,,"The xv6 kernel isolates user programs from each other and isolates the kernel from user programs. As you saw in the above assignments, an application cannot directly call a function in the kernel or in another user program; instead, interactions occur only through system calls. However, if there is a bug in the implementation of a system call, an attacker may be able to exploit that bug to break the isolation boundaries. To get a sense for how bugs can be exploited, we have introduced a bug into xv6 and your goal is to exploit that bug to trick xv6 into revealing a secret from another process. - -The bug is that the call to `memset(mem, 0, sz)` at line 272 in `kernel/vm.c` to clear a newly-allocated page is omitted when compiling this lab. Similarly, when compiling `kernel/kalloc.c` for this lab the two lines that use `memset` to put garbage into free pages are omitted. The net effect of omitting these 3 lines (all marked by `ifndef LAB_SYSCALL`) is that newly allocated memory retains the contents from its previous use. - -`user/secret.c` writes an 8-byte secret in its memory and then exits (which frees its memory). Your goal is to add a few lines of code to `user/attack.c` to find the secret that a previous execution of `secret.c` wrote to memory, and write the 8 secret bytes to file descriptor 2. You'll receive full credit if `attacktest` prints: ""OK: secret is ebb.ebb"". (Note: the secret may be different for each run of `attacktest`.) - -You are allowed to modify `user/attack.c`, but you cannot make any other changes: you cannot modify the xv6 kernel sources, secret.c, attacktest.c, etc. - -Some hints: - -- Run `attacktest` in the xv6 shell. It should the following output: - - ``` - $ attacktest - FAIL: no/incorrect secret - ``` - - Note that despite the 3 deleted lines, xv6 appears to work correctly: it started the shell and it ran `attacktest`. In fact, if you run `usertests` most of them pass! - -- Read `user/attacktest.c`. It generates a random 8-byte string, which it passes to the program `secret`, which writes it into its memory. After `secret` exits, `attacktest` spawns `attack` and waits for `attack` to write the secret string to file descriptor 2. - -- Read `user/secret.c` and think about how you could trick xv6 into revealing the secret to `attack.c`. - -- Test your exploit by running `attacktest` in the xv6 shell. - -`user/secret.c` copies the secret bytes to memory whose address is 32 bytes after the start of a page. Change the 32 to 0 and you should see that your attack doesn't work anymore; why not? - -Small bugs that do not directly affect correctness but still can be exploited to break security (like the one above) make kernel programming challenging. xv6 is likely to have such bugs, although we try to not have them. Real kernels, which have many more lines of code than xv6, have a long history of such bugs. For example, see the public [Linux vulnerabilities](https://www.opencve.io/cve?vendor=linux&product=linux_kernel) and [how to report vulnerabilities](https://docs.kernel.org/process/security-bugs.html).","`user/secret.c` writes an 8-byte secret in its memory and then exits (which frees its memory). Your goal is to add a few lines of code to `user/attack.c` to find the secret that a previous execution of `secret.c` wrote to memory, and write the 8 secret bytes to file descriptor 2. You'll receive full credit if `attacktest` prints: ""OK: secret is ebb.ebb"". (Note: the secret may be different for each run of `attacktest`.)","- Run `attacktest` in the xv6 shell. It should the following output: - - ``` - $ attacktest - FAIL: no/incorrect secret - ``` - - Note that despite the 3 deleted lines, xv6 appears to work correctly: it started the shell and it ran `attacktest`. In fact, if you run `usertests` most of them pass! - -- Read `user/attacktest.c`. It generates a random 8-byte string, which it passes to the program `secret`, which writes it into its memory. After `secret` exits, `attacktest` spawns `attack` and waits for `attack` to write the secret string to file descriptor 2. - -- Read `user/secret.c` and think about how you could trick xv6 into revealing the secret to `attack.c`. - -- Test your exploit by running `attacktest` in the xv6 shell.",,git://g.csail.mit.edu/xv6-labs-2024,syscall,xv6-labs-2024,make qemu && attacktest,FAIL: no/incorrect secret,moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/syscall.html -36,6.1810: Operating System Engineering,2024,Lab: page tables,Inspect a user-process page table,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"To help you understand RISC-V page tables, your first task is to explain the page table for a user process. - -Run `make qemu` and run the user program `pgtbltest`. The `print_pgtbl` functions prints out the page-table entries for the first 10 and last 10 pages of the `pgtbltest` process using the `pgpte` system call that we added to xv6 for this lab. The output looks as follows: - -``` -va 0 pte 0x21FCF45B pa 0x87F3D000 perm 0x5B -va 1000 pte 0x21FCE85B pa 0x87F3A000 perm 0x5B -... -va 0xFFFFD000 pte 0x0 pa 0x0 perm 0x0 -va 0xFFFFE000 pte 0x21FD80C7 pa 0x87F60000 perm 0xC7 -va 0xFFFFF000 pte 0x20001C4B pa 0x80007000 perm 0x4B - -``` - -For every page table entry in the `print_pgtbl` output, explain what it logically contains and what its permission bits are. Figure 3.4 in the xv6 book might be helpful, although note that the figure might have a slightly different set of pages than process that's being inspected here. Note that xv6 doesn't place the virtual pages consecutively in physical memory.",,,,git://g.csail.mit.edu/xv6-labs-2024,pgtbl,xv6-labs-2024,make qemu && pgtbltest,"va 0 pte 0x21FCF45B pa 0x87F3D000 perm 0x5B -va 1000 pte 0x21FCE85B pa 0x87F3A000 perm 0x5B -... -va 0xFFFFD000 pte 0x0 pa 0x0 perm 0x0 -va 0xFFFFE000 pte 0x21FD80C7 pa 0x87F60000 perm 0xC7 -va 0xFFFFF000 pte 0x20001C4B pa 0x80007000 perm 0x4B - ",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -37,6.1810: Operating System Engineering,2024,Lab: page tables,Speed up system calls,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"Some operating systems (e.g., Linux) speed up certain system calls by sharing data in a read-only region between userspace and the kernel. This eliminates the need for kernel crossings when performing these system calls. To help you learn how to insert mappings into a page table, your first task is to implement this optimization for the `getpid()` system call in xv6. - -When each process is created, map one read-only page at USYSCALL (a virtual address defined in `memlayout.h`). At the start of this page, store a `struct usyscall` (also defined in `memlayout.h`), and initialize it to store the PID of the current process. For this lab, `ugetpid()` has been provided on the userspace side and will automatically use the USYSCALL mapping. You will receive full credit for this part of the lab if the `ugetpid` test case passes when running `pgtbltest`. - -Some hints: - -- Choose permission bits that allow userspace to only read the page. -- There are a few things that need to be done over the lifecycle of a new page. For inspiration, understand the trapframe handling in `kernel/proc.c`. - -Which other xv6 system call(s) could be made faster using this shared page? Explain how.",,,,git://g.csail.mit.edu/xv6-labs-2024,pgtbl,xv6-labs-2024,,,easy,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -38,6.1810: Operating System Engineering,2024,Lab: page tables,Print a page table,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"To help you visualize RISC-V page tables, and perhaps to aid future debugging, your next task is to write a function that prints the contents of a page table. - -We added a system call `kpgtbl()`, which calls `vmprint()` in `vm.c`. It takes a `pagetable_t` argument, and your job is to print that pagetable in the format described below. - -When you run `print_kpgtbl()` test, your implementation should print the following output: - -``` -page table 0x0000000087f22000 - ..0x0000000000000000: pte 0x0000000021fc7801 pa 0x0000000087f1e000 - .. ..0x0000000000000000: pte 0x0000000021fc7401 pa 0x0000000087f1d000 - .. .. ..0x0000000000000000: pte 0x0000000021fc7c5b pa 0x0000000087f1f000 - .. .. ..0x0000000000001000: pte 0x0000000021fc70d7 pa 0x0000000087f1c000 - .. .. ..0x0000000000002000: pte 0x0000000021fc6c07 pa 0x0000000087f1b000 - .. .. ..0x0000000000003000: pte 0x0000000021fc68d7 pa 0x0000000087f1a000 - ..0xffffffffc0000000: pte 0x0000000021fc8401 pa 0x0000000087f21000 - .. ..0xffffffffffe00000: pte 0x0000000021fc8001 pa 0x0000000087f20000 - .. .. ..0xffffffffffffd000: pte 0x0000000021fd4c13 pa 0x0000000087f53000 - .. .. ..0xffffffffffffe000: pte 0x0000000021fd00c7 pa 0x0000000087f40000 - .. .. ..0xfffffffffffff000: pte 0x000000002000184b pa 0x0000000080006000 - -``` - -The first line displays the argument to `vmprint`. After that there is a line for each PTE, including PTEs that refer to page-table pages deeper in the tree. Each PTE line is indented by a number of `"" ..""` that indicates its depth in the tree. Each PTE line shows its virtual addresss, the pte bits, and the physical address extracted from the PTE. Don't print PTEs that are not valid. In the above example, the top-level page-table page has mappings for entries 0 and 255. The next level down for entry 0 has only index 0 mapped, and the bottom-level for that index 0 has a few entries mapped. - -Your code might emit different physical addresses than those shown above. The number of entries and the virtual addresses should be the same. - -Some hints: - -- Use the macros at the end of the file kernel/riscv.h. -- The function `freewalk` may be inspirational. -- Use `%p` in your printf calls to print out full 64-bit hex PTEs and addresses as shown in the example. - -For every leaf page in the `vmprint` output, explain what it logically contains and what its permission bits are, and how it relates to the output of the earlier `print_pgtbl()` exercise above. Figure 3.4 in the xv6 book might be helpful, although note that the figure might have a slightly different set of pages than the process that's being inspected here.",,"- Use the macros at the end of the file kernel/riscv.h. -- The function `freewalk` may be inspirational. -- Use `%p` in your printf calls to print out full 64-bit hex PTEs and addresses as shown in the example.",,git://g.csail.mit.edu/xv6-labs-2024,pgtbl,xv6-labs-2024,print_kpgtbl(),"page table 0x0000000087f22000 - ..0x0000000000000000: pte 0x0000000021fc7801 pa 0x0000000087f1e000 - .. ..0x0000000000000000: pte 0x0000000021fc7401 pa 0x0000000087f1d000 - .. .. ..0x0000000000000000: pte 0x0000000021fc7c5b pa 0x0000000087f1f000 - .. .. ..0x0000000000001000: pte 0x0000000021fc70d7 pa 0x0000000087f1c000 - .. .. ..0x0000000000002000: pte 0x0000000021fc6c07 pa 0x0000000087f1b000 - .. .. ..0x0000000000003000: pte 0x0000000021fc68d7 pa 0x0000000087f1a000 - ..0xffffffffc0000000: pte 0x0000000021fc8401 pa 0x0000000087f21000 - .. ..0xffffffffffe00000: pte 0x0000000021fc8001 pa 0x0000000087f20000 - .. .. ..0xffffffffffffd000: pte 0x0000000021fd4c13 pa 0x0000000087f53000 - .. .. ..0xffffffffffffe000: pte 0x0000000021fd00c7 pa 0x0000000087f40000 - .. .. ..0xfffffffffffff000: pte 0x000000002000184b pa 0x0000000080006000 - ",easy,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -39,6.1810: Operating System Engineering,2024,Lab: page tables,Use superpages,"In this lab you will explore page tables and modify them to implement common OS features. - -Before you start coding, read Chapter 3 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related files: - -- `kernel/memlayout.h`, which captures the layout of memory. -- `kernel/vm.c`, which contains most virtual memory (VM) code. -- `kernel/kalloc.c`, which contains code for allocating and freeing physical memory. - -It may also help to consult the [RISC-V privileged architecture manual](https://drive.google.com/file/d/17GeetSnT5wW3xNuAHI95-SI1gPGd5sJ_/view?usp=drive_link). - -To start the lab, switch to the pgtbl branch: - -``` - $ git fetch - $ git checkout pgtbl - $ make clean - -``` -",,,"The RISC-V paging hardware supports two-megabyte pages as well as ordinary 4096-byte pages. The general idea of larger pages is called superpages, and (since RISC-V supports more than one size) 2M pages are called megapages. The operating system creates a superpage by setting the PTE_V and PTE_R bits in the level-1 PTE, and setting the physical page number to point to the start of a two-megabyte region of physical memory. This physical address must be two-mega-byte aligned (i.e., a multiple of two megabytes). You can read about this in the RISC-V privileged manual by searching for megapage and superpage; in particular, the top of page 112. Use of superpages decreases the amount of physical memory used by the page table, and can decrease misses in the TLB cache. For some programs this leads to large increases in performance. - -Your job is to modify the xv6 kernel to use superpages. In particular, if a user program calls sbrk() with a size of 2 megabytes or more, and the newly created address range includes one or more areas that are two-megabyte-aligned and at least two megabytes in size, the kernel should use a single superpage (instead of hundreds of ordinary pages). You will receive full credit for this part of the lab if the `superpg_test` test case passes when running `pgtbltest`. - -Some hints: - -- Read `superpg_test` in `user/pgtbltest.c`. -- A good place to start is `sys_sbrk` in `kernel/sysproc.c`, which is invoked by the `sbrk` system call. Follow the code path to the function that allocates memory for `sbrk`. -- Your kernel will need to be able to allocate and free two-megabyte regions. Modify kalloc.c to set aside a few two-megabyte areas of physical memory, and create superalloc() and superfree() functions. You'll only need a handful of two-megabyte chunks of memory. -- Superpages must be allocated when a process with superpages forks, and freed when it exits; you'll need to modify `uvmcopy()` and `uvmunmap()`. - -Real operating systems dynamically promote a collection of pages to a superpage. The following reference explains why that is a good idea and what is hard in a more serious design: [Juan Navarro, Sitaram Iyer, Peter Druschel, and Alan Cox. Practical, transparent operating system support for superpages. SIGOPS Oper. Syst. Rev., 36(SI):89-104, December 2002.](https://www.usenix.org/conference/osdi-02/practical-transparent-operating-system-support-superpages) This reference summarizes superpage-implementations for different OSes: [A comprehensive analysis of superpage management mechanism and policies](https://www.usenix.org/conference/atc20/presentation/zhu-weixi). -","Your job is to modify the xv6 kernel to use superpages. In particular, if a user program calls sbrk() with a size of 2 megabytes or more, and the newly created address range includes one or more areas that are two-megabyte-aligned and at least two megabytes in size, the kernel should use a single superpage (instead of hundreds of ordinary pages). You will receive full credit for this part of the lab if the `superpg_test` test case passes when running `pgtbltest`.","- Read `superpg_test` in `user/pgtbltest.c`. -- A good place to start is `sys_sbrk` in `kernel/sysproc.c`, which is invoked by the `sbrk` system call. Follow the code path to the function that allocates memory for `sbrk`. -- Your kernel will need to be able to allocate and free two-megabyte regions. Modify kalloc.c to set aside a few two-megabyte areas of physical memory, and create superalloc() and superfree() functions. You'll only need a handful of two-megabyte chunks of memory. -- Superpages must be allocated when a process with superpages forks, and freed when it exits; you'll need to modify `uvmcopy()` and `uvmunmap()`.",,git://g.csail.mit.edu/xv6-labs-2024,,xv6-labs-2024,,,moderate/hard,https://pdos.csail.mit.edu/6.1810/2024/labs/pgtbl.html -40,6.1810: Operating System Engineering,2024,Lab: traps,RISC-V assembly,"This lab explores how system calls are implemented using traps. You will first do a warm-up exercises with stacks and then you will implement an example of user-level trap handling. - -Before you start coding, read Chapter 4 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related source files: - -- `kernel/trampoline.S`: the assembly involved in changing from user space to kernel space and back -- `kernel/trap.c`: code handling all interrupts - -To start the lab, switch to the trap branch: - -``` - $ git fetch - $ git checkout traps - $ make clean - -``` -",,,"It will be important to understand a bit of RISC-V assembly, which you were exposed to in 6.1910 (6.004). There is a file `user/call.c` in your xv6 repo. make fs.img compiles it and also produces a readable assembly version of the program in `user/call.asm`. - -Read the code in call.asm for the functions `g`, `f`, and `main`. The instruction manual for RISC-V is on the [reference page](https://pdos.csail.mit.edu/6.1810/2024/reference.html). Answer the following questions in `answers-traps.txt`: - -Which registers contain arguments to functions? For example, which register holds 13 in main's call to `printf`? - -Where is the call to function `f` in the assembly code for main? Where is the call to `g`? (Hint: the compiler may inline functions.) - -At what address is the function `printf` located? - -What value is in the register `ra` just after the `jalr` to `printf` in `main`? - -Run the following code. - -``` - unsigned int i = 0x00646c72; - printf(""H%x Wo%s"", 57616, (char *) &i); - -``` - -What is the output? [Here's an ASCII table](https://www.asciitable.com/) that maps bytes to characters. - -The output depends on that fact that the RISC-V is little-endian. If the RISC-V were instead big-endian what would you set `i` to in order to yield the same output? Would you need to change `57616` to a different value? - -[Here's a description of little- and big-endian](http://www.webopedia.com/TERM/b/big_endian.html) and [a more whimsical description](https://www.rfc-editor.org/ien/ien137.txt). - -In the following code, what is going to be printed after `'y='`? (note: the answer is not a specific value.) Why does this happen? - -``` - printf(""x=%d y=%d"", 3); - -``` -",,,," $ git fetch - $ git checkout traps - $ make clean",,xv6-labs-2024,,,easy,https://pdos.csail.mit.edu/6.1810/2024/labs/traps.html -41,6.1810: Operating System Engineering,2024,Lab: traps,Backtrace,"This lab explores how system calls are implemented using traps. You will first do a warm-up exercises with stacks and then you will implement an example of user-level trap handling. - -Before you start coding, read Chapter 4 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related source files: - -- `kernel/trampoline.S`: the assembly involved in changing from user space to kernel space and back -- `kernel/trap.c`: code handling all interrupts - -To start the lab, switch to the trap branch: - -``` - $ git fetch - $ git checkout traps - $ make clean - -``` -",,,"For debugging it is often useful to have a backtrace: a list of the function calls on the stack above the point at which the error occurred. To help with backtraces, the compiler generates machine code that maintains a stack frame on the stack corresponding to each function in the current call chain. Each stack frame consists of the return address and a ""frame pointer"" to the caller's stack frame. Register `s0` contains a pointer to the current stack frame (it actually points to the the address of the saved return address on the stack plus 8). Your `backtrace` should use the frame pointers to walk up the stack and print the saved return address in each stack frame. - -Implement a `backtrace()` function in `kernel/printf.c`. Insert a call to this function in `sys_sleep`, and then run bttest, which calls `sys_sleep`. Your output should be a list of return addresses with this form (but the numbers will likely be different): - -``` - backtrace: - 0x0000000080002cda - 0x0000000080002bb6 - 0x0000000080002898 - -``` - -After `bttest` exit qemu. In a terminal window: run `addr2line -e kernel/kernel` (or `riscv64-unknown-elf-addr2line -e kernel/kernel`) and cut-and-paste the addresses from your backtrace, like this: - -``` - $ addr2line -e kernel/kernel - 0x0000000080002de2 - 0x0000000080002f4a - 0x0000000080002bfc - Ctrl-D - -``` - -You should see something like this: - -``` - kernel/sysproc.c:74 - kernel/syscall.c:224 - kernel/trap.c:85 - -``` - -Some hints: - -- Add the prototype for your `backtrace()` to `kernel/defs.h` so that you can invoke `backtrace` in `sys_sleep`. - -- The GCC compiler stores the frame pointer of the currently executing function in the register s0 . In the section marked by #ifndef __ASSEMBLER__ ... #endif, add the following function to `kernel/riscv.h` : - - ``` - static inline uint64 - r_fp() - { - uint64 x; - asm volatile(""mv %0, s0"" : ""=r"" (x) ); - return x; - } - ``` - - and call this function in `backtrace` to read the current frame pointer. `r_fp()` uses in-line assembly to read s0 . - -- These [lecture notes](https://pdos.csail.mit.edu/6.1810/2023/lec/l-riscv.txt) have a picture of the layout of stack frames. Note that the return address lives at a fixed offset (-8) from the frame pointer of a stackframe, and that the saved frame pointer lives at fixed offset (-16) from the frame pointer. - -- Your `backtrace()` will need a way to recognize that it has seen the last stack frame, and should stop. A useful fact is that the memory allocated for each kernel stack consists of a single page-aligned page, so that all the stack frames for a given stack are on the same page. You can use `PGROUNDDOWN(fp)` (see `kernel/riscv.h`) to identify the page that a frame pointer refers to. - -Once your backtrace is working, call it from `panic` in `kernel/printf.c` so that you see the kernel's backtrace when it panics.","Implement a `backtrace()` function in `kernel/printf.c`. Insert a call to this function in `sys_sleep`, and then run bttest, which calls `sys_sleep`. Your output should be a list of return addresses with this form (but the numbers will likely be different): - -``` -backtrace: -0x0000000080002cda -0x0000000080002bb6 -0x0000000080002898 - -``` - -After `bttest` exit qemu. In a terminal window: run `addr2line -e kernel/kernel` (or `riscv64-unknown-elf-addr2line -e kernel/kernel`) and cut-and-paste the addresses from your backtrace, like this: - -``` -$ addr2line -e kernel/kernel -0x0000000080002de2 -0x0000000080002f4a -0x0000000080002bfc -Ctrl-D - -``` - -You should see something like this: - -``` -kernel/sysproc.c:74 -kernel/syscall.c:224 -kernel/trap.c:85 - -``` -","- Add the prototype for your `backtrace()` to `kernel/defs.h` so that you can invoke `backtrace` in `sys_sleep`. - -- The GCC compiler stores the frame pointer of the currently executing function in the register s0 . In the section marked by #ifndef __ASSEMBLER__ ... #endif, add the following function to `kernel/riscv.h` : - - ``` - static inline uint64 - r_fp() - { - uint64 x; - asm volatile(""mv %0, s0"" : ""=r"" (x) ); - return x; - } - ``` - - and call this function in `backtrace` to read the current frame pointer. `r_fp()` uses in-line assembly to read s0 . - -- These [lecture notes](https://pdos.csail.mit.edu/6.1810/2023/lec/l-riscv.txt) have a picture of the layout of stack frames. Note that the return address lives at a fixed offset (-8) from the frame pointer of a stackframe, and that the saved frame pointer lives at fixed offset (-16) from the frame pointer. - -- Your `backtrace()` will need a way to recognize that it has seen the last stack frame, and should stop. A useful fact is that the memory allocated for each kernel stack consists of a single page-aligned page, so that all the stack frames for a given stack are on the same page. You can use `PGROUNDDOWN(fp)` (see `kernel/riscv.h`) to identify the page that a frame pointer refers to.",," $ git fetch - $ git checkout traps - $ make clean",,xv6-labs-2024,addr2line -e kernel/kernel,"0x0000000080002de2 -0x0000000080002f4a -0x0000000080002bfc -Ctrl-D",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/traps.html -42,6.1810: Operating System Engineering,2024,Lab: traps,Alarm,"This lab explores how system calls are implemented using traps. You will first do a warm-up exercises with stacks and then you will implement an example of user-level trap handling. - -Before you start coding, read Chapter 4 of the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf), and related source files: - -- `kernel/trampoline.S`: the assembly involved in changing from user space to kernel space and back -- `kernel/trap.c`: code handling all interrupts - -To start the lab, switch to the trap branch: - -``` - $ git fetch - $ git checkout traps - $ make clean - -``` -",,,"In this exercise you'll add a feature to xv6 that periodically alerts a process as it uses CPU time. This might be useful for compute-bound processes that want to limit how much CPU time they chew up, or for processes that want to compute but also want to take some periodic action. More generally, you'll be implementing a primitive form of user-level interrupt/fault handlers; you could use something similar to handle page faults in the application, for example. Your solution is correct if it passes alarmtest and 'usertests -q' - -You should add a new `sigalarm(interval, handler)` system call. If an application calls `sigalarm(n, fn)`, then after every `n` ""ticks"" of CPU time that the program consumes, the kernel should cause application function `fn` to be called. When `fn` returns, the application should resume where it left off. A tick is a fairly arbitrary unit of time in xv6, determined by how often a hardware timer generates interrupts. If an application calls `sigalarm(0, 0)`, the kernel should stop generating periodic alarm calls. - -You'll find a file `user/alarmtest.c` in your xv6 repository. Add it to the Makefile. It won't compile correctly until you've added `sigalarm` and `sigreturn` system calls (see below). - -`alarmtest` calls `sigalarm(2, periodic)` in `test0` to ask the kernel to force a call to `periodic()` every 2 ticks, and then spins for a while. You can see the assembly code for alarmtest in user/alarmtest.asm, which may be handy for debugging. Your solution is correct when `alarmtest` produces output like this and usertests -q also runs correctly: - -``` -$ alarmtest -test0 start -........alarm! -test0 passed -test1 start -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -test1 passed -test2 start -................alarm! -test2 passed -test3 start -test3 passed -$ usertest -q -... -ALL TESTS PASSED -$ -``` - -When you're done, your solution will be only a few lines of code, but it may be tricky to get it right. We'll test your code with the version of alarmtest.c in the original repository. You can modify alarmtest.c to help you debug, but make sure the original alarmtest says that all the tests pass. - -### test0: invoke handler - -Get started by modifying the kernel to jump to the alarm handler in user space, which will cause test0 to print ""alarm!"". Don't worry yet what happens after the ""alarm!"" output; it's OK for now if your program crashes after printing ""alarm!"". Here are some hints: - -- You'll need to modify the Makefile to cause `alarmtest.c` to be compiled as an xv6 user program. - -- The right declarations to put in user/user.h are: - - ``` - int sigalarm(int ticks, void (*handler)()); - int sigreturn(void); - ``` - -- Update user/usys.pl (which generates user/usys.S), kernel/syscall.h, and kernel/syscall.c to allow `alarmtest` to invoke the sigalarm and sigreturn system calls. - -- For now, your `sys_sigreturn` should just return zero. - -- Your `sys_sigalarm()` should store the alarm interval and the pointer to the handler function in new fields in the `proc` structure (in `kernel/proc.h`). - -- You'll need to keep track of how many ticks have passed since the last call (or are left until the next call) to a process's alarm handler; you'll need a new field in `struct proc` for this too. You can initialize `proc` fields in `allocproc()` in `proc.c`. - -- Every tick, the hardware clock forces an interrupt, which is handled in `usertrap()` in `kernel/trap.c`. - -- You only want to manipulate a process's alarm ticks if there's a timer interrupt; you want something like - - ``` - if(which_dev == 2) ... - ``` - -- Only invoke the alarm function if the process has a timer outstanding. Note that the address of the user's alarm function might be 0 (e.g., in user/alarmtest.asm, `periodic` is at address 0). - -- You'll need to modify `usertrap()` so that when a process's alarm interval expires, the user process executes the handler function. When a trap on the RISC-V returns to user space, what determines the instruction address at which user-space code resumes execution? - -- It will be easier to look at traps with gdb if you tell qemu to use only one CPU, which you can do by running - - ``` - make CPUS=1 qemu-gdb - ``` - -- You've succeeded if alarmtest prints ""alarm!"". - -### test1/test2()/test3(): resume interrupted code - -Chances are that alarmtest crashes in test0 or test1 after it prints ""alarm!"", or that alarmtest (eventually) prints ""test1 failed"", or that alarmtest exits without printing ""test1 passed"". To fix this, you must ensure that, when the alarm handler is done, control returns to the instruction at which the user program was originally interrupted by the timer interrupt. You must ensure that the register contents are restored to the values they held at the time of the interrupt, so that the user program can continue undisturbed after the alarm. Finally, you should ""re-arm"" the alarm counter after each time it goes off, so that the handler is called periodically. - -As a starting point, we've made a design decision for you: user alarm handlers are required to call the `sigreturn` system call when they have finished. Have a look at `periodic` in `alarmtest.c` for an example. This means that you can add code to `usertrap` and `sys_sigreturn` that cooperate to cause the user process to resume properly after it has handled the alarm. - -Some hints: - -- Your solution will require you to save and restore registers---what registers do you need to save and restore to resume the interrupted code correctly? (Hint: it will be many). -- Have `usertrap` save enough state in `struct proc` when the timer goes off that `sigreturn` can correctly return to the interrupted user code. -- Prevent re-entrant calls to the handler----if a handler hasn't returned yet, the kernel shouldn't call it again. `test2` tests this. -- Make sure to restore a0. `sigreturn` is a system call, and its return value is stored in a0. - -Once you pass `test0`, `test1`, `test2`, and `test3` run `usertests -q` to make sure you didn't break any other parts of the kernel.","In this exercise you'll add a feature to xv6 that periodically alerts a process as it uses CPU time. This might be useful for compute-bound processes that want to limit how much CPU time they chew up, or for processes that want to compute but also want to take some periodic action. More generally, you'll be implementing a primitive form of user-level interrupt/fault handlers; you could use something similar to handle page faults in the application, for example. Your solution is correct if it passes alarmtest and 'usertests -q'","- You'll need to modify the Makefile to cause `alarmtest.c` to be compiled as an xv6 user program. - -- The right declarations to put in user/user.h are: - - ``` - int sigalarm(int ticks, void (*handler)()); - int sigreturn(void); - ``` - -- Update user/usys.pl (which generates user/usys.S), kernel/syscall.h, and kernel/syscall.c to allow `alarmtest` to invoke the sigalarm and sigreturn system calls. - -- For now, your `sys_sigreturn` should just return zero. - -- Your `sys_sigalarm()` should store the alarm interval and the pointer to the handler function in new fields in the `proc` structure (in `kernel/proc.h`). - -- You'll need to keep track of how many ticks have passed since the last call (or are left until the next call) to a process's alarm handler; you'll need a new field in `struct proc` for this too. You can initialize `proc` fields in `allocproc()` in `proc.c`. - -- Every tick, the hardware clock forces an interrupt, which is handled in `usertrap()` in `kernel/trap.c`. - -- You only want to manipulate a process's alarm ticks if there's a timer interrupt; you want something like - - ``` - if(which_dev == 2) ... - ``` - -- Only invoke the alarm function if the process has a timer outstanding. Note that the address of the user's alarm function might be 0 (e.g., in user/alarmtest.asm, `periodic` is at address 0). - -- You'll need to modify `usertrap()` so that when a process's alarm interval expires, the user process executes the handler function. When a trap on the RISC-V returns to user space, what determines the instruction address at which user-space code resumes execution? - -- It will be easier to look at traps with gdb if you tell qemu to use only one CPU, which you can do by running - - ``` - make CPUS=1 qemu-gdb - ``` - -- You've succeeded if alarmtest prints ""alarm!"".",," $ git fetch - $ git checkout traps - $ make clean",,xv6-labs-2024,alarmtest,"test0 start -........alarm! -test0 passed -test1 start -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -...alarm! -..alarm! -test1 passed -test2 start -................alarm! -test2 passed -test3 start -test3 passed -$ usertest -q -... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/traps.html -43,6.1810: Operating System Engineering,2024,Lab: Copy-on-Write Fork for xv6,Implement copy-on-write fork,"Virtual memory provides a level of indirection: the kernel can intercept memory references by marking PTEs invalid or read-only, leading to page faults, and can change what addresses mean by modifying PTEs. There is a saying in computer systems that any systems problem can be solved with a level of indirection. This lab explores an example: copy-on-write fork. - -To start the lab, switch to the cow branch: - -``` -$ git fetch -$ git checkout cow -$ make clean -``` - -## The problem - -The fork() system call in xv6 copies all of the parent process's user-space memory into the child. If the parent is large, copying can take a long time. Worse, the work is often largely wasted: fork() is commonly followed by exec() in the child, which discards the copied memory, usually without using most of it. On the other hand, if both parent and child use a copied page, and one or both writes it, the copy is truly needed. - -## The solution - -Your goal in implementing copy-on-write (COW) fork() is to defer allocating and copying physical memory pages until the copies are actually needed, if ever. - -COW fork() creates just a pagetable for the child, with PTEs for user memory pointing to the parent's physical pages. COW fork() marks all the user PTEs in both parent and child as read-only. When either process tries to write one of these COW pages, the CPU will force a page fault. The kernel page-fault handler detects this case, allocates a page of physical memory for the faulting process, copies the original page into the new page, and modifies the relevant PTE in the faulting process to refer to the new page, this time with the PTE marked writeable. When the page fault handler returns, the user process will be able to write its copy of the page. - -COW fork() makes freeing of the physical pages that implement user memory a little trickier. A given physical page may be referred to by multiple processes' page tables, and should be freed only when the last reference disappears. In a simple kernel like xv6 this bookkeeping is reasonably straightforward, but in production kernels this can be difficult to get right; see, for example, [Patching until the COWs come home](https://lwn.net/Articles/849638/).",,,"Your task is to implement copy-on-write fork in the xv6 kernel. You are done if your modified kernel executes both the cowtest and 'usertests -q' programs successfully. - -To help you test your implementation, we've provided an xv6 program called cowtest (source in user/cowtest.c). cowtest runs various tests, but even the first will fail on unmodified xv6. Thus, initially, you will see: - -``` -$ cowtest -simple: fork() failed -$ -``` - -The ""simple"" test allocates more than half of available physical memory, and then fork()s. The fork fails because there is not enough free physical memory to give the child a complete copy of the parent's memory. - -When you are done, your kernel should pass all the tests in both cowtest and usertests -q. That is: - -``` -$ cowtest -simple: ok -simple: ok -three: ok -three: ok -three: ok -file: ok -forkfork: ok -ALL COW TESTS PASSED -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -Here's a reasonable plan of attack. - -1. Modify uvmcopy() to map the parent's physical pages into the child, instead of allocating new pages. Clear `PTE_W` in the PTEs of both child and parent for pages that have `PTE_W` set. -2. Modify usertrap() to recognize page faults. When a write page-fault occurs on a COW page that was originally writeable, allocate a new page with kalloc(), copy the old page to the new page, and install the new page in the PTE with `PTE_W` set. Pages that were originally read-only (not mapped `PTE_W`, like pages in the text segment) should remain read-only and shared between parent and child; a process that tries to write such a page should be killed. -3. Ensure that each physical page is freed when the last PTE reference to it goes away -- but not before. A good way to do this is to keep, for each physical page, a ""reference count"" of the number of user page tables that refer to that page. Set a page's reference count to one when `kalloc()` allocates it. Increment a page's reference count when fork causes a child to share the page, and decrement a page's count each time any process drops the page from its page table. `kfree()` should only place a page back on the free list if its reference count is zero. It's OK to to keep these counts in a fixed-size array of integers. You'll have to work out a scheme for how to index the array and how to choose its size. For example, you could index the array with the page's physical address divided by 4096, and give the array a number of elements equal to highest physical address of any page placed on the free list by `kinit()` in kalloc.c. Feel free to modify kalloc.c (e.g., `kalloc()` and `kfree()`) to maintain the reference counts. -4. Modify copyout() to use the same scheme as page faults when it encounters a COW page. - -Some hints: - -- It may be useful to have a way to record, for each PTE, whether it is a COW mapping. You can use the RSW (reserved for software) bits in the RISC-V PTE for this. -- `usertests -q` explores scenarios that `cowtest` does not test, so don't forget to check that all tests pass for both. -- Some helpful macros and definitions for page table flags are at the end of `kernel/riscv.h`. -- If a COW page fault occurs and there's no free memory, the process should be killed.",Your task is to implement copy-on-write fork in the xv6 kernel. You are done if your modified kernel executes both the cowtest and 'usertests -q' programs successfully.,"- It may be useful to have a way to record, for each PTE, whether it is a COW mapping. You can use the RSW (reserved for software) bits in the RISC-V PTE for this. -- `usertests -q` explores scenarios that `cowtest` does not test, so don't forget to check that all tests pass for both. -- Some helpful macros and definitions for page table flags are at the end of `kernel/riscv.h`. -- If a COW page fault occurs and there's no free memory, the process should be killed.",,"$ git fetch -$ git checkout cow -$ make clean",,xv6-labs-2024,usertests -q,"... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/cow.html -44,6.1810: Operating System Engineering,2024,Lab: networking,Part One: NIC,"In this lab you will write an xv6 device driver for a network interface card (NIC), and then write the receive half of an ethernet/IP/UDP protocol processing stack. - -Fetch the xv6 source for the lab and check out the `net` branch: - -``` - $ git fetch - $ git checkout net - $ make clean -``` - -## Background - -Before writing code, you may find it helpful to review ""Chapter 5: Interrupts and device drivers"" in the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). - -You'll use a network device called the E1000 to handle network communication. To xv6 (and the driver you write), the E1000 looks like a real piece of hardware connected to a real Ethernet local area network (LAN). In fact, the E1000 your driver will talk to is an emulation provided by qemu, connected to a LAN that is also emulated by qemu. On this emulated LAN, xv6 (the ""guest"") has an IP address of 10.0.2.15. Qemu arranges for the computer running qemu (the ""host"") to appear on the LAN with IP address 10.0.2.2. When xv6 uses the E1000 to send a packet to 10.0.2.2, qemu delivers the packet to the appropriate application on the host. - -You will use QEMU's ""user-mode network stack"". QEMU's documentation has more about the user-mode stack [here](https://wiki.qemu.org/Documentation/Networking#User_Networking_.28SLIRP.29). We've updated the Makefile to enable QEMU's user-mode network stack and E1000 network card emulation. - -The Makefile configures QEMU to record all incoming and outgoing packets to the file `packets.pcap` in your lab directory. It may be helpful to review these recordings to confirm that xv6 is transmitting and receiving the packets you expect. To display the recorded packets: - -``` -tcpdump -XXnr packets.pcap -``` - -We've added some files to the xv6 repository for this lab. The file `kernel/e1000.c` contains initialization code for the E1000 as well as empty functions for transmitting and receiving packets, which you'll fill in. `kernel/e1000_dev.h` contains definitions for registers and flag bits defined by the E1000 and described in the Intel E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). `kernel/net.c` and `kernel/net.h` contain simple network stack that implements the [IP](https://en.wikipedia.org/wiki/Internet_Protocol), [UDP](https://en.wikipedia.org/wiki/User_Datagram_Protocol), and [ARP](https://en.wikipedia.org/wiki/Address_Resolution_Protocol) protocols; `net.c` has complete code for user processes to send UDP packets, but lacks most of the code to receive packets and deliver them to user space. Finally, `kernel/pci.c` contains code that searches for an E1000 card on the PCI bus when xv6 boots.",,,"Your job is to complete `e1000_transmit()` and `e1000_recv()`, both in `kernel/e1000.c`, so that the driver can transmit and receive packets. You are done with this part when `make grade` says your solution passes the ""txone"" and ""rxone"" tests. - -While writing your code, you'll find yourself referring to the E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). Of particular help may be the following sections: - -- Section 2 is essential and gives an overview of the entire device. -- Section 3.2 gives an overview of packet receiving. -- Section 3.3 gives an overview of packet transmission, alongside section 3.4. -- Section 13 gives an overview of the registers used by the E1000. -- Section 14 may help you understand the init code that we've provided. - -Browse the E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). This manual covers several closely related Ethernet controllers. QEMU emulates the 82540EM. Skim Chapter 2 now to get a feel for the device. To write your driver, you'll need to be familiar with Chapters 3 and 14, as well as 4.1 (though not 4.1's subsections). You'll also need to use Chapter 13 as a reference. The other chapters mostly cover components of the E1000 that your driver won't have to interact with. Don't worry about the details at first; just get a feel for how the document is structured so you can find things later. The E1000 has many advanced features, most of which you can ignore. Only a small set of basic features is needed to complete this lab. - -The `e1000_init()` function we provide you in `e1000.c` configures the E1000 to read packets to be transmitted from RAM, and to write received packets to RAM. This technique is called DMA, for direct memory access, referring to the fact that the E1000 hardware directly writes and reads packets to/from RAM. - -Because bursts of packets might arrive faster than the driver can process them, `e1000_init()` provides the E1000 with multiple buffers into which the E1000 can write packets. The E1000 requires these buffers to be described by an array of ""descriptors"" in RAM; each descriptor contains an address in RAM where the E1000 can write a received packet. `struct rx_desc` describes the descriptor format. The array of descriptors is called the receive ring, or receive queue. It's a circular ring in the sense that when the card or driver reaches the end of the array, it wraps back to the beginning. `e1000_init()` allocates packet buffers with `kalloc()` for the E1000 to DMA into. There is also a transmit ring into which the driver should place packets it wants the E1000 to send. `e1000_init()` configures the two rings to have size `RX_RING_SIZE` and `TX_RING_SIZE`. - -When the network stack in `net.c` needs to send a packet, it calls `e1000_transmit()` with a pointer to a buffer that holds the packet to be sent; `net.c` allocates this buffer with `kalloc()`. Your transmit code must place a pointer to the packet data in a descriptor in the TX (transmit) ring. `struct tx_desc` describes the descriptor format. You will need to ensure that each buffer is eventually passed to `kfree()`, but only after the E1000 has finished transmitting the packet (the E1000 sets the `E1000_TXD_STAT_DD` bit in the descriptor to indicate this). - -When the E1000 receives each packet from the ethernet, it DMAs the packet to the memory pointed to by `addr` in the next RX (receive) ring descriptor. If an E1000 interrupt is not already pending, the E1000 asks the PLIC to deliver one as soon as interrupts are enabled. Your `e1000_recv()` code must scan the RX ring and deliver each new packet to the network stack (in `net.c`) by calling `net_rx()`. You will then need to allocate a new buffer and place it into the descriptor, so that when the E1000 reaches that point in the RX ring again it finds a fresh buffer into which to DMA a new packet. - -In addition to reading and writing the descriptor rings in RAM, your driver will need to interact with the E1000 through its memory-mapped control registers, to detect when received packets are available and to inform the E1000 that the driver has filled in some TX descriptors with packets to send. The global variable `regs` holds a pointer to the E1000's first control register; your driver can get at the other registers by indexing `regs` as an array. You'll need to use indices `E1000_RDT` and `E1000_TDT` in particular. - -To test e1000_transmit() sending a single packet, run `python3 nettest.py txone` in one window, and in another window run `make qemu` and then run `nettest txone` in xv6, which sends a single packet. `nettest.py` will print `txone: OK` if all went well (i.e. qemu's e1000 emulator saw the packet on the DMA ring and forwarded it outside of qemu). - -If transmitting worked, tcpdump -XXnr packets.pcap shold produce output like this: - -``` -reading from file packets.pcap, link-type EN10MB (Ethernet) -21:27:31.688123 IP 10.0.2.15.2000 > 10.0.2.2.25603: UDP, length 5 - 0x0000: 5255 0a00 0202 5254 0012 3456 0800 4500 RU....RT..4V..E. - 0x0010: 0021 0000 0000 6411 3ebc 0a00 020f 0a00 .!....d.>....... - 0x0020: 0202 07d0 6403 000d 0000 7478 6f6e 65 ....d.....txone -``` - -To test e1000_recv() receiving two packets (an ARP query, then a IP/UDP packet), run `make qemu` in one window, and `python3 nettest.py rxone` in another window. `nettest.py rxone` sends a single UDP packet via qemu to xv6; qemu actually first sends an ARP request to xv6, and (after xv6 returns an ARP reply) qemu forwards the UDP packet to xv6. If e1000_recv() works correctly and passes those packets to `net_rx()`, `net.c` should print - -``` -arp_rx: received an ARP packet -ip_rx: received an IP packet -``` - -`net.c` already contains the code to detect qemu's ARP request and call `e1000_transmit()` to send its reply. This test requires that both e1000_transmit() and e1000_recv() work. In addition, if all went well, tcpdump -XXnr packets.pcap should produce output like this: - -``` -reading from file packets.pcap, link-type EN10MB (Ethernet) -21:29:16.893600 ARP, Request who-has 10.0.2.15 tell 10.0.2.2, length 28 - 0x0000: ffff ffff ffff 5255 0a00 0202 0806 0001 ......RU........ - 0x0010: 0800 0604 0001 5255 0a00 0202 0a00 0202 ......RU........ - 0x0020: 0000 0000 0000 0a00 020f .......... -21:29:16.894543 ARP, Reply 10.0.2.15 is-at 52:54:00:12:34:56, length 28 - 0x0000: 5255 0a00 0202 5254 0012 3456 0806 0001 RU....RT..4V.... - 0x0010: 0800 0604 0002 5254 0012 3456 0a00 020f ......RT..4V.... - 0x0020: 5255 0a00 0202 0a00 0202 RU........ -21:29:16.902656 IP 10.0.2.2.61350 > 10.0.2.15.2000: UDP, length 3 - 0x0000: 5254 0012 3456 5255 0a00 0202 0800 4500 RT..4VRU......E. - 0x0010: 001f 0000 0000 4011 62be 0a00 0202 0a00 ......@.b....... - 0x0020: 020f efa6 07d0 000b fdd6 7879 7a ..........xyz -``` - -Your output will look somewhat different, but it should contain the strings ""ARP, Request"", ""ARP, Reply"", ""UDP"", and ""....xyz"". - -If both of the above tests work, then `make grade` should show that the first two tests pass. - -## e1000 hints - -Start by adding print statements to `e1000_transmit()` and `e1000_recv()`, and running (in xv6) `nettest txone`. You should see from your print statements that `nettest txone` generates a call to `e1000_transmit`. - -Some hints for implementing `e1000_transmit`: - -- First ask the E1000 for the TX ring index at which it's expecting the next packet, by reading the `E1000_TDT` control register. -- Then check if the the ring is overflowing. If `E1000_TXD_STAT_DD` is not set in the descriptor indexed by `E1000_TDT`, the E1000 hasn't finished the corresponding previous transmission request, so return an error. -- Otherwise, use `kfree()` to free the last buffer that was transmitted from that descriptor (if there was one). -- Then fill in the descriptor. Set the necessary cmd flags (look at Section 3.3 in the E1000 manual) and stash away a pointer to the buffer for later freeing. -- Finally, update the ring position by adding one to `E1000_TDT` modulo `TX_RING_SIZE`. -- If `e1000_transmit()` added the packet successfully to the ring, return 0. On failure (e.g., there is no descriptor available), return -1 so that the caller knows to free the buffer. - -Some hints for implementing `e1000_recv`: - -- First ask the E1000 for the ring index at which the next waiting received packet (if any) is located, by fetching the `E1000_RDT` control register and adding one modulo `RX_RING_SIZE`. -- Then check if a new packet is available by checking for the `E1000_RXD_STAT_DD` bit in the `status` portion of the descriptor. If not, stop. -- Deliver the packet buffer to the network stack by calling `net_rx()`. -- Then allocate a new buffer using `kalloc()` to replace the one just given to `net_rx()`. Clear the descriptor's status bits to zero. -- Finally, update the `E1000_RDT` register to be the index of the last ring descriptor processed. -- `e1000_init()` initializes the RX ring with buffers, and you'll want to look at how it does that and perhaps borrow code. -- At some point the total number of packets that have ever arrived will exceed the ring size (16); make sure your code can handle that. -- The e1000 can deliver more than one packet per interrupt; your `e1000_recv` should handle that situation. - -You'll need locks to cope with the possibility that xv6 might use the E1000 from more than one process, or might be using the E1000 in a kernel thread when an interrupt arrives.","Your job is to complete `e1000_transmit()` and `e1000_recv()`, both in `kernel/e1000.c`, so that the driver can transmit and receive packets. You are done with this part when `make grade` says your solution passes the ""txone"" and ""rxone"" tests.","Start by adding print statements to `e1000_transmit()` and `e1000_recv()`, and running (in xv6) `nettest txone`. You should see from your print statements that `nettest txone` generates a call to `e1000_transmit`. - -Some hints for implementing `e1000_transmit`: - -- First ask the E1000 for the TX ring index at which it's expecting the next packet, by reading the `E1000_TDT` control register. -- Then check if the the ring is overflowing. If `E1000_TXD_STAT_DD` is not set in the descriptor indexed by `E1000_TDT`, the E1000 hasn't finished the corresponding previous transmission request, so return an error. -- Otherwise, use `kfree()` to free the last buffer that was transmitted from that descriptor (if there was one). -- Then fill in the descriptor. Set the necessary cmd flags (look at Section 3.3 in the E1000 manual) and stash away a pointer to the buffer for later freeing. -- Finally, update the ring position by adding one to `E1000_TDT` modulo `TX_RING_SIZE`. -- If `e1000_transmit()` added the packet successfully to the ring, return 0. On failure (e.g., there is no descriptor available), return -1 so that the caller knows to free the buffer. - -Some hints for implementing `e1000_recv`: - -- First ask the E1000 for the ring index at which the next waiting received packet (if any) is located, by fetching the `E1000_RDT` control register and adding one modulo `RX_RING_SIZE`. -- Then check if a new packet is available by checking for the `E1000_RXD_STAT_DD` bit in the `status` portion of the descriptor. If not, stop. -- Deliver the packet buffer to the network stack by calling `net_rx()`. -- Then allocate a new buffer using `kalloc()` to replace the one just given to `net_rx()`. Clear the descriptor's status bits to zero. -- Finally, update the `E1000_RDT` register to be the index of the last ring descriptor processed. -- `e1000_init()` initializes the RX ring with buffers, and you'll want to look at how it does that and perhaps borrow code. -- At some point the total number of packets that have ever arrived will exceed the ring size (16); make sure your code can handle that. -- The e1000 can deliver more than one packet per interrupt; your `e1000_recv` should handle that situation. - -You'll need locks to cope with the possibility that xv6 might use the E1000 from more than one process, or might be using the E1000 in a kernel thread when an interrupt arrives.",,"$ git fetch -$ git checkout net -$ make clean",,xv6-labs-2024, tcpdump -XXnr packets.pcap,"reading from file packets.pcap, link-type EN10MB (Ethernet) -21:29:16.893600 ARP, Request who-has 10.0.2.15 tell 10.0.2.2, length 28 - 0x0000: ffff ffff ffff 5255 0a00 0202 0806 0001 ......RU........ - 0x0010: 0800 0604 0001 5255 0a00 0202 0a00 0202 ......RU........ - 0x0020: 0000 0000 0000 0a00 020f .......... -21:29:16.894543 ARP, Reply 10.0.2.15 is-at 52:54:00:12:34:56, length 28 - 0x0000: 5255 0a00 0202 5254 0012 3456 0806 0001 RU....RT..4V.... - 0x0010: 0800 0604 0002 5254 0012 3456 0a00 020f ......RT..4V.... - 0x0020: 5255 0a00 0202 0a00 0202 RU........ -21:29:16.902656 IP 10.0.2.2.61350 > 10.0.2.15.2000: UDP, length 3 - 0x0000: 5254 0012 3456 5255 0a00 0202 0800 4500 RT..4VRU......E. - 0x0010: 001f 0000 0000 4011 62be 0a00 0202 0a00 ......@.b....... - 0x0020: 020f efa6 07d0 000b fdd6 7879 7a ..........xyz",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/net.html -45,6.1810: Operating System Engineering,2024,Lab: networking,Part Two: UDP Receive,"In this lab you will write an xv6 device driver for a network interface card (NIC), and then write the receive half of an ethernet/IP/UDP protocol processing stack. - -Fetch the xv6 source for the lab and check out the `net` branch: - -``` - $ git fetch - $ git checkout net - $ make clean -``` - -## Background - -Before writing code, you may find it helpful to review ""Chapter 5: Interrupts and device drivers"" in the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf). - -You'll use a network device called the E1000 to handle network communication. To xv6 (and the driver you write), the E1000 looks like a real piece of hardware connected to a real Ethernet local area network (LAN). In fact, the E1000 your driver will talk to is an emulation provided by qemu, connected to a LAN that is also emulated by qemu. On this emulated LAN, xv6 (the ""guest"") has an IP address of 10.0.2.15. Qemu arranges for the computer running qemu (the ""host"") to appear on the LAN with IP address 10.0.2.2. When xv6 uses the E1000 to send a packet to 10.0.2.2, qemu delivers the packet to the appropriate application on the host. - -You will use QEMU's ""user-mode network stack"". QEMU's documentation has more about the user-mode stack [here](https://wiki.qemu.org/Documentation/Networking#User_Networking_.28SLIRP.29). We've updated the Makefile to enable QEMU's user-mode network stack and E1000 network card emulation. - -The Makefile configures QEMU to record all incoming and outgoing packets to the file `packets.pcap` in your lab directory. It may be helpful to review these recordings to confirm that xv6 is transmitting and receiving the packets you expect. To display the recorded packets: - -``` -tcpdump -XXnr packets.pcap -``` - -We've added some files to the xv6 repository for this lab. The file `kernel/e1000.c` contains initialization code for the E1000 as well as empty functions for transmitting and receiving packets, which you'll fill in. `kernel/e1000_dev.h` contains definitions for registers and flag bits defined by the E1000 and described in the Intel E1000 [Software Developer's Manual](https://pdos.csail.mit.edu/6.1810/2024/readings/8254x_GBe_SDM.pdf). `kernel/net.c` and `kernel/net.h` contain simple network stack that implements the [IP](https://en.wikipedia.org/wiki/Internet_Protocol), [UDP](https://en.wikipedia.org/wiki/User_Datagram_Protocol), and [ARP](https://en.wikipedia.org/wiki/Address_Resolution_Protocol) protocols; `net.c` has complete code for user processes to send UDP packets, but lacks most of the code to receive packets and deliver them to user space. Finally, `kernel/pci.c` contains code that searches for an E1000 card on the PCI bus when xv6 boots.",,,"UDP, the User Datagram Protocol, allows user processes on different Internet hosts to exchange individual packets (datagrams). UDP is layered on top of IP. A user process indicates which host it wants to send a packet to by specifying a 32-bit IP address. Each UDP packet contains a source port number and a destination port number; processes can request to receive packets that arrive addressed to particular port numbers, and can specify the destination port number when sending. Thus two processes on different hosts can communicate with UDP if they know each others' IP addresses and the port numbers each is listening for. For example, Google operates a DNS name server on the host with IP address 8.8.8.8, listening on UDP port 53. - -In this task, you'll add code to `kernel/net.c` to receive UDP packets, queue them, and allow user processes to read them. `net.c` already contains the code required for user processes to transmit UDP packets (with the exception of e1000_transmit(), which you provide). - -Your job is to implement `ip_rx()`, `sys_recv()`, and `sys_bind()` in `kernel/net.c`. You are done when `make grade` says your solution passes all of the tests. - -You can run the same tests that `make grade` runs by running `python3 nettest.py grade` in one window, and (in another window) then running `nettest grade` inside xv6. If all goes well, `nettest.py` should print `txone: OK`, and you should see this in the xv6 window: - -``` -$ nettest grade -txone: sending one packet -arp_rx: received an ARP packet -ip_rx: received an IP packet -ping0: starting -ping0: OK -ping1: starting -ping1: OK -ping2: starting -ping2: OK -ping3: starting -ping3: OK -dns: starting -DNS arecord for pdos.csail.mit.edu. is 128.52.129.126 -dns: OK -``` - -The system-call API specification for UDP looks like this: - -- `send(short sport, int dst, short dport, char *buf, int len)`: This system call sends a UDP packet to the host with IP address `dst`, and (on that host) the process listening to port `dport`. The packet's source port number will be `sport` (this port number is reported to the receiving process, so that it can reply to the sender). The content (""payload"") of the UDP packet will the `len` bytes at address `buf`. The return value is 0 on success, and -1 on failure. -- `recv(short dport, int *src, short *sport, char *buf, int maxlen)`: This system call returns the payload of a UDP packet that arrives with destination port `dport`. If one or more packets arrived before the call to `recv()`, it should return right away with the earliest waiting packet. If no packets are waiting, `recv()` should wait until a packet for `dport` arrives. `recv()` should see arriving packets for a given port in arrival order. `recv()` copies the packet's 32-bit source IP address to `*src`, copies the packet's 16-bit UDP source port number to `*sport`, copies at most `maxlen` bytes of the packet's UDP payload to `buf`, and removes the packet from the queue. The system call returns the number of bytes of the UDP payload copied, or -1 if there was an error. -- `bind(short port)`: A process should call `bind(port)` before it calls `recv(port, ...)`. If a UDP packet arrives with a destination port that hasn't been passed to `bind()`, `net.c` should discard that packet. The reason for this system call is to initialize any structures `net.c` needs in order to store arriving packets for a subsequent `recv()` call. -- `unbind(short port)`: You do not need to implement this system call, since the test code does not use it. But you can if you like in order to provide symmetry with `bind()`. - -All the addresses and port numbers passed as arguments to these system calls, and returned by them, must be in host byte order (see below). - -You'll need to provide the kernel implementations of the system calls, with the exception of `send()`. The program `user/nettest.c` uses this API. - -To make `recv()` work, you'll need to add code to `ip_rx()`, which `net_rx()` calls for each received IP packet. `ip_rx()` should decide if the arriving packet is UDP, and whether its destination port has been passed to `bind()`; if both are true, it should save the packet where `recv()` can find it. However, for any given port, no more than 16 packets should be saved; if 16 are already waiting for `recv()`, an incoming packet for that port should be dropped. The point of this rule is to prevent a fast or abusive sender from forcing xv6 to run out of memory. Furthermore, if packets are being dropped for one port because it already has 16 packets waiting, that should not affect packets arriving for other ports. - -The packet buffers that `ip_rx()` looks at contain a 14-byte ethernet header, followed by a 20-byte IP header, followed by an 8-byte UDP header, followed by the UDP payload. You'll find C struct definitions for each of these in `kernel/net.h`. Wikipedia has a description of the IP header [here](https://en.wikipedia.org/wiki/Internet_Protocol_version_4#Header), and UDP [here](https://en.wikipedia.org/wiki/User_Datagram_Protocol). - -Production IP/UDP implementations are complex, handling protocol options and validating invariants. You only need to do enough to pass `make grade`. Your code needs to look at ip_p and ip_src in the IP header, and dport, sport, and ulen in the UDP header. - -You will have to pay attention to byte order. Ethernet, IP, and UDP header fields that contain multi-byte integers place the most significant byte first in the packet. The RISC-V CPU, when it lays out a multi-byte integer in memory, places the least-significant byte first. This means that, when code extracts a multi-byte integer from a packet, it must re-arrange the bytes. This applies to short (2-byte) and int (4-byte) fields. You can use the `ntohs()` and `ntohl()` functions for 2-byte and 4-byte fields, respectively. Look at `net_rx()` for an example of this when looking at the 2-byte ethernet type field. - -If there are errors or omissions in your E1000 code, they may only start to cause problems during the ping tests. For example, the ping tests send and receive enough packets that the descriptor ring indices will wrap around. - -Some hints: - -- Create a struct to keep track of bound ports and the packets in their queues. -- Refer to the `sleep(void *chan, struct spinlock *lk)` and `wakeup(void *chan)` functions in `kernel/proc.c` to implement the waiting logic for `recv()`. -- The destination addresses that `sys_recv()` copies the packets to are virtual addresses; you will have to copy from the kernel to the current user process. -- Make sure to free packets that have been copied over or have been dropped.","Your job is to implement `ip_rx()`, `sys_recv()`, and `sys_bind()` in `kernel/net.c`. You are done when `make grade` says your solution passes all of the tests. - -You can run the same tests that `make grade` runs by running `python3 nettest.py grade` in one window, and (in another window) then running `nettest grade` inside xv6. If all goes well, `nettest.py` should print `txone: OK`, and you should see this in the xv6 window: - -``` -$ nettest grade -txone: sending one packet -arp_rx: received an ARP packet -ip_rx: received an IP packet -ping0: starting -ping0: OK -ping1: starting -ping1: OK -ping2: starting -ping2: OK -ping3: starting -ping3: OK -dns: starting -DNS arecord for pdos.csail.mit.edu. is 128.52.129.126 -dns: OK -``` -","- Create a struct to keep track of bound ports and the packets in their queues. -- Refer to the `sleep(void *chan, struct spinlock *lk)` and `wakeup(void *chan)` functions in `kernel/proc.c` to implement the waiting logic for `recv()`. -- The destination addresses that `sys_recv()` copies the packets to are virtual addresses; you will have to copy from the kernel to the current user process. -- Make sure to free packets that have been copied over or have been dropped.",,"$ git fetch -$ git checkout net -$ make clean",,xv6-labs-2024,nettest grade,"txone: sending one packet -arp_rx: received an ARP packet -ip_rx: received an IP packet -ping0: starting -ping0: OK -ping1: starting -ping1: OK -ping2: starting -ping2: OK -ping3: starting -ping3: OK -dns: starting -DNS arecord for pdos.csail.mit.edu. is 128.52.129.126 -dns: OK",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/net.html -46,6.1810: Operating System Engineering,2024,Lab: locks,Memory allocator,"In this lab you'll gain experience in re-designing code to increase parallelism. A common symptom of poor parallelism on multi-core machines is high lock contention. Improving parallelism often involves changing both data structures and locking strategies in order to reduce contention. You'll do this for the xv6 memory allocator and block cache. - -Before writing code, make sure to read the following parts from the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf) : - -- Chapter 6: ""Locking"" and the corresponding code. -- Section 3.5: ""Code: Physical memory allocator"" -- Section 8.1 through 8.3: ""Overview"", ""Buffer cache layer"", and ""Code: Buffer cache"" - -``` - $ git fetch - $ git checkout lock - $ make clean -``` -",,,"The program user/kalloctest stresses xv6's memory allocator: three processes grow and shrink their address spaces, resulting in many calls to `kalloc` and `kfree`. `kalloc` and `kfree` obtain `kmem.lock`. kalloctest prints (as ""#test-and-set"") the number of loop iterations in `acquire` due to attempts to acquire a lock that another core already holds, for the `kmem` lock and a few other locks. The number of loop iterations in `acquire` is a rough measure of lock contention. The output of `kalloctest` looks similar to this before you start the lab: - -``` -$ kalloctest -start test1 -test1 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 83375 #acquire() 433015 -lock: bcache: #test-and-set 0 #acquire() 1260 ---- top 5 contended locks: -lock: kmem: #test-and-set 83375 #acquire() 433015 -lock: proc: #test-and-set 23737 #acquire() 130718 -lock: virtio_disk: #test-and-set 11159 #acquire() 114 -lock: proc: #test-and-set 5937 #acquire() 130786 -lock: proc: #test-and-set 4080 #acquire() 130786 -tot= 83375 -test1 FAIL -start test2 -total free number of pages: 32497 (out of 32768) -..... -test2 OK -start test3 -child done 1 -child done 100000 -test3 OK -start test2 -total free number of pages: 32497 (out of 32768) -..... -test2 OK -start test3 -..........child done 100000 ---- lock kmem/bcache stats -lock: kmem: #test-and-set 28002 #acquire() 4228151 -lock: bcache: #test-and-set 0 #acquire() 1374 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 96998 #acquire() 147 -lock: kmem: #test-and-set 28002 #acquire() 4228151 -lock: proc: #test-and-set 6802 #acquire() 7125 -lock: pr: #test-and-set 3321 #acquire() 5 -lock: log: #test-and-set 1912 #acquire() 68 -tot= 28002 -0 -test3 FAIL m 11720 n 28002 -``` - -You'll likely see different counts than shown here, and a different order for the top 5 contended locks. - -`acquire` maintains, for each lock, the count of calls to `acquire` for that lock, and the number of times the loop in `acquire` tried but failed to set the lock. kalloctest calls a system call that causes the kernel to print those counts for the kmem and bcache locks (which are the focus of this lab) and for the 5 most contended locks. If there is lock contention the number of `acquire` loop iterations will be large. The system call returns the sum of the number of loop iterations for the kmem and bcache locks. - -For this lab, you must use a dedicated unloaded machine with multiple cores. If you use a machine that is doing other things, the counts that kalloctest prints will be nonsense. You can use a dedicated Athena workstation, or your own laptop, but don't use a dialup machine. - -The root cause of lock contention in kalloctest is that `kalloc()` has a single free list, protected by a single lock. To remove lock contention, you will have to redesign the memory allocator to avoid a single lock and list. The basic idea is to maintain a free list per CPU, each list with its own lock. Allocations and frees on different CPUs can run in parallel, because each CPU will operate on a different list. The main challenge will be to deal with the case in which one CPU's free list is empty, but another CPU's list has free memory; in that case, the one CPU must ""steal"" part of the other CPU's free list. Stealing may introduce lock contention, but that will hopefully be infrequent. - -Your job is to implement per-CPU freelists, and stealing when a CPU's free list is empty. You must give all of your locks names that start with ""kmem"". That is, you should call `initlock` for each of your locks, and pass a name that starts with ""kmem"". Run kalloctest to see if your implementation has reduced lock contention. To check that it can still allocate all of memory, run `usertests sbrkmuch`. Your output will look similar to that shown below, with much-reduced contention in total on kmem locks, although the specific numbers will differ. Make sure all tests in `usertests -q` pass. `make grade` should say that the kalloctests pass. - -``` -$ kalloctest -start test1 -test1 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 94703 -lock: kmem: #test-and-set 0 #acquire() 173699 -lock: kmem: #test-and-set 0 #acquire() 164725 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 80 -lock: bcache.bucket: #test-and-set 0 #acquire() 1045 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 37123 #acquire() 497420 -lock: proc: #test-and-set 27415 #acquire() 497425 -lock: wait_lock: #test-and-set 9650 #acquire() 12 -lock: pr: #test-and-set 4451 #acquire() 5 -tot= 0 -test1 OK -start test2 -total free number of pages: 32463 (out of 32768) -..... -test2 OK -start test3 -..........child done 100000 ---- lock kmem/bcache stats -lock: kmem: #test-and-set 758 #acquire() 1375324 -lock: kmem: #test-and-set 796 #acquire() 1864634 -lock: kmem: #test-and-set 1395 #acquire() 1779346 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 84 -lock: bcache.bucket: #test-and-set 0 #acquire() 1145 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: proc: #test-and-set 135932 #acquire() 2617654 -lock: proc: #test-and-set 99612 #acquire() 5132219 -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 46889 #acquire() 2538791 -lock: proc: #test-and-set 33853 #acquire() 1817240 -tot= 2949 - -test3 OK -$ usertests sbrkmuch -usertests starting -test sbrkmuch: OK -ALL TESTS PASSED -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -Some hints: - -- You can use the constant `NCPU` from kernel/param.h - -- Let `freerange` give all free memory to the CPU running `freerange`. - -- The function `cpuid` returns the current core number, but it's only safe to call it and use its result when interrupts are turned off. You should use `push_off()` and `pop_off()` to turn interrupts off and on. - -- Have a look at the `snprintf` function in kernel/sprintf.c for string formatting ideas. It is OK to just name all locks ""kmem"" though. - -- Optionally run your solution using xv6's race detector: - - ``` - $ make clean - $ make KCSAN=1 qemu - $ kalloctest - .. - - ``` - - The kalloctest may fail but you shouldn't see any races. If the xv6's race detector observes a race, it will print two stack traces describing the races along the following lines: - - ``` - == race detected == - backtrace for racing load - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - backtrace for watchpoint: - 0x000000008000ad28 - 0x000000008000af22 - 0x000000008000023c - 0x0000000080000292 - 0x0000000080000316 - 0x000000008000098c - 0x0000000080000ad2 - 0x000000008000113a - 0x0000000080001df2 - 0x000000008000364c - 0x0000000080003522 - 0x0000000080002fdc - ========== - - ``` - - On your OS, you can turn a backtrace into function names with line numbers by cutting and pasting it into addr2line: - - ``` - $ riscv64-linux-gnu-addr2line -e kernel/kernel - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - ctrl-d - kernel/kcsan.c:157 - kernel/kcsan.c:241 - kernel/kalloc.c:174 - kernel/kalloc.c:211 - kernel/vm.c:255 - kernel/proc.c:295 - kernel/sysproc.c:54 - kernel/syscall.c:251 - - ``` - - You are not required to run the race detector, but you might find it helpful. Note that the race detector slows xv6 down significantly, so you probably don't want to use it when running usertests . - -## Buffer cache ([hard](https://pdos.csail.mit.edu/6.1810/2024/labs/guidance.html))","Your job is to implement per-CPU freelists, and stealing when a CPU's free list is empty. You must give all of your locks names that start with ""kmem"". That is, you should call `initlock` for each of your locks, and pass a name that starts with ""kmem"". Run kalloctest to see if your implementation has reduced lock contention. To check that it can still allocate all of memory, run `usertests sbrkmuch`. Your output will look similar to that shown below, with much-reduced contention in total on kmem locks, although the specific numbers will differ. Make sure all tests in `usertests -q` pass. `make grade` should say that the kalloctests pass.","Some hints: - -- You can use the constant `NCPU` from kernel/param.h - -- Let `freerange` give all free memory to the CPU running `freerange`. - -- The function `cpuid` returns the current core number, but it's only safe to call it and use its result when interrupts are turned off. You should use `push_off()` and `pop_off()` to turn interrupts off and on. - -- Have a look at the `snprintf` function in kernel/sprintf.c for string formatting ideas. It is OK to just name all locks ""kmem"" though. - -- Optionally run your solution using xv6's race detector: - - ``` - $ make clean - $ make KCSAN=1 qemu - $ kalloctest - .. - - ``` - - The kalloctest may fail but you shouldn't see any races. If the xv6's race detector observes a race, it will print two stack traces describing the races along the following lines: - - ``` - == race detected == - backtrace for racing load - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - backtrace for watchpoint: - 0x000000008000ad28 - 0x000000008000af22 - 0x000000008000023c - 0x0000000080000292 - 0x0000000080000316 - 0x000000008000098c - 0x0000000080000ad2 - 0x000000008000113a - 0x0000000080001df2 - 0x000000008000364c - 0x0000000080003522 - 0x0000000080002fdc - ========== - - ``` - - On your OS, you can turn a backtrace into function names with line numbers by cutting and pasting it into addr2line: - - ``` - $ riscv64-linux-gnu-addr2line -e kernel/kernel - 0x000000008000ab8a - 0x000000008000ac8a - 0x000000008000ae7e - 0x0000000080000216 - 0x00000000800002e0 - 0x0000000080000f54 - 0x0000000080001d56 - 0x0000000080003704 - 0x0000000080003522 - 0x0000000080002fdc - ctrl-d - kernel/kcsan.c:157 - kernel/kcsan.c:241 - kernel/kalloc.c:174 - kernel/kalloc.c:211 - kernel/vm.c:255 - kernel/proc.c:295 - kernel/sysproc.c:54 - kernel/syscall.c:251 - - ``` - - You are not required to run the race detector, but you might find it helpful. Note that the race detector slows xv6 down significantly, so you probably don't want to use it when running usertests .",," $ git fetch - $ git checkout lock - $ make clean",,xv6-labs-2024,kalloctest,"start test1 -test1 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 94703 -lock: kmem: #test-and-set 0 #acquire() 173699 -lock: kmem: #test-and-set 0 #acquire() 164725 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 80 -lock: bcache.bucket: #test-and-set 0 #acquire() 1045 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 37123 #acquire() 497420 -lock: proc: #test-and-set 27415 #acquire() 497425 -lock: wait_lock: #test-and-set 9650 #acquire() 12 -lock: pr: #test-and-set 4451 #acquire() 5 -tot= 0 -test1 OK -start test2 -total free number of pages: 32463 (out of 32768) -..... -test2 OK -start test3 -..........child done 100000 ---- lock kmem/bcache stats -lock: kmem: #test-and-set 758 #acquire() 1375324 -lock: kmem: #test-and-set 796 #acquire() 1864634 -lock: kmem: #test-and-set 1395 #acquire() 1779346 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: kmem: #test-and-set 0 #acquire() 58 -lock: bcache: #test-and-set 0 #acquire() 32 -lock: bcache.bucket: #test-and-set 0 #acquire() 38 -lock: bcache.bucket: #test-and-set 0 #acquire() 13 -lock: bcache.bucket: #test-and-set 0 #acquire() 22 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 30 -lock: bcache.bucket: #test-and-set 0 #acquire() 18 -lock: bcache.bucket: #test-and-set 0 #acquire() 88 -lock: bcache.bucket: #test-and-set 0 #acquire() 84 -lock: bcache.bucket: #test-and-set 0 #acquire() 1145 -lock: bcache.bucket: #test-and-set 0 #acquire() 16 -lock: bcache.bucket: #test-and-set 0 #acquire() 4 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 -lock: bcache.bucket: #test-and-set 0 #acquire() 8 ---- top 5 contended locks: -lock: proc: #test-and-set 135932 #acquire() 2617654 -lock: proc: #test-and-set 99612 #acquire() 5132219 -lock: virtio_disk: #test-and-set 87542 #acquire() 147 -lock: proc: #test-and-set 46889 #acquire() 2538791 -lock: proc: #test-and-set 33853 #acquire() 1817240 -tot= 2949 - -test3 OK -$ usertests sbrkmuch -usertests starting -test sbrkmuch: OK -ALL TESTS PASSED -$ usertests -q -... -ALL TESTS PASSED",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/lock.html -47,6.1810: Operating System Engineering,2024,Lab: locks,Buffer cache,"In this lab you'll gain experience in re-designing code to increase parallelism. A common symptom of poor parallelism on multi-core machines is high lock contention. Improving parallelism often involves changing both data structures and locking strategies in order to reduce contention. You'll do this for the xv6 memory allocator and block cache. - -Before writing code, make sure to read the following parts from the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf) : - -- Chapter 6: ""Locking"" and the corresponding code. -- Section 3.5: ""Code: Physical memory allocator"" -- Section 8.1 through 8.3: ""Overview"", ""Buffer cache layer"", and ""Code: Buffer cache"" - -``` - $ git fetch - $ git checkout lock - $ make clean -``` -",,,"This half of the assignment is independent from the first half; you can work on this half (and pass the tests) whether or not you have completed the first half. - -If multiple processes use the file system intensively, they will likely contend for `bcache.lock`, which protects the disk block cache in kernel/bio.c. `bcachetest` creates several processes that repeatedly read different files in order to generate contention on `bcache.lock`; its output looks like this (before you complete this lab): - -``` -$ bcachetest -start test0 -test0 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 33099 -lock: bcache: #test-and-set 10273 #acquire() 65964 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 814630 #acquire() 1221 -lock: proc: #test-and-set 57695 #acquire() 67093 -lock: proc: #test-and-set 24368 #acquire() 67103 -lock: bcache: #test-and-set 10273 #acquire() 65964 -lock: pr: #test-and-set 3441 #acquire() 5 -tot= 10273 -test0: FAIL -start test1 - -test1 OK -start test2 - -test2 OK -start test3 - -test3 OK -``` - -You will likely see different output, but the number of test-and-sets for the `bcache` lock will be high. If you look at the code in `kernel/bio.c`, you'll see that `bcache.lock` protects the list of cached block buffers, the reference count (`b->refcnt`) in each block buffer, and the identities of the cached blocks (`b->dev` and `b->blockno`). - - - -Modify the block cache so that the number of `acquire` loop iterations for all locks in the bcache is close to zero when running `bcachetest`. Ideally the sum of the counts for all locks involved in the block cache should be zero, but it's OK if the sum is less than 500. Modify `bget` and `brelse` so that concurrent lookups and releases for different blocks that are in the bcache are unlikely to conflict on locks (e.g., don't all have to wait for `bcache.lock`). You must maintain the invariant that at most one copy of each block is cached. You must not increase the number of buffers; there must be exactly NBUF (30) of them. Your modified cache does not need to use LRU replacement, but it must be able to use any of the NBUF `struct buf`s with zero `refcnt` when it misses in the cache. When you are done, your output should be similar to that shown below (though not identical). Make sure 'usertests -q' still passes. `make grade` should pass all tests when you are done. - -``` -$ bcachetest -start test0 -test0 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 33030 -lock: kmem: #test-and-set 0 #acquire() 28 -lock: kmem: #test-and-set 0 #acquire() 73 -lock: bcache: #test-and-set 0 #acquire() 96 -lock: bcache.bucket: #test-and-set 0 #acquire() 6229 -lock: bcache.bucket: #test-and-set 0 #acquire() 6204 -lock: bcache.bucket: #test-and-set 0 #acquire() 4298 -lock: bcache.bucket: #test-and-set 0 #acquire() 4286 -lock: bcache.bucket: #test-and-set 0 #acquire() 2302 -lock: bcache.bucket: #test-and-set 0 #acquire() 4272 -lock: bcache.bucket: #test-and-set 0 #acquire() 2695 -lock: bcache.bucket: #test-and-set 0 #acquire() 4709 -lock: bcache.bucket: #test-and-set 0 #acquire() 6512 -lock: bcache.bucket: #test-and-set 0 #acquire() 6197 -lock: bcache.bucket: #test-and-set 0 #acquire() 6196 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 1483888 #acquire() 1221 -lock: proc: #test-and-set 38718 #acquire() 76050 -lock: proc: #test-and-set 34460 #acquire() 76039 -lock: proc: #test-and-set 31663 #acquire() 75963 -lock: wait_lock: #test-and-set 11794 #acquire() 16 -tot= 0 -test0: OK -start test1 - -test1 OK -start test2 - -test2 OK -start test3 - -test3 OK -$ usertests -q - ... -ALL TESTS PASSED -$ -``` - -Please give all of your locks names that start with ""bcache"". That is, you should call `initlock` for each of your locks, and pass a name that starts with ""bcache"". - -Reducing contention in the block cache is more tricky than for kalloc, because bcache buffers are truly shared among processes (and thus CPUs). For kalloc, one could eliminate most contention by giving each CPU its own allocator; that won't work for the block cache. We suggest you look up block numbers in the cache with a hash table that has a lock per hash bucket. - -There are some circumstances in which it's OK if your solution has lock conflicts: - -- When two processes concurrently use the same block number. `bcachetest` `test0` doesn't ever do this. -- When two processes concurrently miss in the cache, and need to find an unused block to replace. `bcachetest` `test0` doesn't ever do this. -- When two processes concurrently use blocks that conflict in whatever scheme you use to partition the blocks and locks; for example, if two processes use blocks whose block numbers hash to the same slot in a hash table. `bcachetest` `test0` might do this, depending on your design, but you should try to adjust your scheme's details to avoid conflicts (e.g., change the size of your hash table). - -`bcachetest`'s `test1` uses more distinct blocks than there are buffers, and exercises lots of file system code paths. - -Here are some hints: - -- Read the description of the block cache in the xv6 book (Section 8.1-8.3). -- It is OK to use a fixed number of buckets and not resize the hash table dynamically. Use a prime number of buckets (e.g., 13) to reduce the likelihood of hashing conflicts. -- Searching in the hash table for a buffer and allocating an entry for that buffer when the buffer is not found must be atomic. -- Remove the list of all buffers (`bcache.head` etc.) and don't implement LRU. With this change `brelse` doesn't need to acquire the bcache lock. In `bget` you can select any block that has `refcnt == 0` instead of the least-recently used one. -- You probably won't be able to atomically check for a cached buf and (if not cached) find an unused buf; you will likely have to drop all locks and start from scratch if the buffer isn't in the cache. It is OK to serialize finding an unused buf in `bget` (i.e., the part of `bget` that selects a buffer to re-use when a lookup misses in the cache). -- Your solution might need to hold two locks in some cases; for example, during eviction you may need to hold the bcache lock and a lock per bucket. Make sure you avoid deadlock. -- When replacing a block, you might move a `struct buf` from one bucket to another bucket, because the new block hashes to a different bucket. You might have a tricky case: the new block might hash to the same bucket as the old block. Make sure you avoid deadlock in that case. -- Some debugging tips: implement bucket locks but leave the global bcache.lock acquire/release at the beginning/end of bget to serialize the code. Once you are sure it is correct without race conditions, remove the global locks and deal with concurrency issues. You can also run `make CPUS=1 qemu` to test with one core. -- Use xv6's race detector to find potential races (see above how to use the race detector).","Modify the block cache so that the number of `acquire` loop iterations for all locks in the bcache is close to zero when running `bcachetest`. Ideally the sum of the counts for all locks involved in the block cache should be zero, but it's OK if the sum is less than 500. Modify `bget` and `brelse` so that concurrent lookups and releases for different blocks that are in the bcache are unlikely to conflict on locks (e.g., don't all have to wait for `bcache.lock`). You must maintain the invariant that at most one copy of each block is cached. You must not increase the number of buffers; there must be exactly NBUF (30) of them. Your modified cache does not need to use LRU replacement, but it must be able to use any of the NBUF `struct buf`s with zero `refcnt` when it misses in the cache. When you are done, your output should be similar to that shown below (though not identical). Make sure 'usertests -q' still passes. `make grade` should pass all tests when you are done.","Here are some hints: - -- Read the description of the block cache in the xv6 book (Section 8.1-8.3). -- It is OK to use a fixed number of buckets and not resize the hash table dynamically. Use a prime number of buckets (e.g., 13) to reduce the likelihood of hashing conflicts. -- Searching in the hash table for a buffer and allocating an entry for that buffer when the buffer is not found must be atomic. -- Remove the list of all buffers (`bcache.head` etc.) and don't implement LRU. With this change `brelse` doesn't need to acquire the bcache lock. In `bget` you can select any block that has `refcnt == 0` instead of the least-recently used one. -- You probably won't be able to atomically check for a cached buf and (if not cached) find an unused buf; you will likely have to drop all locks and start from scratch if the buffer isn't in the cache. It is OK to serialize finding an unused buf in `bget` (i.e., the part of `bget` that selects a buffer to re-use when a lookup misses in the cache). -- Your solution might need to hold two locks in some cases; for example, during eviction you may need to hold the bcache lock and a lock per bucket. Make sure you avoid deadlock. -- When replacing a block, you might move a `struct buf` from one bucket to another bucket, because the new block hashes to a different bucket. You might have a tricky case: the new block might hash to the same bucket as the old block. Make sure you avoid deadlock in that case. -- Some debugging tips: implement bucket locks but leave the global bcache.lock acquire/release at the beginning/end of bget to serialize the code. Once you are sure it is correct without race conditions, remove the global locks and deal with concurrency issues. You can also run `make CPUS=1 qemu` to test with one core. -- Use xv6's race detector to find potential races (see above how to use the race detector). -",," $ git fetch - $ git checkout lock - $ make clean",,xv6-labs-2024,bcachetest,"start test0 -test0 results: ---- lock kmem/bcache stats -lock: kmem: #test-and-set 0 #acquire() 33030 -lock: kmem: #test-and-set 0 #acquire() 28 -lock: kmem: #test-and-set 0 #acquire() 73 -lock: bcache: #test-and-set 0 #acquire() 96 -lock: bcache.bucket: #test-and-set 0 #acquire() 6229 -lock: bcache.bucket: #test-and-set 0 #acquire() 6204 -lock: bcache.bucket: #test-and-set 0 #acquire() 4298 -lock: bcache.bucket: #test-and-set 0 #acquire() 4286 -lock: bcache.bucket: #test-and-set 0 #acquire() 2302 -lock: bcache.bucket: #test-and-set 0 #acquire() 4272 -lock: bcache.bucket: #test-and-set 0 #acquire() 2695 -lock: bcache.bucket: #test-and-set 0 #acquire() 4709 -lock: bcache.bucket: #test-and-set 0 #acquire() 6512 -lock: bcache.bucket: #test-and-set 0 #acquire() 6197 -lock: bcache.bucket: #test-and-set 0 #acquire() 6196 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 -lock: bcache.bucket: #test-and-set 0 #acquire() 6201 ---- top 5 contended locks: -lock: virtio_disk: #test-and-set 1483888 #acquire() 1221 -lock: proc: #test-and-set 38718 #acquire() 76050 -lock: proc: #test-and-set 34460 #acquire() 76039 -lock: proc: #test-and-set 31663 #acquire() 75963 -lock: wait_lock: #test-and-set 11794 #acquire() 16 -tot= 0 -test0: OK -start test1 - -test1 OK -start test2 - -test2 OK -start test3 - -test3 OK -$ usertests -q - ... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/lock.html -48,6.1810: Operating System Engineering,2024,Lab: file system,Large files,"In this lab you will add large files and symbolic links to the xv6 file system. - -Before writing code, you should read ""Chapter 8: File system"" from the [xv6 book](https://pdos.csail.mit.edu/6.1810/2024/xv6/book-riscv-rev4.pdf) and study the corresponding code. - -Fetch the xv6 source for the lab and check out the `util` branch: - -``` - $ git fetch - $ git checkout fs - $ make clean -``` -",,,"In this assignment you'll increase the maximum size of an xv6 file. Currently xv6 files are limited to 268 blocks, or 268*BSIZE bytes (BSIZE is 1024 in xv6). This limit comes from the fact that an xv6 inode contains 12 ""direct"" block numbers and one ""singly-indirect"" block number, which refers to a block that holds up to 256 more block numbers, for a total of 12+256=268 blocks. - -The `bigfile` command creates the longest file it can, and reports that size: - -``` -$ bigfile -.. -wrote 268 blocks -bigfile: file is too small -$ -``` - -The test fails because `bigfile` expects to be able to create a file with 65803 blocks, but unmodified xv6 limits files to 268 blocks. - -You'll change the xv6 file system code to support a ""doubly-indirect"" block in each inode, containing 256 addresses of singly-indirect blocks, each of which can contain up to 256 addresses of data blocks. The result will be that a file will be able to consist of up to 65803 blocks, or 256*256+256+11 blocks (11 instead of 12, because we will sacrifice one of the direct block numbers for the double-indirect block). - -### Preliminaries - -The `mkfs` program creates the xv6 file system disk image and determines how many total blocks the file system has; this size is controlled by `FSSIZE` in `kernel/param.h`. You'll see that `FSSIZE` in the repository for this lab is set to 200,000 blocks. You should see the following output from `mkfs/mkfs` in the make output: - -``` -nmeta 70 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 25) blocks 199930 total 200000 -``` - -This line describes the file system that `mkfs/mkfs` built: it has 70 meta-data blocks (blocks used to describe the file system) and 199,930 data blocks, totaling 200,000 blocks. - -Note that `make qemu` builds a new `fs.img`, and saves the old one in `fs.img.bk`. If you want to run xv6 with the existing `fs.img` instead of building a new one, run `make qemu-fs`. - -### What to Look At - -The format of an on-disk inode is defined by `struct dinode` in `fs.h`. You're particularly interested in `NDIRECT`, `NINDIRECT`, `MAXFILE`, and the `addrs[]` element of `struct dinode`. Look at Figure 8.3 in the xv6 text for a diagram of the standard xv6 inode. - -The code that finds a file's data on disk is in `bmap()` in `fs.c`. Have a look at it and make sure you understand what it's doing. `bmap()` is called both when reading and writing a file. When writing, `bmap()` allocates new blocks as needed to hold file content, as well as allocating an indirect block if needed to hold block addresses. - -`bmap()` deals with two kinds of block numbers. The `bn` argument is a ""logical block number"" -- a block number within the file, relative to the start of the file. The block numbers in `ip->addrs[]`, and the argument to `bread()`, are disk block numbers. You can view `bmap()` as mapping a file's logical block numbers into disk block numbers. - -### Your Job - -Modify `bmap()` so that it implements a doubly-indirect block, in addition to direct blocks and a singly-indirect block. You'll have to have only 11 direct blocks, rather than 12, to make room for your new doubly-indirect block; you're not allowed to change the size of an on-disk inode. The first 11 elements of `ip->addrs[]` should be direct blocks; the 12th should be a singly-indirect block (just like the current one); the 13th should be your new doubly-indirect block. You are done with this exercise when `bigfile` writes 65803 blocks and `usertests -q` runs successfully: - -``` -$ bigfile -.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -wrote 65803 blocks -done; ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -`bigfile` will take at least a minute and a half to run. - -Hints: - -- Make sure you understand `bmap()`. Write out a diagram of the relationships between `ip->addrs[]`, the indirect block, the doubly-indirect block and the singly-indirect blocks it points to, and data blocks. Make sure you understand why adding a doubly-indirect block increases the maximum file size by 256*256 blocks (really -1, since you have to decrease the number of direct blocks by one). -- Think about how you'll index the doubly-indirect block, and the indirect blocks it points to, with the logical block number. -- If you change the definition of `NDIRECT`, you'll probably have to change the declaration of `addrs[]` in `struct inode` in `file.h`. Make sure that `struct inode` and `struct dinode` have the same number of elements in their `addrs[]` arrays. -- If you change the definition of `NDIRECT`, make sure to create a new `fs.img`, since `mkfs` uses `NDIRECT` to build the file system. -- If your file system gets into a bad state, perhaps by crashing, delete `fs.img` (do this from Unix, not xv6). `make` will build a new clean file system image for you. -- Don't forget to `brelse()` each block that you `bread()`. -- You should allocate indirect blocks and doubly-indirect blocks only as needed, like the original `bmap()`. -- Make sure `itrunc` frees all blocks of a file, including double-indirect blocks. -- `usertests` takes longer to run than in previous labs because for this lab `FSSIZE` is larger and big files are larger.","Modify `bmap()` so that it implements a doubly-indirect block, in addition to direct blocks and a singly-indirect block. You'll have to have only 11 direct blocks, rather than 12, to make room for your new doubly-indirect block; you're not allowed to change the size of an on-disk inode. The first 11 elements of `ip->addrs[]` should be direct blocks; the 12th should be a singly-indirect block (just like the current one); the 13th should be your new doubly-indirect block. You are done with this exercise when `bigfile` writes 65803 blocks and `usertests -q` runs successfully: - -``` -$ bigfile -.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -wrote 65803 blocks -done; ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -`bigfile` will take at least a minute and a half to run.","- Make sure you understand `bmap()`. Write out a diagram of the relationships between `ip->addrs[]`, the indirect block, the doubly-indirect block and the singly-indirect blocks it points to, and data blocks. Make sure you understand why adding a doubly-indirect block increases the maximum file size by 256*256 blocks (really -1, since you have to decrease the number of direct blocks by one). -- Think about how you'll index the doubly-indirect block, and the indirect blocks it points to, with the logical block number. -- If you change the definition of `NDIRECT`, you'll probably have to change the declaration of `addrs[]` in `struct inode` in `file.h`. Make sure that `struct inode` and `struct dinode` have the same number of elements in their `addrs[]` arrays. -- If you change the definition of `NDIRECT`, make sure to create a new `fs.img`, since `mkfs` uses `NDIRECT` to build the file system. -- If your file system gets into a bad state, perhaps by crashing, delete `fs.img` (do this from Unix, not xv6). `make` will build a new clean file system image for you. -- Don't forget to `brelse()` each block that you `bread()`. -- You should allocate indirect blocks and doubly-indirect blocks only as needed, like the original `bmap()`. -- Make sure `itrunc` frees all blocks of a file, including double-indirect blocks. -- `usertests` takes longer to run than in previous labs because for this lab `FSSIZE` is larger and big files are larger.",," $ git fetch - $ git checkout fs - $ make clean",,xv6-labs-2024,bigfile,".................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -wrote 65803 blocks -done; ok -$ usertests -q -... -ALL TESTS PASSED",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/fs.html -49,6.1810: Operating System Engineering,2024,Lab: file system,Symbolic links,"In this exercise you will add symbolic links to xv6. Symbolic links (or soft links) refer to a linked file or directory by pathname; when a symbolic link is opened, the kernel looks up the linked-to name. Symbolic links resemble hard links, but hard links are restricted to pointing to files on the same disk, cannot refer to directories, and are tied to a specific target i-node rather than (as with symbolic links) referring to whatever happens at the moment to be at the target name, if anything. Implementing this system call is a good exercise to understand how pathname lookup works. - -You do not have to handle symbolic links to directories for this lab; the only system call that needs to know how to follow symbolic links is `open()`. - -### Your job - -You will implement the `symlink(char *target, char *path)` system call, which creates a new symbolic link at path that refers to file named by target. For further information, see the man page symlink. To test, add symlinktest to the Makefile and run it. Your solution is complete when the tests produce the following output (including usertests succeeding). - -``` -$ symlinktest -Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - -Hints: - -- First, create a new system call number for symlink, add an entry to user/usys.pl, user/user.h, and implement an empty sys_symlink in kernel/sysfile.c. -- Add a new file type (`T_SYMLINK`) to kernel/stat.h to represent a symbolic link. -- Add a new flag to kernel/fcntl.h, (`O_NOFOLLOW`), that can be used with the `open` system call. Note that flags passed to `open` are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compile user/symlinktest.c once you add it to the Makefile. -- Implement the `symlink(target, path)` system call to create a new symbolic link at path that refers to target. Note that target does not need to exist for the system call to succeed. You will need to choose somewhere to store the target path of a symbolic link, for example, in the inode's data blocks. `symlink` should return an integer representing success (0) or failure (-1) similar to `link` and `unlink`. -- Modify the `open` system call to handle the case where the path refers to a symbolic link. If the file does not exist, `open` must fail. When a process specifies `O_NOFOLLOW` in the flags to `open`, `open` should open the symlink (and not follow the symbolic link). -- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10). -- Other system calls (e.g., link and unlink) must not follow symbolic links; these system calls operate on the symbolic link itself. -",,,"In this exercise you will add symbolic links to xv6. Symbolic links (or soft links) refer to a linked file or directory by pathname; when a symbolic link is opened, the kernel looks up the linked-to name. Symbolic links resemble hard links, but hard links are restricted to pointing to files on the same disk, cannot refer to directories, and are tied to a specific target i-node rather than (as with symbolic links) referring to whatever happens at the moment to be at the target name, if anything. Implementing this system call is a good exercise to understand how pathname lookup works. - -You do not have to handle symbolic links to directories for this lab; the only system call that needs to know how to follow symbolic links is `open()`. - -### Your job - -You will implement the `symlink(char *target, char *path)` system call, which creates a new symbolic link at path that refers to file named by target. For further information, see the man page symlink. To test, add symlinktest to the Makefile and run it. Your solution is complete when the tests produce the following output (including usertests succeeding). - -``` -$ symlinktest -Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` - - - -Hints: - -- First, create a new system call number for symlink, add an entry to user/usys.pl, user/user.h, and implement an empty sys_symlink in kernel/sysfile.c. -- Add a new file type (`T_SYMLINK`) to kernel/stat.h to represent a symbolic link. -- Add a new flag to kernel/fcntl.h, (`O_NOFOLLOW`), that can be used with the `open` system call. Note that flags passed to `open` are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compile user/symlinktest.c once you add it to the Makefile. -- Implement the `symlink(target, path)` system call to create a new symbolic link at path that refers to target. Note that target does not need to exist for the system call to succeed. You will need to choose somewhere to store the target path of a symbolic link, for example, in the inode's data blocks. `symlink` should return an integer representing success (0) or failure (-1) similar to `link` and `unlink`. -- Modify the `open` system call to handle the case where the path refers to a symbolic link. If the file does not exist, `open` must fail. When a process specifies `O_NOFOLLOW` in the flags to `open`, `open` should open the symlink (and not follow the symbolic link). -- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10). -- Other system calls (e.g., link and unlink) must not follow symbolic links; these system calls operate on the symbolic link itself.","You will implement the `symlink(char *target, char *path)` system call, which creates a new symbolic link at path that refers to file named by target. For further information, see the man page symlink. To test, add symlinktest to the Makefile and run it. Your solution is complete when the tests produce the following output (including usertests succeeding). - -``` -$ symlinktest -Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED -$ -``` -","Hints: - -- First, create a new system call number for symlink, add an entry to user/usys.pl, user/user.h, and implement an empty sys_symlink in kernel/sysfile.c. -- Add a new file type (`T_SYMLINK`) to kernel/stat.h to represent a symbolic link. -- Add a new flag to kernel/fcntl.h, (`O_NOFOLLOW`), that can be used with the `open` system call. Note that flags passed to `open` are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compile user/symlinktest.c once you add it to the Makefile. -- Implement the `symlink(target, path)` system call to create a new symbolic link at path that refers to target. Note that target does not need to exist for the system call to succeed. You will need to choose somewhere to store the target path of a symbolic link, for example, in the inode's data blocks. `symlink` should return an integer representing success (0) or failure (-1) similar to `link` and `unlink`. -- Modify the `open` system call to handle the case where the path refers to a symbolic link. If the file does not exist, `open` must fail. When a process specifies `O_NOFOLLOW` in the flags to `open`, `open` should open the symlink (and not follow the symbolic link). -- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10). -- Other system calls (e.g., link and unlink) must not follow symbolic links; these system calls operate on the symbolic link itself.",," $ git fetch - $ git checkout fs - $ make clean",,xv6-labs-2024,symlinktest,"Start: test symlinks -test symlinks: ok -Start: test concurrent symlinks -test concurrent symlinks: ok -$ usertests -q -... -ALL TESTS PASSED",moderate,https://pdos.csail.mit.edu/6.1810/2024/labs/fs.html -50,6.1810: Operating System Engineering,2024,Lab: mmap,Lab: mmap,,,,"The `mmap` and `munmap` system calls allow UNIX programs to exert detailed control over their address spaces. They can be used to share memory among processes, to map files into process address spaces, and as part of user-level page fault schemes such as the garbage-collection algorithms discussed in lecture. In this lab you'll add `mmap` and `munmap` to xv6, focusing on memory-mapped files. - -Fetch the xv6 source for the lab and check out the `mmap` branch: - -``` - $ git fetch - $ git checkout mmap - $ make clean -``` - -The manual page (run man 2 mmap) shows this declaration for `mmap`: - -``` -void *mmap(void *addr, size_t len, int prot, int flags, - int fd, off_t offset); -``` - -`mmap` can be called in many ways, but this lab requires only a subset of its features relevant to memory-mapping a file. You can assume that `addr` will always be zero, meaning that the kernel should decide the virtual address at which to map the file. `mmap` returns that address, or 0xffffffffffffffff if it fails. `len` is the number of bytes to map; it might not be the same as the file's length. `prot` indicates whether the memory should be mapped readable, writeable, and/or executable; you can assume that `prot` is `PROT_READ` or `PROT_WRITE` or both. `flags` will be either `MAP_SHARED`, meaning that modifications to the mapped memory should be written back to the file, or `MAP_PRIVATE`, meaning that they should not. You don't have to implement any other bits in `flags`. `fd` is the open file descriptor of the file to map. You can assume `offset` is zero (it's the starting point in the file at which to map). - -Your implementation should fill in the page table lazily, in response to page faults. That is, `mmap` itself should not allocate physical memory or read the file. Instead, do that in page fault handling code in (or called by) `usertrap`, as in the copy-on-write lab. The reason to be lazy is to ensure that `mmap` of a large file is fast, and that `mmap` of a file larger than physical memory is possible. - -It's OK if processes that map the same `MAP_SHARED` file do **not** share physical pages. - -The manual page (run man 2 munmap) shows this declaration for `munmap`: - -``` -int munmap(void *addr, size_t len); -``` - -`munmap` should remove mmap mappings in the indicated address range, if any. If the process has modified the memory and has it mapped `MAP_SHARED`, the modifications should first be written to the file. An `munmap` call might cover only a portion of an mmap-ed region, but you can assume that it will either unmap at the start, or at the end, or the whole region (but not punch a hole in the middle of a region). When a process exits, any modifictions it has made to `MAP_SHARED` regions should be written to the relevant files, as if the process had called `munmap`. - -You should implement enough `mmap` and `munmap` functionality to make the `mmaptest` test program work. If `mmaptest` doesn't use a `mmap` feature, you don't need to implement that feature. You must also ensure that `usertests -q` continues to work. - -When you're done, you should see output similar to this: - -``` -$ mmaptest -test basic mmap -test basic mmap: OK -test mmap private -test mmap private: OK -test mmap read-only -test mmap read-only: OK -test mmap read/write -test mmap read/write: OK -test mmap dirty -test mmap dirty: OK -test not-mapped unmap -test not-mapped unmap: OK -test lazy access -test lazy access: OK -test mmap two files -test mmap two files: OK -test fork -test fork: OK -test munmap prevents access -usertrap(): unexpected scause 0xd pid=7 - sepc=0x924 stval=0xc0001000 -usertrap(): unexpected scause 0xd pid=8 - sepc=0x9ac stval=0xc0000000 -test munmap prevents access: OK -test writes to read-only mapped memory -usertrap(): unexpected scause 0xf pid=9 - sepc=0xaf4 stval=0xc0000000 -test writes to read-only mapped memory: OK -mmaptest: all tests succeeded -$ usertests -q -usertests starting -... -ALL TESTS PASSED -$ -``` - -Here are some hints: - -- Start by adding `_mmaptest` to `UPROGS`, and `mmap` and `munmap` system calls, in order to get `user/mmaptest.c` to compile. For now, just return errors from `mmap` and `munmap`. We defined `PROT_READ` etc for you in `kernel/fcntl.h`. Run `mmaptest`, which will fail at the first mmap call. -- Keep track of what `mmap` has mapped for each process. Define a structure corresponding to the VMA (virtual memory area) described in the ""virtual memory for applications"" lecture. This should record the address, length, permissions, file, etc. for a virtual memory range created by `mmap`. Since the xv6 kernel doesn't have a variable-size memory allocator in the kernel, it's OK to declare a fixed-size array of VMAs and allocate from that array as needed. A size of 16 should be sufficient. -- Implement `mmap`: find an unused region in the process's address space in which to map the file, and add a VMA to the process's table of mapped regions. The VMA should contain a pointer to a `struct file` for the file being mapped; `mmap` should increase the file's reference count so that the structure doesn't disappear when the file is closed (hint: see `filedup`). Run `mmaptest`: the first `mmap` should succeed, but the first access to the mmap-ed memory will cause a page fault and kill `mmaptest`. -- Add code to cause a page-fault in a mmap-ed region to allocate a page of physical memory, read 4096 bytes of the relevant file into that page, and map it into the user address space. Read the file with `readi`, which takes an offset argument at which to read in the file (but you will have to lock/unlock the inode passed to `readi`). Don't forget to set the permissions correctly on the page. Run `mmaptest`; it should get to the first `munmap`. -- Implement `munmap`: find the VMA for the address range and unmap the specified pages (hint: use `uvmunmap`). If `munmap` removes all pages of a previous `mmap`, it should decrement the reference count of the corresponding `struct file`. If an unmapped page has been modified and the file is mapped `MAP_SHARED`, write the page back to the file. Look at `filewrite` for inspiration. -- Ideally your implementation would only write back `MAP_SHARED` pages that the program actually modified. The dirty bit (`D`) in the RISC-V PTE indicates whether a page has been written. However, `mmaptest` does not check that non-dirty pages are not written back; thus you can get away with writing pages back without looking at `D` bits. -- Modify `exit` to unmap the process's mapped regions as if `munmap` had been called. Run `mmaptest`; all tests through `test mmap two files` should pass, but probably not `test fork`. -- Modify `fork` to ensure that the child has the same mapped regions as the parent. Don't forget to increment the reference count for a VMA's `struct file`. In the page fault handler of the child, it is OK to allocate a new physical page instead of sharing a page with the parent. The latter would be cooler, but it would require more implementation work. Run `mmaptest`; it should pass all the tests. - -Run `usertests -q` to make sure everything still works. -","You should implement enough `mmap` and `munmap` functionality to make the `mmaptest` test program work. If `mmaptest` doesn't use a `mmap` feature, you don't need to implement that feature. You must also ensure that `usertests -q` continues to work.","Here are some hints: - -- Start by adding `_mmaptest` to `UPROGS`, and `mmap` and `munmap` system calls, in order to get `user/mmaptest.c` to compile. For now, just return errors from `mmap` and `munmap`. We defined `PROT_READ` etc for you in `kernel/fcntl.h`. Run `mmaptest`, which will fail at the first mmap call. -- Keep track of what `mmap` has mapped for each process. Define a structure corresponding to the VMA (virtual memory area) described in the ""virtual memory for applications"" lecture. This should record the address, length, permissions, file, etc. for a virtual memory range created by `mmap`. Since the xv6 kernel doesn't have a variable-size memory allocator in the kernel, it's OK to declare a fixed-size array of VMAs and allocate from that array as needed. A size of 16 should be sufficient. -- Implement `mmap`: find an unused region in the process's address space in which to map the file, and add a VMA to the process's table of mapped regions. The VMA should contain a pointer to a `struct file` for the file being mapped; `mmap` should increase the file's reference count so that the structure doesn't disappear when the file is closed (hint: see `filedup`). Run `mmaptest`: the first `mmap` should succeed, but the first access to the mmap-ed memory will cause a page fault and kill `mmaptest`. -- Add code to cause a page-fault in a mmap-ed region to allocate a page of physical memory, read 4096 bytes of the relevant file into that page, and map it into the user address space. Read the file with `readi`, which takes an offset argument at which to read in the file (but you will have to lock/unlock the inode passed to `readi`). Don't forget to set the permissions correctly on the page. Run `mmaptest`; it should get to the first `munmap`. -- Implement `munmap`: find the VMA for the address range and unmap the specified pages (hint: use `uvmunmap`). If `munmap` removes all pages of a previous `mmap`, it should decrement the reference count of the corresponding `struct file`. If an unmapped page has been modified and the file is mapped `MAP_SHARED`, write the page back to the file. Look at `filewrite` for inspiration. -- Ideally your implementation would only write back `MAP_SHARED` pages that the program actually modified. The dirty bit (`D`) in the RISC-V PTE indicates whether a page has been written. However, `mmaptest` does not check that non-dirty pages are not written back; thus you can get away with writing pages back without looking at `D` bits. -- Modify `exit` to unmap the process's mapped regions as if `munmap` had been called. Run `mmaptest`; all tests through `test mmap two files` should pass, but probably not `test fork`. -- Modify `fork` to ensure that the child has the same mapped regions as the parent. Don't forget to increment the reference count for a VMA's `struct file`. In the page fault handler of the child, it is OK to allocate a new physical page instead of sharing a page with the parent. The latter would be cooler, but it would require more implementation work. Run `mmaptest`; it should pass all the tests.",," $ git fetch - $ git checkout mmap - $ make clean",,xv6-labs-2024,mmaptest,"test basic mmap -test basic mmap: OK -test mmap private -test mmap private: OK -test mmap read-only -test mmap read-only: OK -test mmap read/write -test mmap read/write: OK -test mmap dirty -test mmap dirty: OK -test not-mapped unmap -test not-mapped unmap: OK -test lazy access -test lazy access: OK -test mmap two files -test mmap two files: OK -test fork -test fork: OK -test munmap prevents access -usertrap(): unexpected scause 0xd pid=7 - sepc=0x924 stval=0xc0001000 -usertrap(): unexpected scause 0xd pid=8 - sepc=0x9ac stval=0xc0000000 -test munmap prevents access: OK -test writes to read-only mapped memory -usertrap(): unexpected scause 0xf pid=9 - sepc=0xaf4 stval=0xc0000000 -test writes to read-only mapped memory: OK -mmaptest: all tests succeeded -$ usertests -q -usertests starting -... -ALL TESTS PASSED",hard,https://pdos.csail.mit.edu/6.1810/2024/labs/mmap.html -51,6.5830/6.5831: Database Systems,Fall 2024,Lab 0: Go tutorial,Start an http server and handle requests,,,,"This task requires you to start up the http server in `main.go` and handle the user's GET requests by filling out the `HomeHandler` method in `handlers/handlers.go`. - -The final web app looks like in the screenshot below, where users can select a T line (e.g. red line) and display its ridership statistics in a bar chart. The `HomeHandler` function first checks which line the user has selected in the drop down menu and then queries ridership numbers for that line from a `RiderhipDB` instance. The returned values are then displayed in a bar chart. You don't need to write code to plot the bar chart yourself, you can simply use the `GenerateBarChart` function in `utils/render_chart.go`. - -After completing this task, you should be able to start the web server by running `go run main.go` and see the web app in your browser by going to [http://localhost:PORT](http://localhost:PORT/) (where PORT is the port number you specified): - -[![Screenshot of web app](https://github.com/MIT-DB-Class/lab0/raw/main/screenshot.png)](https://github.com/MIT-DB-Class/lab0/blob/main/screenshot.png) - -You should also be able to pass the test in `handlers_test.go`: When running `go test` from the `handlers` directory, you should get a similar output to this: - -``` -PASS -ok main/handlers 0.246s -``` -",This task requires you to start up the http server in main.go and handle the user's GET requests by filling out the HomeHandler method in handlers/handlers.go.,,,https://github.com/MIT-DB-Class/lab0.git,,,go test,"PASS -ok main/handlers 0.246s",,http://db.lcs.mit.edu/6.5830/assign.php -52,6.5830/6.5831: Database Systems,Fall 2024,Lab 0: Go tutorial,Run a query over a CSV file,,,,"This task requires you to implement the missing methods in `ridership_db/csv_ridership_db.go` - -Instead of issuing the query against sqlite, `CsvRidershipDB` directly runs it over the `mbta.csv` CSV file. MBTA divides a day into nine different time periods (*time_period_01*, ..., *time_period_09*). The CSV file contains how many passengers boarded trains during a specific time period, at a specific station and for a specific line and direction. For the queried line (passed to `GetRidership`) compute the total number of passengers that boarded a train for each given time period (for each time period, sum over all stations and directions). The sum for each time period should be an entry in the returned `int64` slice. Make sure to use the `idIdxMap` map to map the time period id strings (e.g. *time_period_01*) to the correct index in the `boardings` slice (e.g. 0). - -To use your CSV implementation in the web app, instantiate RidershipDB to be a `CsvRidershipDB` instead of a `SqliteRidershipDB` in lines 23-24 in `handlers/handlers.go`: - -``` -// instantiate ridershipDB -// var db rdb.RidershipDB = &rdb.SqliteRidershipDB{} // Sqlite implementation -var db rdb.RidershipDB = &rdb.CsvRidershipDB{} // CSV implementation -``` - -You should also be able to pass the tests in `ridership_db/ridership_db_test.go`: When running `go test` from the `ridership_db` directory, you should get a similar output to this: - -``` -=== RUN TestRidershipDBsMatch -=== RUN TestRidershipDBsMatch/red ---- PASS: TestRidershipDBsMatch/red (0.00s) -=== RUN TestRidershipDBsMatch/green ---- PASS: TestRidershipDBsMatch/green (0.00s) -=== RUN TestRidershipDBsMatch/blue ---- PASS: TestRidershipDBsMatch/blue (0.00s) -=== RUN TestRidershipDBsMatch/orange ---- PASS: TestRidershipDBsMatch/orange (0.00s) ---- PASS: TestRidershipDBsMatch (0.01s) -PASS -ok main/ridership_db 0.226s -``` -",,,,https://github.com/MIT-DB-Class/lab0.git,,,go test,"=== RUN TestRidershipDBsMatch -=== RUN TestRidershipDBsMatch/red ---- PASS: TestRidershipDBsMatch/red (0.00s) -=== RUN TestRidershipDBsMatch/green ---- PASS: TestRidershipDBsMatch/green (0.00s) -=== RUN TestRidershipDBsMatch/blue ---- PASS: TestRidershipDBsMatch/blue (0.00s) -=== RUN TestRidershipDBsMatch/orange ---- PASS: TestRidershipDBsMatch/orange (0.00s) ---- PASS: TestRidershipDBsMatch (0.01s) -PASS -ok main/ridership_db 0.226SEARCH(",,http://db.lcs.mit.edu/6.5830/assign.php diff --git a/benchmarks/course_lab_bench/data/benchmark/lab_exercise_data.xlsx b/benchmarks/course_lab_bench/data/benchmark/lab_exercise_data.xlsx deleted file mode 100644 index 0694c411089286341f9dbf3299d0cfdc6205d648..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 265216 zcmeF%V~j6B+c5aC=ZtOJo-?+sGq!EpwyiU^ZQDL$+n)X3&yy#4v&rth`)xmLCBHPP zs;{oPDxFTJ=Vp<13+l1=fc{T~4+s?K`v)2b;(s|04%i3&pJPNIAYQ=1e~!O@et!Pv zv=dH82|+U6#xwY9RLFW695YU8vq9Y7XS|cAAkUW5P%4P7=Q$T6o3qX9Do9V z5`YST8h{3X7Jv?b9)JOW5r7GR8Gr?V6@U$Z9e@LX6MzeV8{jtp4*)L!9{@jq0DvHX z5P&d%2!JSn7=So{1b`%f6o52<41g?v9DqE40)Qfb5`Z#*3V02EZQxO#m$b zZ2%nrT>w1*eE0$j-s(3=o2a$CUG@xOWiD*iVMI$*{8&+-QT zf9SKDn~5v;Zxfgg(9j>`zZ?H2s?mQ%wFg8u0nC8{U`~tx<23-x!GC7s|Gp3dfdK#K zQvY)g|M#u`yY@frq3r+r1|R@gHvmL70A$eqzapCe{pT6>zgsW?Pr(0)4fcQ0X_qej zzsCJP=M8@&B{^-|hdOwfvuAVff$kaMF}I<#c{M zZJkm6WP3ahS;16|vOD;@mA)MrQ&Ua{Wt_@Hz!#^Rwjo!X-$({3L1q!S;s^!i8|L=9 zXqYfZL{AErSV!n+yJ|q`Lcgcrlf~>cx7r#d;zxm{#?}=SF%|byUsoHrOjf!w^vykT zvn6yOs0ZcOVnfgz642huv|YYEC7EsX7q{@GdBjisGkVtPW3+8>7OXtf?~MEP{nVZG z>1VAi8!dmg-gVfFr3QP#QM?mFI)`6tM+^O!dohGo#a;RGn~bloeAC$3a`lSag`s^@ ziH3Y57{9CuhB|B~%8BsaHSZE}T~*_ztW18kT1J5fKN~KUNZ0Az3Y-T&90XHHX`lU+ z{E&c43U=@zcD5rqQL2ChUL9KLw;b4l*n123M7n~Uuk;y;1i=hcNelh7sawqVLD*8O zMLgeEPz8@wmb1DWFm{OMkDHoxK?*+7pVnyH|13w4y)yQ^<;( zwSTx`Cy2qB!iBDeN;>UgsRBM}4hGB55l&7w*Q?KT2tD!iSu$k3Q3*FBp3+$DX9JN0 zBNOs!EgBkpsS&~qOj%+ZxC!Fq+<0_yyjoo%>K7gMq?=8h5U%axKB*8YrRa2qW55??WrCCt#y>{mX0v&4Z%j#M6SG&(+* z{Z}XjG7%@c((>zHJGnd1HD!Gr0ho_2kCZCP5otW`B00ZxDTVnfB>(t@sm1-nyvWy8 zV(`uBU4|nv98mc=&I~OHaapbc3enocNf~v9Qx8Q8fiD@3W@TXn2?V0XhgX*C+LHWX ziY?(ET({>(l~Hn1_?yQmp1Do!YSi!GGokQup6;Q2f<(}QRtV>Yli(I@cq z?o;i#Y5q=}2$~#G^x@_)!9S9_Kwd7#$K>SI*WuNEt|zYkqf8`!4fm81Aw+{S{T4G` z+_O>7NtC>SE8%$-LRkWZU@wq&z|*dGj2}Yu)iNABmdT@Z4RklHy};1tkiz<^&h`?~ zWJ~@%LBsn4;Z;lJgrppyiF$@jh=-!bc=eWVd2$jjiD|_?9U+SS5sPqZ!iAfC!WHc8 zR6O8`X44}APOj8HTDCuDDM8fobH8G=aeVL7#W@%LH$U|B=ri_5*HHwGkLoBQ_mc4~ z`46)$Jv;qNMHO=f*$4lfqDU>7+Mrk{1#d*pXAaZ8np;F_yny4>Y2f^@_d^Bl8>#yZ zi@-bb*Wws|oBd~iJueG>lql13%u9pDA8y;3{6*VEcH~>hjj~U;B)-Pq_}jCt48H$< zKT=$c^6hC4tP%k$9w}Y*q+!E%FZc8e#Dxp_DCZ!b%2MGs|7;ul!>HV!0SRcIgyeaA z97m+uoUDvA-pZKQR2ZH)b25r}pIb_;VF26f8IAH;z)kAE{A5GzeVQWXJPFZ0?P3Jw z!tTI&W@4PC_F^kyRxgtIJ#N?zih0FUR4_-WlIZbiq&n5AqZaq#u}|BWu%0aO7Tm`r zX103}%Zn|ew!p?;n$;w?{G>m)hK7q)_c2* zNi6WDg&P2z91DNFTVkt8jz^pMg`(jz?3dJAa1{uprn}2Stq2VSY_r!F`2tq5e#+xT z`nf_<4<%`IleEE3V5?6R{tLG4ZMbA z26*k%dzi>Ml|_!6?kzb=0@}WkCl^u!Oe<@mkMb#FhpCYY_S0N!fHzvpb-^hc)S7Jb z7y?gZp1nuaiJ+Dwj8yQfoQm*4eeXQAQ}VCNYExR5t?No zcd$2#yu6oE#ildi=;n>wxK#yuAzbnZ!5Q7?`4Xl8&rj{raj}_vXV&?*U_&H}`eN*s z^Gf=olZ!LmPTj-aW?6zQww<5Qr`b_u)h5O(4#gO$ud|pAu1Ckj?GuB)@|osHo0bgI z^u4f+!WYhbOme>m3&*76R52C2sf`AGI2OONf&;Bxk;vz^Nmf})(dW0}F3Gwy<>}^H z=V{ef)ZyLSWpGyL&Ynp;Z-O|fu~@&?pWN0)M>R~UUgU;Z41b1iD1Gzpa1>5Q>@zCq zP|^6rRHNjVq5{n;YHd2_ zjo}FPg~1W799pNqz^-cC{sD7_#48Tg|Ec0dH4`VlQ8M+vpn`12T14Oz9-x5JaN zS`6`aifBaon>|S-K)P~7 zTXf03_R5>5Pnk)^#zrvAhVP98Bd-uQD7=h_0rC7&=}QjvYGeQ@tGGRGW(L=IDb`C< z4D9wjUbslYGRf1Euh#8jG`F-TQ4NvMM=_kG#c7CK8!!C@EK`-(2i(MFJtoZvcF6J>#L6FsQA5`Hj%mgS@hD0qKE($Vbq+* ze==;|H)pIkUO^6xHVhgI65qtc!?Ju3+D|JP9hW2WN}&I(Q+LbzqS+fiiL3SzzMkk> zL$>nlOJ<6`^5?j;!4JX|d?3)4<>%yLoF4uK8FQQ9(R)R}<7)!ppFH>?8tl<$iGjYMXa=Vv~hsXqE^Vne;lOQt>o``!naXQg=Qy52x@N&Kr)_N5z@G1#o<3N1o!? zow87*M>NjMjI=(4d4qa!o4AdENkqAEeXY-kA+C49xO0{$Nq za=BBy*SN`+L1_diX)RYJYeaqP`~fuxKCnx&Dkr6pj!RO0G#$GmZ8ZV2FX4UDK`!Zo z&0n=X@d3~h61H8n`EP4QuFcOwV+1Lu3g<{Sck}7XA%7EqZ_y49bOXcaemhEZLDuT3 zre`rIov%s0f@agV99`&IetR85@;r~R*7by>ubhO#UApbXBN;a6da;&k1mvK#tnf6V zAKU$``ow(P)(4B(fS`wiMdBf=$fH}X*_|@(CN>ok!B3aA!xnZreQa=7OEW8Rro5vqxLnc+g!-BiiWWn=E~keX%Ee?VqO1bW;(kTUB5XLjt`zDeL_l(*j0~!%QTfy8mYdgu6N>Bb&nx z6aiF<4*JJTb=6b5At}xIevwS3IrMygmN2#<*?t5ax>U9m9tG>UkS;zd@;L)m# zbl|AWLttxvt1GV6CzL~8Di+b?1?FN2F!-=>JAsc?_0XZZuE}NWM6orP;vC1M1^vUp zt(39HVMcY=-S}G{R{{MY8;#tDG}-K+QL8fsq?7Kc&IS*VSk`{3Egu^2U2p+C;U^!` zg~JE*pZ5w_mfshUOo#yvSoC{xC78 zHkXJX)5YIl&$r?QxVe)O+1mN{O>FhJ!4*;`Jyh+~<28`y7e_m!$A%67Z_`-fV!;F_ zYlIF+9$)kY4=1hP9vi_)C{>?|FG?N^}u+3;@fYc5cjnl4JE#Z41_pMP#+CNlrPuaG@iENC#sc#S($8^o3+vhRg8 zG<2Tq^q$zuNm7!xM`=@veYWt$!JvQ>mt;csyRP_L{X8{xr#@y@#)vDKdfEqDNd@%O zy7LybImRM){jFb>$YqzZD}{`S!4!Je|Ra2@DNICK1#Nr8SlL z{L^iY;Z!|G1?do;Q3wdDFuIY`?hr!>x2Mo_G*|JzKNNqwWB;-wMVG8qpAbk4?DLdV zqRrEd8;{GgVS)4WwiDHGM=j$Vt9hknR*_-NkH~B$xU?RR`-ZhERD7qx?eBn*Jp86o z4ue3}#q@cBB6gg8bLgn!oc0cp*@3#o>~A;5(jm64*rbZM*usbOjZ`Yk1qUr2N*f(z zSr$rkcoHMCAJnJTcOx%8r94X(9Vg#|I2E1$zC6i8t4c`&V(&O3SO5n~UdgZ`?<(=@)}Byo`HZ@i z5dmfPqJ0)qP@qyhh>wFz;4c{II!bJqU_FbS_5UZs3qB-&XW8raymWTcAsv=Wr+d4%D!h-#z=G_Mr*utpUsD2Bl!RAV zgGio2iqooc{V-uh|KaW^<=}GrhhPO%QlqqAmm34mmERm5bylqMcGP*5?&tIrO}~W< zeJ_85KdRjB2aU)aMnz6JejvpiS7YnbY_t14zGyD2I(qA7IsYdd;*yZg#DxN8(|SuQu?mgbwF*v#>LJY=`NcFKiE@!XLs_hY7uO0UHhPxVgyZO ziWK9CZfvPinAk)of5XmE_;>n(^}o+ReX8Yn{tTEJ)^J_CBNNlN^H|c6b;dDCLY&R8 zyteXvbh#I@>d@(DT09mOa;;m^Z@j&w!KbKwsQ^Wog@)y-Y=D zlqW=S7DRm-sTuX`yx(}kpJ&Qb5KADKlr|15-Gr%6y9iiO{9(NI+()wcz?*BG6r~K4 zFWH;D^&DMKeRw1#al@cM^hG3+XlYT{(z8#)mGTPHf?h&fyZp*ARoX!uRgOFepTzj! zi??6IF6Rd5gy+bXCL@s)*$3_DZFt)hjl%Fv{e&F3G~dLicMww82s1lfrDdyN1Y0!T z4m|m+`o0QE>|lpQWxB72lZ6~HH2A!+EO31Z-b^VpPTO!k;jA)+j{ZF%+E(wMKejuR zp&V+KK($SHNL>`ape=gjD0wRVw5EqHDQ1>Px6 zfaQQ`Y~oEc7vwfv%$hu9g50%vIYJie$M`SjAnt{0yF#VmRs$pt7i3?E{;4;VMlF}g zD(02NdTZ^WJ`m!=sNP?W>4;SL7`|bi4K+9v1c#b{ePb4n z$RNxTL}p35)MC|g9i6QAJEdToZcK>aq(d#@=ktFWsgM3Oi zmc9xt<)784O!`goxgs;)Lev&Hv+sB8v9iwbgb0hCswuIz#VdFe!Gl+CsZP@nfA1Vl=lb?*rlm4RPJ5~uOZc+Zm`D9Iw|yBB;jM7f=D2pJCe zi9Hs=1BZGi5)aELGjlTDE=|rGWM|v#93{!n0VO_Y`RKe=CK(n!u93Sz`<(t~>q!y_t^nlX5|3 zos4`7$&u4hoq6NnMDlL45h>hHrJV>*mV{_g%u=Kma_O#@)=&|6FyhTqc-Q@vVQUrp zREh3BvibXKjR>C5Cj57;GnD_C(sZ%uhzUF=lbO8&cT>{2KkA~OmbM8H(vNbFU8Q-t zluV-A2whbPDMt|YTPZ<(J9D%d^6mah(jcP15h9$clYHUX$an=JU27rE3gDXl`b zuV>f$}~^glBg5gi6K|Cb~AF> z*w+)ks+e^Y&@N8rC~0M!T=|U69F`OR$m*6xS2muLR(g**8VQBfWP2PL9 zuwdON*fOqnnmjwc;6~9YrfSu-Vh6To z++Tz~k2e1J>3{Fr)o zln*&o5Nyd~cO|dcUR`~X|FOpj4!b|dL99iVvRoZ0#Pd{O5|*P?EJ6+JbzpR&aDLsb zr{|h%->-4Vt~(e#%E3#T)^b7#X_$D@5?%AMJ88VY+m5o;et=3&<^uF07R1v7XC=JR z(7O#`aY}&$vY!947VXYSFmOy1NrA|fNABfsD(lKPFNc3H54wa#9MgvaNnV0c~Vnz499%kSq5SrsJUHfxi_ zT^Cs@#!3VwpVO0!*qan`qb(c#UTxn>*LX!QV-62$ydy#@d)PydDc@9D1qqFA*YyZ#SZW_SJ^1Nm0~0+zK_;XXGhnF;)03-9Sf=xVTiG$l zG$nf`k%9Bp;0JA|LcE! zhAxvaAV1l04(NN*duu>G6-2n_h{ut!?Kx z*x61?&28z7(_Q&V72DMwhWU zE(%6QZrRg`K@yPhtPL}}$u@cM;(c%pgvKEWf^hWmVkcIVUtpU@K!-YP8K^1u4=HLm?|(gq#5D$Mp4$rVPX-aa|ZB{ zfHZypS7xte``zvO3dbbp+n|Ib z%Q&Kj%$1l#v*qlp%J3`f(-N=cPzuX;xC#rih6wTt)RpZa`|gd)vo>tlcss=-Pr`)l z#&ldQ1C1q;4{XiVLpCXG=U?NYp%}?DHh|d866lddOu4n~AUM z5&qzvG5dWTA(uWp>&Dqg(pHZCIPZ8@Z@7bS%NjbYK>b6M0uiiH_cacxYtVo;ob%yW z6PX_=8$p1tIoz(Ow6DJH9rf1oJ7ZUI;m;?0n(vuMjdqEs*p{L&b^D$8oaQ4JElmdX zR0n?;O2d@A!F*v{2J`rQ@YOl9(7xOq5c$w!kCq%$cg+%=4$6QXdmYp6XYQx?Q_+M^ zosOh+W35*!iKpn@kU}Q?UxIH&Vsi#h47- zhk8?N-?OAmXPoKq>iX1okn(x+n0hYVLIfvuuSqIg?rZ1tq-`vNVHZ3%v&3J-u;0nG zV52wY1?+O^=1u2wj>37VH+$u&xO{1M!-e8O|2mS($qB9KfeG8ezz$+}o*(KU#EbP? zX>ALeO7DuH=!h0X8F-kY?`tvtAlS%*x+2SI!U)-T+BOZb`< zPLM#7Y;JEdu#piIzu8_oe4X`zr`{0uDSQm8VqN=QvOR)fqd?%MP8^x}rU2&XFC9HX z%D#!2uw;|f)?LkN8~mLROw&V^#gcNT1G}9q6!pG)HO$4ntbE%;g(m%k>Cu z6br9^a!9StZ>DJL;FaTjs=gL5Si?k`)T2oQC}r?p*Lqi){52ylcnLyeQuT%Dyew7I z5E^+COAm&X{7Tkaafl^UJqx^fDaha=7zUY*hT;PszE;Vp3%*8-u@MJMSu0~e%8?SG zi)k4KG4kXH6=Zl^GJUk@zgGyV`SDJ%V9Z*wHmq}Gl=*UdYgsrRA2)+#-?bh5*aU>z z&VL1V$xlTa<`9GMRGKsP!%?Z)j?ECA(o0SsTP-Ve25%59|Eh@Sb?4{0gk?4}r-a2@ zGc1}Fc)f?nHD6J`!ouJ7L?eTG?iT{-Sgj%3y%JmZJWCh`_xf6-|GQ&UD}(>>*N-X{ zLT~dxt(ZrkD+H?7S5G19!No6GFkA>3DUVX4Y{IZ-)V_s}-}SMPCX$c{kNb=l(_R z^m2O7BZzA~t6kG9*Uu!$$Hmd1ggOEnmP8Zg#5d|Yb0ou=^%tn4cINGHrq{1Tbib7e z5B3<;zh#fR`)|j@w#D;`RuheHUr>V8vt{O_K6!75ErACDXgX8QqnHK=5CED9S0Cw9$-+X z7mzHa9u-Kv1XrcNFV^kr##s#&E!mhRs32>~pw)h#z0dVoQi`^K9=W>~Bi;76p!Znt zRiGacze?Qh?})14SAz&NVnS+QCAmyl2E!@*F|2n8?8ktP9t-YD?fl%l@cZX*wcs9?F;@T}CCV(QkXK@>?24j6x!Y|661Yir1nH!E|7kJ?E>1N_qRAo2<} z4;@QgF)KCs>Sv=^TGhI7G#(fltst!U24cVBO0}1}dSZiVKB5;48Pj!ALM^b##>h+@ zj0p>!aZc=U1Vi5hE|u@FPPchxZx=CyMbeg+Wy?3*LpGm%XJd+cS!Qqxwkr4c2e4aD zDEj5fD)_=WOw%g9P`Pna7b~N(#yxIPLiPuzmJ0rSl}LB1;X?g(T>CPjwY{<)WfD1& z12Sxh4)`&!ke6Xp9;0wGCn`-wM%sPVIcuHa}Hbo9wQ@jE9ShQ92qy%XC(=ix5}qf{S%NVf-7xrN1W1|{AN1=@bij*nnCi;$#J~}NAORaJs zZ(!?K|BeGTh^iLo~Vf&7Wkk;W8h>m_aeE6&obzPnLwiGQ@{^}Oh8Trn}a4D+8r zN7x+N^KF;UXg6y%$Pzv-97MI^}16fM>#+8R5>c#Gz zWZLX1gxJD*R!v)GA7CIO$Qj4>mqLjuv>P8g@PIr+)$8UDAt2`0x){=VT&o?lB}88w z?q&APG{;3yIt$Ohzwlk@z}j6!g~j_TW7zCSq5f>tD5biz7U3Y<%CxOphN~FG1IvH0 zlOP$sBuqOgG%l$5uNn_*G&8HH}julxHJ^ty&y?N6-Y8DReYS28NXjhm%)T#w?sDWE5^j%0qdc#(E*kse7!2ti%1Ow(3B8jBh>djF1D%&vl}oW)P35N4>AnOcii z?#(k426d*=MDmL}0~#|;q1D1Y)aZIFz#Html9j1Rt^Qt{udacKL=t7mUF&@X?eL|7 zdACT6;2j}4E4b*|TZK7FXqbQj_$dHG*4WysNCw0^cPf^2L)vio8RE_SftkZ7Kb2xK zpRexiTFXBEc9#}vr|*6c><7Qjw1Ip&v|mQCB>LId!sNE1fPDGZE0u}-4rbwZ?+r=z zE8(U;WT?&>ofQ;ytZ#K&0|dxcR&0Q#Qb#V^NI;Bmd)R9@|2u}ZHaf))tO@n;tOQI= z&TV5_AJjYwHhg$k_6ABt;mf~KdM7@DejSMB;Q}TM3v}q}ka8G3!8}nl2XhJ?x%j}~ zjWxflsyiqP9`ky7jHi

hB}ck_dqX->!b2m5ww28vy$ zY(;}afMiNWS^zxZQA??lS zZk)5};F~DTu3Z*^SeI>fNzISFfs6;84UQ$q=XdpSe})1nY%*xfKobWkbNQ|Y6F=kM zds^BPwT4`o><s~}t;4KGqnq2-v~}-6nkIsOz?e z+F3ax;O21;!DE6XX%f_7BHHa%nMdY~)S@90wr7Z3nD#SKE~O{EYNjFujqnVHe)CCz z8~nKFrK@GpQvN$Yo8oLg6WRtENH&7Hd|he|(7$0&gQ}8pD63(c&R>A{1N@>DisCZ5 zKIUx09PmnoT6TeI-(`!)QwFLKFOEp;?#X)(^G74KSIld7YxB*Q47-gN;Y#tK^!Ov7 z)v_a{@W;Nz80ytv0uOeVAY6lfiJ4K9M;00kUBatxPv=lQ-tu@5<^(?1LSb6U?YQHn zFZat|MypAZKkN>*JGbO-ScP!a^gzoS+$HPumLj!mU9P1qZleMFjC&Z7TewWJHLpC? z5(jEC0#jkuF30X#(!k@F&1o<9!d-6CxGe^*)ZA+Nx5JJE1)4cdnc4W1Zv00$>HS|yh5*AyPW73jl)RQp4(%LGrj5$r7k_?d z`Gw%>uvQ<#Ec&|;&rg9R$Q;)R*4qaLPn;sjLOo(xa}p4QadV!@^||(=jk_U_`t10i zir0HepDL|+dr8ts_;;jh5ci+lbdqA;yFV~01F5M~GfhN`>>Sk?Vj&EXo)TreL{4_h zTfF{#`*L17giFGmvo+PupFz`igaOCYQ;&YsHCQ&7w;t@5c^V(pS^4u+t&h-Z3csKE zzsHU1oOGwJ_7~Ppv&DTC)@{fb;b!f;LEw+y`PrLY@EJhmYT))a1PWf0EyW4cT~qBw zxF?~#0?y*IAN-d|hqqody-S)A+}PchvLmw&?C9l-we_o?!@JM(*W{Yp{7-B3T(~eKZe_QSoik_5(9OT^}fDjAqM1?|- ztm|#QXSnQ9LKvDG+1Yy0QoAm$z*L>)c1JU)5C19nHX^2KnEn2aKwI{f{99^VAQ&99 z1}+|<7rg9L%}T^h4B0ia%HC``;LrDr+z3p19w3lry>|KYTSX5g^%{Nv0ntHfVj+f{ z;)XHxqw2tDp}_Hlp?N_7~7yX4b{%Q_6_$akztU^CeLHF`ND0 zA5&1q7PkCgv?q_^_~1h8(+oF}k(tigUif^s^ZHN8NP7053W4QG=AYp2qoj{0tkL6S zytUb>j|)Np5}I;OPBl9h-Pl-H(K$B{n{B9N{4-|ttjn$T@#SWdZkfC;jC*nD8gCQ4 z4Ux_ij?VvkXjs*;tZi^^Ye_Sq5RBEAo+F3&#U4+!{8W74C8FQZYuQfKM2l-DjOnniR5x5l6gc#PwG)&)$)^%%;PQ3A_%ud+nBnt^ezcf2jnIv2k4YSD~pZO5UskK)FvUJ zxz&QxnXj;7D|8eKYW4x|c3Je;FPb)$7`M{viF46j^dr7uO3$hWN7;$;wSN+d=*EFP z9KW9s+OdQDVEjS*-de!6`9nZ^gf2cn*E7MqfQDj_ZPv>dFccaM7_ElgKA*_w?i>8% zJo;YO(^${MtKh{U3tiB#E6%ImPWZW)IIEaULf)N_j*c{;$baq=oH$nzH3)q@2A1d{ z|Gua-D&k3HL#-=Jxi1%ZKDLC2vt!Qt{B^C|6r7N1w~@;O#?43+uk1uxjDqet4+mQ9rpdb?XDV1j&VW}@fV7Tn?CX9T9_pz3E$zN`K__%c2v20VOyHyIgXbNAi^jREDhRcm`# zq>-49e^v<-qp=vsCgX?BdB0BLyVRpdp2dTbLJ?twI)nwHlY6!LRHf<{o@06--y;3< zT~{|6PX`Qf?nOG{>!mhHG-ilwQJ-(QT9BG-Xc0eBA)>@B3pVxqK`d}czbsYyqQsmh=3zvh(1GEdb|+WI zEH!f!bUhOrvG0Y3`u%peN38O0esL4e3gej4;O+VD#?ZW8jd=Q?I@k7Tdy6{~-J~4} zTLis4A0uISL|cKbgHK^w;ZwKe`4j>TP?4u$ytY?pFgaClJCNoX5tzWIW(LAGQtECH zPc8$ic;zz=R?t;4Tff{E4wg`fw)MYDozBpY&Tw7XuLu6{f+oKKpXtb02r~$arVi)I zMR@Gx)Vk~?NGWbXoemCyMhM-cDwtM+fr<1gz+f&kndPZBS(o?!9*#Xg8?s^}!<%(h zbAFNfOD$Zk<@&EZSc;^DqF9(M`##x7itpl&X*S><3fJp!dFJU|Y-iz)4?c46!~}R< zR((l3fpk-d-rIicwu&yKvO|KT+Xrm^9UdBiG)^D6>&3sN6Ep!w>j#yk3@?SFa#te~ zoEr{+CRg%PO`&VUxpL;CUPnn%W^;_uabv>dU{dGKy0)+?{r!$pCpizBWkUYI8^Mj)rxXo7dhz|$%Knm3XqPr4q?YOgjJEL2>4qgD= z+uBn|&$7b?x%$(8z1tMo)g`dDUTA9+nM{ufb+yDC%_ITA-yu4dlWk+mh1QFE@*+j` zer0Npi##9uTtdR~X0tK<4CQS&K@pk_2Q?h$nZ!PukBx$QVmt!c=hhS9q0!pl8oBgG zM#J_z2b}S_QTISBomO`_rck>PAD*cbop;*e`Mv$lbn#7NF9=la&JU)vOCAA}v-2sy zoJ6c$(OEg*M~3@LSejk>-}}ZHb6ioQ4pYcED`>H)m&*N#+qqOp=&ic~Og;h16{m{W zQN$J7<#ee4ZU3vhf)&TmJrIR)UysTRt{=kbvsmmQL0MRkGCeJs@OmjncFrUr&6c`; z&BvT(LB|4-^W``x9teEDNuCX48iz??t#b~1%;0C^C{ER~I|Kzs(_VRJD$;I$ulH(T zhnF771qTi+k5WUIX+vxGh$r)4MEtiN-|Ca3QM0?9)b9&9{S;^3>wZgz+FROFdvThF za-62(p^s27watjM(U3aIp(`GjHAL z-2tU8o~F8+Tgn0>0)9J})1dbuDWpHvNU5?N25rWam8=}-X!q{BAVft$unLw3+E?H` z9PV(e?Bt=a?|?DRk=`s~tkP%(Xnl2Z!}OZO{Op>^8C%?Kw;P zOJ27_;W6(mTSF%8{f=}JU-;?BRYd{{6J=>Fj3L1dXLBVspeJSn93hB+kH1QyPzB?S zO_Dtn2$pSK`Ml9f$Ii&V7#mCOQLKyQ1w!v4ncc`$ybHreY@d%}%e3~Q@Mds(8i7q{ zZRen3Q-~muY1lG5yZm#YUoBM?k7oDVMmQL{Owp4vDLA6Y26Y{goV}!crkK+soxcW@ zL7eP#;n~@6b=%_V&FTmP@=Fp5!mj0nkkOSDRcmhc>I_yVtN0HEe|h*Bn3AKJ4nOWW zZ^INvyE#IV;E2c;ZjI%ssK~Hd-s~+X8#+!g>plW_6JKJL?XUU`>0zOIY4)tzzh;WZ ziMa?!$fw|osxMG36U5xA=I%v)nfP-~jWJEzoUOZI&3w*YdErK$7eUoxe?s5%2svk* z7uEGGJPC859R^OU+8MsB-$hB1@w~#zN5Wg*-;ubdq7>eex3p0p915vD32wMfNY_2B zbuUWC{r|ZU(}fG~=+#_m>2yJ*pznVDq|zT(cDZ3JN(^s-n~74aI`-E#fh~uU%B8y# zc7B$5h90KGPY*0ZFGr#5BEpbZpplmoB(@oOb=^MZWY$NSrr}7VhL}xc(M#R!bu~$S zq!8mpEwf`R38r2CToNs1?feaF_H;i~#r|pjHVv@QPiI@_^q8O4wNE4N=DusbT7$|s z4q7J8*E?xGd| zlK;x^{ukJVa4gL)=>da1>dlZkl`>PjaF@bk`>z^BBnrr8hQ6!WI7cVifuacY!V1bC zBigx+m4h9z6x0TCI1l67UW1+J)A|NXvh-i}88=~VL{$Aq%Q$$ph#|{9h`o5y&5X8l z;{qTJ%M?}e{$ovUCl|INNhH;fwFk1{YE){G!b-dL*rE)lw zB$$Vj>36K%+annk{Z5!D`p5p!B~=N_vPK}SJR6iXjr%}o%)@7`H|WQc?)e;+h8Kko}(Ngyz~ndWWh>oGu-#RJz5H9F*xV03JeJ&5Yj3FUYf zX+MWh(o}#<=;04b`hs+ww?MDtY0J1(2pfgi4Yl0qCn%{(SleU<^6`q2)@{%dDmfm*rJ`pG> zIZzUW_5Gu^f)@?nu>Ce7eA2L*vxl}BhMQKX9ZK1}z44BHN3sK5qf}g6mQCh>)EjM| z`cvKU=2B6=(O;QKWzYgcvj>L7PLj-(-2dAaBL7jL>~5ygE@%CItA6cv^E(=$H$fyNfFU$Pyk%n&YDoi{oU8y!W3m#56WCOw7P`Rci0 z0JL^{an8yTifkuT|6F-qBLx--w_-FmPZYW;Sq*B$i`TFV( z%w7$~+^7p$-QVek>u0MD5JImf)PVJ)drc1A4bx_u#^#m8>lE_YT+=xTtnIx#?USGp zE<4^Frnup!TxO)y0uJqC5Nc)S2_T+Q=?yTaKx!tNZ<0>b`OAm%EB`gTf91tZM}Z_v z1xt6lu~?Gqa4-}BV3-fsOdDK=+!1RYG_3yq)O7Lm@xT7ECl4&zvVLepu=&_D_x3DKRWV96YW|2Z zLNAaML5C9m@l03G_|)Io&D}ABvAb1gyQ>yLNXGolXEt7pT=g#Xxt64j0Zn#=jIKcb zg7VV^8G*q~F>Fbg%!n}hpj!I-AOccNa4Dzsi^jNI?ex#69Fs*}bICg0J*_?0je-kt zK1CqGmpp|_+NLjF%mI;)Mn13)>bRYWa`?4eS<1(9t@?2M0|qanWVH2|8-vZW-Yl9~ zIjkSf&xL3&#WrDk^6tzbc-D~O-cG8*maQn!r`Z6dYt4G3t|=K>RJE~+WyHR&$%g&6 z+->x~&l>$M2_hGRbVJJ&CrpQc3UJnIQ^^_55`Kc9X2IOH$h>K4+b8_>BHgc?IhX!? zn@3P^j-p^<<>QFFq{X-E>{nvK`f1i!mxmPv+_r_{5aqYcJp;gbx5y86si~v9(XdIZ znzX&9&tqFSRAW5`Lm?8@CBF>2-*FP3rH3Qy&-i-ru~DQsR4n^&u_CX z5yve&LG$~0q_#RjLrx%MGFFi}Gl@KK?*f|2GA#A?@XSW-s0%^N@7bo9vo&R&A*uAy zIx5F`v5Wv_Tf4=(2y9FT^d)mT^lN_)o!jI*rJ|mow|kK`dD#BU9aEINQ;v0gmV*6a@^E==3 z*Ig4_r}UuEpIchy@g8Z#|Z*7JT@9&>e1Bf{9#7L#y_-(-mAkH}U37dz`EOaJz&TGC*+K2vMkV{3B zH_j0&JE-v3&TY0P%J)0RccT6iy1ahtm>1Iscqc6;L59&%vfBup=naF&7WsY^(G(2^ zhe!bGmfIkihkC?SLW#sg>(Iq=INP} zcxONh3B&@5OjmJF+L#vasuxs4%z0xTzl8S8fFp1c%LN<*jFT^Bd|)*!0Yh4_ z`NvPe8N+=wwEq<$#}4d(DMNs7W;P=un+>o~q8iiyQ*!RmEF++~IHgr5-9HSNp$$R3 z+5jZat{t-*5#I!lJFt3Fs*!N84~0FPg8&oTuQtGv*^ zVm`Wmy8pk-C+IsH3Ea!BX+sQ#>IGlC5qRZ^HQAQlhQtVl>9c-&)p)U^u%e!RBtCvr zeCiy*6$tKjk1Yh|e0bVIC>YKS=S{zta~HN_E^Y{j!(l+jYSdt!Z(H62ITYk4O(=(J zsBKdQ8F2~fj_w?}NIHj6h1aY@U<(hY)w`bqxTW-R)G@!$V10jj7v^i=_5W2Q<=&NNP#JL4}`aUF$-CHwzCOPFwy2Ft+yhh60kGr=yt&BD1*N7%4z2c#5L6q=B z!~^6Py9%QE0su{ZB#Z>W^i1zRuN_mKbHlACEUI*jm)tJX* zCXupR|40a0d^|%7xHV;+0z^#rCz^?#q&%&F;40;C=;D{_&K-q>pVZ&ovmH;j?E48b zu&knk|0Nh)1PSV1ZmNMr!+s(I5`gcC7w$susvViPH>oZSkQ(r=k;Bc%6qXoYPADYK zoxId@<>!K#EnLlz@7q$*$7UyE?B@hNFZH>?|IK>toQ^ha~OzBWRpC^ZVN!b>|lJ176xTh zilpBr?42Vg*%8ci+a27HOU=Gs$@bO4ae0d$0cK=;hT=#5T=22n_elvp;Zv#e)ujzv zt0Tm&^T>&lK0CiWP)si2x^!tkW|pHsS~lrL1Y{7bBni0YT?LGh3Mwx^@5uNZmgFO| zyF;E82XCn_D_5FA8n|vF#`T(mV)WHyOrk_LHJ|hc1q|#dbIyvqW1jA&(I|vd$+h2# zb>-N*iJ9ilc^H$9G=f$KD;NE|f@~ ztJGGryALK+`>E|Tu;Q%vPWsU6T-NoSex*1NR5#j6W+`Nyro??0t~N7BZp^-WXyKoJrV$a@7!kxd_*%d}O0Tk(W-O4NF1lBI`^=g$=`l9^;u za8SN(occDvPxPW9Eh4#O6DE5tIY2gL??Mt zq`wzaqb$LAqktkLP5Bmo!PXG{on8VzQR?BFFkZ}UXo#{cedH;TIpT&1ToA7YDNtX8 zNQI84DHiAW;NJZ|GG&0Su`NevY1ijWfeDTZGToF#HZ@_9t_B=wa~Wt^WiIIvn{DA} zHPEC%A}GhXy9c1)6jQPp%X+rP|6-quYOVy5O5z+PQIt&VAR#T^6T+Q$DHed67OYb^ zHsiP-UjMpNooILXA4xuKHkev;_h1-E_+I@L$wPfFB{AG2By-KNM;x$_dUSlYn0=g} zmRP<=;4WWC2Zg-0o8G2u4oNc8@R*r<_zj!NGA@^^WY0@mv)@9}+Kg!=1_mUNwX@&- z=q$a6vs$N_g{sCRq2+G2Kz^0jr?xh(yrTpi4Rqudlb2X$%+z{5#^)JOq_k?m&M+5MuWf5}K zNe5QI%)WVHK<;hnvy{$Ju0Rs?+&ewJbOV7*(Nh)@T+8BuS&b>JwcV6TTboo`^@|7< z4xLP4QzYcuu>!f82;PRFFPo5$CDU@Dl!KRS=Yoo=nO55}6i^;Vsz?6%1H2;2(%JIP zx~F^I#M!hDNz3Sfj_YDJQUL%fu$`<{f~ROYeg=X);1J||_~ztf=H)FUnJ;FeIPazu z_&9kkQEkPkBgRJ&o*SjwOMUWOQ>iRLs8RVpm0ss_M-`_JldP+`Mpqj_mdFMA3l%>i za7K4E-(nnv?rCqF|9Y^Vk++(dzoks$xxjpDJ>Yt1Pd6Hq)xTYEJo9{OM#nCvYfxe* zuUt%6pJ^N|Vq!lyl|el0BS9#o1c2XdgotE0Ptu8S%VW{Hb3=!5)7|5C9zL#3nW=uBAuLNjjGIL%foDCggZ zpVeQU1XaihFu5KW&@5u|T^RvJ^?tkN6y8>FJCn7BUF$dD{E6EsvQ_6A8|iI}Ehg$p z3}Lq`80HBb*#4j>qfM-$51N)sWUa0d5VB{iHiz`r55X0XT+3ICHMkH0iZ0emQp|f+ z(3sSGx6$7iYxy$1Ev8wG!Y=i>rwVZ+^FnmL$jJo%TV))I;fmgCQ zN3de1FfL|ify4^|fYsXRM9$Ev;4IGp4zuc`kE57~GvGxQ8k(mSe53*nqDuUnLyB$|w#QX0dI;WvQr@D<7Bwn|0kx9XqI#XC0^r4Zs4)C2j zrcTxbxGHKZ2oz_a@a&7x*nhR@5G;V5oG$3{M{c^tR_!vm z4FXUOFf4tdW0*5=tRX+~Fviv<2ONhUP5W@?<6LuJ)G7C0EIJ| z&l!s|+8@r?_8NYqnc|d4&~bVOC3Ug3^h>a1y^5!Eaw-Un{l8}#t5j>B&t-TPIxQwu zsFQ>IOzE$bLNy#`HXPTPdS9{aO#+i0TenNk4#{-!b2iM2k;+5fG>^Xcu9RddO-AYf z(SvZ2ZB- z^!YZylC_Ik1-zk~)kg5uf2b7nshN?;Vf7}okfE4Of4|wiI}-XTy9;wT`x$0Q0o;tR z0m|02DM(|W4^H-O6#g%BnWp>)BRNi<{G2}cG~|{Cz%fz~=Y-N5D(kuikUpF@p+O;S zLmfRXBtXx;9*YFgJ$il+x%_cK!>iVgrOt&u)OF+EgKo#Tybq)dgaW^o_X;ED0`n=^ z0=J%8RwB)mCV7#jI~bHG;JE0U=1-dcuG~B5?{uq`$vqz(3a#&|-jT{Ni$61Q5YlIC zCYb7*wJ4qsduGBsXdsxUcE_IFX%VgGw6Qv2zH4s{u|`DQ+vKneQ_Oag_CG$d^xVpB zeAks7T6NT-z++N7Atp&~GQD1k1TSd<9j$llZWDKrCGIopV^^OOAM*Pv!yzC}yJ*mv zoxm9AQ(gTtS+;#G4{TCH4{hl}jHRRly+4jdR(0-)LBAwC29?TjczTQ5MHO21g~N@d zS8Xv;4}92HP?=`C^`t&qfRWu^I!5=8l@dvxEM(}GL-AH>1tp7ui&l5WRUvAK>8ea+ zM!2L5_240u^9e_V%2=a*al0iKp6Cv@dez4S)uxo186P|0D#E_=q16P^Xjl26@uCQo zadHZh9@7Y<1-LoLx^Fl*k0s~-RJOjD{JV+byvzo^w_{_7GrV}yn&YNr+g(nn1P3qa z-xw(QiE&@e2?X4FVTE^`eH!DgyUDmI7F*1LkNPxKNzHu+G-C?=rpUb#mBpMJ-xrh} zA+xsrZ+t%5k>WkM8P`PWCg)JqM`Wb!X&YneAa-?m$p(<5o#2*GtPU)B^H zKTtY<4ea%59Zrb&>$l(KNZw5~>0Fu+O>_}-ShZc7vnR&r7-g*+Yu7f1U4mvD)O-(` zK~>=oa%k~O4>)LvQ5#R?eK+ld%sw=KkU%5GGVm>^_~sxg4N`Zh*e*}>Pur{&mjASs zce|?!+>F0?OrrH7F#7A9k*Ajyka^-Qrb_PM|3DkepGON^`Rg-CHbg&BXu4#5n92KO zOt@f62kRL+8z5GT3^y&=C*lSdFv@F^mQSIu=6(=eLQ0($%Q?lFzp_%F^^S_Qp)aL3 zYjcsHB!BA41L*1hr7j(t2xue>Uf;D^(VAmUN+N90UTQk}@vs-eW9Xd!-LcnKdxK_UNA$VKp}^t!U!S2_Z(8nIdx zRY$q!Cn+Ev?Ft(gp}!axa~9vd(r4$W9)KvI(QR$v#Vp>~IB(YAjiGtU1t@SnPigeY zH*QoCU)JcoGcWWpb*UsEZLE#goMO&pv#N8aYf}VsH(G1=*S#3-%j@QO?gcHMV~1>S z;VT>muNV*R1mg_^-BLSy^xf7ygC=Zj<|0$HF|CQK00Rkd2P^aa)Qqi7yto@^Un)0&~;(Z*tUJ}uim4U^S>7-Ad z^j^T<@Q0;n0KQc}x}O%ZMk6rx!dOM>|f?DPZSDGgTPasLC| z+VVoKzZfDbtV8#Y3Dt;DgSbtj1NT&<0#NTJb}ga{4DIbK>NGx8ykh?@Xbfo(yvO;^ZngXWF1*#6>t-J#g+ z6y~J@R}!{9geP8{w}iOLz>%O+9o~%Fo^OLxlLwb2q-z`pK@4kVLSR3%AYhe>A|Wld z5+^9@K3*ZP5;cw|+)WBl44l7vB>>F#utE|CWzxDb=L?`5^wEjECxNZ=!as zEKZolVL$BiX!@Jf;yd*d6{1yUG~*kKlIhkOg9TDJt{$JAn`D1gdh<`89kHzt=YpSd z1+2P`ERr1Y>SC9FsY-V=p9L&q;!czc%7n1x6;|zB_ZsFq>WIeEk`-YiCEF*PQ^xrN zX+ftWJM}V^!Ox}n&>JURz-s~52Y}T_d2)m>Ki!}pqznUR^@e$g-~6Z|!wm@#P6@1v zltp-rNJ3Ms_x5rfXl@pK6gUrBzR&OTSTJd&D6qG*0m8ph`H6}wMP2M>ylZlEJO1 z(sIx`;C!#WJT0kgT{tkWDOaF1RD$o<_Oe;n|s7p=+u zX$hchPf+cGD3_wx;5-Ot0O;QPrJ&Xel*6ja1uSB3DCoH~-x=+Wm|m)ItTZ{p`^&C} zqkSU1#M|qSV!yqC@y1Lw4zNZ2cjjxQJ$pOMMbC2sy#0+B-BH7z%kVQuykQLlmg zHnwH#_YU5wA)DYCo2c|ewgByiZ}*Nk|31$bwr{$}VoV@$R(CJVe}VB8n+WLlLB#x` zMAX8yak+!*7}2FL3j$@g-5~SK)1sX*8D>2ZntF2}eV;!IP*rQ=`pR--F1pJ0`fjqb z8d5qYtUzdEy!Nxx3~_at1ep1-?SvOWHz5zM3&jGjtY$2VGqAdp=^ zlYp>8jzu#wX8vWGt!2tM-li-d(cEy;{ybp&J|B< zaeEx`VH9*B*E}Ucxa#SCp(Gm%@!R^?73jE4HIhQPM;Tc}qIC-FguS=HkM}ww zeifueQdZ)`&xH{(z()O$cz)bK|+WM!XZbT+Jgs#&A_%b4jsrz zE7nc{H1J7k#TpjTw3L*$#H)iwT6}DiGfO!7@uaqFaKe0x4?KE=aBR(sNs9w%^qv*! zsEIDFtP6~5VWBXid#)y+X7vY6q?N+y+^dEuVb&qPX#){IH_gwk-v~{6oonF}j!W~u z<1pwt*ZPsMadhZDDX%`*Sq*N5&mJu<(fCupH8!-f@P)GxYrLKBSajZOYXgqZdWl!# zN@&tMTBqIK?|)Vz%{zlBLJ4YXO<`867lN*%TF{;j;ksAFK;Vjo6r<+S1a9NDqjtY5k|upJ^iG(a3K!#_DGxG^3NKt7|AQ=Stql}Z{eLDEgX6SV%8&r&2ho|OY+S9X~a7=S#NibPOi?TNG1{@XIbptN$P^x$q+ch^sGCP4^ zm7-dRIL)=<(N%%;bqH5tm-z|vOUu-q6IPbXwHcpP(Z)yR59ff@|I$PA+xl+K^l2d% z2C4>bk+5<_ydJ%X}&|j8yDVY5f|74U%=SC>IT`q9v$aUo(?( zR8mTE8uEqcjWvEl(yyaPhe*r-E5`kIWK)&548|3^R$|~@SLL#p_%FfL9piZ*kSu@M zOH*K5gBua16)cz=06NAAWQ24O8j&x?7JLP_tqe;&ikHFTgqM@7rx;#^9uIY^%&NHl zlv~mSfJBdw+!K&DER^JkaC0BziDPR{&+0>Q&s{>kFv@)8y5gx(3njgAUAl1e%psBB zQ3RW`tw1gmS};26#Al*FD3RYU&PJX)wH@(~gqm1Z_=!#@Nv`pi9@mET=9Eg*EGz zgBG)k_$ZBG@n%M9|Q$ou<^&RX+H7UbhBi z8NAViGsb(@5>?a|wWy^zSN?ywY=Q9~r&!R9IZn)w&Hqkg7Eg<(OJ8=24Bg`9@&7Xd zwVQiI?1L^;Wb*u#00RS`fupW=6wixpeC-qS8*i8HI1(k2>hF%ED`m% z_E?>fkCo;ZXr1^kChCQ9i&3XfsOLsBAZX)T?WhojE@C93bUvA=5Bj8#kO5w|+q_Sib@D=o^&?+pWfr_fT|2a73$v%ZoW_M4pe*9^ zX7(oSUMhD5gDp`%iTHh|DvV?A-|5FwQT5qkH%!R+VtIc}#!QB(&$27bchsI+;7 z0l-TE?WqZ~hq9x8M64P&kO)U|KMy*w2;~d~V zxvjwVQVhIBJzfMx@1dN_a|_F5m{vZEgO-#{_-5cugYCC9;Xf_eptD{oIAIO4{SIIZ z^6}alCF!Nmxz_=*8*6cDEi**eGx!X* zafpigh9-8{a2IL8;<3?*%5lnC;}o$PdJg8kIiF(>M-hrE{SbU=gZ zym%lVt$^o~c+D-^AP z;*kYtM*5ch1NzpT4u=L!uh9B(71gOtBPxiyK@uSOrAPR1~Nf zh5}eYVDU&XzRvt;fMlYdK%HF~k1am9E&0Eh{ZO&_q+9JBaCwK0pXvBOz*ZDc@CbW~ zn&;HzT6HjfY$Ba7THK96aK-9L4&pnLJI8!hcOjSXfV8K0XJ6s-)rgE|XPS8&I?ab~ za@fPlmmFl}k7K=_S@(2LiUv-0s_!ie(6B!!6%M)+%LE|+f65$E#El{XUe@#%o+%R= zI%HZjzqhH2H%+2T1;+lb!znT-{k;f&4y%6n($PPpkN{-Z_-FP5V#5jq>uthuA#MdY z+rRgBZq3DpKFbF~{>avVt_HlMceZGRIRw~;9i{qpIN0v-wB#gRiFPD@$E-6mz6t=h z+yNbY>L^?~)X-IJy*u_a?YhWik|g$ z&QiaET*WWn8#KNE*`V8 zRN3%l&^C53-cJQsEd(knx)Lj(VS-0YbrC*rZU}K?xgz@E>!+f3DXYW81f$GPUAVE4 z!Z@+s!j3OS2D?xx7Oo89?b9ZVQY?|WhB%B}b8V9VY3m|B0(at(%+MsbmYo>K?p!>; z$1#${mg-J*?XK4BIox&Wd^zFl`>Ixo;!3kIsgQT&Wx}88+x;fi6-$Z<)D8Zrr4sse z69F~tou5E5K}oljffIN)?Q1chEockeRm*ziec@`7t4|k|t3otb_6%2i5N<;X!>*iD zJbpihdO!|e{HP(~q39)#pERG2vfQp+SLQ0oh_Kz~qrxmn^3c;v zS6K5Klu~OuCQ^F)dE1oTI8#l!NLPtNLOu^zETl_b{jG;Gv?#`f#idw%$QsEz$InR% zC$qfTnl=;;KsRJ@cOyZcfU8p7DL39#0^5I1-TyI^PjvorX}|J@QE=u4F~8k;k+B_i zj{=@YIs`I98=!H}*r&mEfDZQV>{jOO>tq(@w1gu)3YFR)!*^D>Z_H;rhq5-SqpfiE z`2pEHEq8!HGYSZcK05-PxIsX&Uu@;wk6U86Y!#z+TAB*KSY4yW+a6PEghE5%YGpyN zEe3Q;X~v(d(e>2X(^p+3svLoMRjwW$PF7yfOn%>czqp^(cpxUKOO;8gkpxG!u-|?m zagfiQ%`}t*PI-0NN0uUqEr}7T%*Ydg{-}KI(gH0{E*;0|9;;fi- zh;Jj+Z8HXM=DALMpRO)#A#=zlD!|4*VUd(CqUb;BCJk^@AP(8gb|MtYBue#Cbj!;; ztv>=sE_WKQkgagr2B5{eO%l@DaLCw;U07R&b8lNOeq(hv$kmp zu(c6&`~S^#$e#Qmloo0!L~Drc7};vyGsoB4@qWW|{BVB~1z&X>wCFfJUef!K%OTFQ zFuDHbSsT)(Ja_+(rrfxUSfW3*`?||G)Fg`6!ZrrbmcrvvE!Dx8~78MB4;mIBgROVqYsia`$%B+?m`$3jm>U_=pzcd+Q)x;mDtTw^>2n9dV!7( z`4^Vfs9W7sgR@isH2yk zc<;tF^yDD_ZxIP7YZy^`AxMar!_+`w9At@c%M=dDUU0N31(dkY{MX%h6q=kxO%)tc zq%m!Q6u z-Hxo#Z4)0T@ZE$OhS%qW#B4)*4QG5;lw$K0$cYfwaEa1$X&A{M50P>6K~$IG@CR(7XKZG(KfFB5*MLk@XZL#-$i@Awvek!6<(=ZL; zvvQMg^{IcEGWV}sT91ZjI`F@-#xnQwv=+0N3X8=`R^5#a0)pge-knYDK~HhT#DN z+jt9u&6usq-{?Fwvwdev;fVnYSflrl;DFx{7Gexx>?1N=K5-g-N)1*eJ_B+7UCxP) zFKn^pJ`heKVR976;x3Utpc_1&sOu!A%& zaty^GE*FAXk-yI*^wgW|0|OvG581rGmx2^c<38wTC4;abC>UeXP-Kyg+>L8v^WAkz zVD}&pi5?4VA3}4*{FwVROOR@QQ@YIMxks_Sw%L|lJqFnX>cOhf=b2Q6%mQ+Yl&D;W zO`Gq!SgrQVcZYzs{j}6C>V<=2Faq8t1A~lL-?X(*-o>2#7v4;#WsIf*7ufo}c+4!& zNfA&N(FVA!IlRN=1Idvzj!vdw`D_ED3!aQ2_Hyqtpw58%1?_3!5jelmB!$RihKO$iwCd0z2Lgv70v#`L*4Y4~K1j zg1;N4J-5sHb392<;YB8QMLxiZ8nRK|lWHj20L`k$Kr?cv&Ehz#%~UIbzrr3RysK0v z!Gv0u4mp5z20JURl;s29r6q|o!%QNPAxyL`%+$D%Zo0OmZU>@j8-H=dz9n--90$8& zF+sJ6APw)YfV43SN~_HZEYvm^2*J&z>`(U-{Tu zxyxM)Tz*uro!tI?u{~fuMbiG0rJxRJpC~8k3gax(vO}&8h>ikcouCd3rz`oQ23baraCu3)b$Nve0P}U+9YgD=L+Vviis5=6t`MIYd zs(bHvv`@r6rndI*1O^Jq8DJ>eF@p3>^oaZb`dVuQ%cf#&M3O9egkJVno$t>Fx9Knt zY{%~zbS3I$1btiU6(wsQzM7TzXy81r4%n&FC{zsf%U#=+HWT~o%l>;7gl!j9ib2nW z86lr0uB5WD86; zyyZWmCmieIcNCuU19XVTH0al7K!1v}d?yfd^BH_cPP%7|`zCT*(id1s=B?LcUaM^O ztk^py2=ER4xp}pajXp4yboEu@LvC9G^5T96^Nuf(c z>aPkb3D>v8I?GixoVKO-FUn)yi*^^zI*Xzl0nrloQMX4|wAs^t@|8pE(6u^I%leCy zPU1I9j|Wrc1KsBi$Pn5%PxJ1QTK%2;f#i;isO(QOj_om-_M}r=?Y6BAsiWW?B*=fm z;{#)8Xb)nw+6E2Q>7AOw2iuj+`=!*Vmo>3}FM&YaQC%FPQ2C*7K%j8d>89Lut^xS{ zozaE4&>0e+0jrxiSsQVoI|`rh>g!2l;^6Py%^oY_EkNB~y(mp9~OKTh~+5Q{Z4z!sfQa@DrG$>ScytU+Q* zjZovweBxzx0{JTzCM=lLMB0PC?oWpvvWr zgdC*~=A3lUI*##SFVVkk9ilg(58y*5Lz8W|f)^6ssVWUa0;~A;3AVNxP`%#@!C@G? z6PtXn)v9A} z?C{{2iZKYC21y}gxs>mE&mgsWh8jma=)5E9nLWW_B?Ac!H^I}f)l8~!EQ6X(x2&pI z?uo&nNa(t^0M-h%ux~BAlY3UpZp@&BMCrj6`&R(N>2bju(`u>mL=mxfI!s>RSRW2q zXt%}+Kgevl6LR_9=yP6V$9M^e1R9dDPhG@N4-V> zf+kOGD>06}F5}1IzUxUnH5qE8cf~BH6Brfe7We|1lF`Sejus=p-`5<(fnmrRTes)%Sn?sS&hEKC`cnvVrlfm>Z5R zymqQ2ZKA#6q6o&`pRFp`ls?J0jCDQsdINx9#=A|<6nE&LmtvXczP6CyNImmQxcAMU zl!0ZFb%tm@r5AI#8i{KrECI??^j78C`yp+FxDn%52{HJfChvvDl?pg+=@dR z6x1F=H6#Kl!{rBlLtAvh9C~>!L@Ye_J7|G#(Trs0g8Pc9{yj3>urcCO_^Vp1E9jND zctnm(ZLM=l{1K!!-=4-8ObJE6|&$o*!W@*I2m7otBY_;_M;DS z;=VK&MI)mpLV2f|A&XSr@EWjZHUbD%ous2{=!KtU`*^hoC?>9|x{gMjbp}l8J@IO7 zGLykt=!wih^CQ)!_QIU9=4A_uSm9VYM<9^$HLHuG2M* zEDIs7WGUNo0T^o*nb89|80-GZKEv$heA_HDeY=SF$MhbP*tK&&tkX+A2J8b2824Sx zVb}>8d!nEA4t=D7xsg_Hx;+PtVHS#mUoc(YZNUKsF}!Kn9kM&1O}&Xk;9ypi!N6KF zu=GE=dV@bG3*B%r9WHqWk!+o(cX=XZI|F3!RPQAFFE{FSG)oWkb5kBGnQU5Ces8){ zJvzQ#$s+HoBAP>=`au@jXEc#4L0-y+YYWBrbJ7R&dnN>-=r-ftyc zq}MO(m#q`Cn)aL~rPGURh0#ph7Jh^~7RC1$N%#JR>GN%wJKQ`+GMRH2Jc?zY+Kyin zWJM?P`8mH{3dTu2(*cgM$zR_g8D%W$p@GkVosOE*A4&KQSCND4qD%uQ5KguF24;Hv z&Rhaa6kZ=xRLRY>S34*y?++1O#MREl#ZsP(nzLj_F}CiLAuo>ZyfE1P7fsra%N220 zEPZ9_M(Y*pJ)zgIE=D5|>nP zBTjO9^>;0vH>t!`f8||z-EmSq{x`h?mEisJcC+$wEHwrY^^SZNc4le0MXzhet8IK= zMofv;_B~Q{nyB1b-k#Lp$i-(wHnrp0V_m)8Op z*hvXP^lTdU2*(d87I5gDdBz%kl-U$t>4*H=PuRjvZ6>2)Ee4E zoIxE4z>xu|R>SDK)^)*bN2Hpb<0!2k{j-`?)G#W&;@1rK+nNUxWbo%-i-3hjpmb7? zhrLTJ7u!!rPR;D0C|e!fEx|7hfSu46EAtL^GPG;w3}J7+>ZsPW&~VxZ&kUtDXb11& z8&{hh7!Lygi=_R2j&vL{`kqYZnYSA~82}-Wbm- zBYdnbBQ&YT9oMY-CGCvdhR%~rYq;CpOQlB8B1W^WdNdqc2_La~o8tNIPQ|)iiKNPG zzCxoTHH1^(Y^ciw#F(Pg@ea*bt5pb`6qUmHzgrb!1@=*^c zIZt}$A`TfvgsW5G@#b(Xr&SD<^f7YO71q(=T(s_hl323Bvw(mb63N}-2zbc9M$Tar zN(VM=-&X+V9T{Y6kEjtn6|hhhT%NDx3na?$ix{}d6(@Jf?Y`2-NRxTBQif{r>$z?j ztF@R8*d<1B`~(I0sEFN zdgb@!d~JYJ^aK{b)~Nc~J(3XBpbnemy#(?$*)pgUza&o4{rKw!IGe{^y&cfC?TK_X zw>;Z1N+Ntdof29C!_gZhk99C>$pDI%?tp~!!Co7A?l4^!RPG1HL3~ej^hodlW0N4~ zYm=J=0(xxX2*Hg|lWT4!#J@hy49!Z^HX-;XCmXQ>%WPi_2(o{EjRzln|Ehs~Lo|C^ zmCa1Fslr`NvK)zhJ@H~zbRx0C+#gvHAF+AYB1|k)p>MbTU<2nVQjl(Tjmrfl7JW71 zn2Yr0H^z;o41LSuwwyZCXPN=AE=zunxx8m^4Sz|hE0wv>=Iz?&vbvGkWlhRZ^m3O( zTqjWx+!B9*l4QN!2TF)iJQ%40#`ZGE?}_I%vYy>k9{w~TWoMNz8KWn{X;Q{>u*Wm} zeGhWQ5W(yevNo1H%Z(yUhXV@KUt6h4yE(WS51UVo0FuN5-7lCXJr_!*IqoC-`@&E) zmUE|={KuUq4S7(_b59ld%vqJkIiGCYo#|PinmLe?Jq&J;QAtpti*{HJzN|g@%l+cb zJs(dLd<4c&(PnOj{VZbY{i8z0F9s=^ya(e`P0T;@&VU;aI5hA!K zN?xEY1?EM?c=$=Z19 zHSri$B3`L|9|S7!ZSuXv!KrY%_o-~yE@Z|5c{yS}Ckza?z7iT-i*{Nv(MBi97K zOETY+t-96l+%I@&l>j$AiqTT7G5|3^&cAIhu?vyJ%o4c#5%UEqnTTxpPxkBXTt$xg zoD^Gy7ch-_voA54-t{@&3dkv@$xQ9FU+AP?3^Br6KQ-i~rQv;kz~F_IABAF7VYrC} zyW<)Q{-zZHj%qLEC36g<2S+EybASxyiKP9pX7Jf)GpL5cHfiDR-xG*TPoFOP$5++4+qo)tGxEku=LF1 zXgg!`?H*dA9!MUf62O_-!Z9dJb7SPM0H7$3tu39SH^z>pqu%wwhdnunf%aTQJ<_Q- zPd0=%xfi@~dCp9Dt7h1`Dc1@tAFZbn#4Hv=Na{DD@wReybydK!awu$$*fi{qX{P-H zt8Iop!O?TfLASq(9aeXy^E{eJPryW#GYz3@zgxzOx(M{b4tfygK6VVsV9hV>%oL!6 z-an&sjn}aEd3;XlOPP#Eb;>eG!;n9B8*h4oQpU8()u?%m{6aeGU zYRDCDvQ?3h(Wu<3-y^MJAZ|&E$i@g3p`Ys=Y7iu4Udw>VjnKaF_;YTH3D94V)d10W z8Dok$?c1%6I^QjyP44u^_DcMlwD!(EN>YOe^hP3&e~6P*7pfvHvef2fqCrys6>))% z09{0;6bLARh?yb*7Q0yWrfyM5y9Ip3 zPR#{Wien5e^{bn1a*vg@79eMxVT7w+;Nf4D8KJpXl9I4uMV5HeQOn?;^gC9!4pn7>@WtxEk4tr9H86>t0-P5H&-{)OXJ|?!OiCl`q#d3%KX)>!Y z)bS;_lm7|{KwP?iuG0ph%ua&JL7-e7-y|r$7(!U}7m}B_wj2MIdX`S>wI1FFkpFl8 z(ATTP@WGYGP%PN;IjKUNHdDdg)xwPHr>-)l(gpAEwM|rci2ZAXfFN>#)kk#b{H2hf z36aA6wmf&KSA;>RSQN-AqVv^KR_CZ~(bV`8+3cH$zTMX7GaA#S(kg$#MFet|HtXo5;Y|8MG^KpvqE>76BEU2@pvnhfB{4bAwFmnT1Cv^vyZaEFzIfWm^pA zKR09q+-)`6IOA~z328bNMFe^$3|6(HdutsM;6lhMuEc8 znHo!a$kUejOqN(*QwKnmt)iL_bY(AQ8}T!ilk7HU7hL~TJ8PrU#}M@iK^dJZx+{dr zQGmLn@`Wxu|3yaQ_W~t0BSs}>1FaB-l}G=3fzF;OCGd#}xijD;VmqV9w)Kck8eMR$ zkp

ogK-XKy6l4UlMN+`To|;YPU?8D#Mu8Ar1g#BCxhZ_(>`LzB9r0M4g)E4sdPc zZ;EDHSC_MdcJ5YdBILn3Pi$u=h*L`phAS4mtC7&#p`&JUE%-hHm_*b1e<(mobx%cW zNsI|HnHo%GEBMi&VkDPcT=Zq?JKg~_NlZ4w+$wMFaJJeXJN^L77W2&87nWe2Hcey+vZVA{@6lL>^aoYG2$;Bb z5YBd_s4%C6er-07j7yW_ z4Us}ugF);-W}S)+O_mJS?^GBTHRIMH$wKIt~}P(3vAY(GPPJ%49-a~Jh0Io=YCmjs$+44pGLxv z=Q6zs=zGYwC^XP{J70*(X3n*W|- z!pHxRT**rXIx-G#JgE(oG7#v>y4|pSH&yV+U@P`j5mWpy&U%;D0-#U?!Kvp^uo0HL8xAPphf%}+QFnA9g$b~C=KzGmt+jBG zwPkgviTw{U+oORJ=$f01Kd?HcHJ8|J41`VBStj|YKcd2J=ef$VEYR_sHxd$MFAM-l%>xf6<^t8(p0Y6+A$?Zxd8xv%xxX=$VnkFF-^_M|Ml~FD*fxL3` zY5hT?qg?fx8o{QH@d$JMw=uW%1gf;w`b+w^nkH~WpjDZeXe^b5GCvKmj@zTCT2uglV1o!(w?=C9FJkrbPuAI!=*ADEl*u(UbfM0gCn; z>dFc&n}jXNV=J1hY3fY;2M8I-hhBp*8utqF#!uC`?Ao5ceX0DWDVIoDb;L-UV1b5R zz|lp~GW=Xc1<>=IzYWhyWz;|amJ;EcV|I~{OgoE6S_1-iSoM6!RM)k9(1st+cd2G9 zx_wJ%zb$k|HV0+({5T8($sDpgai?eiD{ScVd6)`gB?b$I-uUz(nEVJ++A@gfT9>@d zzgWp6ONt#@FIy=xqF>t75a`tZ&PGPjbK6^K>L^`A8hB81&VhA%WV^Kf(cT40hw#_U zveb5j=Bk`fiYt13is=HR@UEQJQ&5?RkG(XEc^^vV-jcvvzYP6 zhZLWR7J!Nt*UVbDo$U^~%p93Q04&w;<1k5Q@czAHFtm)NCE54flg4oOn-$(L+7FN@ z*Olwwpbp6_|Ch0L%s2g|=<%=RC8>G`V-N`pgj6vwu#+PD%T3}lfbPANc?m5xB8A^Z z?8-$i)tq|FB=!7wE*KCmR?F_*|GdFl8BGNrV6cKtLmG4JpCqMwdLfUbn|WpqBnW9j zIEc;gZ1zDzN9YN{(U-}V&5(v}n`0%pXw~pkCOesYGvnj3DVw2=L4#T<^bbrgKrp6K z@rv4P9NR>?IG`=kR&(wK}&(=7#!siU+ zC_N_CZ2$-BhDo+}j>tJR`RUWI@*+JUe~K8#8G=~a7Iryvk=DfUxa?WI4NStWtmhrE z`~!JVuN6ZTCbONzZx_*hYUJx}Ieg-}bTG#q^!%w1bYy>ne(|RHzXQ}kdS%XU zOi9RoA|1DvmbFW*Vo_p2fa)tx$(P8egZ%9?k9q}Lv7RZkLg*Rn-vEESqZ{O&eIJY8F z`Mzr6#vE|nrYl`VhGJmqHsy0v>g`hxy7aYjSt4_ApYWi1Tept3EeZay@)XX(f0+rycGZarLp0s>=6IDn_nc482kf4 zK9FFivWL4h_a&{(3Ho4o2*~+J@>_~fBQu%1RG@CqE<4|}Gm#~|{0T6Y9&Bf4I6u~H zUeylK;xo0?xcu~y$|T2^h|tp>r40heomPg~M({hdx6W5EFn0Gc!j(V}1LZ8Ko4R@4 zPh|a^LhRLBoBcBeEH6@XsvmMg#S{Ro?y+YLkl)`hOy;@NS2cIg@APAxDznE-uxA?) zi3KkA+6H-27dcuL(nQQMMKeaT2L+|6tqG_YZxIs}axFSM;O;6~<+mWvc`#czbqmCH z-EaZY<$FAz;H0F-cR6xL4e5%~-4qCElv2y9Ys z5l^%$JH}>|Ba$53PghgqG{82R}Q(1IArT9b~a@gejW}VQq zlydz@iU53MDjWI4EWHZV@W`(t5*nf^leX_Zdra>wE+cY6gIvuG+B{?>qPWm6(kl1T z@M-<;9D=nefw0WeVCg*H5w#ie3g!+7uNqr8v6v;(osOms|%}zNBDR^9!#Og!Xzx`o8RsRj^^Y5J=<9U@v z%>X5+3wQbB8(>rkWG=Vv49bed%8DwVz@;vszJ~P?RN)BKerd7(^l(gwL&yGBl=kH6 z$}Aj2cLdu#f2*r*Y54-gQ!tOqxy{f0e)BAn`0yglQ)IzGZdvTA&w-o?wVYd=LbJ|gn%<}3XL5%PG=a?3Q05nR!CBChFzsq+;mK*&hQn)KCJ;cmbvZ1l|f6HAsg`w z=cNSNnR-)%gifW@eIBq%?! z52QJ6%>|*Z*V^Om13(p-j>YKr?6`h6aPT@P+i*R`3$O(DooIaIh9o8Ra)+s%~iH4n=QAbe5Lb?CcCYQzW<{s#5}237Z91a%xa5c-BsCzf!Dc&M=54LbhN$ifAYt^ z=u4$;c)3%7eZp;lNXAM`NuFoa3Ugc@*>jS>OqPqmb^F6vQK?)CR} ztqGeUqy2FA`gnQ|y06+#a;WRy@<@%{C^;;gp)ct)cJd2z8d8C(51euPuY;mtq}BTM z;i|uVcDdbLIyV}Xin)-)-GvZCg)@d_ed6v3gj}_S{unof-WH*qS`=JJ7cz=cw!LQq7N~;CBdn6;O z_VQ!jYh*Mss2KsBB!k%`#;z`7F`7R!x4RPi7Cr~k4*8i2RTYXrrBNHOQhMabv@l<=$T9uqRK)IuiCk!C)kI@<^tUC^*RL&9j#os@a z;-h)JbF63{4SF}s4!|apXhcb#I2X>@GkftZQ1 zN`ZK*#We;x6xDje5t$EW!;h7Fu1NZ*KJ-upUMe{}SWM*`G&G6Cgzs%-!1O7%h>P0x z$mCKx+B=l#Lw>h$m+(a=x^m_$?xMgB6;9Y`jvg60U}cR9LU5yJi0i*5c_-;G9P)fQKuF5C6r`pTQ`x?9`4X ze8H*WRT}f)5*1Fs5R!k0CcT9oWBnQI7uk5=4zhZ3AUjlAA;t&@zXdK+9A7ZbO|>>sVkMLyLY(*Scd3&L8K3U13+4iiya> zKvevmySIA_dB$nEQHwSpCiO2axi)1I*l$`=4!Dx&7HPo=6Qc|0sfa*+U{U;L2l!{zB?>`BTPa}>8v2PsogM;|j#$e_JqRBz=C4 zfQYmq=OR*Xwq;D>{1h7w#}Dy{dLFPUkznq{xo6S^J086&-~01(LJGeMxRVlK@9;Zr zl0u+OLA0AZc2w^+M)PKa=8IPD>Yb{c=7D(^q3xEt#$H`MX3}tz-7YJqgxcvUXAWbW z-!8F{CD60t4QnZIWZQI@Os3A5mDxZz7={ouZBD!6_}+%&6ys4Xudk+U4&&;3f67x+59A!?U^A!ae57 z>wE$8nUI`SjEe0|l@I!QxlmeQe@09pqJeP`4s^#jz2mn(j|EdK8(0?lNog zy$N{Z6T4I)e~wFM1Fm{ZTd*R_#0G`kx@`%+)BCM(f_~ZY&pR_mss=~r=mgn(b$RK6 z6z83Z^DhYZtNeR1H>&o^_rj&l+F1Agu~Yx~r~BOlFg>zwHp}ns(GZa&f2+2LCVWzR z<5RWuc?pA8M?#CSKwH)s_##M|!iDE{=j35BnL83N_OtHV;DJ+J0E9U-V%Q33HOh~m zvkjAlSBs?^aP{9Sn!>ga=4iTE*LFcN2Q5 z7Z7@ic_o7d6!}8Qt+^U$<5nyH%K}xAK;+q>9E|ANLZrrVv&WbGP*Bc)mXQvm5>Hf2 z)Zwzz1H}j|My&?GfE6TIAF|dh`-^cbS)(GA@3t=;D=fc-sBL+HOQTX0vO|O;A7abj@ zBHjmj!rYi8WnA7l1i;VnUSiJ*O0+dOaA8LNttR8~szZXQ<^AAd4Q#&6*^$fT$?bD> z7?zrr67wL)fcEu1r5vmZ<`fr2S&jFOZ2f?_ftk@CbUb$H#vDbdF|NBHUbN%>-4D@c z6Uwt=&AWVOzn)9@CmPfn7N1l*cTg3uoh*x3#k@*U)4j$p_v@Ysx|vJ*^JEuc?16AP z&h>h;MYCb3@Zd2o--$>HVaclcDM<{t7W|)gbj|EEc~v|{1Z~IpFkAgkT#LGSlFw0L z_oB4TN*jnN}(`~kM~KaBF2iL!4$+eXmJ{#(u0Q4R*y2}AG7(LDtR zfB`P+bkw;2t4##0@f8=MSk1pzOOU~;A%UMJRx&(HDqgPsM3c(nqzqQ(Ac{PBRgy>$ z6-6NhBOD2fXIlpX(j@ER?x6o`%@V&%tw(uQo)MM&1rk^SK^r}Guydi2HZWbO00eO(* zN<1HHy!e*I#wCr935oFV5xL&TUGCvQA;PL`!_`pq7xh!9;9a`wSb4n3mRVpbvPODS z3X4pt8cDzNu}lMcwkg7&q^9=^a;e}Hh0Lv<(;L4swz|k+#pGxeFv&>A@OHOrZHszL zsx}{8j)lKQ;-!w3Sx=qnKy^i|0xrgc+DM3Y%7RJQUXn^8u<=ByD-i%{tjXj!nP%>= zAC{rd(SIgzm1CiChSL;C5y{ro zqeuz~&nBV|MW=yYCV)ETslxIq>NoCe5CGs}s6RVl56y#!{-<=N9Fu-Yl(3fYU;cMl zPIx)%@zOpB*Z-MLhdmDp3;~rT7%`a+SjXb2uI$lOC#P@(!r7eb)I0*Fs=L$w>nJBwXm#FaN z1c|zOBbI$kT=SX7yhuTacBzn5!8R|YcKK7ih3{|%{eef- z_(84>g#Vy?pa|8y(jQPQpQ}zzq?yCOC>?=%d0p99#<7%KY(464DE6s~-2Od^9j^EA}zGIEoia1sS%)}usTW`6X1%GqM} zjc8&OnECa&8qhW6REkZ!!%ZexPASh zA{Kw-{v?5K^*nBs4J`5m5~ z0(ZaBiKAAy>1r{Ui?zxs2XL!OAZi%gFJH;m_Em=gceb!ez8@>}qx*Dko>)wImLT)s z0%8E&XVZeuLMX9Fa1&N$^UK;i$n)o^mV}6IJ}i7}gZPl+%YK9+sqrmulSmHE#n5^T zYd$}k^J2I2YougpfPh&hYep`hJ}2n|xLNM$qeY|_9h>+lcro;D*Rsl^Lq{;#0*wuu zqz5;AU|OoiC83~2D_kKHPS=186WKZfRALWCITZ1EAIp_|xzu~!$bZZl(>3w7)i4}; zNiF1f4GAw{3A`BmN0@0#zs+5de%7GIt&gl{*pTun&JRgpDH2l|I%?Yp+F}il z@GxYrY+Eto&XDt_#|!j%9B$;`O7`D5Jgu^=++19M zRZbzWovv2YMSbdg1l435D>1q2q<-8Bf_$D(|8J~8iW1EX@!+oyzlN$}3IP3Q6K+<@ zHbP2N+z?U1&$PBuReF}WJMgTI^hv8b{_b80hHlgX?n{f607O#yhUvUJ_@k#JcSBk6 z5rKGNzT~nZIUZJ@fY$M6SYh!QsIOCNh7&sx)2qnYB%2|h`0bY7GIaTxOZ*Po(}hB` ztcT7h$kAcESHN!4k%bqEByG+rXnOq^C`o|BGgdi4K43y*Dd&bs^;lXDtOWtgG#iyP zKnui1Q5F!jm5zRbf%1+oIW91kDmLc=y2r7?jzydIQNscbt^M;D#$zB*l&}o7=TqZ(2N`=xTa_H&j+cYL>_c%PFh-A98EC?#E_2(&@K#RK|X{uIwSSJc%dIU z0b5Y~H^or#x=qIacbB7?-iE_n)K|1|`TvvTN4KUrDxrN8T;@3TL!iQfFH0}dXCM8! z!NuqMdHlaWU$z3yqJJ2xFCa_K+AD@U7wHYn3phpPX^KG^P*`bLmbM|QXpSoabsO&@ zM3z0GNlm>KX72Gi6#GOwYVo;dInIFrx~!zva!5^RH^E7>w0M}DK$*puZASP*<99o| zt$0hL`7&qHbW8bb@#?9Meb|_;47kz?cMw^|jk-+BeX~b!F=&{GTZ)r<3gp()myY=1 zHT(52;&>4&r&|02D)_g_;;2IIWbRCk(Q%qS^#a`hA1<}e{MOYTbI2S?mYxtr_na1eQeAjPsB+%9F&n}xJ%G<1cI+dGk0K`@XpBZ4c zOC-ahTqy9rKW_kSam?O4ce$1~nDc$6T>gHJ`h1^o92z9BXvM_mvHB{17dD2KP2EuC z9FwgOEnukF5w(zeR8?@^|Ga^YFW!wWWvklAY?ZldkPYZ~If05d=1?fUXlD z@3Uz_q{3$M|NG~SgJza_l;1fjoT`>+WobazNCNr6May}-b1tq(!8%=&8pY0jp`|6(P!Z)adR>#2U<{=ZR;iE&~rWv|PL zg^@Dy7z-QRNC*M?1rN}*v5$^|3F|JCgPjO4dsgr@sr%AMt_j)?T5j~%M`>qu*Afzy z6`>_%mN|(vA&Y~M+wjkxZf-u3GiVA99S-F=g2khQH`|Z95~|_?ZMj-oPUH`;R*4Ig z!WhHGQ}}6D9LqUjXoC{gQuS1HCN`Y#RHuyGkNdn$t)v}6?1-G}5&5MxQ28uGNgFJZ zY%!vFCTZAAKqIt?h#w=LW?1v=7D$$CB@ukxKF;L-{I8I$siy>#B@Bk)#}yiCD5VJt zCHxg361ynI>klvw&aUG8rq$x-qwT1|SuLv#VPi)Ru;Y{rdw z!&O}}PB$9z05e7bAO>tglLg(=>Hn2h7t!^#1)FAZgYzy}1tFu>zKJ#=;S|t5v zKSOOO-q~z&N%g>8mIulHQ^nW@uC9EpRBxX&*eULacgq-sVJUTYuWShTv1zHr!c3Cr zXj<IWEXPCo`U$yj;0IMMi9)gxnnHmA)*v|$_O06O(bE$4 z65v3_c9{h@UIo;YF-Z$gt8@<^;QLxKaA~@rr03Z>i9Iml?TsoRWbE!phXcs4BD=p`$h$F z1V5ITW+qvm)h(4(?k~8|^~p>BC>VL$idjmWa`B+>Vp^$_<=Em_6LPdTqXXM*&Jheh zClx&yR0lDjuc9=b&7m|1+cB}!-kD<0+0@v_V)~2YOj(k`i(|&4DJFUr=1^eNfqDnW zFy$|k_rOh32@I_F7I_|(F&dn1Gj;G1Qq&g$-@-a($#n9+kJ$NePeA zA!dGSJ^yVA;U7r~2}D2;%Uy}BIAOO>3MbT`=0M$tMF=Ov$m{wJ$H40RSNCs&5{3T1 z_rmd7^W0!;gPS^`ZtBqGHVTZ(;jg#wYfHAt!Few?JW^f}w9j_`&%d+!?e}sP1w%-h|fva+StVT3otDU|dC1D_MY$Xby7mmXi!!%`GI|)Eo7H zKh0sxuGU{;3|#(<@i}3QGT=JZ9mkAFF|Pd^%+S5CrW;;t zST1d+%nZ5fzlilN(xibuUP!wmXfG9 z`TJxlyA;>Jd@zF*bEs6crD(mrm1rPPXE4*4f%lPFF7nDo1(;vO%lPO$>|IoHOTSTU zANrol-g@r`ba8hw6c_Jpk66LN#yUz$-ogt18vj9ubca4_W*>aF*U6yiT1V!rv|9i1 z{$EzEb85cWz{kL!-v)g+<^1RaW#XudCpBV{IAK&#T?|{K5a1Q$JmSPh+hHDb?;Tbn zNAMQHJBZi8SP^>QN5i~r#2WV2aV7#xW9e^_NcvNe7m7p+Lh z;Z!$87P90FZC0jf1($iLX@9cU_iv1>I87GXj@L@sO`);XhWXr zElY2LA`Obe3bKC96Q)8rbNS&I(IcNS$*DW7iokvtv}eDtl;A+(b?2^XLd?@h1$ht{ zgvUarBX&d6<0`E+c;1&0+S~O0^+!+(TM94sv>i&l9U_W#^XLPQTqPp3J%{%?bfX&D zWi>zSvH4Ujs9wB!;X44%VYUA((&Cs;oVtY4b3{Yv-!d}>gG*QyJwhopI9Q&@!B8DctUUkSqPsQTe3Ru>jQA;fS9$!Obc6eJ)k$g>6 zQ#o0?Ctn~w7qd4{fLuduwrD*btasp;p!=)jpdiy4XYYql^}kZjXAm{Xt4a3LKE`K3 z@qzu2Ef~0T+OvH@qIT(%mx%&4)H>}xCc%TOl{i^#WK$?qZFurWWE5m(5?!?66JJ zi~sFHibp^J1}0lA4DJ-IJgs}=4r@G^UcTTtcdz=jdUxI!YJG{&gK!bgPMFsh&aovl zkf+AH&!x9vd#kak<}PPlRxwvGE$@4Bi<(n}qfth&N_>L|sd10Vv-PD?Tf8G!xySXV zDYomV+7q^dw~~C8W6LKI;QX8U&XDro!L7vA-{i;9B*3zV1xAg@%$Mp&eYb)#Be%3J zp$Lv-n)+SMEOC!smIrKxN13Dyk2?_T>oc^`7sB1kFe&!^GTNn99Wug27xRaQy)HIU znFp}(VEzx|^|K#|p@rbS@xX=&(Oa5f$35=_65t7@j7d=jeY1%RbeuPy#frE$pwm?{ zt~zU35NeKG|I@sleX`(BX|4#le|-qEwMttXs3$ggbRoYmlxWX)h;LvHZl#}n#s5`E z$#{Be79T!KRWvI{Qg)a@PKXlk0^*_wCm7NgANlKrW%0XO4V5=ZEJr^^rygNGYFlk- z2tSRC3vyQn?Y6XF)$sfN@w54Dn82EkjntxV}({H4uk7b*^}cso>=*` zrr)z?>l|PQfDLMlBQ!rTi2>)ci$7iVzT@jLzq^V?-j18{ACGPB1nYW8v%AM`yEQk% za(*e~HA89NN!xcJRR#vb3go+E+0hKOv}GE4LWL#;jm~xosQ!0HHoMK0LU0gW32uMW zQR*(v2NS?t;4!ArBBXxQW!zmL{B1(_B_aTL0EVo_H^2PmO&=x-=6OvM30J14HbeKe z=LzE%OX3Jf6*Fmx+`A3@;_I4K!m+iNSe*f-rp5jWsNRY_SAS^+WU6$bW2Bp;@UOQC z=Nk-$y6X;hjrX&vi0u#l^Kl8TyWlMANRdx(0;P7u3^1mPd)!t|z0p`TtCL`{^c6y4 z=o$2ah_+=TT6x(;Bo-ZkY}>GyjEXasZ$A$upIRF|)y~V|i7g~fPMK@VY((-QRg2Yg zHW+Sz{Bg$0g5LjB;g>o(yE|{2Yf5m{NUI!okm?I*1LX&y{IMS#)8>b%%@mPQBExqA zDnH(v_tbMyPaH=F>ZWA%B-tT$rP-tyvN*nqvxp8Cg$h5t2EU z0jB#jz0B3&-u1UFd8V#GyGc)W2L=@qph>--B{>hxT#oh0W~S(~4B?o46}XQ9QBY?Q zjv_`B10Kc`8wTmI(^_;+D*qysMkeqe$Dq&8 zTN@!pMjToqlN90F^WrAOJp(DxXf_B|tbOv$5UI^RN0o4!LUC_Ry$Fu%_o(jHw8yF^ z>#ljon+hTLHZ#lvpdBs$ElP;I275uI$!IqbQr2ru6|{q8au=pfO9*#}PMuETIntuA z4;D=gD1X*mmi1^2@u>Ft`4lR7DWHX?&@+4~^$(xuiKJy{TO`K;JG=2kMP|g6m3#+w~-QvZ`< z_E8akA#g}DZd#Ipp8Sx#mX&z!KXh+>E95D1V!v+P*8D4xt%aoptNU3czIVIh4~4$+I$nM3*Uc)Oh6Xa-%>Tc1A3MQB!P<|h9@=rK zm6SQG8_|$0f9O3L^)|sdIjO&L?tQ0*3&SLo-`_%AD~P8M5DlS{UyQOOU2b~9BgBxs}v zi2b0>2YoUK8;RY0G;ClqwfhT3CoQ;^SzLs{!J;Bhs9gjXf7!(_L3;+U{;(Eq3YbPK z!qbs~7djmkNIk^$ZBD+l?rWquC-)(Bx_lbTMHKE@qf)PJActS-i4Qs9p4jFzlnXsF z@4e=|7w5Eh1_Xpc{{}mg6Xq^eKI?yaGOdfVHBS2ewEv{F7vQfiqH#Ks%Y-~T@wqL1zNwipcrT6zgNfFr!_^9{?qm8Ah^w+$ z)Dt-x_lW|Y+46|867aTqGrkPWW{P*W|Kzzh1*Gs%((1ihci1D3K7OQmtGN#-RyI+? zza=R!gbu)Z73>D)h4*5sc@oo;@bt66_?O3bg6FVbxEV2+f*xvix*1+#FY?BAB8j z7}b1mQx#B8<5h|K$e#4qG~9~`+$uekof6(jc%ksnA;Oy->z)TT?W*V%Qil^gbz$l7 zdlbK=og-BWK5jz`l7kk`moA4N_W-cMsxs6>Ea&@+6iV-kfa%17#gc6>uJWgQPmsz_ zGq4WB8pA=73UQ*1II4-g(vKZ2?twL$i?rE)bf$&6TUMxE`P7Jc54n_=oGa|(dyTqI zpD6apr^+I2anTxS!ekU*Gl!@c4~zto3W2pjN6eFI9RJqdT{=6cB0();T!IhN0<9`t z`$Oo}?rnX8;F?!mV6JY+`RTAjH5;Kcd>YWBl@6|lnp{J;DfW2;GcU|w&s{H{K!U8L zbS(@K&LHGv4u?sj0ynZIHQEd4j>ECFw`iX(HgeY`Dx}yG`j%8BbfE z_V`XUg~wz1wCuub^RDj0m67Kc06>?xu-)3&Mw z3}W>=44g5y7YpE3%fwV68j8`q>Fl6!(L|$rRni^Wvmzlnb%Oxa-po|_-!hwZrQd*X z=5HWZkQNsmZyG({LZ9DwuvgOzglHVK`3kc#)>lO()j~`SEcUO9sP}DeD*E9_3wU?+ zbKyT~_7I3o!34KVBBfG3%H~$(Pp2*n#MyzN%asgj;@D!ie{dqms;>L1qDV#u@A6+Z za(kpn#ING$!3&SqO|ZWf6@YO1Rq07Ysls5dE}&m)&l62b&bGGHIXjEqMMq`Zuk)g3 z_JcKCDn3==G+C-2P#F7D($DiIb-T-m$G_zUooaK^MHfrj9}O0Au&1ZRMAga4wi^gpQu2%e9PX% zJ0VTK`q$z5%$jfq+^j2gMHD2OiAVKE1OVEA$wPVlpYvWsh5okS$Z}tx5cW zLch}aPt*OJ2w0hh&QTQ1sd_cM-}d?}rv&X*nh!~-U(2y|QX?l>1crbb=KlugWus@7 z--gA?E(UW*QY@}=IiZe4znrs#!UbL?`(7>jNn~2ZsXfYJYJBjs!7GqU{q&rI*4+eB&(>X2@<+H4XD|FoYLeTEn zGZ$DZ&4RUV2aVTBuRi)PG|x*ygA3UzZ|GAS-=r&VbmtfW9qbe3b@uHl3&s|WZ^5dg zL62&S2RN#cRlXlq%1W%-N7q*jZ5o|^3~ajC)eC9`D>=7+44zRN*GUtJUcXNm^QIMp zik4nW=E$QHhG;?QE3{T5Gkd;0Su&!(Al(-!hIVyZzEBQMBww{@wbYb|+d`Doc10eA z-{z1}VPvANJ_dBX0-*8`2J=z(qWm;X+yImk1Bj$~!sKcu*H6vZMFaF;+L8wYa&gyB zBasV~^V1QbfgiHRkFzD?4)DIrhl~$|0DWO=w(EclU|upzF3GMCxnK@ZbJ;eY-s_n= zef@f$+^oX+zVEZ0pQF#?@_y|moej`MIDy^{-#7=>gMf*gn-X$pU+Y(*PQ;#v^7H9e z`+NQ=3awi@E5Fhx_&aqXaeWPDiHQVE{x;%&U z-dG`+%-^5o%NRTWS5=YZB>Lbs7gJ9}KArT3ZmCTbP8Jiq|NFrpQXGk~{C6M+kGvF* zqkZ>PTOTlo^Ud5m-%-*VRi^^uLV&u&i}XjeCf)WW)=Uv<6n*1eo&U>Jr-9~BKNQK8 zZE9!p&#rrFSrQ=W{e1VEI2lh4q<;zm7?Br4B$NV*<9bI>W#@H$K8-lzvaF;3kb$FJ zT5aDBL-`wj%Zll+@#(AHC&OaV@cp#aSBoEE5 z%o4t99PThXcAhqltw=d7z}oYSU?Xm9)`wV-o$8+6TrX@*{Ga#z1b(-P{JprhazwtA z#Xny-(so#;(#M{pFJfYnTQS9kImsIyBJ%GI6d9xxmIJ3D%11qN${WSX$~pE)yCO*F z1tm|dP@@V_+p-@X7rMXNre`GLXXX9;ZQ=R#de~}eWZt^1BoZ*1({q~%cD}DB za+tr&SMGA#TDP|RC;_=4u$ED9cxi>3qKELA^s_dG!a~8Ftp#(sbV1|+#z$HCDDNcg ze@v@_LnO&Paoa4QvA-^!eQLOWzs1w-F6Z`~P+*zg$`^TxYrIIHMV%iDx_p1QT8V5T zfNXb|uabFnc|)8;(7Q*nBCUPr2sm*M+@5GBxsf>9OsHz2N7Z3l zKewok?U#!gFluK*_vx}dsgFBF)RlHxMr+(8Aa{PN3CU3ZlQY7MhkzM9`3o%$kJ-7k znAnSS(*o1~$~N((&5mu`9ox2T+qP}n zwv8LxxZlGZ^$%*CUA50z9;$&)(Kt)1z$RAuKiV7yEIw-Ykvbmj*UmsS7CM!ZCp=dY zLz^wqnS~a!h1FT8=o}!L>ZQlk@LKH2BAqaATGdKm!+J)U+f-qtA4c!4%e6SJn2Q2k zPp3x|?d|ABE;+qn_4>~6KXEPARy9+GKnX?A!h7zksf~0|I&uvx(fAmKeQ>wfWB%*y=RMY^t>CAw#i0vL!9WEBs&#Ay)`loWX=9Zc(cVmMj#eNoIsdH*e3 zRB}tF85rR`O2@!9nMsy^{;j$&)lI(-VE6}y|8}(R#vz$#=jw>PE+un|)xFdEkL$^Y z?S!!Uk4qZ?toy#XqqCO zZf9&dw;34u7qZzNY<;8UB?5)CpiTwLotb9Z{zD-sw-V=rSOI*kaDztD z4TFR{hR4D6GNvtbx=oW7q!x8DMdbM@ZIP+!LWih>JO!2?j3*nJaAXxb1kiId%|wpg zAJMbZm@m3Fg{(!xZ4+z*X}94yI??`r)j318>(vH8yaMu-j*goGu+eJ$3Y^a8X-aUB zGRFmCWi}g+PMu{Nr*A|3D2luUkGy_bex#9)2Lb)GfYbp+IV|?#I=w;26daB>cxLnH zW*(LP&?ZUV+wp_mt$ESOxEl@}&6f!gCH0iory@bHmjfU3)cg#f$|Id4W~<~LU+WD1 zloOJNxvqm0@%NJ6bx_Go&%K6zD+JMGFb@s(Y#}8PpO?fqZG}3oiyK>FRhKQre&ZvV zc!Hpi+JN9T9j4Nnw)c>;s9f&S*WhB!18ba|TerWt^gxgOuzAABmXN(pP#@PwWQgCI zdJCj`hYz-*yM~!$V{>_#+!QTmM>)C>*ipWQ=6WP;Ew}Zmi04qHTgaam*r5#t0WPLp zk`0$X_@fA}1f9XjCM!r%pxS;2c#Z;L+(5dK5XRJ{cbS3+56s#jZcOWlbS9qW2J04o zOOn}`y^*q=(~TY{D^*b}&3f=aSI3UDv;}vqArC~QHpkDtqWwtf3e~_3=@Lk1 zEp|-MLK^M0B#MkImv5X_{W^!lEPGkc5G=<8lr4=5%H zyqsmT`POztX8wZ_(?8QBjBq~oeCkRIu6uGqPF1-@{MNGDoYmhie26IH|Gv}X!ez#f zt!zb$3jsu12pl4!ACTk;zu-l};df+2STS@yFAXTNj@Cqvn@VqqCWALp$u{|Hw)9P! zkpv2AP->KPzHtH<##cc%0I>~^Xr9{Hk}T%g6~i|m&v<>o;nhN8hnZ z<+BO!!NueSY$`A0@GP9+McDnVw0eJFY4dpg%8}qPXn4YufY&z==F3`YunMQ-+@1WB zH{q@Q(i?F{LmkqPRqnvm(K0TTlRB zZjFi_>%b@-F;?PCpCHR|&Oyz;{O0uk+Tytn6Lwy7u`}X10OuGPXl)j6)A5z24m%PZ z?2XSqT|(fI;NF0xkLZ|jbPts-ap&U8Ly^v{us+K>GGn8YKZj2Pzrr>KPTTe}P_dWC};ha7LW8Pc;@n##{< ziWHk>S{*f`9V~*=JPdD09Hw<5k!Vofr%leU?#W`@-DlvgE#~kP;FfT5-BcGM*T?h( zWAusjL~fl9HXc|=P<&l|#`Z**!Q1zOhxAl0^6`e1-ZAn|RL#H3ecMT77kB%W8RWr% znRc_Spk=tlB4bGGKv#qNVC$0K`b7>EB3Q+y#)QOWXdT9sf--ku^@3Ssl;J`vPZtF$ z(Sm|^V|#3dsyvjBJkZs4lK=x&yL`qF{FZ!{F00*d@`LTv z_JVIDFGrTo2e@o5`PY_r2P!zj7Tn1s4yCn^F!2mQn)8C6PT*ams%kv|i)AlMq$o4; zLrv(238Dt7FAKL&w;`K+p>x<#z|Vrd`0>~nfj?;>o+hp)5a;PWL$7uEmJr+ZFK-XN z&UKzXlb^wy!kIfQk2*!LC4_Q(Zwo`xm1N{G17#-LzEt%Gq@xXMwtkE=JVm|=sj&L& z2%lF257?`d)p%Ty#uI8*gvUn?)X8C9Wp54`ACrsjs)wR@3!6eB|JURpC8h-vgEXa-ce#9bIAy? zidhFtmHT5>bI+9oH!|{6jeP79A0_`pX1@fpzxjGwzp{3!FVu9!m8%%C#L!zhO{#;; zGVEQzBb}x-x`A!8HV-)l^))j`_xC)w6;THqYFXQW3W&fPu9zp;OoF^ISS^PpFs9CH zX5iA2T@C|6DbRTS+D=${DY1-vqyrs&2G@HTe1Vwpnrgo|1k5L%?a!8O7LY_AYmPtv zd?2&kd*YJi632NJC)=|&wLaKo;_DiO`@l*}s>F5}$7SHIEOjKo&o<9R$p9NK@IeR8 zwXJU|eP4QHmPD<?>M z#?D~50NabCg)$U76c(rLeAf6Sna-a~Aq|f;Md>y8<0lL)8;hGXRS+L|w!vCYnrnVf93N>BXBP1U}=u-nl z$Q7=CI%#~;0e)`_iDJWmX9i33s?6(aNn^TE0e(YlCR=kuoOl&PXXdxMMJzRG$mRG`yl zI(@f*Eo(?kgNvXA4-P8DKdFXE9m+l{eZ%&m%|iHUdgtmYCJIbv)8Hy%tRS{-kxhYw!wXqJ--@p7cg+m=Rc}U zhj3kD7_>!7i%&{K6VD(1M?t_^orlOdlqzVa9^X9^%(am~%1q?Rw+V=lY77eOO>9G^ zKuYcq+h02|O8GH2mbC$|BqIG(XE)dz9yyEuje19{Az7C#q&=uC(U$VNMbPn;O&&%t z0MUf=KmlU?|EimlJr{XtXrA~~(x3A8=dh=GG3(v0Xfb*^>{bu_N!RbY_+H%_ z;*Yy!MVzoKx02evbreR4%`p4980i%Y8^#WS7E`-G8HkwCcKl~QmZ8mF-ldVKzJK~F zi2bb0X;f>-E&igP)LlaGVz)1*Woq_R+83Pi5=zHnCvYBI7@h z4Wo>C(2Eijka;5)Mglp{EnGERtQ0-2l5OLdt^)Rc=E-7V;;j*7NTg&#qAQf(qEdLT zO;;A%>sO+YoIp3Sq2-OHZUs5K^uukCAljnO;Uh@ev|2JEuDX-+cpXVPFiw<6FcSDn zz;O6?zefOfyEIkc@6M<%b-E>)4EEKp>OUx}n3`O|KXFdX*Gu#kT! zQS|2*`&7wjXvG6j}Gw_FD7WJxKZDV z_f|WyusBMb2cGnCD%-@Z`W3`bUV7P!^O_8gU+1G>oSbWU3TVcs0z{zl%xouzKM8gL}`1ITq z$@@EiQZv(xB@w90hB{Xwg&+Jb$PKx#23ZRQvEsAvAJx7OJ?u!p+bO3W5c(i#m1;hEAzocLxD1Po3puJ#ENgt$Qz`brhphI4Wq z>1l}|k0JN_1)X(euD=Nxifki6M!dS^!ips6{Ndok{__jO0JKKAT8P5Nv&J7gZJsN5 zK=mZ=cs*?C{Syt-%A*~YBwx&ZYT>gFp;bYd@Kwl&r~3r=aJMTwBC^iMF?Y?^)I4mf ziz?2@mMNDq5q|f0BcV>DZxqQpxv9Bx=wgkVgdR2+Nnhe92G31zY)!t(P<-aWrJjT6 zV}_x}VKo5Xz1-%JzKa|xL_fP<_4x_$uCxw%w>xEgFMV-g_y-#syNGQQn7ge5V~sB; zA9@W!7y2)}ua?U&PLI5jLgCteZx;&XB5&*$b*dLloEih%u^gWyHe7&wu6zat1jD3h z!C^~Oh!aF5MKRl>jk8OA+yn*FEh9oM6Vg8Ii6?!6OH7%FPI5)m1Hmp%^R&!&f+o_0 zD3mBJvDN>1&3A|-$H^oVh}GiBT!0`>gE7}_2YRuc$S>ERs+mP0U+Q;y3yrLit*OoP zT0{ zH@fUB>7+fkZ{Y&PZCi(VFJr}wWGLi)B3*h!qO5FfsUw2%o|`$31vi=ZFrxcEp`(@7 z8X8=Y3D$uv7vhfASaHQNo6L4IUvc?_cAJx!V%|v}k6Bboyw!>=P(^%!Ng)Pp6H3#w z!nFCVFgnN#V+zatL}f*A7!>b95zjI997>&ct7&AAP<71xu^S?*pAOGNX#K%h8ni+J zq*Q3)m-Fqlbb%Z*ckckw0_1t({d(3)@n({f#)EdZ602-CHKL|jJ7%(S#^F> zz2|@Hq+Fqz+#uRYWqcXFpv2+NjU4B1>smiqx z&7rjr?lLu-Q5|Pr(*C4+waz!uq!M{%6tYI{h3Kr-CB<$k;aI|Lzi5W`MO}tC&hA^T zMT?2Sy!VHivd<3^+40Vj7xPd`hCN{*(u*us?@C31mPJySMnpstvsxcXG~;}f6xZOA z5M^Pwav}lt1xw)IoSWt0(yOYgBI&?GYUoE}?vp|Vwo;M@$cl&k&b}f@Y~$6Nv@1W> z3<*&hv|3go&gz{)3JN+sqm{^RRumZJs< zUE>AU7d96oTBnh(UR6ja=ICR_c?;KBcCPxb>6zZ zd;W}BzDPO*GBooHG*0$cz&OP(Wmj>s^2U;l$N!JmP$~%>(DS>!T4)Qe%mm|XGDsxn zZKhzFx(q)jRHw>WbZQ*e$aI2xmjl1(G(u* zQBmBC9X{ttDhC7J-4>VjB}&-|t0TWxHq})#mHD~@zjQlOH4mHE1Wjdfut5|oj^N5y zniGS&%#R-G1t~70A@C{qQ^6e=QsrOkv2-!*2EC1oF-+saf&rN*NJZJVS@Sf%OUJdV z$LMGzqtp^XX}ZlFD`9SwnEc)V1$}@6v7`4Ua8G#^pX&NY#8*AvAShGbEJj`Ny*=Ir z6k0Rl$JM>QqEe3g>7~nQW}sp6jG3UJV~J03oQ5Xtz+EZyy<=YA6nL)u>hHPo7S_z ztW5>x499+D>!A8p(i#{L?MJFzzG~*BQDQSuL1wGeMzD%(`Wbmt62@6W-##no z)NFf;@MphIa5{?%HtB-dmQA$8-dWrCatcST9q+LTWrnOoJN%&%a#E zj4q){e*gdQ#X^8`p`nBNl>C5g=NMwbB+qDAyHjPbX~O;3nR$JUTK086{5qVX+I9<+ zlLPwdQ#zn@qNH8Z!SdbG$NX@o(1HWfsdjio{D$6cMp6y~HP1`CF?dY$NI9cD!5}l$ z^Lqtr^5hSh*O<-%Bf4BFr)zr3hhU6t;hZoPa2+9_pJYJQjC%9z-0MA3i0pBBtvcrM zO5o~FfnWbHheNpgR&q+B@+u3*dvRooOcnM-C!$rm#_tylnFx=?|8;y@ruQA6Po6^gMuT32JsHB(NH6Y-)82MRO#4UD~M}(3OJ!5QsdVf6z2i6dCwOvB0 z5?QSt1{Se?gh-j}R~!856Ui!16lUMIIHOj`ybjljIL;nujcuIqX2N&#$O6@aSWDKH zRf(K_hMqRJkNey8m1cad(Y#xRO9j5-aCDO==D35(?&P77(&_I=a#&2D$gi&Bf5`*v z$+q(98&PNgCZhbBd65p&%`toKG6^poygenA$M!)2BApZNSXVKcs^M|@-oev{>e>mQz`%9~aSojgKzRJ9|__py3^z9bdb(@TrMUTa%jfO?nGUby$ z;)AYwuxknMse#p%!298fnV?x`vICjSBm2#B?Iv!o2KH-xVr3${^mFg*kqjr@oeeQ>t*f^lq&cWO zt*E3RTno&`bf+)u^CVQZnH?V2gbfIg2ZL@aAm5$LB_*z>>Kj<>}ODfWb>|&zcYa6n&@FxhNInw;LSF-s@%b` z0BJPBL^~PRZLy~aJzmNhH&1dR)J+K)#--d{7Pl~dazai1nE0|$QS;B3bkR1jsX~7g znsX%>+!iHJ*>>zVeublkGzbLqpX)UdS-Fs_&pQnnMxlHs-h2->9Xv=zi|rOxZ4bvX z!MSw-{m($1fg{SGS&w#7Pdh?s-h7+qwmV~EG=xyR+=s1PjXc@T5)2nb=0p?Z z(Byw6&CF;i-=!;Fzk8!`dVjzO5NPQ;B~_^Z77Ap!SqJ(^C_H3F1kehdBGlC)&9KFIhE8GrP&CJ4bCL!jO;?<7LrCRp zbO>%PgMKw)rP(Ou&*}}p!YR;8@Z8IK@rBJe!Zo%S!UlJ$j7?CcD$C^XEfq+QYp8P5 z-;7=`G}G0|PBlfce@X8=Vl(Ze8}U4{Wyy2y=q?w<68Z4B2u#{mA8ADSM?cPR>!+C( z^%oWnFW33+b3{tQ1`zIHf06#n3*B07O5T{8bvJSE4hPZf66ECmEYmp%1-r?!=$Z1y zC!5|w(M>BZ-GO^gy;wO;EG6A%hRzJ(dZCb=E`$`%7oo4A13E1I&rgu~27U z{)Yls_-}RlAa877ZR}D>)t2HCjqqVhy%vKygvT}H+uIgWra&e6@^R=)SaFREw!gJle2TCs^Td@kRZlCS&AC9Q zs(4bf>uqX!%rI&n1lt~S6O=jd&|E|#ImQ?f8DWX;(}AB} zKymLDvX*V^XQ!XsAeT^Gmr>xBS_*brZ&7X!>2);&!oIe#EJd*s>wN}cN);7JHBAhI z9vorH(WPvy4t6MZngZ(2{z|d^a3XM9>nk`sXV7O93RFAjM_pUoe^aU%3pw^dq<� zSCs5QfBa<1Y47Qrot!$vgei(~`3Qeb6rd~9S3Zlzynd0}@C@!Buk|@OL83<5j zwNhOKu9}ag^Bl(FBg@&e{72H_QWoZIaO#!+66h*X%)p7_Ws%Bs(s6qd7O)ka-R2)N z?&nBS9jm1ZH1)Ey;S^-uSI*o=?eCVPpO*FdWW2su3UgaikN;zIlrlVZAe19>pUdE@ z&-kl%PJ>i0+UUJR!)E%Y?gCd)c|^0PrX6+@MnG=$nb9I-at3xSZbFNRAN*c1kY}Ox z=`+Lkms6j3qFya}-~fTN2L@q13PCcT+#;}YBRb^60QsuZc=;KXz*Q0Ecu&gct3eET zeeqjauzf2+kJYcP5ujNno8k}Z;~bq{k)~rDTlt?4gKE{&yWf3rC>T-RlNCC- zfkx--_yKd3OLTo|(nf1|fer`Dp^;;^Q{5AW2!`Z@XzIh zF}XADQL((|>otr9i+`hATLY(Z@rJi%1H-Hpqi8Q!rRp$P3zeGxJ6j)tQSva4Ns6X$ zK1S^rXh?7oAq)@42r&l7ZUTCuXAB}%Y|upW3CHzC6_%Y%F1FPwl273}cp=^{*rbW{;*5JIn%on4}Bhg>`%tPcn z7uqG{fBhfV)ifp;j&WUJpGqwe4##N6kB-TSlQ}+W8n9RX^5qGZ)g5F=cDWC*N1<8; z7VFqsH$_a?YHw9X_atb*xQb|M{)cz}I{6Ybu3QsQh5mXkWb|V6#onXA7vcY>Pafw1 zk?Wt98hQ2Ju$)4B1V$)4cJ|Ou>{%#cu=$xRR;Jqa+_3F~a%B=P5;ZZ^%mh#Qxilap zNkrGf<*T^=`~*dnj>;5v)?qQ!HpvS1UL?1M=Wr&g=(BfF9&ve$ms+EF9rxPJ+Ps1-=T>e01oycXSJWVC{U2_I$WUB|B;z9n57^ z|3sS%TzAG?)yaO0S#|0ZD5ewN)NO%EHNC4jcp& zj?O>hbet7E6dxTI^68Ydln~tcmGU`wqLR6<`;O7_fVn}12lNvW!|p1WRUCB_=0e53 zpsjunuOdc=#J9U@MnUowtytybJ#ez`vNC_5$DAsCFl*!-Qqw9LbQ2NPYq4uMsMQg@ zU$IGxH>&E-E{;`!0Zz)P>LBvMvxF}`ZH&tWjMUa#Dd(4@`q()(bRHtKFy9aY%YLOH zf5<;eF@w*TTLqCs_FQI1gPhVV{gQPR^Ryz3^kPkO zTJjBsZsA+iRN@s)-Q9b!jpxC__~1leYZ33Wtg`&|C@5#RCpxunC#TP-br-wpsO9_U zbS{q$i%IxT{M)=0(Sw`wZggg&@rK8fpm1y0Hpgy@ zP3SX3O6ULM$XJ+8r$xG9rhV671D&kEC4ssrj=mY2n7yuXq7y=ju7y%ds7y}pwm;jgr zm;#svm;smtm;;yxSO8cASOQoESOHiCSOZuG*Z|lB*aFxF*a6rD*aO%HH~=^VI085Z zH~}~XI0HBb_z&O$;1b{p;2Pit;1=Kx;2z)s;1S>%;QROY_XY5L1$YB^2lxQ^1o#5@ z2KWK^1popD0+MK_);QCCsnDJIC;|FiE;%E>V)}v-IgeC?97}_DeW);MoBdi^qE4_b z&JXn!&_)Y%Xj{&ig{}0a{l$x@~6N3s~AZ$p8uu4GK`@A-Yt#$a}X^M}lL8_7&R`oD93tq$)-LO7&b+(s5Vj42tBZzPwrHUhqE_ zJS(%?>)l4}BA)VU1-adWrltEx@c0f-Ash7;`C-crUOZCA+N?rWLX}w&%Y@Nwk01PI z4&~MU8||KzTz}>9=EEXZ){cH)2u1Le+t&5TL1Nqu0Jv>pSwSvss_kPMftD z2Ls@x-%|9C-FiO$e`M&qsLrr()YZ0FFDIcMEEsHC3XzaUemaX^5r3pLBruU4#3=LDSlY`1%u0vz_L_C58EvWe}-x+-s{8MD1{ z;*nlzy{E!Q9AFsz2)#_a0`n-j#oYkb{b zi>oNgjs2E8eRh(Sc0l!+HVM1cLF%y}Bn_e%sIo^omKwD4ch^pO-`2D(wbEG%AaR{I z+e)pwM|83FxJh4X1wSFptBUr~wa0MDYV~zf@4D<<2UftO(H0%G}GH2p^z52cI zI3_DNv~S^vt0Z(?KtdZB_zNaCXE+aD-NJQ@rLj-JURzlA1p8>col8BjyG-D-jGiZo z*$tyRQGuh)$V^Ols@UC@qF8YM5^!pVT-oUWI#ad5?qi)K#TyB06E0e@=m7H~Ry2rV z)VUU^dLQ5Z0Gj*c|C0fu-Hi7S0v7V~L9* zRMXne-|bxWAMOsQsNa0AyLO4@^+V$EneTa*ZIS*$v;?B>RH7}UcCxARtX8AHS%x9B ze`JT>kr$FP`VNP$0wSm94G)`(K1m%mum90Yi`Y}BrO$LeoWhr?M{FX)Ad0<_&~sth z@AlhzbZ@5|!J}O4Vk5<&l2_&rxVc;R9$Pm#a0o>LpwW%di*y#&`n!~%!>>ELZRj;X zJ3oLQlOy{j8{|Sh9F(#`5$~0`0=fK0a;|7i(q&}mzXMM;-L8XUw$Hm~uflV(JnUjo z87W@`A!-ljz@@MFwv5grQ^n%ldrcQjsc#uIE+5Y-G}N|5r~UaSDeQ^XUFGl@?^pDc zWkrXCzXv>G`Xm(jEgFa?uY(LN!+263KU_EHN16nCxaNjhvH>y9V{~-Rux6+ z$~Hp#a-8VPPm<8^@mgX|2sYqudBX6J2sL0o+-$hO&3p@6Y2&85j-oxu<*$NE7pBK> z=0r+bGim6!&At>YP7;$N$Y^h_W5iJ@?q+aFUFoGK_bi0_u1l$z7T8t`&D>H56BDP# z(BTf{dD6{!%PaVM$PdNvBDTTroivo_xxzc<2Ij?QYe9vnufNp5ESEw zJ||k_YlNm%c$^oo)QycQ^^qK27`KJqx!wBaL{&x!r*fA}1u}4INWo+Hfmkwv-XycZ z3n-PPj0_O}_K!A;=eXg&RSduzdBYGyH&qHqoR2jfU6<_!^a6unAIQji>k6gd1n_J0 zp_?$%Mkn7ZkE=+^Dr8jM3GgfVi0bK1i1J3)wm=vlZxAGGb|sF#DR(+>(bbhA(K-E6AM~EftrbPsWk7WTkI9G z{*TaPRpxAQA+L#b*tcaUsD!5 zsG@o)ag2L*jg{%IF_NuN&Q3Fa&}iDj(VSJTPtVXQ@+&NN#ufhv>?MjU+A;HwY8IVT zLoxPyT3UjGrWpi(y8MUZ8dtcyzoqs!xLu`nj&U(rA5h(IJXgng1SACe`zm%KMjn2Z zBN{ZSAXA6bW>|A?2*(9^ zdNwt5rYGF)`hHvGPy)CKcJz;nXnZvio?bo{KfCr~7IiQvoaC-!8fYDFAFs@2OUE7X z?lB+#1)X!Z*>W2wx9B_QJMLE~@ANL!d&!jB_-Zn2LTrX%NRh)tBMqH=3xudSMy1;x zqnQ>rD8S$ress*(yA8e#7+i z`M;}3K8nPnuyN(}3qh4{mPG8iRA1C^53+?IIv+@-nUp0!hr>jSF(zICTrdaRjooX0 zLH`W<=yb1ST{W?e1=jiKO@xW{k9fG?G~hs*+%-9FGg8|W!QE~!P*N^>P6rJg16gRA zA!nf6E?V5@7Tm#}pvk&vDLpd@6zCl)QRNrdzM#3~kt<)n$or9hq1m2zWuppCE@5tZ zjh~nYAx?OG*K@KjDfbSd{RH(i9w=wPc_W#jnj)!_aa5PD~HpFB#(S#^^AEO8k{Wr|q^u&+d2_FlSbI*@stP{?r zYZJ#COP4%K`%@x!=%S;^0DK=VhKiydAzmWWVG)e2bhz7^8PO?V!a@@E=S8?|_-4#J zuFU6%mx*cP|?KMA$e$BV&gWWl|7CpGltv=n_4IKjxNcOETqwiVa|T!D3o^-?NgX-1ML ztA}=RqzFhW`2HrR->^A>u%gd9;$~i%AKR4xD$^G-_j~X~k89Ch1=v4?3Fq5_LN8de z8~Mry@gXMCG9GL{!dC*{!Oha(Gs*K^R$mf6{rh}bU?i&_kFOE?R+YHdA>!pv;;3cI zgL4@~h&k`3AjF-@Ngu1NfMG@HO{raJRQ@34v)8l_eR!^Kx2t_)6Q8Vv%4f8fhSzcH zlzVr><~CV#W9Lw`RYjj3aVg!lq1Z!T!l?l>d4MZ!%$IzwJ_D0J4p~>G&xphKSB7iQ zQq_M!)xMaZ^dn*;m)WhnkjUSrFx<5pz~t~^PjR34cI{XQQg?IrQY@!+Sc!^xA?|70 zRl9P8A^hghe4i)Y5NMz%P~euOi16%9n!>&rtD{HPIeNm{)l6$ec{!cAIejC4Jzn-7 zDdG{v3v3wP^BeG$&V%VWU#U6seOtw835(hUikTsfR`JP~B{!Diu=N+$LCqS075}U* z))f01x3BuLteS;2e*^18NXC#KqyqTWY>23BdJ0@I5t}- z0=j5a{l_67XDhY5y6s9jsxy&L^mTRI(@?)NU^ITE!< z0<*t!RgiQY60Yn(pVvvXOMF+{ghA#V2S&jqZ48(`X&PIZy%wQmPc~SYPSP&rGuoYr zccOlDBXx;X$aAF>xPCgj_@83kAw1N^e=zPlFqa_QyPvL+a1NE+#6509%b^t-$kRm_Nb)ck>@zuPF?=lG#@&xh4J z$yv4S_ipYyK!mitdE?;)P^ERMI6OzHlMs8chds43R?7H&Khh?&9yKFOsTLR1UT<~t za$pO;QcHUs$*&AQkh~w&;PlIJTSWF3QEdRF1llaNw3{T)y3uzD?nl(P66)&89YQ&0 zHAU}O9MaAU_F+q zRWHh#UjEWrqO_wF&0X@GgH^Tdfwf|v5~pclCxhb4HtR1kGA6PL)cz(;^`M{SSXvyQ zB?n1tk^oF>)p&6c+#+n1Bwpj~nd0FXGWymXjzt30tsoWHj|1H{Rexa+Gz8eo>aoqK zSsjy5Nx{8!`O;kfyai@w1hs!Vbf`bd=4tuoyP|pwUta~({8U*lcZITlN}p};5Ja2uPPnD|$ipp|I??41IFGTI1#b5Z_fydO6uy}bbK)aKB@h9<1b!K=|V%ZfC zd=WCL$n-N#v)=zj?ynp__7L6mPl>PtZ7pjiMfrPx9+3t>dg<)tsdYWPGsJI?{()k* zvW%JPNMq?lvUUHLjvm!G)c;#L);j*SlVKlIx|6bkX?t1up#C0lfm5!pTz3yjG0ZI^ z%!r!uj7PlKh4(p0rF~C92q^}goD zT-14z&Lb?=Emq$cEY_MiH4trVaI%UcPKuj(4MwQGJ8L?K{#OEvfRtrsC^`pdJgEzk z8ksMK)<#C+i~8E9;7*Yur}eM^u_p^CrVX4R{6N6oKPam{*)`o^rL?W=0w(wNaQW7y z#rsa^g#WT`;u+wZ-5%8_V(fX1y%a{e;(?2lO8 ze}9gLXL#imbVSGyJ#F%No{8`Dwl21FhQqWh9~CWGn`e6h!wi?D2m(t^4`m_#UMzKh zOPfA--ZMRN0=YD+hKm zj1OUIN+!74-Vh}3a*9G2rz1dE6KBHEt^|z*=N9PrO3mQSSV^cX6o+dzhuGxZ849?fEIV! z3QX21<*-3+|5*`h&zYXQ@o%Fb%P2R8!VW}wwmNh$*p)j-hU^cGOd#npz?G!O%Jw$& z$~oe0*Px9d0rAxN5B)__-wFnEhwJNLF5!>vm0BhvxXz*mlM4Jwp#E*Uq?0)_ZZ}#g zGT8SEo$(0G88w9o=DI}Qef$P@7TH`7ug-Zf*pZZsbV9xE(VOl9cc5@2h*BD1qF(;+#EoM@_@^nb87b^vjm3} zE>Jp742C0?;f$*F@Nh0q#eyzXU^%I_?mFwI(oOO}e|@;q6CNlH!}qKjNe&MCr3Zy` zh%|ApVW!9XB9vbYd!Tn<{X?CSFnAxFx%8rb7T1!7LG6zWozi~_qWB6K%gekMabw%Q zM1!KwmEAxaIlLjNyj^#sbk;ulJqrYf>co!51dO-0`y9^YHC*MTj;h3vGDe98T%7ry zUGje)JhCP^3~E`yGiC3UBYGM$r|1(4zDw(A=K}m$=_jTK;LhY#8Rpq6{5`BM_xsf; z2uze$K2^w#haH1Yp28)PBVP5~AFxQ9NXss;-agmzK*j^WjJRU@Lax_tYfD~zE{a2i z@brZhY|i>7`G=SNST|k2%(~(8faiXvn=NwwX_PAWKyQesKM&xI!mjvd(P2P#d}0|>A2=<%m0}(e;?Q0 zg*^jPP;%TF*!$Ag$KKcGP-fsHH4b~WB9m#S|2IZd0hZHdKpQ+kQhZKvZ z;bWir#Fa3$772n?fCa?1_>#_f1KY0xx-_?#sp_F-rNmd&sKr1MHw4t6fKES1-<$Mv zsKHddBOQ}l{C##xh&f$KkvJh1NFYSg+vZUbMM(B8x(-8J;0DJbwG`FU)`9z;JDxJT z*RyFkV24N#TU~?VO6HM0Jo1nhtAmf66nI*!#=QtLx-)}#y>?&#d6G~q*G1I}y*Bu# zm5LH8po(;gOY@fr$!9m!&Jo{dJnpUE{V4=w_1}<<;m(>>eZ|lYKS~oMbrh#jo&+X% zCzgOm$$>ESroSp~G8;bVwMXab64q7ISM9974K!T46<_IXD1!rYmQkD@ET@jdEOoRu|48vgi@|!Cnu`{dxO!5bdR^?t}=bMZa@0DyO6A> zK1b-0QM}4I%Xboj@aQ!uOwFD!!~1==`^{RK>~|_v+VfO@3l|oj;{V=YuF>07*07-_ zw9Sd@bp2igC1u74OTw0kr@(T+zM8JHVpO(lTt36$Yu`A^8qOZz+i(UgvS|bw(iS9* z7~A^#@nap`p{*W=>r`qcXQ4<MHtIuTp^B>8=+ zWZ#8@1L1=S*i_Rf8|Yv$v)|Rl>S2bSZc~fyqbxFoGu$}bGn~SJdM!WCC9U`S-HlO@ z-#UR{+;5^!`E(_i@kBeFHW4h0O|P(`RO5N(VH-yfmvB(Y8XyiOf*4)!JWns!b?4w~?62_XC$j{7E6xsE^+OQ{EzRU~Ut5LR|aVbb62 z<-$iFOW4YIhFvBTz^js72^zK4advIo8=MkvhBjtmX-z=nY+!(hoV=cH(VmdE&wohFxgDVNUSAyesMR%#53EUl^sKBNnnVvA2)(j z0GEnL@o%y9Ow<}MD}1~Xa3ya*XV|CqM%QHO5?x6qTw(B8JrvF;#((P_e=&8ZuH4W_ zWIkW6M~j<-cg?%|Y1et}>+**tgvJzZHYf5eJ`YYTi+j)JK5WvX?4<6X;9|PnMQ1VU zb+tl(YzgeOjlNj%4xBORF4N7ge6@htjQ6n%m`a4zTAQ!^uFWsJTSj6!=#rAZFtIpz zKuFh>3f#wxe};JHNsLP1qzf(2(ST}xCx4%rs$dFk0+ENHqFxoGUZL^hKls;iY;Cp0 zD2$<4^TMQDaenW_*x^k{0=RdXEfO`Qik~oqsPpYIl6Fmx?vfxPqthK!I|I0K=L=@J z7VUKH!r1FZ%AtC#b4(A72+8iESz7e!2a72`r{Mo%Y*6Uj$1^R>hbBayj1o=Nvt;5D zqKt@dFGEmA|IgD2|F+$`M?m#wIcpi73qFo4{fzFdiqR~M4S7Dkedd1vJwU?0BI`m% z5{y7Kxv^JmdhQFhZ12e?Z*(z>CYXjkxK3!nF+7WG{ZAKZV~r1exYoejErf4-5}<@v zo0Qm=SV3lNA(vFqxirxUhYi*)*yG+le2~IJ=NsMbOl5Izv6u4F`M(dyNTewYjy_#9 z%Udr=E;ic&bGzU3@8)0r$jx3u+sp?mSWpmHFra)Q=$Sq|M09X%jD`LVlg_DRhE%lA zHPP^6PRp8^U8K-W!$M5yD{3ItFrS<$ZSX}R-eU`3c0p0+0?@gt1R>j*T0%V|CLFu0 z-TW0y+%Frm6`r}QpuUe5Q-F}%96AkoWN|Da*dnFUYH2ZCmv`J-VviC?Pt&#mLX!nW zy}58|V(b~_*}F@#dK(vYeWYeOtqqirgTD;NYJ86eYMEg>8fvlZhfs$L1|)2% zKbz(oD;Fe>@9Iw`&6(Y)KMp7<>a0r5l$lMiagzV!GmLx zA+s8elO*&=AbaHS9~YByDdEr11yj3FqVkC4)9U@+tVfe3i&R{hA!Nln zYC<%b$D*!J?~9i>e*@g zOLo^*8OkX5OAA^RF%cElWgw!LO7tbY0u7%$d|QyRNRx`c05-gegN!Gf^fjDmN3T1R zt>VuEn`@fpMT%4(YO%}h#0>A?2){TWz;uXLId&3wx>Jtcm{#Rc`zXuj#J|-GrkK&*pj{tuvH-G?8E*~bu`y#07fy^yT$gU-wkCGI*%nn z$0-h}hX&9$07F=&7uf6Lf1K@2Q$Trz2K!KG%!I~hw+uFsh@`*kX)4??&8>i=PV#bX zFqO(@1ICbbmn)7Whas8W?OA`Y$SCK9*SxR;W z*eE^LhMv40-lWg?-Njbzp=A~A8JlD!!n1{lhQ&~-tKr<=v2WHn^QO{BLuK7LW)<)0 zh1*dGdZ!c6=)+4Vm%2)A-Vqme@bns&|3`6Y9t)A@>E1yp);_xC0}r&?tT^$(n4D5n zu`Sus4O8jt9T5Nbs)@U!{oGsfy+N z;vcLfS!h#jU|ZmG*x}Ui1OOGwY*qkc2|}d%N(cN~M*8C$jghNg|Nzwx-rs3Za3HI~`-st@-B zq$js(isxuhe42{#2$Jfn}IGXlYL{EB^@iRQajIgl|JQaIl$WlQJ3h z=r2p}@Cxt_sZut9%->r<@iwN6HdC8EW7}6Y@U1}^`r||%O3B#v9TV<8gM%N@3Y`$w zI1dp=_%&=>r8%6cvBZ63c)6B@wHc6^7ugjc4n~3xC6@)6ze7;(1ahcdaO3~Si}K@_~oc7hQwXaW_%ubKkLkdxuklFYY5K8szSW2+yOQShfkFA>+p<^zY<(1oPs0E;|w262bAk&l=d&xplt z7mHC7vFngPAYNkqimu7*!>{&?|CaK!p33dVOZ!8=Oz*1MREAX~J!3ItZbf2LV=(JL z)-Qt+C&#ts@I#m(S{Cydg5IIQ)=37(Y@?mqasn~Ism3zrr-UT18$^4ZnaQ>0CHvR| z{gsR%2(;xtOHR>NZW~#jfO24(jHz;x#cEzLWh@ogwLZb{q#vRVHIzBe%>KHic+%Q+l$Y-?fx2vusp&jEdp)l2)8>AtDy07$DHmHbMNG=2 zNTcBwJ|_cuMmn=;kT1*$@vxBZ`J?BH4P7b!m_N8GY{YGc?Kz2!NY^uK$C{}XJTXT9 zt-+x?{w4}tgKY{LJ*qZ?%M6T!`-;p8+T=$$4|YN?vAZXOm-^RMA1CWP(Coz&1O{7n z*xO4~$Kz*q6`BKDz$!s=ufxkkTm7p55RzcTg`OHDoWr{DEy^5@1zA$A^IjH z2I^G%IQm@Rfpw(p?iR+vD*|U4iP%-izu4OMb=zI$>u7X=?m*oqJY(u4W`@J(gsYPo zJ?77spVaVDK}KLJJ3czN&)c9(!LIloIz(@b80GXg<+?*fj$S*Umk{0yJV6$AU##?* zUo3w{BtwQcP{Z|X|6iNhk)r>)g|v@wV|=B8O@bCwfsxyromvRG@WDgWA1TLQCkYXdHlh?(gHMn zmd;#QHt#Fvwvk-&MTp$q-AJo9PgbcCC*1aw>52##Uil7=65nJiENZx z#f9W$uO}8f&Xfrg4_4qlZSZpYS6*#usMb(+^yB%hj(JROEIZ(2*mO*vAHJx;((y5D zcIa~}&^!*G;*^hg!@2SBlQ1u~`1o6wM;LWl%O4P9QGGxv9l5GETfo7TggJ2gaJvO- z1UhSAc3Z;lhA&h$pZPhKc|DvNH}WE{*wTyr;Dhl3U*k>|j;PdEF9I0-D=02CX z-^F~z4x}HpmkG6KU(6|5cY<1}GkY9fQU1e>Ow!gKRgM!QV3 zz1Zpd{Z*aJz~kMGR$T|fT)3p8;0*EWo={EzP=R|iCg&%E-lA&m*Bu#}4m>Fm?A2q9 zQ;k>(#byNa;3qRkyz-Q3GDxjH_U8Mb6T&vjbaQvicYdj?C(6uzzNzjSRclktSBY)q znS@!#N!+_mZF0eS8Q=eob`9_Jt_xVy+O= ztdQN%c=A{kEbv%8_}oyWTl0U*B^`u2VC+OJ4@ zX?PTk54;mdNQd>RJ7@`dl}>_Jb^rygJAh=k*@;_ZWfybQt_nJh*%zB;RsAyWs9&PH z%1Scht$9ZF0Wln~>WDlk$7suF0zQvd?rqUc*D9GD2sk7hcep?BWDR_0+zvB@e}sZW zY4rOOnqi;8GJ4aak}s2wu!Dc!@AUXR`GP7&MWeiK!Z!T7-(1#wefLQ=%wmkNIzO#8 z_guoe?{^3C#9+ElI=6+?yiQ%9qsGPZNq1zV21wr95cZD;@`VI;4`8UNCyBtfg{w|T zOEb?gdqLBhYmNap_??hJAoaZ{tlzalpM9NY-yz@OouBdf@_x2ZXAz^sAwdKT9iaxs zzj4QrHX@-@4z*^+gd-6^fL8a96?RS2X(E0vqHG+Zh%!&XvYihPm0{yrnJw_>!J1meG6#Pr*E zberb_y<5&!5<0_^${L$WkAY)3A>*Le57I&C`o<3mkbXss2~UbM>ok$6tw%iAI76Dv z_=&!AKE^*#W)eS+GhzX3Q9{?Plzvnd{H;9r3`jIMK@08>eyM;Q7r-O#S*@$XClr); z+)xbTMC|MC(zW)9Z5;JlTWTLK!?ix@BP(pzLBO$Le~mOQiVvFO#0d5s_iu;YwQ+>@ zj&T076mtfgicASu#ep&KOi@e(G!u8eVZPZ(1DK&VTC<)oiVMa$-kchEDH1c_VT^B+ z@p8`y%JWH)DZG~Z2jf=apsENnah6_ry4GRcZneuCA#MeJW6A)v(5M}>&QBTzRDh5c zHuP4aS!=p3ixlS!*8vi+wnY~r)&r@h0}u$B-fCz}L9WiE647zif%_9wM=8-VVGKcW zs~{Nww*+l68+w>r_It3eg}Tm2D1+o;jtomrYk;@yToKjlnF9uzXsYCG?lzv&a%#AY zO1CV{RpaqY`1I1|?C_QO3mvSNTi!U)@G)XlKv-tHH5taf_mU1-|AMt9?<5u|CBewL zakJcEV@?~Ot{uNcDd$pe*m0z;DMqHc(q{LQhPU{U} zCn!qjw{y~~46UEJ)QRiu44TpTLx5*si*U`nCeK&9!k(~FDt^}Nmf__lC=*D@xe;io z-FqwLJ~znru}WE9a_d>nE)@X>Z8pN%h!j&ceY2Ht@pBd;zE2vjAJYn%Pxi_^D%5HE zhPxcC+~BIGC*4U}evEbBgE?c;ZNhlV6H{aXkiCLI-U}GkuLa~0l-pl=@C7T;Ns^xD zTTBjPZCZJ(14A%{k*w>s9&KyJ! z&(}O###dHuW6}cZlCKIjJ5k5Cf1og5!<}@V4cn&K=h<-ie!UO7SlCMMlVy3~vW41Z zJZ(#2!+0(iY$v_Vat74uP8jd|gaC5otJrX{?9&=JIm&^t6uv<4_ps&&P;aCgt@xE3 z9?jenx};HJpEV-{Nttq&J>BZR=E6Xy7O#9>61`tOP0lfH=lL^_y10;l3unU6f;vwj z*u2VL9!a)#rRYaYF0QpmaAo7zAhUDbk$&F%nN|I~POgST;2hy(_~Si{bkvuYhf0F{ zs<)ajQf&h@s|+b5sdA@=o=Obqkc}((4s}1_z$=LUMSGXvN#jmGoH17TlWdmw6Max5 zViBB4G8bHETE~w`o58~?Mt~iX!HVb+$-cKsp1_IVmm6Nl&&veH^rM{UgEd_c_Jq4N zX~jZ;$9Ee^DE8_wimI8g(j~cp>TfO=h3Ph!bm2q8DDdwI7V~#nSA(a@6zE^k^#R?w zADol!t%;3w-`Mp-*t!4YnEF(fHq-ENx=Uv|h%&pFQa(D~8NwYlu>clf)R=a~ko`=a zxiU{$RsW}}dqPDwALr)u^;DZ&pfsl6v3|e@1gec@4Ty7 zqXaKc?Sz9LoZ@m$oT-+#rZR$;VG3)53sm*(5zLQ5ASo@GQ_LuYJ9lVgheCK=P`Z|| z+1}5rnX?<)6;zV_*Z+|g4!4ML>yxBv6JfPiv#A*1mx%`?yFO(pNw`ZzV^U0o?gmGb z;8O-Xx@7+JcG@3k#+eN;z)q_zmIf})#PR-YuHNBX^&>vQl;Il7Wzh_-MUMjU;M}Ka zVs@Zqa(;aI-th#BebJb;SKhf)fgD%@EP;AEn=z>4SGUE)@LxfD@sQY(H`5)e&cr@` z2<~lAZqAvvNVu|GiMK9*^8et|WL=5<7Wk76LjOC>ccJhXLT)s^pfU{wmBI*WBj0nJ1f*WTV`G7Ee8|il%cFclt>}EJ_|+i~_1V#elU0W3k*q^1 za?QZT`rDMm%rP6<26aR)B!o~4F%#P2*Bcn1Gwwx^S0rg$W$xpn)6dssXSR)E+Yy)T=Ll``fI5v`;%WG7 z`%u)*J26`Ng*n|CZpqQdUH0c&Ws@4@GiEqt48un=Iw~dq@znf5=(<(Lo?9D`qH@-d zEp?r$wt(w2`*{9|RR+(-LOgIR2?ZVx^yC`O$Mg4wMQUegANfsPoX$>vWMl2z8-{S+ z>PuX9*S1TML7{OBF|n(_z>srvH*w?|5m#?(op{$?D1nd44rQ@3L`a6_GbR2oMA^(} z(uvE*P=I2G1e|IY?kVra3byapJV;9Qi=;5orvju;%j49Y75^e4KZDrFV4p$9Wf92z zBN$?3_Qe(#Q6In1zTng|sKj`$pB^TnMm?a^@bpXc+OwN&idY26jTjy(;IMS6Pl@Xe%6h!PW}szkKLV0cNYZr_db#`%pR4lNZ9eZ zWhZZ#d*kDV8`qijsCa{(x4T;_o=zfxs$Nk#9&wq5#jj%g`b-ovSI!nDXb}xby6a+9 zAtSFTwe;Gbafy8h8M&A3(9xF$4y9XwlO3BV4u`e!6lgEipZ)9!O<+-=RPZiB)Cg*1 zCtzO}A zB3*6sFo;_&n}UGwIICR4MnRaekvCsi$L_hr;rsraN-CxHbAs++p?ze1`nVp8lFYe1 z!W_F+Fl-}ABs(h4k;W2%mz%W1Y-d~dyO9PWA#REJko)JX=VEds#JL)H|M#8~L<2&x zBBA@zfRaSazDR7W(xQKh(rex1qPu;30ysW){1&GtSzTif=|&IEShd~ijQR>or8hw- zR>hGNb*iq|U5#OIIe-KED8gB6No}i`qTqeR-=+eCudtM?=FjW@owx=12;OCGX2r&E z$bsCbA>()~V8TM5l7&<3Q0OMV*8h*{)hO2`XV`AEHU2SR$H5N~n$$ zku9lBHQPDle5Q=ih?(f?rwxuk2~=3`Ev$ElR$7POH1I3O@~toDHiT79QJ+^2cC0mW-ejR`~2~lPH^YfBh2Wd|K8@HV*Yb zd=JF?&*qi_6mLpR9OT_y@+TPSJdsK&I{9|uGHZvD7JnBE<~JHS;ciBb8g*BO&NjGV z`g({op3iGe&M2wuLUHH~`otmQaRMzVhLw%P3IwbEsqwZYJD@epKPxMx)`d(H3Stju z0@8||y4dzRiv(76bn|ElO61sdV{f{>WV?~=`F(aV0Gv!@r(`ruLn8~kx4S}%KCiXl zQ}^m;uLc|qbuA8DGmzFO`BL48qHi%xlv>3RiX};Z(uRl_H?VTTR`cokoB_5@pE^2$ zQM^GpG=fA12?G=r72d3-$yw0#9188tETlsnktuW--GO?7AiM@C~mWa_@F4j{sbO9Yf< z`?(fVw|>wuK;E9AGa9vv{F8MV-828Hf`9G=h=S8~q$#S8%J{q-dDrZMA}ob9C@)?X zzvDKxoI-G|LBYr&p5743%ENr8L=c99S{K0NS47jXT1J(Mo^dBFF}AuQy?wI zvPX3V9VE~JzW$@oRA6|ht6kR%NT;z*%lzI$+dtjN%}BH0_)}?;7KthQ>SZW?4={Vc zQ)|-e%&ifD#MS$2@Kb_zyRzJm z?xJ9cY^r|iss|dY%Vuq9)g)tLF4)0q0Gf#G8#8=6os=G7k5aym)QmU0zFIAE%n*qZ z9^r@MzG04QA#m2tyzh(q6XV-t$NN*2e%q)F=F%)N z+OB4Y3ZWE8Q05o|((8T7^9E{#jM5;0oORjWwOfUH!2cB>ozX-dtThEmotP}H+tvyZ z2UAjMo(L3YI^q#?Z>i5DJzniV)bk(U^4OB=lx+E;W=iXq6>^9{FHUYQmZz%k+CeQ& zWy-=q+m&y<4tkEI@My%r>G#OSOKPcayY`;zg^vf7a3PXI-bFYX7A6DBxj{t4so{+` z__h;%uq80G@^35-TqUN)(NUR>sR}o9!vl(j5F5mt8MA)c2yc`cG5dJQ0bxkY`&7+G ze;uRkgs33C38h1BIs2tcU8kHKX*_MXKV20|w8>Hm=~?y;hU2!cS+?rO5RZ}wonKv# z+sNS*_v4K~9DHFm?#mvdr47AwPt*Dt!+3Kd?1@8;2-(;evY7%I-8v!)cee0bew?J9 z99=@dq+`NiajeNCB8o5A^yQ}hcX81#T~&PbfLH)Lmx+ghCRil{(KtcB^3pj+qOY;Y zm#N=ARGR%tOF2RxABcig<|adqv#m1yW7NCKN0}6{n&P~$nmLjO?lc|nncMkb*Yyyr zqm|7|Y|P4D7Q4Ix!(j!{$n*R>^tcK;j>J2|tk;c%XVlb;fy_d;l;+)w(I7Huo%qj7 z7nK0v8%&HgR8FV<-_1>ZrRT*s-7#)VvoAVSp<0XLjxYvlPb@FeMr&c%+-R%dOJ9 z&z5cW6m<^r=<&w!*bf7<{fr01JKH$mog{MHJIjjSw4K+%>;;Ofy~HOTV@wF(7zYue zwrp^T2`{rQzLP$vEEv7ME_*p9hboc;8RJf|LZS=28Pth6C+(I`hi45;L*#Hj;nvE;bx%$UVb52=q+fYc)S ztzhwqFu-%b&n`gTd4HSlc=HvX{ z_D~{x)rFmcHXgc7eKqdX;f_@1Dw=&LD|^CByP_(`tPc5s7eEk4`q0M0`yuTD>J)(! zKXq`Q?>+a!v%R+oXNN!n8D@3E$R9?hDl+nIS^{d?Y?m<%d=9A-RCMNYDijbkO!e$< z7-jSEIm9Jx$DO;P z^QGy4k;iO$kp-xGl=*;8aDIE|Qt*#u5uI=+;r;nW66nI1;i~AVHpaUhm^r802-HLl zw6WnS#8NJKRZAU2-ythDag2~kW6%)(P^+I zq>6zMR6(y?<>nYT-~7XqtuVJ3ATsVn%yxm=h&pz_TLB+zM5D*Jh zRfs#_r)D-Cd<&oMP#244-}4EYo(g+e7hNWX2{jX;RF*D~Nd^TH;>qU_{KW*Lzr6rs zPD$j|BJ%xEHK_=}9Ay|i@|@o}5KpS@Y*nk*-H~-dGTk|&Q>)|nXY)R2{*;l=d6fwG zjxbhs7TBW(wdZQ1D3njNpGzpDsBFtRGwHC`Zixbzpk$<~L9?{{7I{o~n}Z7@O()e))uYs!b50 zS))3yXcsqy2W>HYRy=NcfJF11S+pGj*Cq~E`h-QJ@eePQ9y?zKijvO_S?N8YO8vkugK~jK=)`psrQUQ`5hXvR3aXs>xnlDQZPa#He6WIzwGvzO=#XpJTr_>=iL6&1iOYJ!~ zaK{BP5__Dclfw``Zn?vjhrK{pR4!&|pKK!46bt^CQ)&3&rnowfi4?DPZ&x%_KOA!y zU9&9+?(LP7BOPb2=`xQd?sy9mFC1!Ju0E(!CN0S*!kg4(a3Uo=X^*qvo}X(bu0_Lu z7m{JwrYW%|eK+`BsHS1Fgq$zn*GwNQ!du2m)N1q0oVue zP@fW&V67SxL_xc#3Jg>>pD{a--a^aPDO@N+Zc!Qp<^RghKVZkn-Uf>gw<}!WW-(_9 z32loQvL!qLf4tIN?5d!nn?>D=EIr393{v^TWqX0nSUeQWdOJMWCZkdlGA$K@A)icd-KgI$bgdXV)GQuM4UmSE2WjYiEO`td$Zq?x?K-i>ov`@z0Y;9}SQom&OMo58SZ#cCVQ~P8&enJ5(n|$PRwag7Z+Jpj~jVA>$TBp z>^Q7K+UQHi1LC97p2=)ZryZ#|n5tPANpbfd2D!0e(?u1sn~t!1YU`bym?A#NZNE4^ zT=(SmXmqq@0?0?PE_kGfROkQbK*}lC8Nq!zj;>n*7$oWWsCTb>vqApbJ%`5c*4k`P zuX@11;m!BaBgJG$(>l7Xk^9f_PZD@t;|5TUicVD*or1MQk=BG3$R{E}{W_YDbM>hn zMbth+9s^fYo@kAWEz{0*E@cso>jc48JIJx5o}Z zE$P6d2>{W!dFT6J@l=tu?a5CuzPxYAmJ&cL?moQMW$)bes4Q_u=@oOJMx|MF-OWi$nl^$RGJUtk6uuSiMXtpr{J}n_VVp1B!cIaZoA~S+rp2N1r8K(*qT! zQy&u0$Iqw04ZswfHADg9^W8wz&luoxuF_!q8-bT>(<))s!mprftVJTl(1G|h!=bIm3OaE=k!i*{@Gcm3 zG9(}6-RV}R21I0M%){ai6u`uUvhQU8W%RE>ZV;$oMg?BSafrdee zha;aW6G1QE=EV{qKh%#cH)WbUli!64R{Iq@3upsdwHNJ4INo*>%{TGs~UACug$iO;% z&{08f5_PklbwlqbPOgt7t%zkPne}c30bOBHs^m4U^SWM0A+{uBA>be>FtmBA=d@MW+%AEf+9evHLGlh zO&Sd)MDYSG&m+vL)(I$DmyZJCPb@!LZKv){MNO;QXY%$ey8= z4jA3?U;z~XB|aPo<*xB(4ixW3N*Y8iWB>SsA3^WQ1?HEJh^(BTF~R>^Zw@sx4pS>r zSlwSYQKX3w8yx;!_);rK3rsg2nwjn+JPj#OHEIZ5k>T6dx2fCD^} zd4Wzgv(GX(7_hO>^q${wgN9fd0i)y7Mzc-flbirUaD@Mv6u3bD!_1vYYkwu( zkFD!K3u){%=pCy^kx7mTm$c}!3B_=zGtxmac`)euzT6 z#3M)r*0U_(hhPiSW2_a9DiMJW3`y(FIv9cGSGk2a^RJ*c7*KV0P*a=Ex2wD#nJs=4 zF>z*OZ3-{=Q1c3|{il199Ve2RD~kh!Wdj)Q_9@XFr%)D?2~wmzGM{b z5j<*)-BIESn)iTg2;vNu%j4qNZUP`oo&;R@Y!v5K7XvM+lH1x61V}d81?ycA&w1P{ z1(QU(1f=%Q?S;@5mL8bV)e^9y&=M((d)qLFt=IjLMFa-QgJ}$yy;-CtY1@ zi+gAg%0B!ZT^+(lpukn9eH5V2F-qU>&9&%$~{^1|TW@BE4qXPSy$m3=R}xQMn$C#)HMdDZwIC(ep?bUfq~4 zKn;ThLAa}0>>Rl5y!+#vXoD7?npcj;H^OCKVTMz!yLsm;<8ozV}ZHEPL& za^i12q_sTyoJFl`Fwmf}&aCo8hKe@I>Q}8DDpSiFHq7ISlHSj5*0AA37|somJE;+j z+C|v^-K#=7&&wt0%l=tisrc~j^Wl}|s7x?2^Dn>@WcDPZ zhr>``u3*5=@+Hj>;O!&mESbBEW3C{2l<_Bb5AF#M+AZ}V>pAo$elgp$qN7`Nt8Z~k zs4%sG+~7C*Tqai$Xw;zDYXAb6rdFpk%N{O@l^Wrpkg#EUe@vcG{4AlU-@h{M`z57} z8|d<`w&2NVW2!yRqz}x8&?&5`ds~^?UU!KQwB1WlFsqP6wCV38w;IQ5YyCYMem}T_ zF-<&H9*Ni=fy-$g9>2cFF;%|mV#r=v?Bi{I+%hpMvqqabu{3sSdWy;1uk0_jQ{n(` z)S)Ibqy@eAvh(r5^cQR}(UsUG;>&uxtCz#hj9#!lg#WmBe4ao=vPyI4l#;C#V3u4( z|47HL)8zRgeD!gqvJjxMljns(fQdJ7hp?(RXzBwtOe$R zw@n?Nk;?fj2v40~MHh*(k6j38KNWj)ZRVlTn*;W4Yuz6FKBc|yn0AY_Y8J#}Dj-wx8UyVNvXa7*~4E+|`_pcuu4TyRs{sduzDTZJfl zC1r|2a>bEEe=t_F2^L?6% z%2v%pc52!Wlu;ZG_#TfdZ|<>ddu)$}&kvP>o1HzT;=lw9T2^qrstNx-V)xI=&?}*g zlo^;VcWkP=bQI5l_S4P#dpzuSn_h5t6QPnCr}avUwBQo7V2Nk6?n)3riWSV{O~9Ce zBu}_wBKiXVxD|)t5B%5!yq|tY5<`4}H}bx72QBH^`WH~9OGo!bg{OBx5x)R7lr02z2&OL&6`72HF88eOBKfuze$8y&T z^%nzST~*jT+Zxb?l9TN_OPGJy?0I5G7%0u+7i(80G20jk3{~U%T~w@{cbNb4IdVSF ziKt5M&qGP4&qbZk4{Gn!gdyfHVC`W;dYN^_>GAl`8fK!fF@*%3RPp& zq`)3;^js39wsYIUAEJCiEK8?bhf5LfZXLq7L!3vDU_b3Uudjb!_JY4&JRHUxEw8&r z^^RuCk?$`x3ppo|U3;nPI?H6nWY(}VEyO*4pd_xH?AlLoOVS1>4*1UgasY^xhcRCB z{!lEIe4vIM4qz51G0^#zwu2L6HsV@{E-ioG;p9dfH8Xs=`gChL+#3^ zSgeZh1<-e@GJCY%`P>D1b{@Ujx#{6hAMIJG86-dt<$J2W%Cyv}M(dpu-Q^zRfgflk z7D$#65icnb$Enmr5xKETR*&8WUG2m*(47Gk^!%ce_4}24tUpNC5zSP zXRR_g8C`$lrKx&fh66xJy@3`%SVf8$v}u+d${f&txHa+-75PjG87_1JUObSF=ZM$klz`EwhGNX-KdH}}^xcwVD6 z+q>H>Q{$V>viYr_SWASyeZmOq52ePDq3Out^s_+EZhX%z` zfkr4iZDN9;kZ$i#U|}a<#r{C1wrY@}q>EVJ!WiiV>0(G~r}u1rXPsPtz%^Eq7QO-kLNVvz-`La3uj( zo|1GFv+=glax_BTirvkeSg?HvrG$&DGxr6Ah}Xnbagq}h?P|L@rT5Xvxl@X()p|2e zJFz3pNhySt?s}i>2zw_{@lR0TBZ9qg`A@DD5Bl@^sbXwd z8;Uc!vx94eQ*oqgMkv)VJLfv$gp>(-rb7!6bx4xih-J3^Z@n zQO2n5YqSmEThLlK`cRjwTIe9psv7Q;3TDt!el{=Xq=YNaHA-zf6p1$s5++=RlateN zF9~-kK4t*|*fn3-w3@Bqn8NY$U4{o8gCsWENYnN$Y$kyj05fUPQD_?F*OT+nM<6SsfCYLnn@%PFXEvD? z>i~15Ig%Z(x}EU(f6HdL4RPe=6h4?`hk?Pg^lf3|)Or^VSKT}q zlFp43-AaP}svPGVC*v=$vR$hoNou~4rQlYqvbkvI=x>tkgPyAxsGS@_Epo2cQ0N4K z)0k%I|3ZFmV>YnnhmQg_WYX;^b=Pbh0BZymJ7Am^ zYX;wjj~552{stM*q~LLQo>3wZd!Bh`9P=Gv*qa9M5f$jQM1SpAStd+upRh7yQku0K zZ7x3QC~eY35I-bLeGyHihXx0%?{ZYC9@Oop;{!mXS`a}7-UYf7k@vP11*|b+&b%h# zIlM}>ff6sKX1z);s@O}b+`Pme;PXe)0B+`#i*jw-g2O$Xd=v4rB)$BU9NJNie z2+)1utiUA&A&W?^5X~mC5t|Qld6``T8aPG;?xpm$ilQ4`FU*{+Ohh5QzPQPk`lK(h z>RFdKWUn?*owyhUwIh3%j}@J(kAxat*(6CMJ9)#=GVIuJ?C~6^3e3-fy4?3he&rdj z)JX#i3V#~U8a^JulP}Jo-^5xgQ>e!WPPtbyJO+SHC$*6MWO^u_&rrdw0fnQ^K3xAZ zMg4&^p?n{gCid|TS6PGVh%%p$x;+lF9pPIcFytP+lZ_gYMi- z+>f)HX41i02RN+P@WL2q+Qn(|`i$pXMP7K1RWxPW22yzvNb@fy(i?fH%%DwTAf+JZ z8!t}BJ5=Qr!iA6;#P$=G-Ei(9PzbL-g^Pp<^$w!n+aG$1C_6*6@o&hSa}ESeC7tpg ztj4!ya8JJNoszl6J~nW9o#6y~Sxt;>%Jc18=AV|M$BpSYq{Zx|NlMDSQClHUMZiV; zHU31Xxboch5Kudd~$fbCYDq~T$8?N*C_G8hKncxtyq}S2XVP!W1I9xQr z_apzMG?%P*)B-*5QHF*L$$M98C-)GQ1L|>z+WL=dCMS(+{_ z4m)_-iIYd~S$pH>N*R$sFQq9rsnpfe|);(0WL!36X-E;gCB_wTd+TOiuG>= zasQKX+XCVH+)BazU?87I=VR(On~O#UGD|T1IK^I0OpS9}7XA1JFaQz}s;JxYh;55A z7=x!9p8dem|1*bRo7Z|A^J5eYt&p+N7ovw}dd$(RhgB6pxvEwTv55Kfk>Pc~KAEJs zaI+z0^1~gYNK0UOUU1nj*4S?c@rxXQE2>$u9I%@ItG5Z66K?I;*7Q4m@!tov)V71B z+jJB*rAbB(zVeUgGlr@_S)yB^^s&+sLp|M}oPsv)I&|ZF`H%C&Dl+U<{L0=ilCDdM zytd7?025gtETTh2Oig?Ghwpwt502axjzG0}HZ`z3BSFW6A6q`bL&aL|k(MR^t=t9) zt(ZWlZ^1@B#R=8!=u?N#7k&mPQ~BunKj_mOwZB1I4^RGaK;@Aw;AS5BIjUrE zy}Gr|q-CiRCwWsGxInVnz;v=bmB*j4p^YL#EYIt9ml zS(0AX)~+L{xN&`G;+;}5Ya>#g?0H@S&60qe%2N6Oa;BTfHM)R5)oa>0oPKN$!cg=4@FRD z7g{*a=};U!IaDzC%13zbCKiG3a6pY&fibp&N68;L*G+q9Fc&+Ifc}hfi({O10GkR* z!#R0>P*S^iGyek{y;vg>v1tDD7$-03RRYcVYau%^;nIX= za$9fS`Dr9X^+1o&+0 zN4!emrW+_;J$$1mXkVq2L-oh~Z8erJA~C`V7Bi;=1XO~gUV4lIV+f+e>q4bye~1I+ zYW6LjFRRi3vKC;!ky_7`M`-RH>fj~8g_j`pri8+_>Ol04%_?WKCVCf+2SL{ghxJXNzaJL4{e~Yd6Rt{sf)C&zV zc|fQS2yeZ8JTl`_TDM=m4=6|VE|!mCVAv|b?xc0+Pk;>E@h-6F7Ss!B=&9xt+BH~k z=`?0*nWWm3UlZNTH3p0WeKTQ|*>4Z94yc+>C`j`)snpaY;xTsGORoU4mQM>Jz>DiIHoj@ySWW;(ugm<5{y+b0~ilex20fd-BwH!br_j&As-*WpwhV*)U*t<1`yh zuOaAaS)8B)Rz+1ZL;yHSi`k$SZDv2>-eic6z@vxRN#-z|bo00tp6O& zjT3AlaJuiRvZ3N*b29_?!8CaoOPVzNNHD5fsO@Wle^ImMQmPU>VeGZb9!NFO%@bt)3_ zpDe)^Klw;2$lWKl$Fk=17Bm*ODi4(dK5XqmnIz2&Q~WgI#%8+b+XTj3%?kp2n zCC&cSPYENo@?|8+3x0Xk>ffWF%G};PZA0)PkHVAq>dGA23X{Uw?)L294N=Le5slft zpp92%TmqA+<$G)q3vV7xfOFmzzLTmejYbzFFdzK_%>B+hXS_(c{XsbL#->;Qc^R)h zqfkKRaO1wL?EqvQ7q?YQvvvt6QlHQmw)f#B#N^C9LeUD}p*qB`;+%WQc(v~3_Mrz% zy?&G&z9t@oX^L~!T(=UOE%cNbvRAuGkOeAj!=@^TSQxme8oO5HvoRr`nrM@^pr;Jkdyh^px(i~Gr z(bEK)`3t5|A$vB5s>)jG5kvgN%yA0;8asaXrq99yAcm%Nq|a`6MPmdL;h*`E%KqWR z#cCT>uTRUetd`G|-!-+x97MvYF=EQI9JVB?j!FcYou=gX@S2KDq1&5>whXM>@teLb$yB+882tl1C6ZOsD3TRYbd zR6hB}#Wt^lmV)Tx;uk~P8iOM)K^?7a1-Oyv3E7Y5CKMzlYO}^VZ?^oA9ByGKLp56S zkvCo&G{K&|3fel29m+G*QeyraLG`e&U%u#?jQa~zead1(DUh$~<4s0$h@AGn#9Z@@ zmJNBPqKgZWR#@Ufm*(!Sou!T+qXBlrD-3q*Q!Wm%CTBGZ3R^skn=}t366#PhKjPqc zLn#WM6&zcgg{_rO^Fw}3W>h0t?)`)aY+N3;ub+1ufOt=<3me|x@?0>!(R*GD(P|0! zN5m_A&b`_MY9P<^a6SgtO;zPJf~Bfxl2T@s(*7Vh@v$~ZJJ^=!NSCZHzodE!vC)R{ zE;#n_rS}#5nJkm76bL7YupVEj2+-+UNGwKPo^U!SWPe^JU?8MNxR?LW-j}_}TsPUb zo&R6FLWP^#B`jPbSTX*AF995&Vt{~}5O)!>e(xX1&K1o{L_oi)=|`Tt*ME!_0HFXi}`pB;3BD zSdnbBOatah=72^T(_b|X7wV+sC01)YteQib&bV~@Ng+#E>ONaQcs3xM=LchI!_ogS zYTlO+dS3_&T=n^3nG9>P7Tp1dp^ZW+b-F^7X5eInzn1=GOCyVcj2);v2Uf=Gu#LJg z3G^!S1w)9xXNBBFr{Xi9 z+R)V0V3(1Wzv5ExXP*Z-2uk|kNgrbp!uMsyovRz0-zZ$Xl>n>}UF@YP_&3FPCJWuj z;n1tU6-hfF$8c?xPsyq-wITProJZs>C40o1fjbfI>J7T1h6C$TYmKldPIQo4*FVlT z^I{gPk>{vpXAdw)&iWLRA6&K@*yfU_!WInv>(sG>7jQ?0YJG~!yJGf3JSqoOG|)>a zbin4BC_3)b+l@HWnHmfqfQoc{)q{Y_xd`a_LTq5HLW zVSIzg0!h*WXq+vs`>^KE&(?&SsFmL~a?75PTsEsv?TvF~B17D)#myQwsEU059cR$) zG?-xCMgH&Osz-l&J5DwZ!1lJX3&S4CvV=1fbhmGj`1HM>a&B$0!fe0&9dU_2Tj+!N zXF}O#VIHy}BeDUmgZ!J@Wzet>puPjC0>v=!WL>wtk$Qfg;n8GLe}oiWl4wfZOFf?Y zp^1l^ady%1+%8M4pO#fAlcN}UU7guUg*m7F{<(0$2oR}{V=E9vNkPM%m%nueB&AOa7wL&2&D^vAt3x$ zVz2WIW$F<)E#Z)!Y67Nma4A$a7uHgQx9gPh9|hDigNp??Y{;n507cY4rrL&JpN|;& z2;{_Vqcp*0VSR@UhT7mQ8Pq9%D(CMx|15@z4DqRJ9N#ket^*0Uf$HhD2wtf@594ye z=(WIfyIvfZQM59@XJ-w(``4@NKE#YlGQK#oVsKCJ>%jB+x;G_CKU!d;W@F|9jI1$*hSY$)x!ox#TzgR$U#0nbn&6+`#f=%t%duK#LCG4#VKQJV=| zVYa~Gj{pqq>Bj`WgJLGzC7~C^tvr`s1zbn;X3h?F+A{RY>nJuS!j*B z>=A2+B+Ct36D_J1mLP6Pg^2=x{#UqJ+BG&u2Y~KOn9)HCj4fXEf|fTqr-1u3op>JE znV3FFX;`IrdnHy;K8INDDORlpo!_|* z*Nhm(b(;-qF{HzT_+!Nx+U-X1CA!C9R{D-1(3Wj2Nk_iMx5m&Jk9!^I^^}j|vClNq zcI|uX2^86yc#)2{yLoKZ2YSGbR|pFd*y+0Dno=W2ge4tYXSc#ipFV--| z`L<2m;||%XTM*=4ttKTL+EX!*puwbmY2vYmP&F!PR3Ai9;~}c49qrGwk1p52cv(>W zm%!Xg_`O#=iy6DF@{i<1?okZo&z1>QUvtXO4Zlbcx>}*s)RWMNwAZ-w3GK zpj17aSqi43{jn1=Eed%>!a~BR*qvuj=q^>g@{Qn`Add$H612bU%HJmZOCVP72>e|7 z3K0%1iLQjfZV{sbZ34At86#?C%(js%`mehk1;pVF1cQZkQZi<5f{2)vKU7Y}T~9Ea z@{fjVq)A$~bR=Q6-^5dplF|70DHM;T~c*>qcVBDu3-g3i7a(cp z`6`*!PIaNX**A#mEDktB8BcTd!H_*OT`sa2>L?N$z+LahHfz zn=dm!GWJZyPQ3$yb>GIL&!YQKBy4n~$5RxmDKqTewC2vmb)Sy8G+@PWkz|fA zEXQsW2Tw=z-A1^xGMe$}Car=S>My^nWGo!t+8E6@2_oGJpJC^Sd%avq5NsK%AG^cB zI+=1QQ^wY5D`gCwmS3RVnY=@TIn(fWC-YwLbMt1dCZC&5n%7&<-%vIIBOu$g7kCR& zI)3K~*s`1F?G-LZ${(&RnX{E&gr!xlS<31YlijR4$Z63|*-umM$6r)aYh13!<80^1 z!;Mt@kVq3aei=8L+Me7=53)yx>;#Ftx&^5?iZ#d_&lJpId0>3_&w8~81gs#JfrD;( zl=!wz3d4=h(ZC-lT}T zKzu=iHSlLaB<+#>5)P-8f4FfVlqv6k_Y^aM~YSBC58&!W&~{gWd#%lfId`iEE6^3u(28e{Xs`Wl;kfAa@W zjML<}m%*_S9;0YLpwEDd0?0ER*V?M)22=h;eYR#ENS*Ed?bE55@CDDz&6Trdmf)^B z4Zgf^wLZ^;kbm;vJ}XlNZk>srp(Xa!1nEs~jw6Y*((~g_7#P z7M}zdmU&5>1$5DGzR2BJKA=72u0Xf_Z1ZD$j`ceb`%3T8EDy)EzAxQy_xJEc?nw_U{T{wKE(eX6 zJUhBYR(Qy^IU^`j8&cxZ@oRzPRAurA1w#P3(*f&&{N&H?^jwA->=X$XQu@{{|6vLv zGQNPRwkj+_Y}-YM{rU!``80-RD{9H~#g|+o&_M~bKQ)UPR0dhDUXN04YUU-b9z?|l zK{VlV{MtvG`HCOaQRbG3j=&uT^g>tsp|o4^)!!e%KH73Gn@R9{;q{uIWG1r!&v(3q zN20AwX2WMcu-TZ(`jG9b@ZE;c%FnKot>gc-g%ech9$ppLV%y5U$i_JKNcC&)LL#Oe z#t=GDao=-F9bL>nnOh&n3pzJz(9tKc71$&_?+8MO5nvakmB&!LwID6SCG8{(h`EHq zzD|{KguuQGw5p`#LeGSWA%CTveD4aYuR`IYFsX&#*5Munp~>Pz-RIcO#wZmJ7V07-j?bJJKKw^Vr#gNLPgv=)w6*CSt;O?K$A) zY-W0yh}Iseyp4^7`ip9T1qVrh0wQd5j9fmtdxN*^4+sE*iRa-;jDH{A?mgBxX_Jz~ zx<95!fFW7oa-AZ`pKm;UOVCh~tt*h)I7jXVHmdg7BeeiUOxbjjHHYGYmQ*>T*Dw8a z!ACVw{1${+THt#~g4Ko>XO@&2?P!8=Lz3!%@Ui6-Hb))6N*+Z^I+PepUkYi8s>f$` zH5r18j0+jn#6_P;Bu=D;h{Y1n~q!BvBIox5i`PK`lp%{uxe+Wa6#(ayYPp zRyX_*=unRpsZI@sW0aSoV`7OGSnfkmK@ney%mP+BSpRxe|HFfkyr^=H9T9sG$y#3Q zNiaE6W+8mb8F@FXo;YcrfE+F!&m|2*rZoy}vOU^>%YC1KfSD?ie&Pg>n|6ni0p3~ri;uNBJ-~tHs*zh+(VIbcK`9DT>R^N0lR%<;fcZUF(5#v0k z4llfkI8!(S0FZ4Dc_+x03F2PJsH4x>iGV>n9($kE@dcI_q~5pFSBxkDWKtg@2A<7} zJxYY~ZszxNrTmYva1M1z1f(Qp-hJz05qeqnk?*XGF(2 z?W$1(u^U(aswwG|wqXad+)yKjmJ?a_Pkl}q+_FqFsF41te5=F4V&UBL2e`aYq#cv> z5szryo4-|bvdbd3xsz=;#5viOxffrVkN+KGW~tgx5~OW?asmPMIKhC-un)2PG5MyP z=;Q~0Q#yRP2RT=1k=c1EQVi2@I}ML&1q3T@0_EY2z6o5sGmG8*bTzrh!NffXJz8F1 zc*Z##>6JPFGflAB_#O;7KUbyA`G`=jtiAJDZ)ZjTkKJBhH;hkFosL3anY^ z8_c4}&osTrfQYJ5UDm2Qn7Q1PhOwi{J)46zI)f@a=Enk{p3Yhln#S;0wgrSzD947g z8916oidI=RuV1UtPmd6@BptRFc)A|XCzRIjB$+q1TZ!qjp*rsc9@{daxnoTAK0@TO zKM)Sc<1wCE&u0r8H{NjdehM2$vd#SQy@qA-H?-;CZJx8!u+KQbTWQuo*~8zhEyI3z zKTHlnJYG9?TX=p49U6uBzbIIvGVFySSh#35l666(z&1z`Im}VlbS@Dk;$VM~F!i!cT%ZC<4YuN}Tm6*LuuO~uK zWCOI$(f=XKoSMxZ8t+E@BLTPKQA8A*uWY`m32!!GrTg*A*{5p6Y2+n-kWMY&m5tpi zW&G*ffL`Uwlt=ic6)ZubpK@rpJge^qm3n>hUy^Hng>+I75v-Nk0=l*&53U|PoI<v;Zp&Aoe zU!R}Bou%|#{%$Xgk|E-!<5s105G?Q@@r0GQ{1r_20C%|R3tbkpuCA(h+Mio5TtM>A z+kpq`KmuaPc6yk58a57A#OH8HHw?ih^8+!j7{|f}6P0A+-LYa1lX#QYFc!?IuZ!?{=e+O&d|HqLabwGV>aVU3M9I-gQ z+lG$Xsi~`gxXE<+e0E>Qm{9#u90lA4gj#SAn z_Z=ax&Dir|eax~YA^Np=YU{Ol?`R*$aR8~Lb0!s*4)0|)a{GskA4Un5lzn}YM?yK% z2=q))CjI4*u6u}Cw0)+(sE%B8hcNsQGvQy?r2>~MN~aJx#A=6MZo?+FJuKDJ4cU+% zN1xdHDXESvD(i*CCaxRJ{e0oOXt2Ps%%El6jU%1y-29nE2R9SagFtZ7oVhMgiwisx zaj3Fm`^gcUi^@OkfaxC*wt+sHkUmkD^M%>y*VZVS_qzz+CH z`fj&eqfWQ+t?dRb{Ay_EG>(owG7`W|>;L3=_I!CHZ1K3yM;5jVFq!uQgElR2@u&oI zEP+$n{6N2UE*nuX#d3smqPP>CJYdoJ4&Egnd!XX5TUM+upzg;jl71I2ac4J#MX3A< z=#G;EUkQRCZt4qj`KEL^bff?0<>Y`U6=Y6LWd{f8@pLg_&y6)A5Ryxt*XeBjtig0M#hkvdYn1a&v`g!#W`@vN^^-1N%Uu=^%{H zNIYzgjy&&9RljC1G__k7JL*Bv#K7vf=*whUI*lz~9{I*aCxN^_#2c}$Ey!^Kb#QWO zC@2Y2``>l$Bblmm=WjWN`yO4gP-@q~Uk4dYR^g#fZNW-3O^C*CkPu5M=l6AFch4}l zjdx%x9@4>xx5<^V(x?-Ajp1ON#Gqwtpu?Gr@@_NVU!J9The00(cvMb} zM7Lm4l)%TU6_!gyL(U~9ZP0%9&?2)IPXD7G{tPq4FS~a1bz4ma#On}19cABvtphcl zD-3J@K>Dm^kN>WiJX@eiNl?vQ^v>N2cUW>knkK%oS9a;W4G&`d?x$!_xQ0Kb*o)F6 zgX4ab9U@aESs~I(q(W*zC&azS9JR2q-yyZrpoA8Do8!?hu&gX(1=)d@a#eHP;o-pj zb0sW*+^Ian=aB3D2(9{gIl=V&7 z>C<3Z@UiOl=$_co!iPcZKS`s-*CP9kYp+VssjuN&s5g?l+@=2{Q1=Aqr!~j@A<%jV zSRkt))P}e|Ym7}1m2=t+qmKp8b(_G_$YxC@$oC)Gu$MaB?s^TqE`o82XG9Pld4WFw zO4tL*sNvj_I5tQIX`uUYy%cMZdH5e5^-3@f@ug8L$5i}mr@%`@$7zNxP&%>u1aFM1(dRRT>jwungzS|b>Xq; zjIr)$%>!NXck@=|7v@-)jFIYuX(pXhs5dVQ16g%RFb@Wy73-uXgp^T>K@nokwzKsw zjez^OT!1JvgXnKA3!~-s;JZb|_{+~Rl+^O8j}D$P^%$P*mR#1<+sI}Bbb<@?93lS$ zq@piOMS3eI>L_s2CIti!=0UGb=u zD-lWLQyR)$rx%-aqX4a<+^rQF8N!Z^J0%J@=1?eXCU>dP9J2|cp*`tO7|Jcxpt#{s zfQ8jp?Vhe&1K^SR!5i-Y?A~Y4F5d4hOi1)X{ocV+$%_icbVCrcZLs3PP<9XTdSZC= z`=h0+Z^V5vILA7QZ(r&BUUx2G>nk#IWWrw?5m>Ubye82>TEpuBw)THyb+k>2)sI4g z-VO1Z*w)HAXFVv=Z3#ltmO~~?M{%fOOQv7ykLSO9|KN*$po!^C_x=iwU#!FF29L@D z1yfi~?qHT7SX#E?(|7p49$=9}uq^}2E6d@Oji)m4LT2R{dh&(6bEvEL!5iQ`p*~1!coY5S75B6VAn)23 zd8RCcF+GNG#UR}hnUPu{F+vNXzEi0lP&IOoDOLM``y2qdl*+=sAkR$j5A-c)QrI*}~e{AxX7cH{hG^0|m4TkP#mZU01mIM;L+0bj2~uomm>i ziR@0T6Yf2cA>YMTk}!k-;gH7VXKj1I@apEMxg*viPx9E~%0Gas3+Su9p1ALJ zSkjAHB=5E!8odPwy*KS#FACD>&jtoHFUDr8M7{=Z4oS^7t5iQ_M4{t@i3fCoA@XMd#ja*nW?px z``;?}lnL%2ig?&lH{whHo`8s+#mO+(w~0aiG<~V1&J@_np>J4Vw+$Xy-C1*CZa2wU;a70L7O?7xq}X zn<AB^giMOclNH$n#I>rk<>j!tog7V;NR38R_^g2!>XkE&|0O~zsRD=NZNG*xp-=aZm(}30 znOB<7*u5BA6Pzpi=9n47!F78R&hE{bK{}#KEZS@P4^{;KlXTJr}x9K?z2(_8oAtU zdMxV!_{D?fz5+6j2a^zr=>gI5jkZoEze)z_KrLp=$o24+74BuLyKMgwq{TR_9vV(i z&aOIAZqyhF91$VfsS~$$16ATsbwR!b*y9Bm{(p#gTnC}4-{hTki?J`ZC*d%=2yPb) znwu6cUp)ft;eU##wrshObxKzVg{$_qp=jYem2#+{=!@y}5Xj8-AK{Ne^z~au)M1Um zy^5kHWP83_x-&hkT(zWq@TaeY3m||WhY1f%@p?GyD+j)1MZEUn8w4(hF-))U8vw5( zA)w8!1$@lWYiV5ZN8L7cszs?A%{%gqGNvZj1Cka$OEPx#j^T$rwe5C)bfdXXZD~F{ zB|yOe=9+iMuG)}sBe`jw*-lcin=~yKblRpDpb$ASoC^a-?^(fz94G|9y>A;jKNwM? z(T7^yqvkoFK8*?UMzZbiU`T|2&`CD6gs9^wN*D?FUbIC?&{|CVCrBQjUco>(Z7O-q zsMiA6N1iz)G%s(Cfyf>QTB729i2*zNVXB^S;rFw&?8|-!Qh2(fO`hhhkTbtxDCfVy zN~6@a3E87HnSBtZ$*B)w)a^^Q0w(zi@yY0)DEn4;C=G;q{0i;&pp`NJH}>_3_8Y^G zRTUJYZ_hG0zq^~2-_SkU37o7ft&$+PW~wbPG=Yqkc|+jP|D%)n;G-)w>|oZ!!Ju+kYPS1h1L20YiVe$&xwSSl@@l=*>*3h<)EjC?He6h7|#p+yf}( z!;$71!%JV-d$wd*Xl10O(D~E9+z?~JkbCkgrS}rPYor{6v|FTJZLWLSjmRBaZD}Q z4GaI!4kKXj7i1m5xJgh6G*<`fqM-4P4@K0J8TL6x#1DL^pyZXoEOaWuZ$25HZTTXz z)lAoN20BQV&HbP64;`rC3j)K^Dyu-|kG^CSb&%=3m?~CPJ_p4X*3RJCATb|5ZrYUF zqE>S)UZC<0QyE}}7Vd!E!_d=I*)1#bh|L68_P3B;2dAQ{oO9>fJE5&7y=fAYk{V4Y zM)b9So!p`9@4m)oFN4HauG08bU}1=J22MT``?N3E@1b2GMK{IgOff9HP;`C6l@s9J zzrl*=5mwusPs~FR#s9TB0&0Nea8~zN+^a00Lh1KcQ&a`;ginu-PdREP9b5u<$mtV} za2a5M)T-A`t74@KBg;tqH4VaD*m06El(I&PSCj^%IWGjwMcjWFv4+E9qg|a@;O^ju zNnw3wJuLh%%u)sa`AuZgYd%f4Q5=4e<}(T$eavw-5Sily^j?ib)EYW%Y5>#&^;Zr& zys>00_PNlhkeZ#6uBa5kpP?@!?arZ2tZ!b8!Q27AaK`zR)Zbs^dCNs3JLeScv8Y@8 zy9HDB!*iKCl66FQx2OEqR}sZ;!pt&wWoB-z@Zf!8S>p`UU*-9cp;=)r=a^;5^%2A)I+=^%Y^)5WD4nE9LlF9}QVEFs*v4_nZ-})K z#R&wte$7ET)NxJmmLYI@O-5uSmTWXY=er`up0Y~dQ32#-2N*s?BK3a5ep3y2dQ7O0 z7zAqGu8pjY%s4NsYj;JP{j2dO!lO5>+P-|mh7qVJu^{Y|cHF049kX-+OxLEZS)Ko# zqW`@>2`U@UT?5%}Jn{EXi&29czf3nX@o&qAe-4fkW#GBZGxwQ0(s?eXV4q1wQCjcF zih=hb@6=~_i~*vKTo3m5u7WKz0+I`Z?1HsyJfnsE;npzXwCYN9=Q>F1akk`OV^V1j z&#QKqsPHK0RKWe?$YXNE0}F_;@>V&gwXK_{Dmvjsmdpt5OcX-z5;VGkkKcCV^hv&L zw6Pmg-K%ePyOGrMUSKLzX^+>w>={tN=bHQ7Rl%Hv=pvRE`ZG9I#COL<$%Uus3to17 z3umkslA#=&ZFfrus`gvvdop*K`O%ETE~QMf+xuJPc=hE?E(-yiJbbLSAIsyC41k~3blb_fe{j#7{PTUU!XYUc^ufVyZc-wJBdi2Y3Nwpik4G!N8JsevmHIUWwjfNGyuXO05g=!CJAj#r($ZwGGtEa1^l+F$zneG3$d zVeUMMHtXS&Q+a9??ur9RxG`Za+0{eJpNO|4PX?#6E|dal*ugA(Y7PabDp(8WDM4=R zIIS$)W1?;`!OMj!EhhsZ+Ir!in=if}Bd%RW8xRq-(gMzAoj3SZ2trFv<9j2<^jU zEDpHOVeU~M(Y12G`gY{6mLeaQx%%`J&Yg)E(OL1n;Amj{{fN`nSmzqK0bP%tiBsV4 z<^iHCA3JqFpJ-w%NDg6JP&+#BN6y$@_y`A*T+P_KnZLSGYTEr@!|I(Lc9RL7b?T_3 zyoL;HvbVod7NiG91?_`nL?~jI=|q`@qr?INTc*Y~hvMjc7T%ZK>hznMO^J`FFac>*9k*G(!Da(JO~3+qATw-u|=s&n1!F%t1{J z<7C-!qTJI6H_b4p;GH#6=%JC^ancc-WaKI9?&vkdKnKqX$Tp-}Qo{;h}^m4NjX5k#PMw~+_7KbwJRH<8fRcgXbb#UO&0W*fO5#_*s zE+7VeQEZc#qFJ%Gm%BK~z=Pu?3kvC96@?IQ1+dc5g&SuI1%5oLK3)=lrbD68IZgmD zBF@V~FJj8`X;9lz=l_+~4+>@VZ=B zUGhwWf%SC8rbOz3X78fp`;s(eUvf7ilrRn-HZGi5`NL$U6nGLtu$Et3g3>VxSfacuiEV2-OD zpgMpUc+P;NQ~Z(|P0lBSfs=7VfPtWVYrpp^|yA zP`U!5oUQ4KAGO{bU)himM4{>_g|BSSTd?xSdVYmxW{smS>mWhiqP@mZQd#enHn+4< zmB#fE6^NLCKh|I59qmUg|M7O0206Oj8q^`pxZH7Da?Pn%DG0|F9P6WS{Y%`M{XY!1 zA9!9_QKQ4AbiZ7SeJmaaw_QB|$DeiyfFZAWcgc3`+0?$l&EzSZ&by&~>vifA)N{j? zW&NS46<^{QXpz9^6Z0$NG~vbxYJ3wbo_k?g>ba%5>#T(eR$E2{L>bTwYR-sHHHxSuvY+{KaQDAv)-e`gq~bR;12txJW3*?``+DR_E1PUv@iJ8n6(<|W z9_P#0F+6_}x2Pv9*bG-fXfp=u3mM63>PRo{C!An>2?MT!uVm*oqq}cc-@GY_UI#$9cmb^-H@`thL%Xctb^w&|2>d$p zty?5?ld+SX!do01ktdDVjyjG7B$c!h69b5boG)O;MFVjCQ3O?XOOh>za;|qdW&+1P zJGJGjVzoGGfHJKq*^xN4yW=FraA|DF1Glv&auF)oXy)8o?OeOdWcs8UC}v}?F^8(1 zxlYN3<%E^p^>+^+7HT{V7~IR$6vwkzNM~?+`F1gVEE*m>4!$)@Gj!rXfbpkXoPzzI zlEc{VcJg-TPzyPV>OB2}Dh2{J5#`$cx8{mS2=%KE#py4wB>&d@5H{Q@%G<I_o2yrZ?L&Li= zm5hbzpMl-1G}THSreuB-Z!w1?oZ$|3DT8Qi!gyH-k`zX1$bFw!<6 zz;3tK|DRb30}HP!MM0cq71DEZ-pCH+9aYdF)nMhkMcS5I^gaYaU5F*LJ_pHx3($Mio2{lMz&*R(K4wfz-b#8%doBnZqwghFHkIm0#vYUt0uv zRP`J8(OK>LL!1REqo_)5eCy{gVpek9)p7)Mu4$fpUU^!9gp|#HS6z%|e;i}Cd`4eYHHy=EWq^IBv5RT8X$7j`G?qudA&a#y{F@*zmWQ`6OYg&>7iXg;mO8_U6(}vU=#t^62Or|@Rz!Gq_m0O z*ob_CoO|}>iyv29^%k<%qVW`ZINEbGVtUH#xR?~|3hvNqVB1b}(eze3<5Pq+Wbx5O zlW!)F_!#>e)~O=F#H`KDt zkS};S!nX^>GjC1G@PiK|P|vf5Z(p-|K)r0?v>k$&RIi4}>Wt*|vlu`)n!0rBrh>i4 zWRR{~`=drnF_yaF6GtR}mK!Q$vjL?|<^^FWhi#qCrDqr}le%SPI_RZVM3+};0Hk?3 zFD^8re!=VO(Hr|U>#6iH@+2?uqj}Pw21DWw3(zA75JsGttnAcrS6-^^={6A+ahZrs)>2&l76iyg<5{)`#?v0;9#$6yzUc;or=g7 zO+iuR9$FHCS2mmRo8BXrcPVgBVZR`vEegN9U4-NNgxmxaXbkC!@!ge+s4U0gZ+$Hw z48&c@WeCCA>IZ}b#NSCflkT(3v8adi5+)rr2<{*)d3zB+C!Pi#9)Td)Dc%b2Z>zrR zTwkK`KJip-Rl4!?$rOxDlw`dx5i`5p-p)HwrGPnSZuMgnhD^u8Gw=oT`{+v3%Fv0F z5XT|vE90aJ+?I}U^;$X6S5Y7^3nqk}<2A@N7#5G|kjbnBSUKUd;_Usvbwe<$bSw=G z&2T4%mTG#ld#0@Gj9BMJJ|B4?v|#_F4AWX~UUc(M^%__pa8mv~$HOIkYT1#vQrq4+ zOcq&Q2+v4wPLrte4ji|acswtR0M7wHs~r)X!Q27$!yg?XK@jI{-gEwEgV0Sbccr5r zJZ;=L-;DmLEg0cBfZl_R8kL6U)^8SU9JICTR)~I*y#T9}@U%Y8dFzyOz`$oa-(U`~ z3(m`HWe|WgDckXn06v!``CjnB?pDxOeCTH3yU*LWq(`ni+uPvsK=CF3d_De*%0FEa zO6J&h^dC_-mWgR8pprLv=EgB*In(PB>N$x^$amsB6QlcEgmmR-5ep+znyv~3HR5EZ z*txF6O`AB}=+R+|V$khPvU1ANNIkbtP3AV^-viM&F!Ygz(%)MNLAzJns>yB$u^rRe z&)2kzIaz^*u!WwH0;&oJ2ZYXY)yK)Xbh$BNPI|eG(ZbJs#h=SCfK9ji+{|Kgq#upp zW%YCtygyLByj9+A^d36p9OhanbqSc-jnzo!Z-#cJd=jFomYSEeLtj+d9c$Su{&u}Q zwhJ0~fwz=5+bf1&U(PCtcXr3PgEn#&{H8G898{I{8GOx&f=Ko;$f)->(l0hU|G#XT z$&c!?J|ykuOIgQtOeEH1Xbupy7~^PK0t5oJ(DW23i&u%NS(hhS5yvITL{_QlY_fDH z2@TDl^o6r;anbn3l-l>+^)fM#&|l?VROvuJNTfpycbg;(%t%|X@Hc=7YB7Btz0B8$ zSXGTvHN9sXXPoBAKN%-W6s%r8yYX_7G{Jg31qOufx4ugxGCYiovrC?MjLWxPlVOa| zI#sfP((} z7%QD7%HQ;qr34R?G3BU*x@#nKm~XDlqg+*y$(l?2Jd!wO3_W_sK;58Mk%p`$s;cH| z$uKe$zWVb)*~&(==o)+z*$iGCp(orYJDjoe zIv_9zyjur*g!}SnSQNu5@7yX&_Z42u>+buz4GE_&<#()3>5 z>6!b8zu_MJQbxTC6ZF)<6&<0dXBs|Xn2RV)`Gk-r1gMd~3mi+B((DY{;6dSkS3yR7 z4reuMb+VD%&|_$+0PXxgVOGs03ozrse8a3)c|gAo1N_@q-^w!Lv$jCPb{XS(d^KRX zskQ`}{L1K@kt@QE^57~=ddAG2P_4D!D{clR?;$L0GjAkVniY!~0D7lgC~smT1B&5? z64)l36R8r6fCq!|dku1hdX{iZ%Sp3MZitB3T)8+r*g-&i!_!S}X-!>aOn%(up6B0 zd_{1zE95M~7xvYHl?^o2CW*TIivkS-P;nyPmkS8DXabg$95jPt%NT61Pl<+|5D|!( zhZeM~G3+fG;>&pyYrUD2xae59i+A>#B1(1bKo*>F54}xBSG8^zF9;}kd_XME3+&;8 zn~9#|pt}t+v}b{mHPoy^DVjqt@~7zW!>af-7tF7<{q&{whuAi>lf>D3DykRjIKe3< zleI!uC36O5=l|=C9YIl*nmyq%r^CF3KR(FY&IsNu*Vt?#>*k?f!9gtrE%zuE3i=h1 z|BaagxRlhm7woxQ!!BS`%AjE!;@gZBo?el-QG!+ea#;He^_+3yS)k#hEG)M?LXI4F6hsw_fk|tb+cfL7zkFLIU|q? zfbwnu1{ooms|vG^KiOo`(jrTo&&NE@)KcvCJhM6*H%=o|d*y=su;2RIDnr)h0&zHi z5lSr;*w|39v;V5>m~xISMkL3(T2jiQ|S1bAv_^Ien#}l`jZsd zKvu-joPo1t4t`lcF-JlI6LZiw|Iia}^m}j&7ISuZ0SxVEMLG_nf;KcL6lK6C zR=NE$&kQ}fPkjGTRatSLoYK}TPVl!8)=Gp~?P<-5(D0xZ<=|&Y8e?yOzMhaQ{e?x5 zWGyJq>*JP!1FzXy8I@R+SkBDas(20sF%Chw05H+f5&BRo_MBRCX2!8ogZ|AY#Q(7= zqR!VyX3)jKY1UH1*G3>_fcT`n+CLmkJ2#xvgLud15@S^K7bCw zL{|K;$--s&InsGP4@9no#+g-;j+#6GKLgNWxt#H7tWA#eI)ceK_G2f6l70h$r82O) zAx3iJ2l3ijsc=IpDn8u_M{OpidmA5xI7JPwzvLGVF!F~lr~<*}L_&f~`tlrXcabVz5Ml3&Q;cn!{m{@_ z4Pfk@TTPqx6z@Sf|3QyLu17AEg%aWOkuh?OdX}Q@ruXkzFSQ%he5?xch9U<@5o;M= zoSu%ZX*_mzy&DnzTMKJ(etmTOsMU8K(LoT%J+^9CQ4iI{_*@IM z$cHhEK{D08m0XcZJxA1a6HZIj8x@dj0`YUNOv0j#zyK#88Cn+c`SN3?cjqg|PE{if zaSj>k$9`gwKOK6kH`f^Hf|`qIOG#&08=Kf+Ze!HjTU&)7DN}{Sw$V1Mm+K+KCAIIi z5Zuc?+F%802vi>a<;RozxFf6nAp!a^OVSmZ_@*#i470xbIs+C3b%KyIeq!bwsj+e+ zFeu09FLfF~no?A2IetSd0>R}h-_=*45i=m} zmw{qQQ^C+C?Epp(a_8yk*?tr1Js~mR{%Eu0d`B13iV~Rbmz5?Pk93Gx>bNjwh_BOO z#r{b4x|(u^Wo(%*6mZ8r3q)iEBX@i*#aLLfPm8^*;zZLc^9I2w;yGeDaiUAceyV|- z4p5LT%Uul!-f`pB^gtCx>%+r?m%1v}7Vu33W#1R@jIhY`E4(0bW&g-F*wU${iNy8- z-Ve7jd$#oV9{XIaxpvkIB`4zJ0~#`;-cmO`W4*dQ=E3V;oZS6osum1pQC5g0Ftk7( zSY0k_mn`B(3aMNNZ80naUtSJZvhNF7&#tx7QmV7H7ae{@k+bb*hVa39C75cbJ5`U31~_OYE9I| z!G)C_bTb;;N005Kk8tacKjh+&dNjoqUdG6tJDX4J%zXZBlqa+?39c<%{7Twpbm)P7 z{)O*L`W&<=(OcascXf{!u*?yoW%7zTll&wWMYpUbP~|esd*R?;WJJtfR#3w z4Q|H1BO1$`=g1K=qMrq0um%mxhchtCl;akSNS`(7_VzISxIx_yrmlt z(}lmQssqw@mQq>eBD>mdL`^b9hD~=)eo3o*=0W6k{quNXadvQgm`|#oBn}>G`hZ7V z-XasyWLp%L875ye$mh}p<@T|sNu00IZrk$;gs2RO3XBu^gFd21uppkVq@1|OLqWGa z(GR2v%2)~Oer>_m$LrBlyWZ7SqR{K8Ipy3BTBE}^{lSOzLnHjh=B`pGUEXwJfj$sR zRVL?BwV;~k(%P=B&byrQIgRNk{EiK)$`8Ar#U2lCCC;7QDo0q%AFyqh7DXN^49rR1 zjKs;Y)l+Ykc2d?Z6}oe@kR?>(F?pL@fK>k2!S{Ki4-20}8!y|C91smtrrN7TcTz(? zxC3|7I};}jEx1WyZ@Mw**pZ?tKfF)b1dd<5HjY+k_%Fnoae7!koDF6bGD96Ps$^`t z>3rETYws}mt_*pFtOkD;Zz{5AA=tT^#ut9;HU1k3Xm0~ggWOuyrbx%WS8~HqGoY<* z2Fkc9`Wcp~ZS(&FaPE1o_VJB3m5=}Ra@a-QwJoMj|iygWL-?tDS z@>6lwPXF?tV<`|hfQJCG61|B#WA z+H<$KOrr|G0muNLO^`{M-#B*noSu*w%;PT{(wlSg@4if^=?O%wFCA(Vx6oZjG$qzC z3j8PyoVybKS@y8`2_UMr+0r|UUj179KOBzzSJcU>aBI?X#lKMuX^&TaMdGF&qxutp zIzc)V=2ofO3YlVL0=?H^m@H7HTgrw><@|r+YMT5|QeF z=JXzSwHq=lw@PfsR0~+{+$EQD?1Ze%UQ-x0w@ri22xr zSPBT8!KDs+?=gaN*F%bd=_@|3A#s^GcKvY%HINNf{{4KEch#cGdf2lLJ}L`n&vKpe z$Myejo5Tl}){+8CWA)>67LF8I>)7cIJc9)O%_Kfa_U6AD-GMwsEyCt#3*1Go!!j2L zJuBkEf4K>zp)aj)lt329Hac^%T4J2#kna#-KUj^7a~>@`>=AgvfA^Gj4R!q`Ftnkq zO|wKw6(1FN7-4YdCm1a6tl}82`5cd|u@W5$*--;5Sh zzKy?O*}y|Rarn%sdLyLw(Dmxmt*iI#fX~hF+H`z4|Kh)}?kbt=#uUvBumlxQoZI?f zo`Vu5 zoJ%=+`)@EIK9>euSKI}i1WWC1EhRD8D=X<-T3dWY zUT&6jQ0DdtQev>J02wo4mS`tIl{v3pBxlEj6}a|iwK5>mU^lglIc365-Xe~(iCR)Eb zRKh%chV`K{5pS!!yzxYoqM1}5`>fCQT#}|OfHzL6F(EdxbiSOs#Eis3892H5J|3fI zhT8q*f9i16iU=&lf~HcjLEwd$MAuqL_g08qe=5{^+bVBZbd?#8uv0;zAMOh@F?v=? zLsz>^5=%oLM6`fGFQ4+a)uGu}Fy&yUxuA!R`xMx~(JPq$<;T%d;p&I26>VD1gNidG zBOyw(|B$1=VaBjFn;dP8Lk#!nmSb6j+3-mM82PGnK8dCqHI6Tlcs#AaJNZ-2M}5%H zkcd(WELEPT?T7w0j{YtoVMnU~>(J0$k;3*SSv+hZ#g<-H)t)&aBlRv=E^3Z1P4}~- zvEptuI@(T(IzS^y#+j!!Okd*S>DgVkqSd6$Hrk^X8IiRN+kk^lkrIW>+jnSX2?+-; zojp^+1iXlN)25#~-vvVNiEH}KFQ}^)!8p~SAsgVrb zjzSzd^Vv4nooGTDH~0!Rq7cz{Fj-@PQ*13mVS*}2II(%pG9U?Q z5U|BXhKSZkQI_jn>7xaTaYC@Z$|Dcc?46LlVbO(@-lH8v7XvA6I6@LP^6xp9JNuPcH=FRjV`^(9&E>6y6@B@-kcP@yh=-csQGF z!-P}m$jr}2U8sb|WSB%{3Sx&{HPl(w?*-r$$Q4OcC}AzhQgMdS$q5!%BW^jXq(MeN zdXe6>JwLZt>z-iNqnv=GsRCao#5~+|3o?pVp9Kn9pI`9Eb!qu@^rripo=Gl)qe0=l z&`(HujUv*kNzDW@Dzn48nYW7A3xUEaq|Q`9aOII!=4&ZZ!LH3qj>CMzOh55lO+Mqy4%OV-S~5R zE&`OL7U9$*l*Ov*@o3;RXwrTlk?{9|Qa^!%hVXM5uiT_j@YNu8^D#K!;2MQ6WO9kGbGaeBEKgucnboGz9I70p z%-~-to(<@E;Ch%*88k7#Gd_4Ur~=aeu?t z;<{)#h?N#SM?K*NkwlLg{_XQkt_ZtV9O*Qu{rI^STgS%Dl7$%e@xO`_txUweDtC z%X7&`Y{54Ubtf@|AU80te`E^#8_Pe!rFWCDP0FC2yg&d9fP%y!^_A@kj5uQ+PcE^hSU)GYa6c`CtwRNgfki`9#P-kah1$wKq)k})Uwdt?1ny{516fPT z`|M0j`SRvXWAjni?d|Ifby)3HSvOjdN6Gw|o@em)<<@7OGLN}j%Y1duj3ST4I?c>_ zl{)LN6n^l7@$?Q6j#V-1)Q;gKlr~3_7 zg*r!SXZ4@54XCAjpccY@#F^3G`!(i8A*uqc&u_v8H9PS~v|}vIg%Y5G*T%6ZCKp}r z|4v{5=1BB5q=eaRkbFYpwUnFm^1j<8>G)X|DG2{FK?Wo*p>lr+)?z0CHSt(ipjz>2 zWPDTL&^e+}0}yLo*rQK_VfI2HAV}l85`a5zawKGcf_x@3=F$~bh+j^5nzlJyc9 zap_Z@H-t8V2SWNW+OH29X*^1fSr6+|=P!)v7`a%=h(KlV+{jo_oh(1UDuC~+{)aba zAa~B~?+E)gTH6>@P?^YC*WN!S%VX)@m;A2wCvs1rKIdPv9Aeh&lIoP${mZ86|1-5T zSCs2fEuB98CZ_t%xf?LUE;Vxc!^>HQ5F68eLC-6nRKG=A@FR9vbwcd;c@&mhu>kJU z7w(vXVkRz!hX7nTPH;u}dYP`8Lv;n_&g`s50KuqfHk(VXWzK*>Zfz)Kn@!x=0Ki*| zyIYX)2zLw&ln3L%5mw5FY!?3%VyFDj$clqih!2TRcBwMQpUn{Z4XL4rFWi|{j5bG_ zGwO`kYyNqNK8%DUL~IjcdHu|=`MFDti1%8iFemrYFfTxq(Pf_+2=2wTGfZb_D%$AT zJA#YRU}|YYjH%_rMg{`+$5=9Fa?SCE#6~Z?Eni~1#Fe)<3{)XlfXCx+A>SC|{Q8mQ zVbC)B+(wWT%n#ew5R6Mn3hI{CtV$T&%c+v70Zn1f)NX36o32-rQh~-)JOy0~hiN(s z$Me!buz%VNa?|XK%#jWSp}GV>0H@u^Y!ZW*L*6TH8Ays&Hs?V==#~j9km$1GE$SwP z$mQ}e`9#0tgpc&~0$!KQJRihqyco3UEbY7BVX0MqQQhJRc~cLyQ0BCgVqt*-97w+# zFM_QC{6wohQ#kCe*gVInkYLm(mO!bz-U3|Y)vaZKW?Xmr2SOsiH+)nS?#!;>SI?(F zf1^?N^An;x2Ikbsbx`x3&6LAMdVD2weL(Ywtr$9A0u>;RdJh z*CS2FlEc@y0ea~;zB75e*P@(L#v-6E?G-_^-w6qG*cMi`qR|VntKx5JCUQZF>VbpR z>FYFRN9TTo4Eky7*=K!9=^OdBPVw_VfWD?}O$lJ%wW<>hNc&>xhF{SCVdMJ=Xkt2U ziu(y+Ne^h_tBjZ_5QWn8&vwkZs*BXQGx(G8WEMZ;W|Hd-0Fepf@>{L<72QrD&`2t< z_Wz9lcvT>U%NL0kZY*FfRem$Kg9#I*D_##DhQl zj*aMmqCd^6u5-*?^I1W?g~-Z%c`XH)Er%7sI_^@`Ec%9AFXannYtK$i{0&ASW1AWk z7SioAo!B7Z-c1FbA)mKoKW%*kJHm82*m^>^)LZeJusXjd$U+0^i3O_@jfHbM6-$#9 z5~<-Q5kgtBCT)a&hyW2JrfcjH_T)_{KrYTBO3e6HAU3m8+gU@@vkmLUz1!nlSB-L5 zG^_tjc;y7|ybFEz-fnV_(Rm>wje1U(LoR_p)LF3M;6GcccaHpamGluYmG2f+dP*JuEbL2O^g3`C)3PxAq%3#pkAk*J# z`V-J;CVB1;To`*>k{~xQhflMZ;G%SUKg$4b29X}68$rAt<(5IZdh6LGzGSH^ZNz{O z4A{Gme21^&vgw;eKEW{-*;`|I!f&42~miR;jUN>tC*Mz&+M0;)~tnC^fpja4F= z!L%`W%HX&r$V;@INM(kb`vhN2Q@YVxxf-OTJ{)6nSU;nDvX+CpV)6{}wI6`^DISbF z8zT(=pZfO!U&KP9j=ZOt0~7*PB9I;D74;q5udY<-`Q_A9-v3UwD}OQr7x}E7yka7B z;UMK+F7nc{LT%Iqf*i&oGrA-Q1QA6>TA&`=vSFaH>)R~Yrz07RYJCxz9z|fy%dmHA zmpU^lp(rf+ExGi-i$E5mk1yaQIEY#c0S2HV%%YNNSjuNTf1Hc)SZFo?ZD&fjmB5Q{ zK;l9m0mv+=Bm(&nPfUusyzq$2-ZBq7Nq6 z$Z0l7K~~ZeOFapsZ-aCvG1-Tni?q$8V0`$?m=;cJ1?!dW9iES%$C~+$ zGn+3;{g0GS`+r)=dNV0J;x@{aUhJptOr#$U@j}egBvgiFp3xuJMkdnVd$x2xPp2D_ zj8n8d?}ERx7%f1K+E8<4W=hscAMictXzla=l0p`5!vZKH-DwRNECC#ZG1o0p&N!dl)vAPC#?KHPx&uBWqlnb9rQ#+7#kUPK4FTG)CBc2^B zBZc?0_b-Q6i%kr{qI2A?>n+0%tKgedbytSx5U(6}&6vgW1Qt6zxR%oiEriZI!mi(? z(FOAQ>`P+62a~h&K#*$q+iyc(MVW)S^KxP8Tw4vF2hKxWb#VgpVNaw!!h`B;#1ofI zDO%MC$%#CsGvB%0=#Uq3m0n3>o8l+W1>Gi#`Oojl>u_gOeIV>JYd3p(*v->HRga5q zDTKAEvhY^v8pikLGZ@R=GK@~IezW>1u0b4TAp&9>fBQAjm+|Y?t#;ZTxBuH;WFs5w zt4nHs6}W30cz4$)LU#h_&&Fo@x96d>6sFhik<`huj-$A)b+5jStmgt(f})f&@_S(` zQh=$1SlG!-9xI{NEf=)0yj_KHsxmY2J*3!3&|Mb>Txm=n;n0?`OMlK9OC}THpa1M6 z=8`CB31}n<(WK^Ix^T6H1&fX4)x1omSpTezUo2Iyxhj88ei>ZQw;$}SbSEQg-_x$% zySP(3S0(|udaFlg2mFD~CJUA+8B1qzasA=usaFp2-m<>7HM(4+xaaq;|GptyHoza` z2!VvKpl$3H+yiZfv8j$c1I4dn9xJl{=uAqX$Cz@pOix& zLA%EeTOhaTz~XYh?|VnB?j5RcI;x3>FtPHBps@Gr8gZeiDJKD$>7oFNsGnd8WCCl4 zX%fU4%vWgR+*gf6NPu~;rt1Z&8tCAYZRUqnmZ%OVF996~A2cLzNh_ef1=<#_H{mJ3{8q8{%`SX-@ekGz{cx0Sn7=o}pvq>v^2C-w@J~1M%1? za+p(xC{3_Le!^(|vE}8@#s_*mgq~bToA^LKioGOzM=Y4#Xv5-T7eA;NVEG+`=LPp^ zXD2iO?)eKt2!&fnr;l<*;XA< zfmR|>f>kol^)~)om6wp#!0+1ypM2QUtu-S;$KJ@E7(t`uPsFXgd+;Pga=F<2etBq_ z(iKv2mQADjv)Ft(®*OrBv?l-BiwWo zHzEmJ-Edyp+{xw7L(L1BY^RLW7h=53jiOhWWI_4zpb?^&J16g~b9Y5)L9?DDNEB?w z79Lm*Y5~Bw-2sQGxBEY89T}rB>2EgD7;dn;KV3wr(E%$py`V;k zK#4tvVWjdO%50N4u)r1#4^jjxPOEiIRtwdnHL11@NGXfX`ve(M#>iaU#2F)%PVXC= zmP;9hclTqBTwU_-XeT$!A@@){wW_)#8m+`TE^Dx38dm2)&_z1jKlrl5e=2!Q=YoOg zfSyhL!CpNZfeTA~C5Lu3vRGm1T5A$0_(XS9N@LvMt>86!5J9+d2fa4H%hcWFbtB5Xz~}iJ6pt zKple#8`;KoQyB@^ieL6lg&WMq*>5Lr7j9vtXRNUhELARvq7d!W3WSq=+-|YSgb;+B5Zy zq^ead_w<@7hW)f7S}et{u0dAxh)y6y^n|c}3)^kk(@w~u!skXU=2_?Mkk-D&vCO<9 zhDt2Mq{uh-Kh~q-)d~SP+#wc~`J9TVLKP&}{NmYklLLnD6#6l9UHJ#p%MSb6@b4BO z@i|AfNt^nH`vKxyKX_U?a>f~Wu>IR>FD`P!spo9)F;9(Ul2L4V+jsdLc|iT3vln*U z$&gS!6$Smpil7TMRj_dA1J-Umy6bg7`>5N4(*Gc-;I_VY| zFGDnmV4wyK_3%{;00-)Qih#mp1lqUi+F;C!%~@2!0!F1j`)-Go7TPS*dN`OWw$P!x z@eHPo1*E6nwiJ}`BD|A#N=RVUHZ!HfL9!R=3R}7*ZsJ0A-_hg@JEgca0fKdMz#_Ra z^9^~xiQk#EiqF>QEe;kps2%<-q3UBKPsiMiq!i;K@iEEy-#MBpv_nPHlATKIK}d}%^gFuZ3dsxlX3KqVaLI?E~@BP0_)+I%N6 zV>Z!{IBF=`@7*PR$6PRk*UCA=hpP10w_veDCm8!9JTf6>MQITA{)KV5kBovkoNM?; zW@BI$hxA?$EjlI8>opg$Bx;OLsq>YFC5YY1VV5-^;-+fa;}exQB$?m*`Z(;zR!k)B z-2)v3N(3=WQ_%p%aA|6G6ej7hM z53Gm_$eHjSR0A4h)NtQry`QbqG@)e|XgODtL6v*@e z>ZJIGIluBFB=8>!72fcA5zW;nR~sTdI+rFg{Ui{!+3;b!7sdEOWDrPc$GMFy+c2Q< zO{SCAJl`l)i^!X7`og_U%2lJHRLt;law^~b8%lNmse>v>KNw=}LZy*=2xDJko?zq@ z?eo)0%>TRU)4ppG#f>0lG}c-8Y*^a+iY($ZN&_Jn#3rW0&2XVd1_Sv>2GL3GdumEm`a894nUH1z&~45_s4 z)N?&9KS~t6s36Q1_x+$(eV??nIRTq6ayu~%FLQj=uZI5v^+srbM?0e2)r$d^ij3+_ zajp!~U8^urGMc5!DqEaEtuj|(#i*^x#X{8X+N?;6FgontVG^?MH0k1}R?S-|MCPT3{I5Hj^>VFca0)D!?XeIkn`N~jv-DfTN!iD-8 ziOJTO(n_cWNx9NiiIKPx3W&zPJ?Ev8p`vBW*uet`L!dA^y%)I~QAMH^M>VaxJfR0< z4<&_uW24LZ6>pa7*-@gx$-U%cQpV^@d9N&Y&ol_}@kj}|W;#M(B>qN`@`)3x@lN!W zw};9|5)N8*$YY&CJ#f5mNHYVFv)?pE4AXbb%+>n9$5}qmPy(lfOuVr_nBY!oh;oiDXz-W4&If2T|j`!Oj?wpkj zWlwnomrS@dmP`wk$=mKQRt}deAjy(HUL<&1m^iCyeBj= z5S!-crQAp){U*14qyeuyKng;k#FN2fctowCBssiDa_NJwDiCD%Fw_5$UISo-GC%*4 zaFLcS2#j(86o$GFP?#;vES+*5FuZxCUr-H@cJ`}8WbcAZGt8WRD|-P&j9gPiqykP{ z9zAGw)G2S1p0JH8_2{}MJM9iTLS_0(wO->}x$&4mn6|>;z>!pYr;Z(0HYdOH*^bD) z*Bl2dkdp{i+aL!If%yJ+Ps5ZR9`3}cja#~pPxc7JrMij6r+Ir%AAvGsb5Zih?s~UH z((3|S5bEIVP-?c~lJ4Xik;U`p&M$&SI6G#{$5RG`GGrextsz2hV2I6a<3zdL;&7xv zA5qcH`Ig5?EcLwxQEz6g!eYq}89p<8oimmL3p}qHBx0bVFum}kn>7;+&?G1G4UzX8 z4bfx;=3cdMQYggUq=l5gmIwe6fMW=EY*N24U#Pe8wMKh1^$k!5Ps$o!FSU>ITnZJL zd|BC!Nf)I?b@}w%VqrF^tqQUifAk%c-E{;2vKcGeCGeshhMVMtdfaf8Dn0 zA&)%?TEEGXK?&5mNiy^yzC}r3I}_ts8N!jFKYI;umO-n^{ao z2*2Pd^iNX~lSa_s{8Lu98wv^*oIdH}Bkx7Q{wiSEs-)4y8Ia32tK!;=+#1Nnlku|m zVXV$?(4VVsO7xtgN8$mQ^Q~7+-PpO6@=URG>wNFXHEC{2%hqijgQfz`V;Wn)qWO3! zKwb}JpO_RJo-B!?{KNhjqe_jZYc@$TSH1wL^tHbH#$h$?uI>6xagWrq*Lc5Acth@4 z72TK6nr~5^%R^_qcJimrPpG^^N7nKFD(95=E`NEXo*{uTBXnIN>gzJMKKuF4dOnru z;%9;w&<}XX5kf+4G1R(8N#?!Sjl7jCnU;#-cM$ zd_iNXV;I?a5|ze~A=ub&aCl1=l%U(qCquhCynz}|vMpvc$2vqZ1!$T56-dTzXsqfL ztJ2c+Nz{ z&`}sHgYJbHwwZC+QmPXkFU(>cuG<+Gm^yg0k3T%fEg0KX`%Ctg!+?}3wdBqu`;meL zLFzWRL5qKaDF(eKyTfHhEh{o&C+k3A4HZZ(8FvVnu;3Ql|9KML#t)OxHbaTzbwi>+ zu{Bb*wRHp-7VGx#Zy`P9H!V6}}CmPo!$^!%sg!}~&na;jIn`@*YsfO#E*)NiIz(o~;TqEhf<+nG>XT)RL`M}GBVDWr;Uqr4S zJ^JPxftp~N!@3+&{K}5(Dyq3b_8z<&dPFWk3^e&t9I+NgSsrFvD-?RtyK`qRERBxb zg%3ou+5GDBI~9{jr&F2&#`5-AY2B?rUVLyYZ6NFaz^1%lg*-x!jL65L@6;Ngk|}3BNBq`-c_iI1?Gb&{BA2Si`V9Ai)*=8E3H0Z3>>l4X;}L@ zZyfU5ss;fbt^5S|2^86WOs{QR<(=?Bfl73|s1`TXMK{fGt=(!{RF7((vfA-Ife8` z&pTvEn`aYQeG9=#Joy`~J?F$$ut&{&Oh^wu=-P?iR2#6Kh!!h-s*q^?k{Q1?=ikwq z6}~ou%tV^-plGu?W!FsRE#u(09yFY|8y_KWSxu#1qEjs*fVZF;$J)2h-ebxr={qugUGDrPw z;!iC*=Sy2D6|1g=5XR4cI&Cy&3QPuqI*nh;aZ}W{%vT^df&n?`Rj8ODYWa^hqRV0Ib{SOjp^cI>YPzYNl{l(8%PXo zX!+k711GQCA0R~nj4B->gh!M_U;zh1963mljWLKohq>6vGFFt=;ClZ;D~O}*LEL~Z z*iM*iJe|u6_4#Z}cvlstqf`2$@g9eC_Ry;V%gV4lB`X82bvSFp>?&>axHguxR+dF9 zW^1UW$Y4g}*UIXB?T3y+P^__Z+~1=n6uc>5Y1AQ}`SA`}NRNo9Y#i@lTvIMaOri&+ zGtGcA0#NR5?5O1QcX&C1c|whW_zeuho58Hd>n0n~{u7P3lM$v3^RmVZ@-PzCZeX(| z8{sWc49rQ7un6w`x#=1sfms)%2E`_GVr@N#$PYO2Gk7z;b~txbTaN`_3%R0$bbiM| zlL6}!g$Ppr30iYQwifs$8@Wh&JJf;npbh}zBnGZ zRUTa?qiY~jJfVIb}V%90wFU!*%XWB zoA;X5IPJpjirjtt#ASvX<3y2$%S1t+Z5qN_XBz`Jluqqz0z=WPnRCWyoh#mghHPs-D1Dk}dN;fz z+gH`~iWHy0U@)ei{~~xJb4^lgxTfuV8KrgGCRj2HLW>g{QoL@;tdL_s)Ip1lC0PQ~ zvc02m*!6eT(yqY3dNdP1QfIz+h3*8Yg!kPtNY{qqA8W3wd=n1fLPSuu_+Hqknu1G_ zLI}?+U4_-Xkf&AS%~0P?8@1=Um^^AREwwAvoaE|L0S@9&bzzdv|37F7D!VpnVIHJG zwr_auSv zZT>GT39t*&tB}ydt;8K~N@wnwuWl%Ip&`K2yY^;6)tI-;V{+`P=eOgr(X-h~6_0W| z7f2FK*HkhhoT4q~i-GxFPEmOBPWSm!1xh_2`N+i+qeymwEOxw;;~n;GfF#YC~y;2<2j%uIgtk zLql&c)+*=iyGF$IM1tCDfGS=bjToY|L#*KFHS*!Jz3YtlzRV8u=|{~iSCksWij5&D z!_fs~b&&u$mQkIqjTFTE{usZ@ZV-O8LP66OpLIlstfS;1Nqw%nuM<`npVqAkc=xm= zbUitXDSsdO7>V7l_~D%(yB#0lwrCl~FOqWMw3}*sUMo8#SD!~M0Wm(Q5H@oNE>(-T z9DwhGd&|cV^=XK}*zv%GAWunWwH1I<#e7Y3zqaDo2e;%MS=y?tZRRNhH!mZ9i<$5d zQ(VeRzWp-1BM#GQI2;xm{8w79VjD~X5doWTat%bte1lv2QR1;Klv4K!W zlP!g>k>yQ0Q!fvNxvGt`_@@TWjAqdf zAiREMMzn%>gomjdKbsQ!KckgKF9;aZVhfqCR@ll672(1ihnJRL@xgCWr}c`o z_ey!|Zh^}?TtM)ydHDJ)`!t8*r(Nbo9Q2_sDb!l0ekN;vIo$Qsm_}2Hq(b2C+dHSR ze8GkXek0dgBe03CS{AXT3z-?XB+AwlTeB+n^V7wQw5|ShT!h#S70ub(nmXJOT_LD$ z5YrwIhF}$~@zdBIUL0M9=@0QLGb#N{z{gu0;BQe%ZiB;uQB%G1!w$@XhL44=mXwHB z@pUMA%)qU*Hx|vb(D*y2{o|jqQ<-K~eK~u{ce`|Ku{*>~C+aS<%A>0n2V{|qoit@f zh0XMF*-rB35-UsH{rd-1ZXO)@~;7*3a);(rj=&%7Gk)Ut4D^4 zb4E1e&l&dC5Wj0P!|YtZItD`~J9YWfapk|y^JBD`lY$u9Ej?&*O*q&^f2b-T`}9s+ zSOqv_Q}UBvJyfm=VSn_-URzepsLIOAfy-4!p1~ zYGlMtwE>^D+DuRB{-`e?jscuKkAh6yQ+FoK8V2B46MHhTZEIrNwr$(CZ5tEYwr$(q z?{~HjuC==B7j#wEbKlMAi&x<58o_j(Jvl{!^c&T8@Z77?Dcs0?fpqO=G`v6$Yn)o> zWI|mcOZ518#&j)2%Dm0SZp-5!(+2j;i#s61bto=2l$p<-RN$>3j^3K&rup)JTdQS* zZO*Bk^P&V`cD^Sv03CD#K}#B$@A7m9Yp)D>^e42#>lvA46WuIq^yLMgZT~ zm46*D24_?zXN>biMaIAv9Jt?%G9%j0oPp51W`nvFPhuQ@|DqzPLuN2&W}yn3Ysp^T zWZggNH{g%m_8BSbF!u|{gr?ki=cy~6MC4%-XnNCu8=pB zu36jS_El+Z{nCdz_$={)aUAVIpjja}5fiLU^T|mKbGHYNsc&SSz)k^BBbMG1YuU!} zy$lC(o%JMl+09XV?J8^Jw!#f$;ShxS@V&#)e2}r#Lh4PsmN&w%6=|r4gP$51$5(gf zYkQubd|S&v1q2F8evLK{X>=v$sm?gle0816cHhU&IuC z$M1Qf=z`9#4ndYom?Y+8TbXc6B}t%=5d&+l-zjGrsuL50zjvX8CKWFg{S+E(IRE5*FXoPSsNWd-#eA5b;Re&I6TOxUD);`(M9Pw>oZqz8U8 zMHaiZd;ft-qkd^(Sl#fyp-oS?!fC$J1V|ul!jHLIhq38W!VbZPj1Z z8sT+G5f2CXy)~cc))zA)aA^^dhdUG_eJ^jk%ff>2W{uZBwvYJ$ebs$xH#z-&FP@cT ze`hrTuW(Ny4D_Ri`-xj8Ut*ZYlrFjUj?TZpXDOz#uii9Ra+xpIGU@m9o;y&A!yZ-5 zim?P%{T5k+YO~&#+a`JHgps>~excLZ98_AmRly~1(x3Gi7G|BV z1KkXSN~U8@JUHh-JYeRAR}md!&8Ij1sKY>XEvUw8Keg#u^x9hxROqej&wbjb?III~ zv>r2o_yDNIAO-5Y{fckz*2y2q5Cm8Jg-F;=f!t}*^#Fqn-ZPFUseUVaM?rU~;oo-# zw%cvroc(>9-)nSs#2|k!{pz^l8R0o4xGa90E#%~eGOujFI0|o&kk-ViJUE@u8t<&6 zd*9#VQRvPV0Lqu5r(q&h1mg;|Sx*dA)-=-XcNTemBbFye0EOP0A z4W~qs{#eF%EYh3hjS<%en1YgWJ$7unSa!fn^!-(a1Dt4@1YW`H%?Mxq4X*r3IDD*z@JR~Cu-up4 zce|N0VWQp43}4Zkgb&}m!Y%VXtog8 z5Ask&JHus;QC=6Ux8~B)lmR(V#Ms+<2>pq!fN_33*`e;Ap*F9h}-LWI89QaDTgDDA+eZbM*qtEwN z^&JquMmy373JpZDwLVs|)OU43-(Kn_)U~3-$(3(?tbY;I;wauec4XAXeP;(uaaf5 zJzz?tNydP}#c-2{0*`{R>Cj(QHK_{7OZ2oMN;d=550V#}6CR3**Nk3-6#Pk~|8%uB z$827O)KFN&fIz8b6)X6=@&(64HtdC-fGe1A(o;ka)uY>o5C0i^g%^Kl*h25+_cqvW z^<^a)@}XO#HE7gIQ6rW7JhF8YC9MN9sgXp>AdeC_#%?8D8o|k`Vycy!49rFOISo{4 zBm=$vrU>HT7UnqO_ZVgC;#%qjxQF`Z?E|VLVC)i?R0_wOyJq~Wi$hkUW~}v}4GMt? ze~Zey?4o2BQ5QU+pfK#wlSSZi>BZrq^NGm6NxdpH)136z>s-V5C<*TK#UL2Qmur6L zXr2?)L*Pk$)Yh~cbE<4}tDmz}n{pDzT^{hx4qJGd_WIjM;2=S?^nv_$rY}vi!lhSs z*t=p2A(J-u^{yXRXv#sE=k<19arduDcMtj_w{d-2Gf@?}m2vmf(-c!Ii` zcFDgVg{5ET^3u&e*V?-1eSc&%E-BXvIz|J!?KZAF9Gli40-or{!0z>iz9~`Q`ty`} zZAC(=Ovn(!Sv_h5`=ipTvBRpW6 z&s?$dtuJP$lz>ShLHCGEwyWo-@_f^50P(;Odhh*-8keXluO1wa6A@H=x+-QU9?s~3 z@3d(q;au6ql^2*L?((n)tTcWOeh}^00?iDcCBt~FZjK5Mm~b}e{^}VP`^)iX{U^3DFI(@tn17tx zC9mbL18NPSWRgkP8GaEyIl;RZhGn&({QFtm6Dm-ZPg0`luJg=*<-&yqsxT5BRl+X( zIF=p)pC-bz*^&Q}*R!GnO%T~61U0+Xclh`IVm<<=Y?QtLd}(rrhT7`>DXgD8YSeye@dBS!BwuBhTY6|G&48f1)d%GV3@YJ$NLAu?mY;6^Qo0fncxQsQ#Eu*L_y zt-?A+_>g*>F!D5H=G%HBLx$%tU`P9)F@xeQj(nElrH@h^l#HUZEHd`iMpcGBEf*;p zmyDr{XXT*(i)2`sie=jeXmw+d2yd}rb>S)e7!Hz|A&BG2w&HfU*_p!QT-ebo0U}rV zeW*Ouw%0=5df^nQHs}Xg40PQoQSTv`H5mqVdL7FrJN?)d7TH_7w(s)QD2#?4h}n5} zkYua*#&UcfB*@-XH}G^V_PB6+IuPj!6ihI}RkM<6*?}r6Xt>^TW31n1nD*Vc0VqSv z!Z{%X`!c)70lqYk-<`iHM{DpIuv)r$p_Ky8sEmcj4MKxzAs3g1UtwXO4N<*cg)1vWHJeZ=2aV9{|Mps| z2r9igl3RZxnoSo~STD{!Ps#cVr_bY0Vmnl$vlN=Z5&W#}oh;ZT7UO-JzdXfW(EKE1 z()NA^F$E2YLU%OW26^>N_u@UoD5x~Us?-sfKyp{y!fUoRK?hRbuYfCd zCRig`$-3jCEwQA5c3VQvQpp)vEMZ7y6}E0lf=smTB7g9vk^b4{3*>*wqH}AV*Jz=+ zdH1E2t2DcZzfd3O8-GpMsI1M|e>2SMIm@P=RfOjRn&pMnJbR2`p81OE&3rG67Q^Br z7wv!_aW0S{+Aa}P75k>lMPXb8xVet~qPW+skr1(MsisqrVuQBCF`+U(7)_+zCrq5G zI`M69b_a72SC{hR8%luaH_&uWsN(tNlZ}3EYuHj=;ZS`Im`ffubtIRv5zuPPGeW5#)2Eejwp?eSDl5w|dFm74AOlI&JtW>Vz}{E=9j2W}Et@)2TOdLaC;; zDxPl>)==O$d_CB~)sDuk(N@r<#KpjX-T80 z$0}T+B>r;r#>PnA zlj)EkDN%R_myyN!Lp_e3>Af9n=lD&-SpQ26tq>M#5$}Yeh%ooqNS=L~y{vx^ zG`_sscMBn@Wb=^Je!Shhkx&;!Z%MRC>IvCVb=+z?0-rmF=n=W>pFpUyZdNelsOG$F_Imm36lJY^9SaH3M7Qo)SsD3~FB1d)3= z^8TP=S1M9&z+%~>ak6EY#z0|Oud3~rLUpySa6zf!2#p#Y<>@y#Dx;cO1bzH(_J7qfXyFG_Q7XCtS}tv<6&*yrP{1IAAXgp{V;3~NRJnd zrOBynX$g-)6;soXDQatv5v|(d#kp5r8G_5^y?3@ANo(e4)?NxpKLQkjn4x_lF zzpPKvnHk>zlhjlQ1w%0KngSzBxrk3y(%FN8(orjs5v1OEO~Ty{^WAvu@4q00XQh3G z$akV>WuU=btgDCP)6=kHqQSy_au%Mh9h)s!2@Q#V$T6Gat7@`L+`cegY;k!&Mvw5P zf`}Ut+IJp}p3zhdz6()+#{Cb^Aul4B_mrII3-WcyVF zHCnl6M1Up9rZ~%&#~`{5<{~0Tp>M*R#dwARZEvue=rrF1cElIQVW@!>3pc%z6!H!?3;*-ZIWx(jQLQRmDZy2Er;&N!3Ba z60nSUkdN5BcDLty!^C0k(n!^olE9411WJZp@2g|0w~lI* zs97tq+^=-Wv1x@$gx-eMZD&6k!4gbXH3uat6q>Sw9Ic;E3v4v*`=7<%y3zMw&J} zR~lx$@)Eqesg0TnN4J0I*e!F>iGP&@lp3Y&*#xzWRYl6s$-~K^;c2Qgk35pfJbrp= zQ+~4}*j(bqiY^yVTXqj67Cm6B)P39aiE#eO=n$4}HF$@Wvq;~1d_RC6f(tz`=gyMZ z!+seThAPk=@M8s}VF@`Kk7_6m82k#4|1IsQhm7Si%T`Z&P{WTfle%y52bexfm{t%) z&XRI7+mqo*FS7S2oTg0CL{|gIP5C5)G4s5Y2?h^U&yz74#ft3(16ypO0i!0~@j}F%ViZ!D8Pw%fVNzBU#Zg}R{@e$rZ?X^f_Di9zU;jF4ZTiIEm zJd+gtCGa06@lnL@co0nzLG*ueJ^HptvS5jvYfIYPcAXpFAR>ylweRhJMAy`f-Y=fg z8hHYASBx0!O~l+gvpBu{*$W>ZJoI0a-9KXX-{h!AEqs41({g^gmgF6XJ> zroMMIE?@XL$|MR%pj;hvxfa$VC*7KdA>5?3h=j2tuGvIFhh`68^u)+VIJE<9Bq|pV zS~l698C0A|GT9vlPFZKAy-9+C!~X_?3a-8YmxVlwk8K_$J-2c0!D&DI0+#2k+p7Nw z3Tg^jX8McU5l@!p)~n>0vJi#I$)D%otaHNHBc~R{)e@IoEX=;235$(hUja0C4@{K+ zELW}Oz9y`w6yVnp2e@sc|F(WZ;ey0}sx%61&06hf=lAz z^|nvsNbZE$zC<`eS9Q6V_hQV?z^M2!U_3j#YWmGC894Q}?jNC+WyT&95ztJ=xsLnYCSKr13iL;>X@TlM&{h=b2i*`yd z)O36Dv@q!>ms~kfOK?M1bI3gR_dv`soQ#*|Kq*w9PPU8M^D2`}FH`mI2X;7otz;Lj zTbLs}t?7-1ldKK@iSOM$+7btbb+0lF}4Qt0X5`I z`ML3$h6uFITqO6?w3G8N<9<7ixIw(Wg7#eAyXIOiMv6W0&7kkUV6)V+b%UJ4d|s^^ zl`*T-E_Ht)g>+u_rTo}h0=b9@Qv4|g3n$n^8scNt#B<91^0K#!f5mXvnmdA$=C2cI z2YYpxMAS&DRe{~eI(~6cgGAU5-g9`p+z-PYQU3q-Aq0YYyxz@9d?)EGc>BWnH{1Ww zLpN4H;!vT+Z;9W07mPis>ruIsc8&7YkpGqPG)iLDT+;uSSY%;Jo+?Rr%RK~ zK^`Yrp0E;Snru-z5+jFEBV1aX;ZgilM%+VJ#aEUE-N7*loVbCl4M@PD6w8_3whmoe z`mVU+cxBD!-_+F#L)a|`Qn@gq+goi;6Ax@brBeE34SzBtr$~@?)0~H58^bSS2=N|S zi8$hWfa0YwrxJvDWIz^RuxMOS?5^5unANh^VbkA`rC`X=i8CwG5O645lTBs+~Q!ZI24Eb-gUrfd!VRp6ek@b*uLx z=Roj}%9Oxwy|>zien)vMM}7u5wcmYC1o`^EQZ=G1gZ{?c*#C~Yb=QmZdRFGj!fP`1 zQE8m>@|4Gpi_;yzy!p;AB(O^PJd#e3w9vpP-fBw`m&XJ(Oh;tB8qMJ;^}VznHHm1c z-y>6|vm`=9_3)~tK${=2)4zQcHMS+p4Ypa9Y<>ksj%HX4lcdlzwjB7HC?#<+bl zuPQ5yoC@|#Al1_p8ZF>N$8 zDeh^RXhqWLAiTRT`+CeooaH^TSgGMTCaZBtC-2_ePnRR zDaG0jC_X7e|F@4y&z(e|D`i6Kd&GkAIS~w+j=udDrNaVA#ang|d}w3#a14Lp*gVw~ zjV9;YBJ*t{S<+c8#B553C=GXU(HG|TK%8R$G|&*W+FAM~Zl1Tp6`dh%>dr!5QbuDSV2k%vqL6cV@Wqkcy$Aq&q--JTQ52ttvhd zc@3UaJ9tp~PT~h!qmCNOn)@dY@Gz-2+28a5f~m)g`OI_iCA_}DmrC&B0XA~f>DMAZ zo8G{PXCg?C8!=^f<{=VfsE#eic2&4-dndcYv0T!gaX*|Ghg61#^6nA4Vc@+q=D1x% z{5^)KQsCYTkg91*%l7G~IelSGc3JZ>XWDHS61<6IV>kcJf>9xKom1)2p?8~q0ygN&3Sb4tpq322{>?7J++F z7Ijj(ccd+RVnU$n9gB5jzTbR6k2vTqF z(!DCA#L!k~du7uew?*k8&tf_RF7OTNDP_uxE$;tjc7cO{+Dkw&wr8yGDaR7PlsxpE zYuD-s_64ieT9mEw&kxUiUg2yBQK*v%CQfaZ7%auhS4PuxD6TBW{LBJ}02?|Wh^9z= z>4k6t%#~0;vs{e@i|kCv=!PU+J**Cg^UAHg5$n~JN}q6gfK zf$JtnOCz>x3T&8??PZ{MYlY22S{6K$S5`^-P-3Smn2tAhx2v0dMQWE-MitF$qnF?K z%iA;9kYcPiVG2^*R2`MC*qM!RRmQ*K8GTb~ug8Bg6#2qL7x2zgBOI+;($N@kqaWP~ zEn*`$VGfsf#r^glqGP-y0)HkG_ro*F-U7xmAcpbO_i4ayY>5fF~|~F zniG#T@qRI}*PYr4)K_L4-se@(BQ+W;bNtk>4eTuQohw0N*100M@pM=*8;d=N5qrbF zYNDxA2I$AGAI3(_c#y2H55A=PwVX2O()h>w3i@Y)WA`ibvDsy%%OX5?bX(3mu6;xd3=9MW0ssmC1^^BK0RRaA1po~I1Mmv~761+a9smIV5daAQ82|+U6#xwY z9RLFW6W})h7Qi0>YycbpTmU=(d;kIfLI5HFVgM2VQUEdlasUbdN&qSVY5*DlS^zo# zdH@ChMgS%NW&jodRsc2tb^s0lP5>?dZh-HfpP&Em1D^T*@BIs~_8(!unh1a>fEa){ zfCPXffE0ifDM2xfE|E6fCGRdfD?c-fD3>tfE$22fCqplfER!_ zfDeE#fFHoRA&3jY*S!rlFH=pP4-C5ZpUm=zE8ag7Rim@g3j+uU@*sVG_~eGIAv1!f z@xXQqgU?B99m%8_z-^OijWlrYo3f}q+Ga=B#s5rce@xvi6!3-C={DQg zoJ_&C%S_TB5`(T`N)6wa4eU$T!sM)9?ZNH8rLI%I&i|nhnpRH4t}2~;(T$)n9Sw<( zGp?TlrbBq#b%crehPD=@4)!Lyd_nNlc~7!Fp_j#97!u~QCc+h7#zja z=iSKO3P-|R1v=M=dUI5vUH^;Efmh{P32dKUiF`W{O~l!G=LmIKn?CRSy|(&g904+k zhB$eVYKR86ufiB~H&qKGsgjb`WFN26G5F90dpCb)=4`6Fq1R8j3`?DNAFxef z&45uxD|&%8jZ<(ejjcoc)mE)qCx~mo`=QNrqiDb&Kp1@`->^9dqmh)%H zG!&YpJ9&{<&9VA>f)sFU0ce6+@yKhfE$wf_3Pp9vb8-ml{9gDM^Cd}#$^7_C2rPgGr-A>%o6lIym>M(Z?zexzvd!@55@^$w!e;6 zwj1E*I!lQ}K%A_~Ev)oNX6gj5;j^Fo+IdqF%D;ucQ=78~Ut}@8U{W;(=Ieo(Ni6JK zQ5+FxvC>NA)YvmiO>4H9d(_;EF^75}=<25NPuzv~KUqmwXUr;-&4Cvua z5Q~>8-n#VJ6aRKjwZ&Nr{%jy1SY~78l0oJeH|G_@_jbnj3Cfj1dYxfViyGZDbdxWTJevcT7!Yuei{v6p=1ars58G7E zCRY}W1r5nd@pK*p>bF&G@_QVWDfv^^2^oV~h?z5E&x3`Z+ETjxZ(mkeX4LYEC0)M2 zn7C!X1YTAp^LaR>r2P5r3rP==^ZtWyugoj@(1&!>cej4Y!gb;#Xd}hE2mYbN-RqfCRgGdxJCn8%YNZBx2xVwk zx=Uf0QpoF+`$3<%0mX#pT6KE|yO&M3M$grFC_#Y3v8Q!jWutM6MWiq|@m;|aveX!t zgGl}xocs3`Z>9q-ekRmYdXls98I~bSy^+ViWX}R=6?98*6H#kIKE1{EJY$5rb+(5f zor8U7lott*;G3CL4zY5$8Jf;oc!MQImA%cQScsZZr^*9|z8B~f-k>wzlB$jaM%kf9#@7O@tpU(SegHrA^+wy*qTJ+$)T+Y*M9ats5` zEpN2H3r59fUhP<^dm9&z%mmYex)61n+6XSkb$VBaAh%o}T;svZi>9s4yux~FOsgF3{C>|wOzQhah}Twx*(V3> zxMHnsK<(ZwVo0SK3?H$yBOe#&9n=44r>YI6WC3rZ|rb5XKom?O%UYU($y zEOI?8Q$gL%gnt>3g3y!Z{NjwbD9e)7*S-?F)zOiXc)FFISTaWmZrStU>g-MRDYu-*8%o5xq#|&^z{zn$nxh(k&lKdg;7qOqK}2x{sCimujUU5SYT^>KI(5;1Ne@T^&eqo$7A$AB~|I;RVsbu3-k(} z210XY&8c%C&kkf@gTcP^b3A`=95`x5Q5c+aNEOVyMke&wK0PZx)j?$(Ti{TXjm}oQ z((+Gapi5OH7{%w|xv?&sI3_cktn0BK{bv)Yh;Q{GWezn2m*&oB2KVC($aC!++&)x> z7#VY@Kd=KgR+Z6G%g=}B&&1_sG@@aPpk+ zHJ)Em&hu!u+CO4VYj3>r;M2p30oFx{9YhkJmFRtt|3+EqOE}T4Zi>(}`5H5U|2B{d zNHOoy?2iC8vh}bCd*1x`L_>A?y`xd3_eU<1q}`%C(U9x;D!j^U@mW zyr6(yL|3371yPa?@V0;_YjRuM@nut8Xr!8r%0veYaZXz?%|2RMqRf6ftUmL+)4L9Opc-^h4;yYU_LFbpiKb4nXB=<)OTzjkOO({coj*w%_`j}Hw zh8oteF34QR$0w*K%tl?SWmmSFoYYL$I4fkwYg~0cR(N2 zSoY@bi_7HPOQ1OO>jU+Fo4=9FVWg{SkI1F|I#pM!z9UC3K2t1hDUm}%32MmcKS~r3ujalM( zC&!S?`X#oT@fd_tV7a_;z53XzM9K<{@>;T}ox|_3x$$*x-hnk3xr!J+6@Jh=^O|F; z&XlvCerEuLTUo3bQHWN$X(HYwNVstajE#BTsEnjUzey@JE_LVW7G|r5TaxZysG2oY z!CJP~dQDIiu~|WKqwEqGaDkDw@TefuVZadbuYrF`&GhQ~7GNpZc(S`CxhHVQ?F`-A z{B14mam?GvUfkszqR?5`XJNe=^iW^du$%~7X_0rr`v`3DGVqguqK7TOdLPD8JU0Y8 zs;7L&7RB;drl&01+vMaj${tv#>n!A$$fTNX!aVPF>Hh9Dk|#v|SW(9C5ng}m0ik~Ez^}1$VlDn;>^-zt;i>U`!*4Spce7kx0E1FYqYHL=gHd} zP)x2G+N4kJbcTe)+_pP_c1VSHs`y3%k-Fcs9otgrdq@vh1Mk|Zo>0-QccVt}8Bq9u z)$#s}Mk34dn1MALf>Ss$BcyfpVHk`d(9lI{1rk=IAiBcwOJcQU0gi$Mc069llZmmR zhF)k#%kQ|@q?no@nrF_9t^b>LD(@KS%I7#qebfQ5l9+nMh5( zgQ!R_66eP1-uq%ABT!CLtl95p(dQUE)B$ivB%gsKHZXtG0n($2^NkHnqxw&NQKcB> ze))+!{Wli$wcn3UN}8}3LN<03!7fxY#=Fa0c;ERGmXh^tp=zIpd3u+@w-=@va=SD_ zejbmjD+-%K(Aul#X<4qgQI9fr+t1>#+7^yk^s)w(>)TM|@j3&HJI1IL7FSljm-|{2 zaN(SZjiZ0L44z(}(IkjIhofRr4`Uc|v(tTDNr{Al)MwrcisyB;(2Us&vC=~q`kTF7 z{#x?25N78-S!(8jaJLny0KI?_fIi5Wzdr@%1eT&&zw8a?Popu1mTs?@iJXfkL{L?4GO6e|;K50#3k&PQo5n7bFtgQfVi3^`C6 zRfMt5F3|bHe8dmW1%XXm)(T(JdmIW0$`wjvQiaIyEZqLcVbfykEa**8XK*rXo&QeA ziI_j6%Z~Urzh}tpmT7FGU}~AC1Z7}K4$jN4WY}VK*a)j$;-h@P9ZCnE0+8B6BHp^E zT#T3`h$c;o?|%3GThD`QZMY_7KI>BYp$=0(0ZRAf#2+dO{NGd=ITX`T5WIZ%yO!LhQbWUlP>XmhKXte#H~*>mb4$@NAH;8 z!P~-_q9Us9DzF2bel~@;(A~-tRQoyELOSoGL3@xf6A)ZA#{j{Dah=n)qz}`^k^7e9 zvCfsTS4%65%WWX=bFi*OKqx5D^KQkS(3$lpq!X5*Edhk#hviyDHWc7Y_@)bfd0v2m z6>XAz0YvPywmVx?0-+q)b#R>9{H|Z6ctPoRDAKGJrfh3Eg*u~)O-)X%Gd=#ok->`7 z^#|J#G&~DO^{mS;20l)gkD=;`pfAgVy+FUVgRGR@crw%PbuqeFTifb6iOUEXu&m9~ z+vl5!9r_OS^(A`}(V?N43h}D_`vnD8NQsI6RgI_DEw8Rlj%8|dYKoZ4X}u_(pDGIl z^$~{0*NLxz5QNUM)o#9e(U&eLApfeg{0mFV#;C2{H>T-8qn-Ybe99L?IV9%G*X&F# z5wdCRd(f9oYAUo|1GZ_kJT-3mGQSj4PUd&W8YTX5E)IfkpM8SxJ#+^Ynb64LCbnBc zowlS%#IFsqp&Tg9do;X+hab3DTqip&yf^8>0`x`ZDXsmUFZ($DQy>V1-#fzQb%BRk zQ{}LS{Y(w5Gs$?H_J2*G{vAHwofbVbpfBA{%Nb6{Qkw@GMlJ6?mAV@RA&*JtBsG;566>I@=;jv67DIwLMfZBZ)T$TlTxlICt0>Re>0~wFR7Xd*9#1 zj!O;D+2F*hrXNK6S7qx}_~`?7)n;0 zkWfkW2|0KW&$F@6ReaD?Nu0^fBc8&&p3<NC!PX28Iv}RMAI#=eM`pUm>j2G*qRlqe z8&8O2BNh1A&ByacR5)=F{=e<%-|ps!q*wW@d^vmuNL((E65fPFeLBWPn>Gsn)f))TB0LjbJ0#})cJaE`)TLpU za0%E^`WV5|4tLZZ$Op;Nd|KC7T2AG8N)X^kVsDP9p5R3_dJJ zJkYJESF6yOowiL%*lIHxOtD+OShY$O)pm`^eR4E81TSma=@TgDP1kj0%6_H-DRyJw z@FBTZyd*(H>E<|0-HwnMn7XT;`&BDN!Ff8+ZJsTgX|+{=`0q!vKTIXP&yNaP)n- zd4JOWx(c?1{M4sAR&}eigy>uY%a4Dtu~3|VS=$PHI-72YMdFOdy3G?#R>ngnMy?MY~c!JwBu z#<3n$mqQHLL47!pUypei>5Q3|TsI6-i*2#j*w0g#MPVLz#UC`%_YBHJXt5R_*U2#M3ZC{M(sev^W+s70&h|vFA;ZvoI%_nzm}ynr97H92I}bTz&7tmX8z) zkxCRt#~bg@`0P7^e;$UGkuGmcx-%tG>!&p|jTkJE{Zd&7j$mPaREfEWh|ePwo${1k#~p z=8W&17d^pZ0#$m@5K8c^R7_f0sI+Rtpq9wBEorwMhj9CBm5U_^a*`OtCA+F{sG!u> zXd*;*vy8a|i8~?MIKMML_EKH{bf}=l5`s-CKj@B%M%ScTACJXbyj>JROj<6h(9QTJ5D%iOU#)`6l@7RJ_vCh)hR;9M1X23ynA_-+V{lW}S0~9=+ivN(S z9*XA0L#Sqj8!+d*0%VC0(DLB(np#b!?-3y?(iuEla`>n^x)*o)d^<9Ho{6AF)_PDq zjH}x}xq#U>{W<2zK6nm2DWPQBjuI6{2{zYCV~aSHc@zq5p7HpK!t;qy)Q~dHEzErv z5$(#km!RfE8UwI6!SEuXbksm*(D=&KIVhZOms&6}(|Be8ya&znV?_0V2>g`LQExE93mT(ZPXZTBYh{u?uaZpD0~ zfX!4>l)a=$F#&7HUi-A`uSd13v~ zd}HNoPyU)nEJY&jxXPld#GT$0Iw!8sjxtrfvCKTUr9JygM$g4Ozi2O)ovS9PB*1(k zADfxK6&~~bPh$F4tLdSbk;c?kL79|Bmh|)H0?S!wdLaVeua>S^tnFAeVyTLhyfq>6mTmv za%3d+^8js5oKPfYom+2wI_V|&Xd06O?LT-U?RDNfOEzChGLb7+I37d>-B)H;!3^Xf zb>_UazPwSlahVb%$OFKtHoMdk`tvhDTP^&lmY!D-wJL(d@S~_odOzYwBIUts(nq&K z?Zah0hkI7sv^LG1V`x31jxd+39c>54;}4s6VK?B3nG>h;n~(6c?`ec5bDw!Pau#m| zDEQ*J!Ba`EUt14It*Q%+*A+iE^rMQ#ulSsiVur_cRYupy=QJ-(k%$P4#n2wvAs(I^V|NlF`V z^h)^f;X>;se2S-dIL5ZY1-Lj9;QGhg5|_rnAN6(~5L1i4xX5s#qhnQ^uEiG~ND?4A zlU91f0k;Zd-r_eUm-mBJ8Or{VR-fGw7jUW5vF{fx-{RN9dmMlc{#n;-$Haf%UVJSX zje>OD=kG7qd$SyY{icp=;@#ESK*voB-=-{d6&f}mcipZ(SVHM8iCZ>jSFBIo# zEgKmvt>NujUoaSzn7}Nr1$#AdaetG$aS77ta~Y z{(k^9K+3;*%qLqIWZ*Ahq(s$@SbvN%?KGWc5&CDcNSR{@9iC_Ty3}n$FVe(95Z?0e z)*;%KNX|Wo{3?IHMFp1${B70TdyABn`!H@*+-K+mWA1>wId2~BU2Eej%DvmLzgUbA zkVZ>u7a{Q0I6v)8xxR{U%Ll4Ju(>eG&e{Y2p3twkCO(hB8Mcr2abkl#4_$F@Z_H6I zm){-5RNe0BR@p=TmP?DWVrgZ;v-5ff<}1cE^~e}e=Y&udlT-yPlb4sb32G<-jnc*Y zmLZZxcMLA`OnrUE@%=>>593VIZzz@g_#R~0>(beeBdYG%%UFKRCC976yDmM*ahk9`WDh4{oy5WQfwpE_A??!TU7h+~t{!bTu+%mbpy0W^|X&H+dN>g(nm( zCk97|9_cxV#41DNt2J6peMex0e9WAhV{uI=U>gfW#%XspZ~V}Ena^rmMPwIS4+NR$ zJ&-5{`6cRoHgemP%^5e$$ZCq+`Uzb=^rUn1myb8!pWK1NV!0jk)@Q1AC8eZ8L_ zy34lXqOW=@nW;HSvkCDgTCFGD8W7E_st}JIq+MeX_boK_eXn-D`o&Nnt|Ohx8qyuw zj%^SceGt_oK@*%;18VgEAkW`(&U;hHzi&Z1k_yAPhoEB451)BgU;x`wA0Jh@6wJ5Sg3v!3jA5AbA*_dBoWs^u^vA3dff!(6fb6LmF_|&T((t zZ(ns2<)7hQ`J4?Cf6%gb1{H2YcvKtU1O&;seS81~PCu9`iMO zmZ$!*ts2AIUd>InGD5i+ z6InIZizQ|XI9GoTxrT!4)Goj35p#JN6B!v4=6;Mql8MawfJt=A9%@154JS;tp={{J zg-c2s#Wo0{+b!Pa?m$NO%f8z)o(wLxV(GZXfva9AUar{Dq+O#KAXp{#`~ArR8kJ`| z5S?eJtlzQ-@-=C`_A8QML~ZmW6OB}MF_%fSDSZzRrVe_XU31p6QL>1>_?NcYXy2qFYx(C;Q4kAE4_San3Gx(bFm0 z<=@Ss4f{!Eq`oGTW!(g3(DC+2Sy7nY5)E_w z?-&;O$6BiP%4u&fey*_I@RrTjKc{1YaKNg|Pl@igmxe-L=gV`PLe7DuU4irN)XF@Z z$>mKi?n3dkR%miL6@BzENdoV-;Rp5RY&;wn&+8oI*XFq76#h~w(t@pmABO|sU3N8} z{R3Mphaay$N)VP^U@SpWWGzrq9XUw4+R$Kju=hEap~YEw`#Ckpiloi*-&K&j3xM;* z*{2y?PW6_-|NJb{*fVCXJQ=usp9X1V23|&u0>eiwdNuD`kn9{t=$aTx2s<&76U4S& zm1Ul)5)*Yvzh=z3g^>z+@NoflgVI^3?Y2o6ZJS3LAzHRd5JgENe-$+mt!1k<9W}X`>%}1&` zeN(G(M(_|B_5%~J3WwvrTk2D$QVW){zZ$`rHMxO=^gMZbiH3Z2b$Tk9P zFzgVexg0s%j6|d;f^t5WKU@hq{Wd%sZIG}WOLp6)wQQb?if?)x#pRs)XNd}dgA5eJ zdJYjifmK>K<>>AIpt+YfTf*~0t&(u%(gfHw4<%e2bb?wkDD@MrPZTr>TaaioiaZJ% zqaU|By#h=z(gYvtwf}_;XsXFR zSR>8Em}zo0#-xoAp`NHy^3U3_otq$a18hP(RUW zRs$HSVS@NFC#BBOY0md?8A_Aq8^&_5ET81CWpOw#vYQ{!03BP4ki>pHyU*09Cn~?O zFyZ$nQnuZ(RH67S74fzhUVQ1(@^_mc|HQd5^?mq0XGJWU?4fJ6$PdfbLgOi=_l2V) z(}MmhhQYQ#Io)#nsN7G{17&%H??i3|2z^>v&80~obNeq{Tjy(<@B+nGS>*)dWsI{M7?gdCFx+?dUX8Q$VoJKg+bNzu=~LQsB&VTwX=9sve*ba=G&!@ zbPiTC-i}S`on&{E|9wEh8_d9U=p#%IhV4}*^q(06zdt86)3&gWSEPdDbXGrXxsCvb zTBBM~VDBU{(pKRI}@2WRnBJIfD z0FoNY624?gs_IifQ|KN$(l+I#ZmLBJ@BgSs?*{+EzZTW8CSA*$@t}x*D2Z{NzjLZ_ z+{=}*GfE};N>u>HtHon=!2~cC=3#9dY<-E73EvZMh`Pq(t7a6HIhyBL*6@-7Zy^i| z5Xia$_W6rbgp~WS?yO{61#h}uEQi+(Pl6=aEDHn9 zA0+V#VF(MW#eof))f*xU3YO=qDN1c+Wz!wY29@CZpV~jPau^wGq6J8*`(~Q^ zT9(InLOSeuvw}zLPxp+DBgr(+SYXui*%B@VTsGvsQB1P z9%@JlUB@_w8S9SyGv?0idLygqzv*29Qw_X2_0n(nMHw5T_r8E0j?zELUxorgywM&=$5{1grp_1w)drSU%i+;X}pui`V%&Gsq4Ou&O7M(aPn&j%2%glsF z?MFAL#eyd%Yv^}4WB@feQ>a!$Er)plC^u6i9y26Y@opsT5Qi|eLc}s~lEG5<-D{vv z&(d{(*~mk>+AqQ{&)#>Wlae23(K^_{NKEUL;|o}3;$@F=;l0xwHhrppLK^HcchYPRJMw( z@RKxB@E*BKW~B$tvc*Tf##Sx_dQ2*Jqh7O3YLdXmL6E9J3Qu0O1dHiR`ngGfK)>7h z%3Yl07O=Ko)8wl)x12T75|f4sLe|s5U)tcC&m!mdxEtU^p2|P9=3uoE;>)O|aR0GQ zEppYyf;-ly*~(!)WxqwhIN7Ijq<3BCFcLV6tG=Zcs@$(nZs?)u8s^9$+8(ffg8nwNS-LHsHVwai2Wm{1%;G@%?z$^0Xk*WQ&6l~s{-|_)B#N$}JI!^|)fh2;&u?}!6t z4t#>*GW1$=p6i{SrEdC18`APFBr2w z{=j*)J(va`kBKmcY+bsbPF0=}7$jhjbFF!*%5DjIM6Drxs;m-!S=2Uv%#=~y(PgKX zetD|5`)4m*VMoBBiTnU-?B8$}yCCPjnLIaH_N_-6qp@=SqXTSx@UiAOD9MIq*B9k! zTt^civ$;hUVw~8XjXTEzL)ZFzpHHZ=QjeVbpE=CHtrIAXEVy_$)Ig};0oZw$wC#?8KGj{j$F1$k8-(@2F7uDPl1ENx`8FF0D7WxqdQ{_?fB6t{cc=C`{KV#^WnLzN@^ zKZAty((^^gTVU)28d*}#nsL8W&S90Lo2wAw!V-x8X85z(EFa;PC!NGJ^z2JjjWEIF zcSq*H*&nUM%L$;$moR+>g%G1`fNwvmh;;NBk}=SL^V%oMK<;0HteGrU-CKe3 zSyAl1EjHB$`m*N8diS31d)}{4B(zk9rxxhCT5Sb-?4eXjaOncG{lEq<)OLQckmwAH zel7gtZXS^}Af7rlzMV1-an1rgcVb*@j@VO$BN;N9 z`;`}M(FUoe7km1(bC+PrieQEC^AtR^BcaDLjow0&njP8N-LWk=qSqxgN`KlK)5*WD z+hpx%d+xFWP;Smdw@6u#A;v)1kI1@Y5Q9#nncLfJj!>vPTD7?yv z+JyGz2FEAs75rI^q7t65q?ez{JTq3o$K|zWk~yIec|z3i=#A?;g`Yy5X@zw=lV~TR zrpd;m_vD5e|GXE|*@c9t_9q&wXxi_e{KtVB_O#mGPp7e@o1cBH84h!G^LiR_Y;Tyc zZ9`_tB!%Ngt^h*BH0GpMMixS!M@HW$%@~yJtb45DFif(1{MabTCDJV>BctLDxl1oH5$js zh)Fj>K)(aH6r!~X*1NRI;1%9kgK_~_ePy&67S%m#j| zKSPL-cbLEhLWbP`73B$28TA7>*@9Dh3>eMof~rSUFLO!x0E8W!1f@TsDeqaMUhtC- zlTKOD=dlFq@>KN02j2!-VH2-S$~G8hC*b6+u7CGwzed55m;ZVR6qPc)26 zkCprnmb7@pNFdL9KC=Rw6Yl7c`ziydq#G4_weM?0Oaz4dKO+_LYbw7H$Q1I(z||n0 z&Zz{0URvaW^!}DGoX`pl;7RAjmIxy0s)zM=7T(u+?}xZMEk-*{#p$m#Q0<|Iz@MJ` z_hnRlALJm?vqZx-FA-&^MYfTj>Saaq`WgNU?RO|`i5Tv3=HUiUq(^Q^8_j|pgI4)p za+a5Grd~H<2NF3*1uW(L(%+6(gBkO7?Hb%Pve>C-7GQY$m`Ug9PFY=%LB#WB!CfC) zVJ(@n-)I*qjCjP_rxDg)8y;Zus`p9##c~>z;jETf)AcWyB{^4=@HZlttsWF2`2M31~=i# z+zb+Y-un35dA8?2KP$4L*H-o_2x{?6nePi!;=Hr2p8J%DdbaPdWE8gAR8DQ#?awr& zFm8%n$g?=*;)^Br4o)1~$h`@8Gb?lT4r9D-Kr8~rBeZ2QJbB%B(VEOF4)5qCz2A>RRxs~+PuKl_?(EN|`$ z91!LZY>?#|@Vt}IcziRDO_#!VJ7E~csX%ejE9}=4=#rbpi<&38-rcYlVRV z18vMLcW&B?BKmF6u5yEDw;#p@B;+R@P4XB}TC>BIi{0|nC1&!$qUYuvcs7J=IfenZ zfbE19uWNF)PKz_Y(05@?v9Wwl62n3T^Jl*paNVYs{X!^n`bR2*k!6LZ6=40hHB!9k zDKVtF`NrL7f4So7%|na#@^%k4a3joUf>fa)9bstcf_Qjgk-UtHoTRK2@nbR0Zk-|Z z&|g!w4=E}Rn2f_BPLz35$@_8fdns2y$PL7#o{{P!mQVyd?O!qL1CTWb01uuE(7 z41OwHv9DlHzeG_&dKtTZt98q?DAW62%+O{A0#Jxm_tcV9c3%e0#}!{m-IzLqo#TR$ z1#LIy(VPX|Jnz2#Eoirqox;VQe7g?T%r=b8sijT{r$Kp*@hh1YD>0cG7$c|*UL8!lZDu|}j zYp{=emqU`U$OT%VI3S+H-B2TYW*FGj%@(I3*7A)GD;j3pvaR^Gu}aE${qpX4DfH>( z*uQ!QRUmIGAMGN*mhAzQ-+Tg~o^+|)&*eIjK?5#Rh_f=Bd)}&5?vUcRuM5L?GP&58 z;{nhj{QxjfA6uW{7B32cr5T1ovZ8kdK3)8QVoKlw9VHAQYu7tr&f%`BvOq$}tvN>ikW7{)9{b32v?ncCVQIb@c8sg%+y}uJ`a1`SjC`P z|1-tms5F`#r#@dcrM4C?s^4SAM|q3#OHVide5$Jh@ME0=4R;jNP{41EFo@JbXLr>n zO^5!PxYQEEakBpvqe=g-cB?}}8|3YJ>I@13q2`_OIn_5^Gsr&|>fMAD(QRA;*S_Wl zLY=GF4}Py^I1s+eyuKduqHE^j-w8w45O>P{)i*v2>Q+64U( zVk&PrGN|!>V5}i7Q3)TWiky>JN2Vb1bKh>bk7^=vlCpJof?Cf14)sAFE>pQFSEs!W zP~K9L#=Au(oPyT|Ev@!@ zDCWdDgS9L|Ol8>>bFm16Y#r0g0R9fO&2KCoZZbA!IqaGn%9`6 zZ&07 z??Qx$U4xJszi3r468(xdrbUewa2f z9VAHH&IH(H_VeQ7K32GFeTVnT)O334g3C=(mj>@AzJ? z@!g~xMJRf~y^bp&jLtt@PHD947m`QIJd5yt3H|zu<}~>Y-F@>Jzx~ZMPP%~I%z1S5 zAtDs8^8{uz?P8q{r8TX;hd|d4j0Zg(^rZ8lCe2R}ngu2`Y zcMB4*utGPSP08u7N=6;Ocv|M-W*_4u{}SWEv!JM=eja~ARvYQJ;R6NHa!k)v67UUq zK#ci;;rUbrDy|nsSp&k~0en+;5Rn!c%v=%3)L0ZM+QiT~Og`hfl}}J*=%%i7$_+`n z0(Lsn)Fnx!J?ZV@DHW4#j?L|ep^I>(3q92^6=Q2eU|B!N!Q^>X;E1|dFco5gLC9=YZ=U#J;vL3Pv&ZrbHe$6!oC4an?v-{RdWii-$3MkVZq6nWwx_vF$L0aIrEIBNJ@y8lAP5-u7$fI z>NcAPO_>6ro&fkQL*oe{)#~D0)DPK)qBCK6my_6)AsH769(rCv-KZ+8omLgw&dj~* zozXx}SRKp*|J8l}-hmtonoB^_OX{=eWCb7xpWbq&?zvGU1qb-KPeySq+titivVV$9 zfP&XJGiBC{mXyQdKck_e9Ex_argyt4>max08u>ege1r9C@x8uKS*g=lWxl@Hxc70` zrtyMG4++F-?CjJeo?bnHIUXe6xA?ri8+ZzL z5P{-V3p@vyztA7vHdq5OWrZ&gFh(RDJp;JU9U$wIUI(jWR1UMmd6Q0epr(Q0mfGdo z7VPr}0XZk)^~j6OhgXC`biHEVXb0V_I?qoETZGrD+@nP<{H2zHjF8yFEl!8^=5d{) zJvbKSQM);Fd}wkK*7M`UseV5mO2SLv1`K>@^4&8}nggnO?8#~^uSL)h5(Y>MMxdEo zKPv_By#?46kI{cK2D?HigN@-Js~B;KI#S~b$yHHt6AA_vu8S57BmE8d zix^dEU;POzLbaCV-3&LA%uSIlvr5y-WuwvbfE>>2xHJAr`(U>4v1TF$PUsiEc#N{` zn4g?>_Er7}aD6T?IO1{55e81Eq%n!h_Nx`t084=ImI*SjGQOH^bkPHR#hl+FiPmMN z?$2h(eGC4NXxjGC*b_qd65FLizT1PWu@N^L^~pA4*v$TOdX56G%AiM_z&_~*(gD%@ zyw5a7CF1Ig)z#XTBfBvD8U`OdJ98%M$q3@nj#K}(Zx{8s`4Xeqb7(V@^p(R{&Rkw)_5q-`<7Q2#hr6^dVl3%BPr>g-Ch6;GW;Tvb!hBqiKNUb)7{n>? zaOIDKE;H<9{DWOmjnD!7*THC)GKhubF(9v_FjZ6nox1cteUgv?NDlq-$LJK++Mmjk#828)p1>F- zxH7jhVQaUg)MKcK2Ld+XX>coJ5?7NgH?@W=PJ-Y;X&w*4xEg+krY|C4ido*s;0;~f z)#n2(Q8A{*qX(R@P^0OFb+(#`CeQy>F@^SyN9C1XYR=6MYV&Z4X=k1sdUN97*WpUJ z{Do}2+FrQcuIQqa=3EF$-53joce=6Nk$|UHK#5&dZ*$*SLyiSM1U%110xafDqv@UL z`Zmz&7px1I{Psghl3FU?wU4cf>@+pWa38J#)frlKWLdCZmOKkptWb1ut&3&^$Z^%1 zsFv@0*SzOI0g%viG~W*h@cbMmx3IXx;p$^(PooF;_YF3t))-s^Cg6B3!w=p3Hl17l zX}T%np}7g?u|-rXEw%No+@&(rg^@(oB>}Lnr+CV#f%P12tyeWgGtM`3fNTLcr*>v^ z0IP?s8dopd6Quux1AZuPK^y)K*81v}Gp1{IYM`+5+ji5et@p5=@+GpEbtDQ!hJ9o? zdartdi2Y(!*^BN~Au3w6W_TT3#_H&AWmbt7fWh+JLLM_DhwWmTY=c?MD;iIyX9Cek zlWa=ZLr}G(>M}Xn@Hh#T`*_ieXhAv7fb9`f69L$0>F5cVJc4!`aAS+9~zFBu1v)0F!(k`fuIg@&W< z-(m-JEy-8w>v&9h;3{6mA`c9deG&;b>C&vGkASpdLPUazQO^UPY3bre4~y#fzb>VT zZlwL}^mL5|#)?LQ$n&Ue(;|wXk&!(HRmdNxd-?K&ma9f@_G=5TH_^e8d;{15Qv=4;7cSvvD_1!F}^6@-V9u-ZvLAlIn!5Y2#?iLXjh zT^=@Z@yOc#li+RvgId(k;j|@_6}rD~R>*MRwt!HkxT3gFL?C> z<*j|Dk6!Qhx3WEn#*C3-Qup>iZ#|UD8PqLADpFECF``C z2@!1z^;5hMCBj^!AjejQ0W_9|jW%)UJ!paDV?5+bpIJa3pH%jt z4Xf#pOo}j%(>+Xca#A$NcPlteD?U9^+|qhN86GjjoO+w`e&k85Ju=#AaF<;-R?f8Z z1P$$HyYrz<3l^UYKvLs-fyy`|D|^gRj&&^slgMaRR4ZIAKnG*q9*ZDpp#&8hNp8oe zuNg$Y6?)=ZI(R=JTcOHklA8kQwETDAd+~S9nr7tCjTFln+`(j|47UxHNl^Fesi{h@ zr$)l(uIXOEe}?VoHMRA)A3HTc?WSdY4UR%W&EZk4=pAhq*TF4MaCN`y%O(gkgq4Fo z!S5yL>}T!E%v=asd>^WrG9UI-3r)^Z8Z9duM~ItBE; zJNqUbVP|i|?_^G{E^I(FwP#REp*k3kR#*WmABP7nn5G~88+l%ioVGEn*EITGq{?(* zsF+Vv>0-6t*w(ZAuTMDEUpDKZ6^C`_)^u&H@A5L}7%#J#X2cvub$^P^T;06D*U;6% zd^|)~@2X3<+7o-V&4GCuFu-mx_SCKtxw2!87}~saVZ6t`#Z(Ekw**W6MTYgy2ss=( zjrHAWMNHv^u&5l>!DeE{wqg|GZyIiwUue%n%0IaU#kuxr0&!YNY^F7pT( zyGJ6YksY$H=N7d#*jW;x0T%!nNCF!~D$8uqMSZ^ZRg8e!oHl zxAu=tS6^nIcMH@ckye2RoBe!R7UpqSB9Dc9WIjTpJSCT6`_!Rrhn3leH<0=#G`CF4 zv;F_K>9ZIYcMDXYWWOMyG7Kr+YR1`r(IY~XxEt%V;~b>57S_lD{vy8%fYe|{if_D= zcoVp~zy?r3kHT+yPBki`gu~`=X(DeR7#iOc^w*OP^8OFDecxzMkj(j|`vh9UbY|ED z+|t!lNO=>>HVeei(Q8vYi3ZaUgcm2-Pjp!Rf;8Lk^{;o*C^bN)q=#g+@0PY+S+p!E zJUFjY?m|+_)%-)y;to4#lF+_Af{SS;F>!=- zAYvClN3CIIpnKWWI-8L<>d_?Syl%m}7Rek+ zEDZ`9!h=Z)@s~onr)gdtgRvFS?Yl9z#ZGIRH4mV@_WizW zWvs$M+4Oi=pxZ|Ifw#F!h%&Rh#l)nVP8YD!Hd470F4`E)F?$PcnBcXm`2^KD6e*e`UB*#NTvbI^G2&r${1k@=k0n$VRP(> z*Ovn4H!aN@u)+DtLS#DKvyZ9tnzp4u;l8{Lfeo?DEE(0zj|6&90fWAd>O~T$?cPC_ ziZD}aXrXDbw(!c2eQwhB*TQ1c*B7r|CQH;52kbCO%+u5(1<*CT$D7V)Gl3*IxX`A$ zsINdK&!p<%wjZ+-10Q3&bU%)V$zkcZ+S!16DYsN3x8~@Y@Wj;&djU=BUL5Wbu9mqJ z1qd{6BgeNa$z?Diz4K#K=+7QLQCnt(>|W2z2C3N8`>*a&cf;r?_(j!|)^E`x&ec!S zHObAS#_0LZ{OP*r!K~|hDnFVVFMYPp1nWFFAuoB8a@s$p8Y&Qa*-6vg#|fTe+h>3< z;`8SUQ43fx8sVwtZ}^H70Ej^**rQ2Uh>9w-InOF>8f1U;o*$4Dy{BHhX zWC#zZKkR`4mc2{QlIBK6ItUL}if1;n`}J3)PNfgDq(1q_`VM!0OaN}gbttGV2$Ie9 zFH8xNgYIjuW)J52g1&`dp@#41iA~U{5xv@r7pNb}V+J#AkF|C7taIL27}wV4Lx{c` z`;BB|-maFJ7bR$CbPKnPp~wYj`SS#vi9S8tc^o;(_>!beX{|kEi`UBHu?AhV z9&T<=tt9T+)>i7q>3Ra)CXv_CsXyG!)k6mJBfHV`srXoCEp?mQ^s=%uecPdv{ z6?c*pNi!g}2A_OvtgtXTitm<}wFi&kn}41JD4^ptm$hile3_>iDHu~_k>OlY`Dw9O ziopu5Mq-Oer6H-D&`%zJ^-^xaykM(TlqcDMugwa+=w@)w$8#4%(V<%M4gNa#BWGXm zi9tRvovUVxjVL5!%b4*{tjFS74uv`)Eon7w18s?80}Ro93u$lSbr4k0$jHkbt}|+2 zC=UGniO-jFU!G~d0KxGVum(C=GXfa+N)t}a9x<1x&QYq|$w)S{{jz4S7#QAIo$^YF z(P#hx_ljSe-j=mEQskv;{7pLEu*7I2ZcgYy6ThzJO;!MQK9}XNp^hTMs78R#hQ@}Z zEJecAHDSqg4i0BdGki?|2v0k&$*_~`dUUX=#UT0cn}PfaKdmZ94^;4b-+XDUyrnCMk(81^%afK0MCa`_)5K%+u z?$iw~@?Feo5EO}H$IUPxiPmt&2uog}m3*u9$+5m?BR>v$bI{Fbs{edv~6yOvew(0pLQ+rzID zP~Ma{Y2(G0AO)0y!e9E~MI@%%0_0*5wkN~bg9&EkDQ|LF@i23K%D@?jSanPmf(?JG zZi*H#+gJd0e&1e>D&|J;I3N|9;dRfLx^701J|I=W@S|$0*1}*wVGXg$D?l}Ah<{t$ z$!Am)Z+pz62w5`h7uPx4ss_*(588FCl^dmAxu?AWO;@wt>sV3PJO}`c(54dCrI?#0UB0)2nMy@j{@HWK8ED6%+_I znWbZ`^MTUI&c;3}>RTsVvA0|Eb3&xI;czAc{AQwj7Hd$IcEa%y^D`8;>#$`9kZGoPdy;;NQ zf~H3KJ{|90&92m-)I;K2z}-KaDh(QxHBOAEAk0f3a5~5<$9sH=cNS8&pm{TVOI5XjwTZN=Rvqn(;p9DdSw5vWsFlOnJn&~f=JPNBw%C-B zc!&XEKRMep<6oW6$)t}$T5?>d94r(y=rAn&Nb>73h(`sYZuikp7)lVaA$mr}*Lo zWe<%u3ajL&uqfmAB^5T>M_;b3_M5-cq-z%Im6RMNlvfAd#(MK(0yZ)A*{3rh^q?u= z(s*+Fwr@+ICHY^_&_dL@HamVV&Q=BlUJ2qVEF416rC1$6DO>Vi6D<*#jVFstX(r`d zMEIF3vMzh1h8?!*3!AzQnb2e+BPxSjtph#qPJnd88>e)uB`q+gg8rZi2J)^%$#-f3j+29B9G(34nBSjCXH9{2AwYBGA4>KF zne`si3R?>Xj8(jEkQ{}I_T6D#wcBd+a4>(PD=(mB;iQZDC z3DuGgs%UH zUVuS4!@;uDF|j6dYm-BY2_f_wr9>M@zQpogI)1uerNH(BAFo0`O~fn0FW|W{)%oMy zGaCMRdp=Wem1{<1d0X;MgIysOGc>onXVpZ)X&%XtK2T~TXNaLe&xJ(*xhw0&P(9WO)8d4$UdE*6}c=w;BUMErcrXY)U^PXe=3i3F3iTDnUn|R@DABihV~uP zuy>Sb>DmF9K4;XjG@S4E94)&#r94ufA01*cx0sB)W>IJQ$;gZ&HU)oD_@|ah_Ur;^ zHW|;&IOC)kQ1_mg>quz64Go$;=DvJgu_PPY+;A(+{-iRN?e^^Q|2kv+ZK zMD6&=7yMWCa;7uNefy!det4rZ5s3J#4k;p+HiQe-Sw{kNRvvn@IQ( z+ye`R`yYSRjeG+{W>sK=Syp5x3 zKK|HbZRM8wej++gVSAX?3E;u9@@lNkMB-VBqZ%sca?hEFj ztW|(qdD-daAr-(T0_E=XQ8Kyje29scmu_1sUMuz4GD#REt#ASs)Y*Hcy9@M$_nI1Y zwT)wEQ*AVLN;0(@g%yYfr|V!AVIVn_Os$+;0uN9L*>qGuX{!(%0;9{^al+|MYxM(Y z{}{;3EXW(UBK^?9GreO_R8rH6KjBZ?zSC2nLQA*Z%iY&+i%{e}5HB>f=4>R_A6IJw z%`$zbHSs}L{PErD0+zW*N`dm_^AllI$A2gw>6dW_{gg5OOagG`Tv(;9 zO^H7%Rmp=*y3?cwq?jO0u{_WR0vST_Qn!%`va&*PiNa!=BE|>e)2fYPlR2X5zooQN zajY$vGioT5RQOIsx6+Wq0FJ(p6-DLKKJbd?+QgCF+rWfF6grkf$P}R8z(2okXykcm zvkjj{?HKl~L&^BRbHj4JquMsePyuR_u*5~p9+w;~O9Q^Ei%NfH2t?q( zsqKm}{$D`o2p>!4ASqUBkwshN+6k$h{`>1-hz+Hy13nl-!In2Xv%q?BXhy5ee@qcj zM;uzJyOv?* zzVkj9WiI2PgBg6rkY|*`NExRjs_B`Z#jDtZ`TD4n`~RTISa|oL@m8|N3!z7 zZIs+X3$PC&K^kuKy?$N^9QMRe7QXHv#_hLP!Po%fwN$|!L1azLt)Q&52(MU?4q?&8 zN@dC+zO132+hJbbeg?p*b(n|CV(Sw>;kY@fN-*%Iz0TY7K-A%xWk=X}#Hm5${0YXR z`O_v>Jz3rF+T0=J5Wf-SN^VpmnO@S^fl)CkryVJ?YI$Z$t#!lGs{rm~6(mW%?(K=u za*AN*K&Gd=x*zBF4iYd&O@-Q(#IZ^qg!3BL9C7F)tjvoHJqCn->ZmRmA#tk)csRb zgAbC^wc7(cQ*D^&i^Qy;W}$OFPRHpqBk9a_*df&9H?^Irf3zjMyy$DxXg`;(H*S?7 zYNu3|W~DgHIfx$+he~i3$gsa@WT-Q}UA&YZ@o}Y_tFl`!;6w#pfHx8UsC>~%tm^+B zKZE3FdJQknUvgD%1!Fm&@uH*&cf*flYw6Fr4YkpzNCzjg)_1 ze^o>(Ye>0zNNB|SauOkl=Af}8MW2IuJI4iUrWd12b()C6uXVrEc6*#+r8{wO*b62z z)Gb-V%$XvFE_KkH?7ay%lV1dn9Cy&?Z7i z38lqek?d58QkJBwDNAG*|J$>C-{*b5&-*>z_rDy+@BjO~*Bqa@uY0?e^SZ9{I?sFV zd+ztDy@XzFhnlA4ufv~mgUZw=dTZo3+QH_U7fGvra7wNtN7Pbug;LEFSdrf^9)bHI z&sN@!9&KrNJW{h^>$^j_N|XFdv0(+fJeIS%RM*bN-l<+64#Ow4jtV1>l!KVZrUpGa zY^TTF6OUc-Yw>uj=$~-WW{-;%cH{hJQoY`0-Z|wvUq0X^Bs%K!7*;z<>uZ`C;8r$h1e{488^wZFr4>{pBdU^=&l|9PRkhsP$Fe$Q^}(u$h0a1PfVIW4j^pW{mu9bAOc?@j5%rqvi>wkFX*g zVHdCmzgQAl%kg}y{AbyvGkUqsNtTUT2i<~VU2e6=Ldm*LQ45vir)ULmnRf9no^V)c zQOw(ztrLAW-ezHXdRyZ5yo)%WC8^igA8K^ueo9M0>DONo*7dRoSIF4{1$}w&p4Q~_ z&g1=$ti|KU#5BEjF{Ksx!z-q#7+IP4rPV{@S!VX3(3`vskv*Q?5;5}vYuw|nWxcCZZhf$Q9orS% zbxTQBKR0(nXwzfo#B}3#{Kb9KiMROUS~R*|3dm!YiXPlC+7LgNcOks_jYr;%FRy4j ztU=Z6$G5LP87M!{Z40q`cErPFE747 zxB1q|Ddpu8Jbb*FwWY?oR6dD4@Z_B^7IxF+K4rPbEuNqgm(L9*S8F-CcYy_EB!Ktw zsjburyRe6Tw7;*Il-yuJi+SeT0qV#8{BZh<%3+Dsr5+AUI(b7>oygyFo$+oo!*K5n zHi`35RpL|eudZ(@e-&GLVaw8g{&UAdz&qgPDWeB>x^G@9V@UM}&#TLnttZ_^nEoP! zHad;@r=CgvmU=0?<%>X4V|{PaVfUh>n6T@e&`+Nd41cBZJ)Z`d`06Sl8f2D)4pO@l zf*KviUh2P77wLPx{enkf&G}-Xk8+*>cg~#N+*H*|{`7m}LT&-)_~f3?4t&YljAK39 zqxSxJT8L|QR7js?d!cniWMCF>3vHdgss0O7-WMH)LQW2T=FdzAUeYMHQ?-zHliy!` z+&FYb&_-Ong*s;Xk;^VJ2U%xYD>5v%9vjL$MD4$T zA0U}k)ISRu5jHRHlXv}4dnN9yvFo*aztW_?@LkS^ZByE_^d6I?)UzJ&Pd>fe)XCE$ zvTcF!-sImdc|Mxa#TMVZek_7!%kN4196fCIL`=))$b*Sug~@1|XMK^72M^>SwaDlr zwZ&dT)BGNm+uA2)f=|ugR_c1m@lHj;sdWBAmXBQIxEYC9#0jCY_utgt^MybCBy5Et z^6;4o#DwOxq*Aa^RXWx>?1U*@o2s1mg3cGetec5iilFEV0AE~KkivF zQ!xBOL?h^&Z$POL+iAz2?~iI{x)n%vUDU$b4<4y37O7!ntxeLfyk*sKOu_xS2S=1@ zmX&_>-q4R?!-t5o@#8e>plGtczrn{n_b=WIgdb`V>2n=!@b={3xsjZ4HrN3NO3}M1 z!n(zXNy{Z4QX_X2n<-}c8Sq=5z&t8on4jXBIHPxo+4F}0_sl70f8#Rk6Z}=kjdR7Ye zNqm7?u5+F3oWwmAD>=EI*mQQ!ve;Ozal$WlD(Qx3p@LKEJwb^+R|^rId_D}@^MczJ zed3ttx4c7I64{f71=fAw_WKsttBezpaDJQzS6&{woN~fgJl?ILx^tz=|LxVhpZfy2 z%PY||tS9{C%Si6Q&Wp{5<+%!>MD_=jSf3TL>IxT_IVR zoPH6JCBqRhmZE)+``0pAE$7Xh>bH%nZ#N%4&|o<YsWqB2ZpaUw^IdM6FV>42j99N zc24kzVN&V4TPqnXMCIqM$Q&t^+j_6s6Aa2{=w|8_L*(~k_;uTpmreSUzlwF9^lf8{ zoqGRB+zy-;NPy6sVlE@R2cqTg7JaB{%c^F3-e3^>z~RBmD{4vF*;d%9g3z4{HEXs9 zW zCEj@_^;dFEu2^!A`1>D!d~_!?aQV6$k3n_0memn|`Amw1EkX6Y@6Pbo+?TOZr;EkL ze${=X#4ZWi4UN5?4RJAUvPwDMM)VBgG7-R!4rxBhTtmN*(-kp7n2d}Y*KH2&!SFQwi5C z7JI2xpz&v&%+I+4o_VKI?u+v1htT6WZitPQdDRzsYTNlH7Yu_OsyJuX)z16VLU@>9z3`_Ze@$C&M)PcaPuNjd>L~$LS4rEGEjT zT1MZ}vHU_7{{HoK5U6_l)aN5|+((Xbw6XDA7r821-iteTTy638)-dd%_8l9Kp)^`39V*H1o| z%x~3}IY04G(&C_dD!KY(HWzO3T5yeoUWu86iWeJsc7oDs0M(FE0D#_CoZT{CAhQy9u zE1SRn%TU_=e*1N?f%T%Yt3L>WVf$xXLz85?ketbiYf5_>WBWDDFXaUX=t=}7V;{y> zYn>!}wk=-QNOHdY$=|Q<_O^o!KW9R+QpLG^_gFaz)vRrumb4>zt)Ug@V|_DqT^t#x zpQ!Dq)@#L*mEYg3z6>9<(VlSocKJ)!1+K8|Z=T&Ap5CY#@!CKJX{eB2IoCp9E|L^{N05>d-G8 z`HTi__&h(&XYCdv-Dq_3Aeg;doh9PbbNiqu$wHI!@+XRpunhP|)UUOGLU4Y>wYiVH zLa$yQa~*m%*DHOKepW3k1Fjc?f39H6)GnPr@_3eFIs!agZ!9!!;o7=-xM@F80Qco5 zd(c2a+r7K?8-==v`kl+y-ztMZHNi*VXKT}joD(O1-Mwi$UD1x1vYZuP6R&>0Qn=ER z)FCO_9={Z8$&qP^Nt*FXFFpLE2Fo{hvireZv7s~p}q+;2IRgZ+Hhnwl! zhqX%kWs$Em5I?vP4#HodKWYaOS+Gnyu7 z?vpIVFN=C*-TzX(_0Ehz=xP3#G5JlwJq?oBvoU9u&?l3wxeb5b$BtkHUC^mb=>OQn zK5;Urhm$G36M#y#dZ7{&bLN!XKFLN;x%jfDGl5$HS5ARSKwmT2d1QL#L&_yeomdM8 zmZi);?Hy~y7?uPdJuK~ zCUC!W_bJt+&0^QJYtIE!j>yq+tHjP<&*+mDi6j~~K531Mol(5wOE|x|Y|+%&rC_vL zm0qbqWbz}G&UUqpoR?L|9d&3hJ`m`-JaBaXw^uE9L$^e-UyW#7N%)`yJ2Z)Sv9bi0 z+kZX4&um*Ji}~Rz&;5fv$z#17{-b$_JpKQpKFVWF#S_7s21J>1hlp0KIM*H`!I-7nIItw^40InnNF zX_0#7)-{{Fkt2MB0x|g%ioU3QlFrr>+emD4LeA*RS`tKM|6-6<(_{5}#CvcF=W9dS z^m-m>)J`dVDy7}|N2+g~wy!v)Ii1bCow zQ%6~d?fq446TuCzV6$O8i;YM?Ut!d+sa7d#e8YW3(NFWPQJaQ+*eBoKIeY(7)6;sC zy7gJvw$J9~B#369Cj05@^WtxeH+9I)>dRs0j88pHf5m>=cw|Z3fxl4U)h!>Ldm}$y zlnFj~egEeVl_N&tx||p;9*&A??%VKjp)J?wxXhf5hSC%T`R|uh`KCooimvu!0_tct zMOgNHDcxrJz1e2M|<{5xT}@jn+(*mf6lAm z>tS$GB96gZ)z6W5{|4nNZpC|JU4FfeYY&GUt>gqlxcN)q^D!g9i2WMS9i=%s4^@h1;4Le$M>0$|bM#t`c^D}ti-EnHo_Jio~cKY&y z*6H!%{#rLYiXybd0_N)7WI+X~l4o9j&&@pSlj4E#;5GgPyLU8vYe8$=Ip5qR+PqCj z7}4uCq7T<$AC?xpwjQScLIr)ot}``p-PyZb&~Z`!mg1E#s}s5@C6fA2JT7e&(S zacqe)dNWozeCpX^-GyqnpcWYtnp1;T;H2`ryR+5od?&>3!%G9NeRX1snIlli+k)TH zyW2%_KXKtxyR1Bv#i8L*gwL%l$f9ju6RQg78+m0!5to|hCpNAW#M;~mHBh-XF1zK* zy?Obqyl3t2SP}=h3*Pf{@$ZyrJ>u&8UiBfjIlDoLzRMWeE1f5a6!XNM38(65fX7CE z@>a$=oHkCAZi^AYvSz1i)8>%%ucKxHhD?QO@6}E4p{!yK-#&H5;oM^|tB+Nr^+<^V^)bo$p!l0B zaf$c#cQi!5&9-&Q?UBpL&TrZun1fO@kmNFXQ_m~%fOha)PzV3YVT;=Ky{lFSZ^d8K zcS(poanNn=n~~Zrw@ct+lRW>!p=3YKJ`=y$l4&{3b40$fp;kY|mN>t3P`u3^9`%aS z{yWcxKP;+9YG#oFzua9`C~>hlt@~QW9bFkadtkNsSs}Y=PONci8A92x+ywdcQC^b| zhh+9E79ZZ?enLY0McAkMPn5FaZv;pa`y-7$^mLkM*tt~_V!ubGve>zYW{J__SBCWh zekL})9v^QeaA|}U+UrzvwhORb39RgOFz8C0K9oY*A3jvlorzYw;5P3y;Q-;se}o0}F94@CVXaFFY2jzwF6Z%C8pX z&V|R#U|zL7PZ0%``lnrg)Xb+3Kdn*X@4cE&JTWJ=+?dvYKXGu@ExHoDUK3pQ2}@Ym ziHp_dWD_`B+@n;^_;mIJa{EHJTS-YDeWE`^uOY{5Yk%qZvBwGDji)ROPa4YUT{Vy2 z+eWY!8DQ%hLutB9p6GCwqa;33c09X0kTsvBK7P0N>-R#ui@iOEuql9p@kn? zvP-)VvP8DVbq;8^L}qKAd=e7%Hp7jWV3ZDZX=U%ti%~&7b(B-2o|ixAR`zX<|FqzH zSKRHJTr0Hswj(LGqNcAjr=^V?jE=OfriP;44X`v93cVotq!(TwZi&39UT3-UGCxuD zX$myrYvW?UzMvDISGf$EuZ8+8N2T&=(%gZEgiADao(P|jeKtom&yf z)a$@a^EBj$f4o8AJVoZbS|^Ha=JgWzR@C}4pw+hERK)nPFiyNwfyzCNxzC?&bTqKk zWe-Ru3WC*-?mH6lQsQLq&CB*v5p;qN+Ep$7MCwzQLcO)HXN;j$;V-sISFq2;%W}9tJP%K z`5lkU#ajlt4b874IM%K|DNNS9V0?S^QBivITCD*Qm!P+xCm_z3lGcBm#X?RyNYnV*fVgPWr-_XiAP*Ye{9$+a{On{ zE25WJ?RbGgcxL=Q<|p5=xZ>-Gi2#YI4hZa+qJ@LqM=7IcqXTb}_0o<=A(o3YxZ2nQ z@d@*1o1Q1k-e-Giur6G9{r1OTVcof!W9J9=9uyJ%aX5DA>DTW0V;fZz&ugyp=N%8W ztZRnTtvU+cJY%O+;%oR@BNq1gNrccl4Jq$O-uk??S@BVM8e_OetZDK}iM7FnnQ^e3 z;i{YAqcB0>?uMOKsf~LU?^}LeE6*@};>7Xw0QJT+q8_3Oxfn7ln0(+sfV}^SPYCe- zTz~7d_|VS{5^3=r%~C&0&gmvRnvb*?4)@&J-n5eHKYg6nsWC5iOii05L!CU9#!g6Ua9)|djEs95sfWkv=ST8kEs=Yr9EFsElO1pKUAyqq!MjpW{<~s%qq(ZReh8~7r3{w zs+E$qT`;F6ivuUGJsacHzW(Xx`RgA7JiT&wl(_d;GKPQdq5ExcbQKBPc0O7|NyR``CS+>Q8V;`ur^$Lhc-!S${K-wm{kK>HQJ zKI7w*f~;bfL9fde-5Ya;Ejz1iC;RAwRS^aPCcQPGPaZkfmVB>uhhBJ5I9)V+GqH^R z1$|WD^rxR|3iV9!FM+H{!;6}I1MUohUIsv=&uJefI zbfsGE)4a<+I9uG%i=x##uy-7gQ>Zh}LGBw}cAOm^$G&Gjx0}5tn^&IOj(*@?eQqIf zKk!N*9_>Sgx3+B~eoc!;&x$;Ondk1WjC_4^WpDg<^R_5@4YA+l3DtvRf z-*xHcq#RL>hE#;89B>Pq#UBs6k=LB%Hal#Q8j>5*BMa5-2F)fWa9v&;>W*uqr^g&; z|7bp)c)xAy3;AP((wcw**LPlCt7ydp;>`DMc%^gpz_v;L$v`mP<0DJ%?JHBv0|&~n z!8}P586C6x&Nw}B8dJn|K`UK41oFIu*7J8Xc{xm}0zF#yJo(r@_cyW7x-?~cDLlZSPv|JWJFsBRb&g__}CUZ zk%iGWYIzMYFB}AKt>=NmLigSD^8!z9w)AMcwPdsTG0 zD?)=7o}cSH@^Iu;vIyaTlBUed>PAUQpH)qd=D-O(_la{9Ra@1V^=6}eb#9Vp-U)l< z_`;X=HXGQgZz|P3gb%#?)a;(yf;aodPd+u=xforyuyF4kcc8eLh%|HAL!`A_wmaAZ z`yg)hN7@zNL!kT|+k&77J?i+ygC9y>!WSu0KR`vF&gf@fTU?Cs_}Hu8`@X_d_xAnj z6DNdTga@9lm=iTSkC;T@JF5ph6dG(gFVz&NgbjI>U*kWc z#adWeAk>oSe`c)*4%VTce)zS_vYo@8PlTkeC#QVC<;nig`^?M=rQC!k9U&~Yt_V+h zShY2YUzhDB9T-$GpMcvdiI&+0-`zVcy){22$}bc;@S~@1%5$!B-p-_--Sc9Gz=CAh zcu8*0QgHHot?<62shAgb#voQn{(4cH7vKuk|N&4)-1J zjK1h2_-W~>(i7O;E}1;{`lBlWveh3p%Q*F0!kzFI$u4d6UQ^0P1m$x=pJpK(KjNIY zSHId`e2`iXQE<6koY@i;@gBB)ZN|g%(@opB0>`zK7}oX2H=$CchmQ7ooHuwTD-vXp zW)m%jba?%&ur`W+;YNlqANskjE@vFHDAaLANWqwxMe;iBSkTwssk^$-+ft`=7+iiL zosjvoVR`*=;{K%y0^xz%66PFIK0(_mNg>OD47vp+4W)IqwOQC+8VYRfsQF zj=}>fMsmgMz7$&8y%EXiM=mSAuzj~|-XE{|G^9oHHRVNOqAj#BSQsOu&7Gc99V4&p z&Oqn6C0E4ad|~D40eKf$Za?qvQ=Fo+E!RBgn2V~V-eGC&q5j}c;eL3#Kv$TLrS^2% zh2Z=>zW1VPO%A_}cecI~oFu`EXgW1gZ)Y*8P*8a9c^mYqj-GN6;ljyB?hbfX-RK0N znFq6yc#NUK%zfHY#!38^n24`lLV3uvU0o$Xymqied=VV;B z4__iZIx2KS;lb{xyFd9+G(1}z8u5b{cp+MA z@xVQcqbU#Ar7Bo|Rhh{4HoWeiVdgGu2?}UJmhz!Pj!;c1U zPr0pb%lAON)rIl(`h=3u^gZ{f#~y3gzu2n! z_RKu*m0GLIMPu7HjvLz>-`HpOjqrtcUy@w!l>E|uOdqbgn%VKH!h#*OgXzkNvOo4l z?OJ=d3n3(p(`=e40+Vc+!U>#G;FvDEIr#Nz;s;v+JH%$FS<}W%d3z@&;i|IG+s`NB z!j%oQZ75>~r+6LNhO-uB5;C`#0dDy>RyW@0fJ#q4mtWSTkfu!fe!_Y>Oy{Dn_ah_S z3|D#$u99MWOiQ-2t75YRjpjp=d0k_hri5QYJK858?^0)yKAORa{6qXT{p_@aHUH9Pc83-PsjS)6G2{_VXHVX^f}aq-}Z?T3#rWDm;1$MKoHDN>b6 zUpP0L6d7jp%<@AC?*zU<<>NL}TjEjX|WbE`?_`E-p%2Hg5C3|$=#GU%sBc&7Y zCSCl{04VO2GrHTCwad+R|-z=5;~1_&q4a@xLZH-?N}aF_jM&ytO{Ca?49oG z+A=KGkhhedJZDs<-#voi*>2@6UmNh?ebCD)33)`k;!Q+v)p*SPe7yWLOEQ3 z@vvn%ZD0NM@^9IT)5jpz-#~@g#In;H%D?&#Tyyjpv1UBJ`cX>i&Ndm+w>=c~(LR*J zeO-QEF)aQpQTk;s3!P(PZv!K3;b>#&##JgCzfpQj?*{k&b}|lD^QrERh=Bkn7Rg>8 z6N0YMyYa=Pm7IB&fc|nXeI}8U4$p8KEL@6G!=VVL*#ax>7x7M9&QadE=;>``x_xW9 z_>2FP^ft4I_bvT;Xk(diU+3wGMy~f`G-Qh6DfMO^iK|b?w%n!oaf|Dj9|fWnb>DqB zonaAm&Vrv`$Sn4xiWzE(W96e~RNnx!NLHS!r%0H`=jes7_{671rrO7}pY7BPN@^4p z?(M2BQh{?1 zv;;jrn0{aPiseRQ6l%+a28qulJmjvDo!P~TDX-+am2z7XqFa5g!L$c;YD8yW?_?FA z!OhB|wS-u`gBo(;Un0Mr7j6Li}iVwZtrSqcB$e;wGKnJi^wqN zup^5M&7YIfEitcdc!a}e%U2%?x5*!8NjBG;9-I|l8QD0QaUkD!dY*oW4cvD<((LdL z%|s!TZs5)12|qr0q7zIi(p$76pM`I5i2;uUDaWYQf?i~oB_K}~Yb(5Oe^|Xlx#m+c zxop#i$ZnTQ-VtzyeSYI0we`h^sex31u2!5o^{Y)P%}s6eXEohI^l2+iK1O^bgYvMr z7v6r%D#qYQGogA6Bd~akV)O28%pq0HmM8GFg8ey3j&0fAgo5hW!E5LHVjoIOeLZ`@ z`YN1NlZE=rOV*I1IRCS)a$*?OCqJg@HI?$L-YwHcpyyqWY7-wSpypBdYZc7=N!*(* zlHwJQHVL1NruPC4V+NepziCRi%?(z2=|vVUBQ!Cz?5akP4PEw^Up{4GAg*FtLSzOB zRd~8jB*dyi=ExII9bt(&6W3v0N=0o$UUwft}BpO_DOONuun+1<7j z%1_0W#ZFdSxj`I0p#~Pl81^i_KNSC7!s8b*b#Vq!detm)xch;}35)v9QVOqv74r7= z5L3k=_OOSTA7d@E^XI7{wbzWV{T!aJHJteRY~$MN17{VZH!%H~@m3dIRcmhC*FUsR zDs%eW(cm8AmHX#!l!+>vC;3c1J>n>vPL`NBc6k zqQ3?6cYZ0PIO}yMTXQ8H^%gMo^irzt#TN7^?qs0rmBZy+qqO}Tou*iw3RK1xniwan z_a{APzDMpGL{6`w^UtY!H9PgAnv;L%(cf>2X}u?@pgNvCD^JUYKiZ~X)B|&07S$qs zeftqsc~aRm(cU@*2MdPIzh4TZb>=FZ&t(Q+x!&*DJeQI)MXXrp%B6c|e82=96fwdu zpBOJLFGQ-@xJJA{zFY0;gSI7ZtZUsuDoR^;OD=XPhEE#iJisc2g}hf1@~gq#wRDM{ z99?kaJHGk0c45Cn@F--vBc!L${j;=uyZ=CY_Pd9XMXqBc9if$<-{FOwHbeH+vC*M? zd*95Mz{AhGHzW@`-rOevQdlkoeO){0zo&_7y@3H8Z&Xa*Pgs_&m=aw1`0~uWv*Tno z4LQClaOrvHhw3`9Gp%1HqrRTXJ3@a}^TMJk+ZLUmDm7{DQg(Hq*yf_-g^0D8CZwgR z#>!WhkLBjdmp|9cyTG|ZjZU}tJilqwFPzMG&cx<(-RR}S)CInWK5xe#xuRq=2L$h` z4!`*@DST^+dit49Uyc0N^B7%@;d#D(Ot9Fex`RB!sp1QYMhyZyrK89nsDjUu9CkS1 z|4G$PXIswb+l!ag6>Mw}K(B@ZPXOLzy?lIH;N|DL@)9H-Eoc<@M z8Fs`Y7w+79EwemqVX}9sgsr1#(mtV_=*0IT^@j%%{5rpH=WDDZKGB+cj>5kaw}IU$ zf3@r`nHzMPHKl~r{g776O~oFI$Sj_OUtA{+Jb+uaY)9TLUsanpc=7YS(_ibjJ9^3# zukznIbhO+zz&kZnTy?Dcz+PFO(x!x+oasDPzd;SD>RfJbfx@3xI3+kM4k2@^SbGlW zpKjpuJg#2pai=UEElEzD)LK=1I#|kk`;vmZ(uDVR%Ko-1^&d3I=E^R@ZER{@N-+{A zC%um!46PoFIo$Grm63E3_v?$0grYX~P?w;D?AIohG@W}-ZXG!mchK#v1XT}= z%u6vAlU{@(3i>m6H!CY|Y(WlvR7~PlxOQwp|i7A#=o9t(eP2owYJ@x%v>FcbNH&rDp zJf18?vTRBo=~>bN&rhdjnWJqVX;X`sSC`)K=Tp||{rQE^lUg_OX9Uz=FNU^gKi0zN z^wF+mr5j@n!4a^>^T53xcetFf*GGv!9h;kSmgd zg9eFK9h-T|_jp{zT=|4#Fl|Un_~Do*&((m-+@A%tOavtY$PZrgOWbW2(N6hfEjMd6 z@Dbs~Kbj%rm-j8d*veYDvm7)%*OBE3Pw}yBulC(|FDx60{w4{-sm0F(I?TO6|2Vu7 zbozUsvbT~OBs|aZNCWszg>|~2%6&N@aD>G4%jg-G;oE!5FCuO@pq@qeh21Vs3!kgG z-4FWN%zb~ogyu7!iff#Y1z-R0eu2&&V$?x9b3&Jpn%^Qjti;8wbL^IQOr32N!Afw* zvQ;mUFxA0Yu{Ii*2_EVmJ6e6o?$a}zZEY~=XBZRiyDT1c8&*-j?+PSC$)}jk+zKe1kWj#+Lu^PoTn3sxxcsra zN$M$K?v!PWp`UcTn&QnF?dY>IWfcXKjHsK z39tgS-rm{S+4IM8H_i#f_X2PM?E7Oou=#(&|ArFK0ovD!h0KCyp|j9f7yu*|CW|VI zArSKiVpw3GKMN7qtIA>otnF^Wumk|#cwj4oWw-zQ2@w5nhuuE5d%o8nOJKnw4S)i2 z-7QlXkQWokiNIp>Tb>l)+Z$L{1dcIcu>;Ef&qw}qIe~-!lkjK$|0+Fupd47BJPbfK zG$8Kxm$YGy>V*TURECNm9{}k!CQl{sb(Q=1 z2_rIOPU`94sW_?@6^kQMEy)a~G?0itLu+@Vu8a?p8K4CMG4MV_KP*GlkBq0&7&H=7 z6;Ja6(MTjRo(O`1p~CKRrQ1(RU#f0mUMP}hLJ)bL+N53{hb?&=N>fZgHouaEwHFB`D%`QyNU z&fg4>^sb!$YeAfam324YU46ByxBf1Ss$_|yKMk>t=4Jo3{Od9QPy3KEQ2*Wh{|tXk z`#+1b13NVS*!j%U$9-{iiV|88IYyYR2-Zwa(E4j4~Vmj7P9f97-WkNWS<6T4;j zbNjzq?)DYf?w|+4eIlWzz_%lr=||1Mko5w zf_GB`bRkGj2lVfCo!-AIk2A2gD+x=02w>JxXMq8I!k7gK#En=~Simd@76`De!lDN3 zF#y(8fU#o$Y*qVhuOSeF0$YuMZ{%-#;J{iW%Wf`zH?LU%eSLQhBLM6!cW1TTS>*5f z+ito3WOvMdKlA@yn*quFFA>fHRf++K1&9MU2M`aC0FVff1dt4n0&pH66(9}Z0zf)I z20$jjMSx2HmjSK-Tm{GixCW37kOOcXAQvDHARpicKmkA@KoP)AfMS4K0Ji~30PX;k z0+a!i1Kb6;2T%cUAK(GNLx4vBl>k)$)c`dBwE%Sh^#G3no&Y=rXaHygcm~h}@Eo8S zpaq~6pbelMpaY;2pbMZI;03@-fL8!L0Ivbw0Q3U%0lWox2hb1j9^eDO0Kg!?5Wq0N zM}QH4PXMC;p8>uAi~)=TOaM#*OaV*-dR_y+JDU>0BwU>;xr;0M4Wz!JbufMtLc zfK`AsfOUWkfK7lcfL{RH06PFIz(mXnzy`1ffE{4h{{Nrwe~twH|E7Qc#@+_3X=l2X zQ7r*+K%k)!(>SRWaX)>*<(bFb*gq zdowGP9$;w0RltD#tZolRXo1yrK!4e(V}!yO+L_tdTifaBSR0t54DErFjsDCGqhjJ< zX7rcTe@_gq0zue=!CGJ-s|YZJqy{5uYO3IHns{JJB|=ni8hALcNHnmcU9Vu=1(FMi2h76mMX0x{crU{g))i$1R_D}&mx*xqos`i zCqbIbkj7K73(`F9{e2QK{1QWIv)b$}fOQ3nLO4(hA1* zs1v`P;!pEeA^u}K9_uenB#@cPz|R0r+of8YDazNJs^*p@vgYQ^Vo` zV+p6Of>p!Ai9`|sj>SV%>3$4Vf2=ANOQ8Gx=HGdS_~98kpg+DghBz_I^W)v-a^0lmT=Whd>A^#l5QKoE{f#;XuR0|4LCp9u)T z5ozgw`iqAOv<=alNQVIf1p$Y{fd2j45kSWONK5aZlJ%nzf~Y}^zex^o*L|jm~eTW&w&^wTB3Nkdtcxy6z)j%Y3 zGYA~#9BOL`g$FSF@PSNojGv>uxd{s46k@CiHuBL#yLwx|4QVdnRBJmXRNdUf1QBZD zgi^B)_SUenCfkBsj1fd?FfkMXM(XPRZBv2%(oZlbLkFOrxLBebQI>!7h~GzpisSDW zECAhvr3L{l1zfk-{rqOH|BKTw77qV7#m)*N4gD9V*aJ?5KU3v51^9a=|5g8C*Jt>1 zL>rl5?Cs199P9!8Wn*oI1{Cb?BjS%CX9-*l@n;ZKkodqLGT^8H+Ct^GVfc5$9ZXV# z0|O7N0@Hv2LlQO=Z>fk_(z{hdjq|9?B~LBL=I zY6Dz4`a2B(`WspN&8B}R9$*@vWBrLtmH(EUG}R1@H4&Of6+!U%4FR0BgHDo}MJgo+x}5UhfPL7^&Y zV1yb9YOD!C8i4;0%DHa`HcCY?HfQtOP-e84M`JWl_e`du0b7w>p8VOiYXcGf7`#)_q1`!Kf`BI?} z!&HKIy@mg1ApHl6MgxM7(Ie80EKGe2)v%~AO9Ip16>jT|HHA}rpf=%t=uncHqm7LP z!NvgX$^cQ-Nl-kDf-nvZhJ;~AFwIc=AXg1%b$F14CCM_x9%p8OBGDNZW++!HkPjgg z?Sl<4fLn!O!3=c@E;z*AGAzK^9ELGA@*_BfKnR3TvXga~I@&DACYXXWWTHq)jx<9T zbF#Vv*xNi11y=V9z?hqA*keKn7&V*?G2GV|VS|Qw6L1g%A7^tCHQW+!K*fbo;P#yf5eXWOj!YCD2DUOX zV=!=r_V&S!R8k1wDFp>#y#v5x!(d+`)z=T0z`zbj+wcG)#1dx!^HE1(jS-fv0oHaP zO(t2B6ow28qH5qmOb`eMU#c~Z;esHLgRM*nP^(aHh@F!!$;Hgj*wL7#L`P@@p+oHL zTuAoLp{8bm0VenW8znHEW&nbR)9s9@wp1fq3#Cv4iXE1SBsn8&C^TO>*XtxBv{RoEg$J4_C z0QXkqcc))v*A14@bG1eo*oOolTrKVVLdfALb4ECViDQ@;LTSd%Sg5TT-q>gNazVHW z66+X>RMK#wfbGK&P?r!>ut5kU+!%v^8EP0ZU~nHZCkK1DT_~1Hfl$Mp1F?3vfUrOq z1Y!q5fSg=0XpjrUD$p5Z7KmqHLj8RG?Y!0Sem>4bODHAG(2rA_oIygu?2#cDb6ZDqh^v#gpOQ^DL*1W&QzsKOpipmXTbnRLx|uIJz=`B&NwKwe zgt)?ueDRPlw7QEc!I=ax55ub2D>=iQ4b7cE;V=r_hfdRApiG>@Eggd_Z5%^vl`QB6 zXd2!p&;n^3fMK8vDK5@R3jPLFRrm8$TtO zxucUKiUJA`2Ab9tLsfFtpd*xm=}cQQJEb5%ihe{jJFo!`k2cc`M&cM|u1Z$kYF2PO z5n*nEqgwfx8Ckf5AP_!5{#2N+0Yr(8H+6wpS*s%r$r_Fp6k`ViYhQ|ywH*Z)s7VdA zgQ3vs_5?D>lt6SK1_j}fenE(EI2!PELoMu44j?~+AO{Vsv8lS9MX(x@NH>FHP&gX| z4vaDl$AlqFp%5hk*4P>i3l6Y0z>*BfHZUtD$ym)W(1_vS2njMGW55R1L18E-CkEOn z$lBW%Wohl?XNxq#;sX&%FcWHcum#N@=VFUTgkk+Gjm<&;Hx+LKB|EwrJ6QOIS{a!7 zYlNGCC@3?m29;n>1a9053-Nai)G$*IA*d5TrlAISdyFXv44fPwnuaEp z#84Y2Ctn9s8#KewGE6O$LNfCu23gY#1Hv_3LNp!Cf*BMCnzkU{FlMGCf@2`a1_xoFY#ebIZ(Cn9#Vmm8h@*vA2BYyoAxd~A%ok-1va$7ZB)9N3&wwg-zfu_D8 zmLy{Ll|nwo>7 z8c5BRPNO4{K_0Ev zkN2RK6&uISLjTnsv<2Ud30{8sg$wh7Z{zAXj&)dT6f|CdqQ&Td9y;#0K zs@Yhf&E75H^|r-4IU2BfEqH@B2Z>!SFLNk7D({;s^)#emnYP0A7gTlPk@afVOW=tn z7LMJchDKl;HvwUKn@7D3~nls^%gR5fvRDP8<#N9tm>CSFXBN-237$Td z8WQ>d!@~fd(VSyXL^MMG5POVM({GjakV+V%(BezQB6+FOA9UgxnwnZuL0;*lEuVoG zd#w@&Wpi@JitD^Yh!0)iVXhn&nb>zfgWb8}S<-_`I^ubzRhsrls{>{R4>PpDw=AJv zwbq_JS}TU>@+QZfTc%dw)M;_@Sy7H7I&op{$me-VC(LRLeZD_FiEozsWkjUks6$qT zj(@VJ*Yhsd_?FGS-{TtXgJ7a_h_?K-LNz7nXp_k=qpO@`R>!-gs&6hvYT2!){F@k@ z&scBrh^4EO>#}6KXl|NaQoo)w2wYvcJ48b09ppzczdq!QpD$)8L*v7R{~H&RT;`@p z3~z=)B}1h=0bf;z^*^X7UPo7#zm#R5`-b>q$MsKxDut)1f;iQTJAXXiHS$clmKslV zC5m0qx$}x*khB>b?e5n|o}`H^-*@WU=(WKciZ^7PHE}W@VRwW)_ZSZ15}&U^wka6S z5q*`Ld2-V1;M(xTb**+gEKQz5T45g{mDL)IzTFgonNtnv;Mb1}}oI;pkl>#U89T!ht2>SSXZQp~U~Q%;U`8}#{5KGNLq z(`C@G)P2{3(Tnbys399Mm_4HSolSy}jK^2Y1WXi2Y`q!3^iv!h?x~Kv9at}ycdWdX z8_dX=EjA73FE>(G0#BylMLmyW9Q>)@v{DiG8W%&zXY3U#`D{> zj~^+23cO=Rly3Q?F=G6?Mp=fUO|2xJNbD-{(Vt-BCkm6=6;*;V;)=IqeFZ7ICTkY> zDb!Ns-8k0k$1OepQ}3flpDSOGiYllfN;5D<;dftBhhEYXgJ8=!y&6>syY;%RXuQ2| z0)5+OnFDZ-4b|<@pWEZ<-)z`iqau`s=z9{D0LYzk+(YuurCDER2sKT@>G$N#(kQ?W z%?#P1TLLtkz3jzdD?(2n9~Fr!!@I;n+h!*1i#(b{YX`nMAxa^Ga@p@Q#N+lUB`Q8Y zL=)Ql74`4~=Hw~M{+Tb3hPbq zPC7a={Jw17mZf)tdFfZRnoe4i1;%U7Tb6bmz)P5l^DzevR>juW)EaqR2IOd=(4&X0 z3`~yvv9@3=OskKgY{t%a+xSWfyPM7M6>qbY@X02mE9vv@mQ48A{$ZW24lX)2Wi7ME zP8OS>x_WKup}3$hLx5_CM90-^>M4gljQ@P%elSfuYTMlXzP+vx=KAJQ4gaL+hV}+} zuhvL(AxVL$4iYM5GoWW@GO7NUN$186KfP#KNzPArh*ei&yvz_iPT=9{&kPPK0~k3@6L7r~3X$=WG`M4rXyHuN z$9XI8df(Z_zPmgtl-j>BE>FBGn(^z6VXc?XaaF#;J1?=PO=>?8X+eOwCeT$q4p=mw zq(@F5q|y>_)9|4H-w;(l=jY~e2c28-f=}0p4wj_b;cQlZvF7clqTwhi>LRYR(hfG? zbzatr*NvTXvDvz3aRz5|bc8UtOeGn;^CQ@O34wmc@UU(;73IaAqp3kOs3c& z$=(G7oj69_Ki|F}>op|QHJau%Bq)RbxUVz2-weNPN$BYoN-d8%=)g!F^8PI!;j21F zWceCT8uw1ho>x3yVYI<8UlwsY7A>|r@^7KM+9ZA07DtKmeVL3_ zAApjj7T*)^3xjk$u~|o9U&gY^=szfB89LR5%Cg&>Zmxp~b>B})yp9`p zjSuyHUTc2a;3Gync(&D_e{DV)D_T@k&-LK6K+Fqom^{ER;Yk2HC`EuRIzv`-D6elj zCN*M%m*78N=-|Ak!Z1gdvcazNsTIvF*cZeM-*Tsl&6UG~ZwuOc1Ushi`B_K|{O+Pr zFZSNR*o&meB##Nm-dZ`PksFz9QpF!1F>?o2%NznT^m23dPq@(p3Ko_0#Y!okG3j^& zbw&TKvTF2ro~ee{hAeDL6`qcW$)l$Q-G4Z zw$E=IJeZQt4Z_X7(@RYWvvgp5T)D47RW(0HM0a4|tqg?8$&Gw&yz;2|p2K-QxUp;% ziLa?_>F1c3v$7#nW?n+j9oCb+{y4vuf7+i!jbCbxsk8HkZ#fNf?kAfRYnB@}?Dyj7 z;ujseKJz^^F6^V!Qo*?hE$m(>#{Gfj(<|da0XH_WlZe|P^C|AnqhJi|osrin;U$25 z)pT~($6)wa^j5lN9co9Pk9vWV%A9lzj{DFzT67OuicYuwg55}-+f5gA4gaww*8LeEEO`| z9$GK?)srJu)GV~7UU{eA*|H`{`~-gVwX?aaUBglo9%;e+I)^lX){|D3S*APg{@|Rt zOxz&&If{23U>_qNfUXrGl^s~1pH!e;Oiu9;9$_^0^6ipx1^4-l)L9@j4GZ%u7?L+U z=0PH))7ObJy`ow}@>pA1k8V#+<25{s$>I^t0(*Yuo8`XkaSb|q4I}rlXUO?(y#0^g zUt-_~zR<*P&YKWfXI3F7ImEcqdrdKmG#jV%ZomX{w`~4_Is9cFo`yLIrZyj@ zgvFd5N_oFv)De*vUrc*s<5}z10L1J07sG@c4#j(gb7#6_3QNPu&4n;H)?^tLf@L$n zX7f25TY{)B_42=-Z`XT5u{7Sp?~R%3gOZNk%fw=;U2f+>DFU4MR(yT%-|>arPUG-h zzv88MC=x4J&SN_Q47EKOVsmRdU@|P>)P+_v?-LQINC@3T2qTAsV=d8%j(>s`jGt6ZZIAW<@5iG~YhnI6jn(h&r&rMGj&nP|#RpH+J7%dd*u5HgB zLgqIq(nQfeE%@KRA#n@CexfAk2No?D2N$J69ak}f6+|X*ge};Ji{b@eF)CaDxYi+fCK=x?cuFHne@bWYcVa#@_#a0BvV` zW&)sBh1lQ{M-QO$2^EpOStGJbh7o)*M#y3bPqHAts=8m_?6~J8hUYLiVQPP_3-yc; zz8$sD59`B`306`#ax*xA&|=f$N&W?Qi4wE(FCrh0-$gvp3TW!E-KL>1O>?G@)0U9B zCMintrO(xO`Vo}Bbz(TNkq#t*`DuZF1g*k}d zlGxFMS&tI=n+&~c15MY}?Y9&^y^Q$aa0rb>75fh$@-R<~rDqo{`)0Od>Is%)#jcdQ zQ(mfVc2N1%e87H^wB8@GjEBUpXv%*#mujH=%Vof{9rDa z?-!6_B@$#8AHd^)E$%KZV;lkGiFm#<+MYm5w+S2iW`HHZM45V>^uTmfYB>Vn;K5v;9 z&2bZT1vzWtUIZ_Ge5Ga_oG@QdYQ%E0InH|l;EQf#t;NQltL{f4!Ih$`&5`zrWpb7; zxaNp{aAB_Oo%>#InsNg6^%5>!5viG-(1VOF^dss1JiZk?wWT}#fweE+0ZjCYbpdt^ z)%08~)=F=P+B!bBY-c5rH^!dqoF&LR5&VNTMVd{Bw3B2<_6?8+@9{@GdmF#rpO>TY zVf_8ZffYbtdQv@H$q1mrl+XweL@K!Gc15D9NMrd|NySm9j$^PJ5 zmL+Y|3Xe|FX_^QpTi>4yAOv4B-O2(bUFRyJQc)*j&=E|vhxHSEjdCsN{KZW{HcaYs z_a-eGaXyk^9c}@u$fX>~AD2Q$NAtiHO1qPCAPA@L?{}GB@f&HyScM!{9EhY1W8-Cf zYtaq9$K(O&%p_mW+qARxTQb-p7)>6-02KM5m{xejG z@8~Kv=0|BSMz-IaCf85Day*BG3tdLuxk_IHT%M3e1^4Q`Bv$2YiC}`SUvcM--4``N zU%xcB*DvSRD9Jq>@7Xs?r{^47{4e%lUVV$>8+-1pTgA?QwqgMqM#=bESz7<1%!0$v zxWiSt??y|iq zK^M6-p_RJ132{o9I_>xbeQ0mHy z@-W7-{8-WrU^BgB$R3FHUm5>4;6kXgU2-fbprCuqCi&UOLAZ$3u6F2+qhAxe(wmp# zP8jp*x6H#Jab2LTXGy%yZCz78*-otC*gSwL`FqY^6)GjZa4-pGY2*2QidS+uqThTc z3<>L^KqmoKMSG^0*+hNEPYawcnNlO~M zqheV3C%I9yRwRH9^~hgTpmk|8kAU0!b2@*#f1=!am{uU?Yqd&0pX*_Mg5kR{Hn}`; zeHE;U+IfV{RCtr{w!b~ZdY%v0XX}W`ISiUXk`UZ-aw@Y+C#1U^twk+t2iFT~$x|wA zn#Z?M;S7IXcqNQrye;M!mAH#F!GKo}YB@X6{t~!$sf!IC5sH-$IA7pMi+d_nc(811 zC2?lg1NQx7`r*34gXB1Y2ZLs9VTAjW7qUSXs0F`b(ad}`BbOIOKDq2F5|@lN!(9S$ z$FOVCp7j7xRdZUKta>z^{%L-@ZoGaNtiCM%di~C+vJdgV;0_<2tbs~_wgVJ_9~c{H z_Jbhy>w?4?18CNf%R-Uo^b)Fokid)13z1KrPC%A*$}|E%|AK;L?u0F6JFt7aD;xtL zoeChy-Wq2k0-}Sj)Z`t2;3&Nand4{_A!PfmRAme+xB z0EL~80uUDHEr4(5?|FWL@HQd9#_VJpeV?saTP*FK6)>NFI)9sCA{43-5atd`;>t!ChatM)Oa8pz>2 zeCAz=Ynb^tzq(mbw7&u@l)<{qA;Jr@q*%Ib*>e_D(585uHFQ81}hD+=)<(r?EY4mFN27^P?CQ?BE$J zB8Jifo)Bo$eX4+rcVLYI0X{9lNdnk`^Ww)RflKwu%kOg~zlk}uzRPxh+*ULmunrpFFKl7-h7jUJdl{1sE?GlciK!`+vw6c zO6~?;yJg_nQsWGkykRI-sa#6Oc+OPc^7fGS?#GbaAAryYhQ8)z(ExBzl8iRmJonid zm0I4wmNYTzY>MSa$gjNA&>Mhb#;?uvNyFu9);i@fl<@DtKOpHF?Z92MeJS9MqIsRH z?hr**O~9&^u2y4BtHG78Va@({{(lJ!rb)rSwHR8oA77!~lu3yQ&bLJ-nNJqG>M&Tp z-6VcEhreq-lW!Z0BO}sO+@`@Rjk5}@16mO18`H8AlhPT36#&eh&iMtU(M5kpH1f+t zk&6ibxeezXlsEHDO%LzF13^D6%tZsvcfe0js~X#a7{Kk9eZ z(ey{;%cMK%^wY{#8F`Xw?SNT45YjhzX6oI*TNC*TbLr~0WsO*sy({{zZ^)?YAEWAq{+?AFxc@am%T-a-Rv{O9t>>&$JQlXR( zonN9p^uF$={UATvW@*2@Y?9TO;`o$R=J~^_O)Y~7Z8@59=gS+*;RisJ;OD*tfDA@m zB4$#WpxdAD$MKK+O-ZTIp)7CV29)epM8gxpANkjb)rGyYHS?;-O_5GF5O1r@sa>R|NJv_M=+rEuB3`(N=F;jfU}-VGB! zjVa3?<~ZL)u(JhFmOhLBEq*ExCy^AOkM{OdoQ93wwNTExk!G~1+o%P`;g7)P(<=A? zOO<6Qb+3>5zkdH6?=Mhts1mq>gF~dGO4eS0FMoqk@9XG`(u8ayzLE*G(IQlW7n1uO zI@}=b300qW0wnN_7+w<%V9n(6RQfo&g=L*Q$xEm};*2`_LR(jgi&NC0`&+)ZK6Bq6 zX0jQ^JV6cQ{}1z*(LMzKGPB#p$__R(H4w-~=J!kJtZ+uIp4j-;E6ok7=KsXMS-f;< zd|)YsaB|Q2?#pML)stu9;b+Jdan>3GaOWHHKBpxS?{mW*Gx<~rhPa5oF&{l%AWDsaX1B8Y2~GRgQFFl(Br9IBZ$V$OOzAO}= zRV2ng@<0DC{0qwacQOsj0(wCeO@D|8xrD)FA-hutV?TfdaKYn+5WFvAcI_k}^yzor z1Mpz_GDV<}OQ2l5wbi%OKVp{#Zt-2i8@`seVjUp^7NfrfzbEB-F{J74 z9A=xA-XfLdIg`KV{u!V)U7*+6)U~$*+f%Yq9v`zegBivKRyY6sKr^Rj+|d0!^V^^K zb5G~7y6`Ohbpr*Zb8JW+<$uxs_(%Tyw<69nIoIz8yAe#in*AO7=ikGhOT{Hv`O$Jy z|9N$)TqV-yz?13~B6aX{Y%;g$aqGHV@7_)WF=2#Fy(8u{(ZODr);)kcPZT_y5!Qsh7(s;sQjQgpQpD88(~lBj;-b#(20NJ{p*G zjojf8!vb!xzed0G$Njmyq@$^#>xv^b4+*uJgGCrI%HLZNHJ3wk{j{zCMk+*q5r6yx z*!t~RG97TmG!HGM0}2S(vfE2){H`9Ah;jD}*%Og#IT$qkIWs z&{yWg?Pw=d=*$FByR zd=N(WuebOza?Os}T~fuM+H~Mm{d{z&j1h=eWo+w5k;C3>{>l@3Rg|U##xwNi)yU%G z=oJcE4|&7!d?p||)Bm6QU*$jYSHQna!_`Cw?*29n0uQ#<_@1vXVPdH>v9K^Ol|#eK zakEFi)vQHH= zLT1O~ar$H{4>Ik*Qjp330B;#>1D+6rMqfrjRcr_ne7OT}#}+#=p*(wN8`*N#Ppvu^ ziHo{7zn~dw`*EvEu9CgDtp!BQT)w#n1`b_Lg8h$CC*ZL=976UfJ1&zF*bM)-`FCL3 zDTYYou@8mrS3fgLg*IEbFCYaXkOhoTkenBn1?X>nq&p2i^TdUu5p>&Yhu7$=oK$R` zaZ;@-0Rvs;{(iKV6%+6S1x$wK#!Ut}tKTg8Q-o5f2u2$U^##_E!sifqpP=n2*_vLQ zT3|p_-K~1n4+4}iZ+PM?frPsW$lSH$3An*^rQ~Bc>dp|>V_;}=DR0fEf;d)aDGq8> z;^~jx0#FP|q=lob>0Q`gQT^)1WtjBEoHbIQVd|l#i?WI!P@;IO>7@g)652f>WWQ%X zQm^pb((nV^4OUAX9|HQFZ}IUFC0)+gKF04;$Ww`lMHDdVu!6rp$XD99frP=e+LLYN z8c|K#*o^{|+CsoS!%1Df#!0AnMM_KW>ln)kRUP6nU5hGs5anVGNn)#&mb-wcWmurW ztnQOn#Ue$wt2qmE^OLWHDqk1p$0IbZTpdN~HjquGTW#x#SDv%PD;sgM@?qr$9s{aE478r~!;`8(2Po z`G;PIH|K&FmG)hSR5-Z>W$pW$IJXe!WFb>-@=t^f74!NjjU$7E#NOSd;rw+Gvm!5O5UQ2lly9ll}l}9R-JIe(=>e9-l98Q5HD}nt7zt&KJ5Lyx*#V z+>@E3*MB1Zy7h-Dtr3trYb0m9sqY37fPmQ6_yiCU7iiQ?^p_;Nn|NjFZi?A3TEKB1 z6!f{IARFhOdH(!aKb@S11TSNYOUVP$4P2HuN8xi4um@u9Tzie>61;iZRGLYfVA25sc8FosZ z;L=L|M#!|?_dAl|AzniU;p2~{=a|yfBzr*NB{Nz{pcX z0Z`IZ-N>XQnd8=;S5NAFBM0dcKxk4+IB)!MxQ=gs&qh37IPIyaHM<3)gD#~h+gwl} zgBKuFNd@Y|sfkptH`I2c$?l(+9|q!DKZ5MCC328L!iwoj^ED7y=ZOe0jH}!ax>i~q zaIv@5-;@7Dy*ABdCHxBDlk9|g~=8$UorLJlw3N1i}w|ML8_8m(j_ zG!l7V-|G#c$wB7~fl<;%VGiMJRL@1}@JgNSgZ)MRBUbl2Q$!0;27+dlTQm@sizm|r^kTd&x@(0r(%k#jUp0Of81a1s0`%mrtJ^IC9a%vZ8rLR4bkpYd+ z=7YF;5SX0zT|crC*Pft#N%wr}+Jhs=d z{Ys;ySWrebt;_hw{0a?`p{FpEqaac3cb^~wSYD!PQR=tJfneMYSdkHf%M4m=Uw%dv zj(z~2qGs4%pdk(DKj#0ip9-x`3si4Spfvz7LDvq2F^=Q>NS(c}Ff$+H_}cvoM;`)` zlDm>WU7m)uTKs&stFY)8=^yx!ofedaEz-PbG&Q;tH-bI_Q+%wiwJjlt^}$RRR>Uaq zfoPiHPP6SSGW&K~F6k8VW7AGsM%;vIiD-b_ZqYy>xUS)t7=RYA@+yi+?gTB&UCvhMehk9Q*#Pm==La$#GYql(qM78`q=Wl$W zgd%~YdFdy8k_px=9i@cf>51l>=KM}G4C**4c7t|WkX*koL9AEKJsHeWPPqZE28RES=jY5U>CZ0}w480+KD2TR zifB66anM&J8nYGex@(yUEtL_%opPYr`v z&4@cOpRsqo?`>&bl`@zyF}C?B!XPvqJwGJKK%^BF{&r|^aiRP+T6wF{Mg%f@_aD4uZ^0>L1Hjv&4{DSvILkB{`_9uUBh6$6_)2B$ z_wT`9-&=(28bcms%3qf!A4Q-4%D!s>hCtaaF|+8)G_0TW3;y=}nynID$O}b2rU+2V z!V^XTx&;4g`IG<0^J^|_;eLj)&Fj_=Xt)))!gP|YaRQF&()xVXjhana4I2cw((hZV zQ-~mHp>7}2Za^QK*^TI%2PHD!Nd)EBl@QX-f6qS)nrHI!*`@h)_^~8|X3xgrKs7BQ zcNsz5?|v$E>j7Pp{@DD9f6Hohe6fw(bI*lF48HB0KbzR_MK2Y!dB+T22Aud-%~!6% znsTo)hu2VmpUs(q#%$_air2Ippt`ZOWmm_M(2T=7)G?kFWexvTQqzN%t-(N>1d90Y z2?Y0_s7KM?A1v~qEBL>cUp^eZ)5^>-{i4wTndWL=TV0P9f}PD{&1frs4DpZucKFzM zG5P8!=oNDA0mq?fI$|6XSbQOHT*V!I3|Pa1kGkakk@?xwSeqIpMiet|CklmhLtcvY zm-qja6`9^I&N%Yaxo0kd2t?$kU7et1qEs4~?0+Qxu&`rjW-Ryk)NGZk*Eb(^038~eXiFSSOyuObGQZ~I%>6z1u>(@1 zdU`qzjd&t19BTkr!C_42+Oez)>V0Sj!m>lWS79$97IaO}*4_QS?ezPBqADEJnfwFy z-}w`je1`wD^*f$>r5YgKd}-WDjwJznJ^&P~LA8c=TRP0nTd>xjcaUyz12gbPvI_nm znZIGT7rUtlR%GbZx9<(hKmnq-_jSq+-_LHf`EEO`L#Bmj={xjc#(`gV=rtvj5Yt`nrC`0Fy3E@VAr6)UTGh!6FJpT1f@zr(r&ZL%isrdDe#6Mt+r@{&Z3zO=`7-xGu;&U1DI)K3CE@zua z=`sen`tq1??aDX z9CfSMPZ4mo9jTk-%}bEYk!OacHp+~c|1|C&dH(zne+{pc95lVpr-d8PlZJnAaPNOl zf7YM%KR`Y}CsP@&SWBH2{PI&)pg7c+X)`(75A#70shxzc0EFYO(avgpU#aB&fU2+)K7Vt<7O)q$O&tonw~thk$T1^S$q56%TN3qeM?#e$=B zetpwe`!oNx=w0@ztLBwxK)XG(>yND{_q$M$$$uG_%F2t5_enAP7E02LvlTm z^IngB!kHOaEISCgZ3plJk|2FR(ek%xIIgQ`#Y@$n^;WHEsH{qy7Nq@cO!$V4(z#gB zK>y$CPtS~@e|cxI&{Ec+DAcL=lg>>1d&XRwU_WoRMcUad{Svbn9fx|rhs1`)iGBn@ zxIhC?_1P;2ouc_P7Hb9w6R@$B)~~`RWc2pFU83cyDsFz)B#_Q?@C}NL9n6XqwFOlH z1gNZ5f$E|ON9eBQ7yYR%Q}#H}i5c#6_uvC=o<%H()$8-$^DmN$$nSlSeW&EYj(^oDbtV(O2h+PnAs<@{Bmyhrn3eQF#9dS#>fs5o{(NRU0+ifU$&%`-T+ zvH)&e{CoPJ-q7Ug9RH#NmG=5&j~PUN{TI)_XvQ5(bk<&;(Mwxf$nP!N$4-P=Co6-& z3;lo^@g(oRfw4iyMJO)r*r9BVYmsIfe35A7N$ij{DL<+f*aPF`OWVgwHqG{D?EUX| zlNj4GQ|c)u-hQ+;_}??X|JnZ>>f|M~ZVhG7 zf`nYNjteL;|9kXz=p?;hjoP|*iGq;)i3uJUl}@=U)_ko(pm>G9W6_#xL;XGdovRil z>{!u!zGWMw_&n9DYbq+vr>3XN)(GI{0%<;j~M*BFOHmXsa@t6C@!#&U(uF3=(`cLJA zo&uoK-el%I2y`MP+^sPspGpyU>wj;1}xztf^(M^&03i744|uEQmLkb3z@Ibo3r@ z)lRj`?OZ5D`Sjk3um1^GU)b$54&U`FUW$hz0a4F+YzH{$xhF$xZf)li_a&UV&}!y= zB95y<=q5rKIUF48eC#Kv1>dP6i1pwCWifs_ML z>7xFzLVXqlXNALykN+P2)8%qmBbLomsK>)yx+(W3Gr~5}hYvG*7pVr8{G@!c%qF7Z zy!=dc09DR6&(j=fa&OW;qaGnAQb&01ln^K-D^dt}3VkXwbfbIa~IJWK<( z_wFPOFmBAS{Y`S_>)sqHFS_J4jb7>wHhTOc@ehY-4pi|s#_n|3r(R`wnY4eci>c}A z*nld`7d=07>Pei9gCsHuiVe`h_!+w?2Mx9JT+H<&7V&h7!3Xx$E?l5f`+NK|fwT8Z zk!MJPFr)p{Gw27D-g1bGN2UUr*sSvL2JM%fJr{qE{uyzh$EAii&OC3}#}o}raQorn z1Im0U+)|r}r!BS4Fj)M1{ENR`uDNtdajk&w`_Aqstoic}(d<>U&#mLG79+o)|M>b9 z=6Zth6Q@D+~`0+Wgio{>?6;o?~fW0!@4m0c=vv)?b>( z;8op`b5?#fyzDu<{~lXLcz|31I{*I1bPD4s^NvAF!_vwQmeVrh^ssL8z25x#uqYJK}7{V-@Dio>w?j4oD} z*s0765;gvT{ojA|&j+u!ydxs@(g$`nFptet?au!m{r4aF3$X=N>&;L+rNY17KlvdR za?So8{Or&C#oz9q*jo2q@n$BL01B)(k4;<6`^51EVuuY`E#CPgf=PX=h>I>IW>@bx zvp5=fO%qB(u>X&+43+ub_~EZJ`Fr%Q(wz04(5(1Il0U6U)_pO2D2-lA-_*13+P#mCo#pRs z@ZalyzHYpJ7_7c5{(Ak+sj?68!2Wyst-On(pElL(xMDeUA+56*l;Z3g>2a{cn^xzKkkPNj@zcA z!2Lb@@6Y*9*m!{>&*RG3CsgWC)OP9D`bGJBt^2;_{mJo-*7sR{lT6g}_934<$T_0g z9)=ck*53|({u`cugVeg&TBN?`WaWPPt=jMXY*$cvrN%p~&WtlrI}I=p>%J&>UH~!1 zy6=wvBvX$mDa@2>^zz(?5-Z%;i1DHXQ&x%d#(zXD7if(!&24>aaDv~&zt%q}+fx1t z?&-_Y<#O1#!@GUi`QE?~)Z2X*vR=$x&r3n=f?9rUn0%3^O8|G(UhkiOd8Mup6l=pduW)Ag;=R3Xa ziHv-iu%XY!y|AYEc{`wi@!Z}*71Cp*hOgI;^UHPobr@!OpEB3Q-Klt?%%;%VAKc5; zw$zz4sIw;R`5~Jn58pTaC5!U*uZ;+JDia6$lCzqM-c0-OQVdN@FtX+?TCL{A(5L+? z3cv)R_P2C&ia*u(6NCBQ;MzX_Z~p0@`K5g$af>})%TXkF-hT_?da<*w&JXIw~I`8F-2`6w<+*oH?)R1O&gae?kklNeg1@*^wzVvNJy~qX5oXGW%_V2k_Kc+ ziv3di&M({VsedG&ucAv!J1^Wyn>-eQV2GE&zBrmEvDp)Nnb zNj+pqA*45e4*J?s{i699fr*EFac5F1;8TJ7x{*W-n+l!7JEk4MzNX7plf4z>A98+? z$Mg*S%z)2|og;453i<+t=g{q>)Q>JK2VB9IGU08VV5jKgQ`VTQi18cOZ4~5ekpFw? zuU~?N*z~n`PodKNk-muc>k>8gqZ)_;gFK3vL~u?vdQJVF{pbH-esM&e&D)R)D$B}t zGR$kSK)1B1>m8&+(~FWAGW*kdMch?SvRy>m4CX?RtuR)9~Gj_7J^1hnix1m8V(S&gRMr}K7(Ln-D z)K=Fp7W&>grw^)LwZ|<1JS^rKf0y}TQ%Sx%X;!El-cKc4Q)13&Kri~2{j)(Z1*gA^ zXXzmK!MtQn%$?)M{@bg}gJ5Ew%jgU~jZ9r2HU7pJX!_)q1n*g9z^UwMkV@z%=Xu@8 zlT4nafSewmofyNlRnji$u(t6XbOgTYUa}9?nMWcLcm&{yoCri*ew4na$x`x&2l23l~<{etZB%cH{te`^N5$&Xgg?q%3%{T1O;gNd>xo%yTOupE(1BM#zFCUlCJDf&D#PM=V7g*m@pWk_Y<~wgqpf8r$$fqeq7IqLJM99jEm-1E-;QEXAl|ab?S3?B{TNm- z-$G2a)-DuAg{X7>c>O|v2N;79KDd*1MfW~>de0DsfyM7vG-R*6&5c4R=Tk+_Km^qT zNyEojdrZCa58;6)A(h?Yo?-W4mK?v<vd?j8=M1Pm{{~z-F0YUop za!Qc2c;5Et8g0L!G&cp4%6sxElE<5EiFtU7-tc?)dp2Xkl-|kizuV=;r;O+#fOa$+=L_$T4O588T;B+3cyZ zk=LLCdd~U)s`O=5*os|jzubT13hnp9TOkQ)Rv5h$!dd&;J`XmBXTawNVlCR%W5-$z z?Dy~&LJ14|7I`q_P^PkkE}wP5k>Ae0HK) z=STm#FOQqj7J!s1OuH1^S9^fcttQsjc6Wzy6v3Iw8(IIOlP|ZUEE7dnrSI|K;-+Y}Su` zROs{Ou%7Ii!VIT-Uo-4RZtz?yt>|!>o)G==o7NBB5meIY%<~Nk;t2tfVFns8^WaBH z3r+4m5(Aj-JsNHz+wAR%##rH9&^@kDuJ|*`%K^3G5rOr5$GWTd?$CG7DqcYQaqCo6 zM-KiR_|6+&Hk*8|S9MU1^f>9&;`~`124x9`RrBP=`5^qm4v0W*Ac2lfNusk6sS5XT zE;;@Y`#6i7VsbA+>2}DPXG315_)@mdb+}1ot|76o5ib(cB4PEVe7|55&<|Jl>WaDp z1UU*?fH95}`kZ;Yr|v~{#-XhORdV#(v(P=GYeH@+E*b6()m; zI)v5zvy=!Ueb^zl$Qtm-PQ~XLzmNk~55h6X;!{ma22@Rho1UmsJnzd|=FS0~yWf*v zyKlCXJ%JK~S!l(l4D*r6UbY@`#~?Fj3wyOc2)1rT%`B==kb${$`}xqGh8%i1V;cA8 zKEl?+s`!-OqC3-7nMMDQ{!l0=R(*WaeJOKyI^xo4*oAL^NP~~*5ZewymAF2lXB)FV zx1d*2TlGj$-jZ)PpAH6ntc)y|Jf_$;!R9t-}-)vBt2f!g~O+3eZE@&WvAa_gse55*rPf=QbRQg*ufK6LJajI zKm|e(KArs=`jbEaXM~99T~*$a&sQ|jp73%I7_2`FC?12Myk;N|1ETKVGynQC|JpK4 zhYF+jjq?pCZ+p@{0(bhqUH{=fWc?MN(83U^avl_J$zR^w6WF2x3+s!#p}VzH&MHOW z&iVWo`H#&X{nj^1Hm=LE zKJa{yDL`3^d?RZhsj{0(6JgF)Mqr>khdy5$5}(?*MsG00M<=H?;Hrx{B0@7~cK1`I zky5TV&WWd7rT<;>lZV>R49$ZuU)$AQ-_x4tmNCon-@YuF;Y;GeCudb#8oxjG-%8z2 z&s;BhPi-vc3UwU1QSFK?txI-EUFwsRt6Q}J-sepQG?Mq-4F_y#+I**De&`?mKAclw{fQrNjK+B*MY+>IC$~X7 zo%b{P!DOE+V2~q`B;X|S@5^w?XMVA3YVID|&SkK)b{x>poOFmP#yg{e- zo;|_v8^*sKv&wK*VFN`fo|5JA#{r$ZG4-b3BP$y(sPh>BIxeAhN~;_O%AR6e(x4@= z7&w67ui`g%^~1B_d|a44}agk+Mn=;zYmO>zd3Fk98EI)HXxN8 z-la`(hX%$D9LxCU?Y$s)`PpCehrjO_s=WHPh8Qa%7w8CUmN0qTUk!qyO$rPB^H5FT z`sj!FdKVsd)f~_c@i<{_-UD+YOnY}=Fckzy+FV&q7>~y{yG4N{qE2Q9@Y6YB*>CMyZ48`&pA)|g|xx7yV8-*q=J|)e?}z>84}agk`t$F%`}@jf`XGj~KFyjQvf(^4o5wou zTI;`&ap~>OVyFFHs?VtV6hWry6WMENJ{SWWm@zFZar#rYwD?R|qgq%oM-L>+yb^iQci5HeGO6+Su>m zzbE|<=U0qIpdkSrUNWUt*6BNXl0iWuEzxXrk~uV z4|DhYJ@fBB-oKORvVA`gveD#sS<6O0znUTbc7MTVRajtmu>7xDD_Gp)(dl9G@A0qk zS)@eigZ2Cn8Mn6o2mARmz9B0X` zI<|JIEBfvEPY(uq#}SZB*7s+Dki%$3ihTW^{ntyC7;y=GrJ!vQVwswCAK67h*MgP3 z#5bRH@t0eNLd^3V;=rsOV_nU$*28fCP$YgA+sovq@@whUH;&_=7L`|{lv8PK3s$%M z=?3IycXO9DyB!f7>6x>DD{I{8*+`Y6{OB*pSDb;1fqA-1X&NN{oNHsxFfx0sE-eq&hwH&~+N?iNP0fXos04-B++&)w2wJ%r%%ox!(9 z(d#lUL&5&5=Lc4j%KUTmmL8B86Iyqkt@U~T9)HY_^^>SqUGYQBpW!M%3#V2f~s~x+Zb?J?n>m_ci;Ni3>9rlrS>7;9h8i15aJePBl5BT4X|8R|M16 z7La?RdCZ={)L3eoqfX zf$q#uw~Rs`$F+a>uQCP&$bgh4Pl#s&j((dp}VK%K~DowZssxOTk3Ocz$A;OTpj&honn+>KPS4pr}#=QU{J ztwiiTeFHNS$co&~H-nmC913uLK0nu(i?DJKGU zy4D*i%2TCO4Vs6ejFXm{45QJuDdhRi&-)vZ5*U60mGTp#m#|mvEgo^dikCwtux(ERf0AZYm5pfk zX`x_GsBstT9h>}Xej+Cg52F44&}2z?2{M^&ey(+&79X8QB~h37DPq*>eUZ`-vp?0p zt=wJTbKQIn;>hg+b0*?$16t=H5%7g46!JB$53Z!+kD1Y|_NAeYTYu?0m#7xE8q9|< zy7T6(7cNHhn!TEjv#0Fa( zYgROmVdBRF@*sjkStBm6=j2g-*!+9*g7PrQ~MKJV+(Wb`CG3*n<$}`&e);;(t3o zLRb+`BxToqWc!d(htaN^{fqn^s+zVcx0G8mZT2(p*FCQ=?M1YbQ@udE+773EdmnM_ z15>?m8(PPH=%HgQNOiuH2tLm)Jqw5jY};7wKiCtoo(F5gX2_7!aM_+Jd_PQqz18dQ z!Vflia)Rw6{50ev29@&d;t;iWDmQ%$!~US`;`ri@ch`E#m)AaJ!YBSJ@p;rR=H9 z^d%zU=E`%=BKUsQj74q&fFYI(JLDlniPntj1AMONe634eaSyJwpLlVzGj{~mt!&;G*S?*I9b z{~HL~;u0?v?yISgxmX{k^sW9L{x6%HrE`@OF$~sA_7gCu2D4svetW8)hCqo9m`-}) zsekR?Gd~amfuV}@`^Sj)J9@ACitx{%D(kycu}SUsSDoL45TM~ic{nLL!TG`Ea59oF zMuQkN#K{2ta#Tzswh2hNNx1ss#DezlZ-CRVrrWJnwwa>@%-pzCN8tcuggY zTH*X2l5rlh=#Vhbi}|3_)c-_W%E5(z`ndc^0+bj=DctZ()r@SS8QPiuMUMa+4(B_G>C$*PYr zhtVqfh+`jW-BC=pg66QxQX-8ySjEDyD^$pwsHZBkOLl>>Ww_a>>gy@XWd$BeOBLXq zFxE=nT^A62r_%du2z_2*({e~yJhys$OR~3{?6=jhyqq2$QUD4X3o`ukC#Bdi0Cn~K zmA}@KHI@gCtJFg{F#df2A%OE#cPKY0!Pz^ymIwcPdAEWo$%vU>_2rshR~f6LfJCG9 zix2v*&QBGy4tkAU2RbMr0tuRe&7a<3QO8TykJ}}O^Cq^Al{EwXJ^n9h&6Qvz8}swP z8r(TKXZ*aL&`F=liRMHxj##}L9p1y)-(~&(d-EgvWKiyzYQ>YB>|%jR99_+7>dZ>g zuoc!q$Mhey#c$a7+vi8V3DhoOrE4&@mA#37%fV)UQUCu52H*#}nRu2}&TD^wqP+Bo zMFWqQZTBehK|9o28e0X_$cS&8F_zsgTkY5(!4MHR{_VAn${Gos)gs=d!%{m|L-Z0- zmfU(Q0s>3fj?^YLDq>!!gbp1&ORp#Crg6Sf2hV8pJ^iz(7*Tm)sN?2 zOZplK3+w~)=OKj)MD5-fjKAmp3$XKk;MWMV3xoazQ0%aL1n-S-l8Mg#9)4V~OC~x| zl2Y1HqrDR}4@0^r@H&_P37mNX$#fmxxOPg)Gz~IklC|koh`oFs5ysr%FXE>^W?4P5 zZAa5?NVXD@4Z{BFU0NS6FLR+>a}rx82@XlvU(QeG*JQiCV+em=&mE?h`#e$4_&xLE zKlp&0Rg4wzVVH#KW+NE} zW+_qqT}oL3>PeNX#=qTPZ+f$m8l(4~^9DI8lD*v)&nMjGwmt=J!|!$V>H(H|;~Liyw_IJZj9{E~|DY+~pu1>2@=@FsIFV@Eqt z6i>O^if$1>QM+~U@YS-6gIS@f)xa-L zwp6RFlK+17&{Zh}(^ZzKcsv)BuZ3xDyv5Bu3q`3QI%Bnx7|?tZ3x2(f>ov*O_3`qs zRTr!v7YYG*+V3tc_pM?q?CpV9tFKdhak=R%u1}30X500Wxt`yjSs-66#w zX*HI@$22B|TgdC@2xx^$uIlxM2lM3cb)&OyI`tSgYyY1Y}t#?Ym^rB*Ty19z(5K~&{Wq<9O1pW5@?-Aqxa=UzF3-7k) zCs46MBYu%T89&(u=iDd3h6F{Yk@32(V3~%UeFhWpz|s`BYZ=~JzsbMd{Mo;E{dKd6 zcR0S*IxjTs#Z?yq1X=L?ruucnT8N#ovtQ0CxZQl^f&(a`Ik6B@wg{t%im+( zXsZ!du_pTnZ?lBsUI~%Zx!t__y|IRr38X+sju*+KrboEUO-9d0_l3MIGY9hvnQT!A zYsT&WcKim>Rxo_6V80o)qL0{V(UEncgh(TwZZ@H3&gdJ7B~3T~*njKvvj`Cx9RXlK z0z-R35nbEI;Uw5s$|%z145*sg3dn(1Uu8#IsAMotP~*(RL%?VKGFg2rO$rBLh=Xn5 zQGbp-(?os{A8(jbLI&458|V>cx}Y&S*;k5PJ}f*6SXiUJh6(G7pHQCE9?Q3BK&W&T z+jkGG{lUFlZA+a=gF0){o*%MV@(@HNzGP9}{UW%cKi8`dW zXtkObL!b7qD0nHB)-Y}+fv5A;2+!G>tNO31Hpi%QW+d? zV7tuif@~4GH-+fLDYmip7vI(SE=63PM~}qgl8<9Z9TcpXhPEksUthZ^8!Y@!F(nqW? z6nI8FOh35Db`v6n(MgZ(bH1T76^`mQNR;XqkB1^mV&O*-{2u zNj`;Yd<)U3TAd6O<;N1tt9&KJ_77SA`LTY8d_5C+DywYM)u`!=PSFmdW3<225^9dy z!}a5;IKOW(_&xIjg}}r^zPK}~74WIRecebR_Iu_Rf6o7DZX;sM2YF`?CQi<$`#Nze zuAFSkc8c&u?mm>OFCVk^{6+JxWC`G5{w@*D@n*o*j`su`PbAWvqcgPzRX!MzF}1h8 zB?=*N7ZtH&kob95M@Jsu>c~Dry>PC`+GwfkGCY=+lDb)G5C;0X8d(J-rxNTP2j#&qt(mQ?vbephPTx6ws;^VmN8^p;$ zBl$z`?Juf7*-o{Gok6~i;cBOn*)-XFUru~pAUThd&szy#+>VRS15t-JoBARUA3C?%rACaO`MHt(C`Mn zUiU!{12H;JjsTbgU6trBH=r98oMi46MhAmWhNUs zH|xGj0$5zkgVX8*2FY{y@d&Le*A5NJwACWurz4*3i?a?`EKtap>h22n;xZ))>?G^Z zdE^-Y->rSMP+{!;6ukV0jy@c5l%z3Ri9WraPv7rWb?=EG9p(Y*=dpJ<^-&bDAC3g8r@_y$k*T8ljyZ>=yqjR6g>{W3S7li?-1n?~dCEHv z<^0K?|aSo{K8Z zr1_0(s$_>{HFPIRw;t~Iz%)5(n&zihZ=^5K$%7Ft(7|zzSR(e@Jxnn{*IA*(dSuW} z>MRzH$E&njw!4+G2u&Ji10NB{_Bj-sXyJ^^%Q8KmgN0iLEP(RPs~&vkTsq>&d}z<( z?DM{R8V#Hwcwy;dDUAn)gv$TN>*rpn*&^&W8gcSai3Mf+ZwVS1&%&Nsf9_IN>XrCq zWG_-ak)Hh&e50QHfI~2;=`MmU22tV}uL{%gJtK1@<7*_wH<92jB7%>Bh1`gj;VD@S zjf!e=n1F+}LqU-9V-qWyH0EvTHPTqN=@C5=;7X*P-<`880v>A9qn1ekMKB1v$(IywTP+V$Xj+~BWJRc{Z?v`HfoB) zZg{DU9Po4ZBBJ3tp*gR}I91C2Gw3}IRA?`UW2jvOn1v1i(6wrjUd`}CUbU74)PCUl zyg%@cwws6dsoI*u>Mt_ZHvbt~j5enAV^o!vM3v_y3}{NdK}QLbg`xz7ykwY5lU7ij z23h8jCy%Q?K2doFL*+k~EC!QPpO_jx%_p;P$TZf;6wI6cT zysZS8dx$=lb6+if1F z&|ABiR3`7WjWhf(CKJNJjfu=_9t+m~h#r?TO;i++ALydX2GRJT?u?1;;{zq=o0{3tmC1u(_^;`@1V$=i35+4 zxU=DvH*b?u{6q+nf8AU6uJ!1VrDQ;GV8DV)ij)BKU=Sk2Jk^NC^%!x^W8scq!jx)& zPCR_^as`QuaC>H#H5f^czVMn+Vm)5Q%CiRtS#gj_4HWB0>A-qEmZ4gzR|x)c>>Fu{ zhePk~3_~0L%tF;f*xKAoh+umAYjH;a%~}*Z-vq2FX)+O%)Ec~DAS`Vn-J554sHGws?i!)_T}^{meHwY(d0tZ7GlT8B*bvQyPXQl%8KyQwPD0H>$Ku7%C@0uFgJbJaAMErxK)h@OXYd;5DRjx{5C; zP^-9wqB(OTxDPPHFiM-U+7XNoPY#>1tLHkY0^26uu z(~8-1=(Y5fpgF=(S4kc@&;G_5tPDqC4Jy*-5?=wxqY?Cjze|zyFBI*&so}p<7yCgn zP@Xl$F)?A~LinFxMkg^``U^2Y|NRmGy926duq}GgV|t; zlb7J_eX%+8)^~B!x0CS(moX0$3pWozipR6~`1Lg%O!`s6KA}&f{T9zwa@`_UO2?p= zgezQTe@~=guteMvN}~d1myN!qH&R+*+M>A8&X*B<05_n4oUSRP+Cg!_vpbT{YgL8) zVLFOa9oi0jh;wx!=Z893gm-fvsO_fn5@HuZ>o7X1p`{b={^|izi2%{KZ{GaTOScm7 zI0GpjP8Gmz?fdX|c1@7sqPEOk4n^Sy$JZz}0(0bS&VwuV1($ol9;UyCzT9<~#i9pPJ&I z|HL_t??{3sYr>!Yy3SkqKlN;y_+K9dl=UU!Pk&{rtSE7o`ZJnh#Gi?rX8+8TMfGPE zBZ(BLa1tTO|Eb4+mf+rV)r$Yyv(O+J#V}d*XNIcKKNDyQo&Zg=KVt=*7YK?L8A1Q} zEHH12@n8SqYl_7^U!L^WzlfS5`A_g_8VcA^o~rp*U*#~80zdxWzVjD{^Egv_O5XqR z66fEd8Q$wPmVf%|Ujoi}N&i$1|Mi>y{HJA|!9V`#ugf-9X7S0&?P~>Q}Uq@8`UcpdQR~NqDQslC0Sv? z-_}s95?Chx=X3natMYvApBzxh{$J5Q2k3wK+k5bTUEcqA1^>gZfe_4}{@V0gT3j5? zj{|$RJIUV57lvL0Drz8S;XvgW3}TGJx+(UFA{lmvoHvQ+hlCoWhtwnIw{LZ{ct>Sk z!kBz%A+WqLU4u!=Dm2UG;KJNQ0@bKzv;hbhWPP9zqt(E>Ha@@xD!ILkD#B;6=DcRP zqAdp8Mr`=!{lKDneSlL%7zN^2OLSESI+!g+njZ~`2L{eG~L7!Z1IB$K&WG&r9e|NP&* WXaC)zgrnIs{`$ZC@%49q1^zdgWgM0O diff --git a/benchmarks/course_lab_bench/data/benchmark/lab_exercise_data_old.xlsx b/benchmarks/course_lab_bench/data/benchmark/lab_exercise_data_old.xlsx deleted file mode 100644 index a3681d9af35e3127e3131ae24e982b1f219c1573..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 227840 zcmeF%V~}LQ+bH<9d)l^b+qT_3ZQHhuY1^E(ZQI7QZDZ^I-hCr(Y{cHZ`)xmLM*QL^ z^E@Z>$;yhVICHZ|dj<8_`#}Gf!Vd%r^!)=31o1ze2LtQ_|MxK>5YXfQdHntJ^Yh=Q zodG*<0Pg^R)Bk_@|M&=e0iN#}kSkDt|Kt}O;6FKs1b_m727m#81%Ly92lxem005W* zKu7?{04M;c0B8W{02lz609XLn05|}+0C)iS00aPp07L-903-mU0Av8<02Ba}08{|f z05kx!0CWKK01N<(089YP04xBk0Biv402~0E09*jv06YM^0DJ)a00IDl073x503raQ z0Ac{*01^O_08#+b05Slw0CE8G015z#07?ML04e~g0BQj002%mr#SHcLz_Nw zd_4KxP2eX$y#SE^QTabnjsGjE10cF7UGuOVOz{xcf?>p~0!0{ov_{r5Bc zuWSF0tN*DFwf8@s76OoULqKFhKn5NDE3zrjf7Y=7-GT{N5&t_j*#AwVzGj^N>i2*5 z|9`Hg4(Ov3AfhE83;)UM|J#BH=>LDm|IhY+xBY+S@_&lr-2WJdV(*3=0bw3D_xKe0 zNL~Go^K31HdEYg`)983Xn}?x~N>iekxz5f}EwwOWq9IMg(rg!$zGZY@#(A63gsN)M z;qIPm?bTHJ<+DmIdFYVy79la+8WhQ($rnzu5`r03`B;oLpSHjU?-t5OHvQHxc^1o& zpOZXUQX@}El46SP9M=H#z8twOl7n$6bW438o~=oNJM`GpC!~A7V!u-QR+9xYboHA# zbUWRq53A1VdhVwwDq`YJx7Ym%lG^xclcZ0qFZ=HY(@;Vbio=O1muQ|lww!Oor{DDm zw4rGPJ|+Bbb8fMPo;J0ugR|UCQR{Dup*lrehF=q40n0K9p9Do7sVA!M zW|P$94|iIQKJ>7+^k`?eI7KZgQH|f1!}wE^wer2<%TpXyOBP>DfqB4#9Q9Yy80y9BBw-EsFG~W^W1fKO8eZV&>`tLA-2a+>}t&y zCUQ#GfM0RR{uZnYDMTJ`515 z+1Q?eb)1)x(PXFKVnBfvXppo7FSMY5>cLtg-8;s`S^VS(rX;s$zv2#?#kD9=0v}ma z%4KRv(25dRm3DPqCbv?FX9TLfL?h%MK{2|9;` ztVzJ@czhr=wpCjGY27};P_e6;epp4Yv;G(3O7cUZF_g;=*(UEjmAb!~KVSC3!n#!7 z8+hCV-j{u;B*2443vYO-08eExeKXrjp=n;|_u#H)x5R%KwoV5g@hvov`D^98oPH^1 z-+p=?G*g5|zXOSzBJQHf1%+s2xQ8by-YY!3u}Mst!sjPmvcRw$Ag z@@9J{1UNZvy79P23ovG!lpihv&di%c-lh?%KV^nhzfHa0Zt}c;+;^6+@-QW>D~=iY zKw&y~giR+ySPPUA3}}0`UpAy zcEt?)xD==-&}xvwqF{~viz?THKf}zh@1+9NbzxA38GL;|>0)qxajo*7lzDN$_OFXK zO8rW8d4+YVT6hXxxy=O*S+E!l6?({v1vRXceFwj~-@Hf|exZR2qsJxz^;_X~Ecu|A zYNrly&|NT`!M(rIGMrkR$)W2PV=cGgai!5)PC|L%ZM<2p;r%pSGcI2XYLOK|H&Kxj z`dJ!91y;48dcTaa8K7^>QmdBTDjQ%wx&%j0H(0s0`*vW$pIat%@vh%#++GaWoh zDy`zwC>{=&)}$ZX`wee+4k5;XMQVM2(i6tkySj0}x`w0Wx$uVV9PnA_UGv@yAKfJ> zeHW!7!t?wt*P}y%96F^*+e%-@`}GN*&0f_R#OmU$B9HQL>iIzycjJbl&I)N_ckp4M zNiI?;-Ujzp=oz=*E-{aU zT>>KwQ^Fo}@ZIE+2g@1e@-QZ>THQ-9gp-AB{sp!{b7X#+<44FrCQ-$UAd5YAwIdB- z3=$At4jD8wDwj+S3cYgffr#ejGt{x3IV$PxT7iaT@)^;HBJqYM@vo>_9(3fASO^Gp z2-s*4PB}L0BVPV7Ra`9C)+{I4fZtYMNu%$d9pAv6v~d2QZ517XLVTQqGhuinv6opq zkCoD&#qe$Q78nQ=^Ths(jUMh@HGxC0)f~?Hzd1@d>i*&nml$}GhdN%n>&URbPo;r3 z=kVrK%bG<4vzqVnoy8C44%XafTEh&7Jol>NOgxqwdNb3N#VYe~gc_iJ>R>@9R^ z$eJ|^S_(y=yTrKpxJ)E(%Ko71bZ)F*$2_3AuMy}uf2;bA(fT*pM@VB9qoXTI$wv>Jj!kJn+EAm|R7v(USxtu?Ry!t)h|8JP#5mBe3p_gumxBFCDRT z_LaUM&k^W``nZCAYyg*|YUx{N*5N*f^v(KJV5$j+s#(1fxGFPKm)X{gl1s;xf(@;C zjskr~f6%5=Q}g22EMfY;?oG$?0*>H4R)Zf<8Z4V5v#Nn9+)Z`qCvUQ_C<*Wmqx!x% zmwrYszY~?sfeCDzbhm`1TIPb_xOIsTTANDyzpA5x!8*MJ4ptZ2|Ng%7qIFF+Gxl)4 zy!e|pD;AT##!Y-nT6>ie6*r9X)Qp=7UU^Q{Q*lfso|7y-{TVeHTY@fkpL&kpUQ*yN zD)E{Bsn5)ByBMu*b0y8OeA-5dvgrBjq~{@UBoUl3;p}AT&Dqb^)IYny{!Otq>Mwg7yh5 zZ(ZDs{tLtk6%dLQnmN(056!@w@MnjdEZ=|jrfC8$x+~b=iY6a$$hMB zS@&MIBv~(mJE4^#p{0=%ygL7NQ>ffV;w9BHLW5FdJ!Dc>7jIKXn)6{F5@xD#UmKIV zu*Dxwl@{+qomP56Y-l%57>f!AQ>mtMIiWV(YzDq6@Kue8{)P+LHaTT9RQusy@Nrw5 zR#b;&aQyx3e#=(vaJk(_;`LJ5=A(SAII*Q1`D;1wpV`t`cZsbY!ik-O{Se^OTtLbJ zzk6{L#$>=zPiX?j4dsE%;_?Mvr*CO8m?zCQN}KnqSMYCs9%^V~OUIkNl_gACO`YlX zznNxpa0@4#6NO&lIaU%7Dj4(WK*TcV!}i5oFdfGEZu&zi@W$d_W0##tm^75m{Knp+b;Mc}qvKgYRW945+dVjr)!vVX2n z#Wn+Ow`SLk#YUt&g)oOu&i3<}bBZ|{P`Ttj-@=TGo#_`!Bm`Nxk>DwtrLrCc5BknQv-+z^Z%o>O2uG{M-MpXRKM7NYzJH{H|7 z^BQdi25z(xdx)vcIM`dpy>yF6`h)Xj1hDZ(c!Tuq;N}1Q29BqB4UTUQ1H)cH0PEgX{kaU3z?$iQXy9m^Nw0|v(JMv!W!c~+u!YzD{HaHYQ9 zHMjA&uglq#(=EiTwHS3pJlnbbCiE&qAKzTC_i!}h!_w=S)rc<83fuu%RD!&%3~k@g zB2UwzLukh#R)aTp3kKo?>0^<}HL}4=-XED7n|}le%A^Z1_oC9a6xdsRI%{Vx((CM0 zuOL&{$H@?fErlVOpw!Y7Pilt{&D1#?V1|c`;9H12G3j@~oy30VMD;i1LsUkg4bPxZqRW7;Cb)?^i56NvSdRpg(Ee1Gdn^QsE zUYj;a40@Do>nq*?#G4Ex){^sl^v+3u{aA9-u1E`;3E^2)Wc-01VUU89^RAwNF zz82-mv6e4>e&6&uVV+vnz4~D6DL^r!MeO_m!D)KAMdZlipERqISfI_qfbC&4qfG_L#il7K7VPpKNKii;hl8t#cR`my1Xg?P6*EpRdI$GMmn-4Z9AE^U z8vXM82J2(gYtywG`<`|V+vo|x538^#D3!`CAS`9VadH;3hAZXX?I!*h;D}d zQ`xC{W>GxZ1I}f+q@{&jryG&A`4RM)8HqlvhHm9WR99rh`#0BIMyK#|mR9s)G18t$ zEg;Af1Iub}0~CM+;wg)OQ6ZnpJgc_wm=TPQ5-)5K{L&kbJXfK_4%E#DRu@`_tU8~` zU}*IaH{QOhka>JYj1FVVJWG_I82ipr3Le3+Y22k0e>--`%!CkrK~j;xiQ`w4qb|qK zIre#DtEATKer{UJrU7k98Vfww?YVF*=)+{ozUbZ%qS7$%8~>~rsARKcc@`BeQd_); zroTn_UeOl(kA@H;QUu6&d_z}6YrZU7$z;c$KTC2Er9kg*&LqMk7`kXdkClAzAK%2U zE|t>Cb-@}Fuy4OzR5eW$rf=!ZeN`9t8c)l+a2=fM$Bp}CW2hm!c)lA( zHT>ym*KhJ~*FlbK{*@{&wA~TovW!-jWinIhX4=YnloSCre`#-T#%iBsBc)5yAzkzgz64fqeGQ!ZtN1d(Hoi~r>nxLmkK0om96f~ z!9*c@2m8H(+?6owgmrzcLEVU3GbwY1m-KOb1fkZK?H^>x%xoq6hd`P!_PTON>^%4#T(_q{v ztM}KYX1g-FwQZd$d~%0(8JD+)ieT4B94w$~G|yhaWV6=b9wU#VLK)6V`z5wObg0J* z`oVpCc`C+{TC}D8@T9t^MN0$gDE^e~u&|J|e^`PRCB8u~P0I$+bBUswDS4;bj*Zv| zkukAHpBE7kJ>@;lK8*_g$q=%Bah>oigA4&=S^N~#1b8W?CrBlouOUNdK2_mkKV2R9 zZf2ftoU*Ng>wU_4iGROnbDE^jj5~f}7^)>c)b`Ga&`$mc25B%ll=+z!bI4(;;f6y6 zpB9yx63M=(SA96PJ$J$E6aS*%*QrX}wxw^wKH)RvoO>hAVNxT5-bx%R>%4m6Dbz|K zQ;Yg$kZsFzo8$G$jCTOyXrNfKY4MK;E9kgL$-(cDhllLibZndq5yiyqe+VtuLbKu+ z`jmJ-3?Yh7OX*R6n$nlgM%h7eHjWv*5dK{U^w69oj3@eB5L5erQQ-4WHTge>!uHiA zJ|joV1{7IxV2)T80@tcZAd2LS0I6ZagWV0V4H(-K>Oc^oC1P&#D0dz5-}IVcl;bv? zpuAo+p~r}91A9lZuZL34%);5pDBqNFzA+(GsgK4L~_&7UvM>{ve5%|=tBByG~ur7 zHZB0tceth9pfwHBU%gCWZXnRu_>Yxj43XavwOUETcFvwqQR4{Z=^C8&=?&9&-uFn6 z&BV&_lORiB`Kk<1S#cX`KqeY2bM?a1_VeMNFgmDDq*;Y!ypOaM6U-|4!8(t)pnh7tHg3x$>|Pg8cpC3 zJgxA{wU@V4^Ic(2US0Jpw?A_WTi2A$lpIO7wUNke%O>{RuIYalsuEX zaAjqJ$6lJgn~&w=b<{lkmXhsHG#^ub<)D{LtXd~-E8ru|mSZ58xbbNn&s57p(qi}W zyxPz@g~t4#z`+-gD6JLzoxzT*c~VbS5C`W&qL=b?dIE#|;hv)7<>~u$0ZsI|SxL(*I7*U3H4s1&CXX|fi zIY>37rpipQ>iXZ^E*llh=V@q?SU1M&m-g+uz@fdR3y?PF?2Lp*SyBW%6+U%Zo{BOf zxKpWCzZ%D}K!TToMk6X9D+Ni~hae&U49a)B#S|QTjpC#{sJ_JSV>bk&;z(O9* zx_u$t8v*YX1FJLe%=h8LfPMWXO)640KA>?CQYkd3#o5i#i1Y6W3u|6fMqi%54Qi?t z(co{!d05DV3mZ2WbdzFL-ZD0i?%}`Qk@5Im24`l7u<%Y~e|d?FILg^ct~h54((7U6 z71c`3xB_mSVMYgkcL@8uzjK>(kba7oSm2CJhIj1>byYiY$h<)NHv4$`;2(gFc=uZN>(XVkT(0i*pH1ad zlZ$L}os4bi6ben!v!97jbTh{^kO?w-iV)2FF@MNg{>Mgi-Byk2l> z!|TW>aiU=VuqtrVIy91gz74JiLlzXJMW${Zg?5GLU?s>4YKq-kIq`7P96ht-6*ZJo z1wC8-@%SY4Z`fkXOXi*E2kG`m)B95%mx3G7bV)(j>K?k-Jc{w!1alzCXVy5SGw_v> z$5OZ}OU{1ikM|nOx|4TrsQFRue;gl(`e-H!-#?E|n>0$WGDLf4<_|$pNys~#N>?0r zNfe`zaR!Hk1P2WU#%_R|EL{&xF z$UFHC?2SP5oDW>VUdhQp26V95<9cNGMj~gh$|-RY{Hx45HfLrnUi%S9juCs?RpkZf zLhkqGd9e5)L)PJEeJX>pSDyEZg00SQF$9R`aZ{?OiC}U%vtZ1!h#;v6A!q0QW;@3Q z{7YxJjo*e1pXP!{ZmfiPyo`=iVNq9(fQ+N1w2(rW0M(X!r?4qZiE)Q_DH(3PXfF>K zdPdISE`jMna9wmAE%+%uWX{&ke$~z_e?0TpGB8e*#r+)ETncYt2kB{GHIP>WXRcN- zy5w7CU8klhY3+MnR{S5BA?eO;{-$$iBvo3$S@GlatlGxP0ik5-?c{ly2ooo%{L)%8?;F;@Q5 zPAe?^xvIh)TDE7IP`eWe`8ucyYs8^Ez6YM7Z8mfAy|ZX)wigcZ`{99Q#Z-M;?MMk< zJBy^zpSD^o#+=`9mOOUjRM|7QBE(!v%P+L45AY}b2TII8B5UvZUU-CUZgkSdZ$+ac znkWZ@f8g(V|1z?yCRgwl4gJgc$mUB3pEaHL(L+1vdV*U#&aRELUT?A)ww4l!L+Tw_ z_%qf@+M0Rr3N&EYAX}ftK0PEu*Q+PE{kM1)gfRc*vIld1T$R^JtPnA8tS63;-gezHD`Li zlR3@v#9@W)aQej#m$sWT)|3#^d?b<+ShslXDvjK?PqmGc1X7O^xE}0PnaYq@XLP(gzSP{*~rrw>7n4XG?y?6$YUvF6&1v{`jX&w|556K zI~+JMlVix^aWMB$Fvk&;5`T^^!g_qkk<~WbFxWoX*GdUrlO$rZ0R2+cB}^IgBw@pd zl0;ie61DKc3xX1*nGn`7bKrtz>|l9;W7Um~QDZnGQ7<3sP8(cz)8)+8r4oU`<8Qy7;HQ#M7UDe{wkwEs19x(s=W#bOVA|d09lO8e) zOt?n^61x`Fp|jy*H|Ktc5bj{XdP1+*ieqa3cPzs0h>Jn<%zAqodXKh~0F{p7+PT&_ z77`WSZIj9NcljU^8h%X!vTc^$S#ssIfQ72+(?D z(M(&2;(DQrDdHm!W>w%H4BIw$R#gnZN`Ht$jzZrCtwsx9G%y>0?v~9?rkrItW#oD} zevx3fC6homj6i+*aCoF^3okN;pt|+yR=%B45z1YCNdWBd{XC@eB=ePUE4LMc59!H1 zOX8;n?C?w`tH{esBG{Np;9PEtUk9>S8!MGU{3&%OkFuX{?^+g16?VZIPT4t;^e5Id z!U7+4RRA6I2y+P$K66@T(sYRnUpTd{ST_vvRbwT$8TYD{5wkSt!O7>FS|ul7XcV$r z2%1L^X-Yi^!JCjtFf7=s9k-sAP&wqzcJh9>Rl(1nXz#d7d*@_!CTtpo`9Ls=Sn1Jk zrk90?`H*o^6OA`JU=}}^W=zt;I3-TxV#bMZP}lcwVKg3crM(#f+fV~3kZF|5zna?3 z_CFw8y_k$&ulWP9G36&&2bq=6d@sipHCVXxB-v&$nmFJ{3N2Rf@lE&0zJ93{wIvjy#*Ym2xws}u? z483D~5z2iD%~m;Flf~V&jtjl?ZZuvD9EE6AmOETvb{_TB!xM?$@0k`?ZLAhpeNcUQ zg>`Keg?P zvQpIpr1DTQcarmhgJI9HOOE7?eUnwPC3Z{D4C>$#CY(BKcBC3hEEi0S6~)X#ZIXvn z9az8r_#;OVQh*+0gr7mLxMfjmAmN=Ki+>GmOL(3Di4`%}#J8ET{**?hS`NF*T+M%Z z+;f9y$Vl13+dp$$pkH9GR|MLzQq#-qE&ZAn~9^?B5K8=ICS+)S6ipkyY!5s*ZZ?d>BC7)k%rA8Qw2`4m+_K>C5+^hH!XQ*qzWNbw% za12eBEDwL+vjV@YSX)_p^VFm@+Mi@!yaNB{ZrKp03&QoA#<=oblu$|EdTdDd2Gk|j zq|9dCoslRdDfyhs+2BaK&*&vRXSODwgtS9^u15-32m<#5au4o{XTlpL9w$Dp-eoQ9mV> ztW1YZeEtEXx4DkHZ+ER_5q_Vph5ZhY07so~$h$f#43;jC+SXr8dca1ZDrmS?uj=p;*NQJ=CfFU$Yh zyaIBXKa{;$`Kz*cZh%apYM# z+c~R3sbZh#15CjO$4-ldIji-ldx%7ELM>TdsaO-Sy_iUBELmPooNs>1&lRNqtJp>d z8SapZt&CIcXaHOO>;i7HC=%G}FqASQp~K~V<2AIu0#?Z7o?~G%f%vHo0w4RD>k)E` zE@pZAK*{Z*ZD`3Kg3lj&7_M}her2X7MG#d1o{Z-!ZqGx^Nk*|Rw$t#eJwaE;k3$vT zM@%ICxL5l9p{J21QwiAVCm5;u!cIb-twK3ok;khJ+lq?KYR@vTFw3cuZI`rkZH^$| zv8!A24|hc?OY?S$K?ufbg}tHA9(OW5x5>3e%KW%>DNeh)E-y#J)Za4;13ucl;`6GDgK+JTV1!`S?i_hJ( z*d|A0{JxHkl3zi&?)F3k3f#RipcRV4R;kcT@=YD&=Wsn#E~+Zg{X&xVzNvMhw^6|L zwXNBXCy5h8^1Zbz>)ffKlX)e7rFNA}+$g|lelKVZIHVoS(uyLtw5ouZZK4Ki4~y%* z6IZ0WV}!ZQ2L>1RY-h|d|083jzv8<{UFx+PQJ7#R+B zMZg55OiI$dUzBxNz>=*t;lrq;Xf@z)emi6~kg*jlO!u%j-zrg$LkIVyzzD7Nosxa> z>d{j+XDXyi?r@jO`48sC^5j%q;I1mAJ}zWt4=qaF^~cRRZlX6vxzQ z^64c24Sf0;f+pHf2Xm0yTexJ}=MLJr#i@gj9)f7$$Qw@y7jTK!R2Vp^y<(rzz8N~? z$|dhFhR_F2lv6B|nk6_fGe+$rlnnPe>eLH7OGVl#;_|vqtqkAU7xdpm}}MtE%MmJ z<-TU&me|cZ8aJyj7n!yR(`7Jt=J-+6opfn>*_Rkp*Kt|XD^t$+pxtQn8Hu}F1ViK3 zt=m5TXMKANL2n=?XMu{gbmGML+s+Y4H;M6CZ%@2vO~cSyay3@8tT?(3c(e ziQALlv7)NS{F?}!LSZ5;RcTe=C!h-M!#wZOpS!pL8dxxw>Sjg=-|IZCY>td@ix6V6hAO&zG{drJ#go)L zpb3?t_{)Ikyo=s4ri=AA&$#h-PpfqsIaA=*+#FMwtd#S=k@EZgL&1+1i(zPiX zRaKz=Kc?fQ326;$c(@Jo@ay=YqrJo;@4To7weSehNK)4U%N1;kwynL@COjsA%A*(I zcXcTgjHB7vMob9Xyo**7B;kkpk6_U;eaW)SJ&{ENhy`AsTg*Fg=5>NzP zLO?6<{of^%lvSjPbBN~-LDh7(l6c;p6ds#ZG3AAHPlY9>hQJQ-4xl4Y$nkS~`v!p5 zupGOPi!6T`|Esb_0I}lSus9_=8~qKMl&4(3Vl}hXEil}$D+uCl$J-mJ#D`1gOT$K1 zB?rCrM*9^g#LGcfXLv(S3x`z!e+jv1e`rBJ#~|uaLEaVmnLSOczV61Tp_AG*rDrQg z;CcXI2_02WNy**x(P?&tvcD4CrXccM4sW`0FaoQzq#}pD^U>8oswvumIgH8@_Jwlr z7)6;dXj^X8&|wy_ zMFJan*5sE1hD+4xE0&kQD(VaFW!Pa!NR20jm9~?G8r~ z3!#>yJ)3~*GSCNfaLBscm4>&Dw%4BJnBgP~nlu^fH!Y9$8!n?uf53fsBPWs`Hq*8f zgTrpSV?HxD^x>pP{2tjq4czlEzx8v>Am7Na{+L3Nft@yqY1p6w2{c(vy_k0Q4^>qa zD~;Fjc^K#06k#Ny;>ine*5pw3udT7+uP0W2B(gXWht7Q{mXsQl17=(eg3$7hl*Vei zpM0rMCACBl*L)BV&bZ@|K_d#w_lj~Qua)YkTAS)?qni>t43cnUm7Vl5_M~dn>7#2C zPeVmN46Wd|@^wV3u1vZcn{ByG)y6dhc%Yp}{pLKIoWoRDyYXde&sFR9sN(6hW@kp= zfQ*0<1`AWW`ma{5D>n+Ou+=Hv>0aB6bIoZ-nHcQ5X#u))MXEpy_1yRd->x2gk| zYK(Yz%GZLS5Q0kb@j~>x2RlC#)5en7AwBf%MzK55i_zLrCSh33L$;R6!j;T|vS1Pv2Bfv1mWV8h7``LD2cH%d&6e-#EbX;dKtsjqM> z1!S%3hcaKqLY8r09%LmcUrA>(YL^pL|M(b5FjY8bs$`k<XgJGz;APCFi2ooCd`;dZyfq7bZ`v75ToT}S zyU~j1a1m<3#<@nW$v@~c`uf)LieeG>(WgWg!i>9C9NGNXxr%FZK0rVn!)o@=>~koS z3|j^J^Zmw;4z1ypXMLD7=4O13EPUf7y0!DJ+VRUFE;RPtfh|JRO3`eB@~eXJv+{yZIlfY9yH7q{a0$N$Bl_C(4B-tl$WwBINQuUr zXS-j!Zfk#kj~Hf(BOaUA;7lVK(y`Ca*~CNvhq+K53~yqqcpiVPM26tjkk(bf&vR*H z2k3!LnU^1!#iilOMrENwmyC~Lt`jVN%9U8VlPylt*+AUN;=(0xw8>0+DN0n;``U^u zrs%N280!X>xU1#6-^(K@2H&jc&iP+k4xVw)@Vg0)I+=tWiye1ke~4+Pcf20VKPHYm zFIwbexl;?nY@RQ&{;esl6Tc@?(mZGITj6X;cQ0igAfC*xTi+`Z0qwZN-_cPrP;L~u zZ(iN>VLWKc7;{A~qhn#JEN4`-ea-LztVjbJ%_5X6Z_%R@)~XndFZr_IF3^)V++KMY z3av~+^ARcN4b`D3iIMlQRZY~~6b{pkNIcHC4XAODwjpPiAltn7zaXocR_9-0!W@Nn zf$4uVDlNnZTku2&bo3MsKr9DiB~faa6t=yiHGyAo5XDCA{%fqJBGFcpfTTCt681IVL_1x@wdgb66 zq%t?@CQgVIbB6eJz(3)}r5rdn**9sUs74I!LxK|slRqU7Q!&>EPq;8y& znikr=XHnO_x68|i!V&(q!?_E{VyJZDC|hwZ&_l&c>`}S9DW>JU&MbF1%g-ZhY8`dR zf?y#-x8yDejOMOjU-*N1zs$70n2}t5#4Ip40G8K$B*l#)^ zW@sN;?xgn_!H* zmJz*GX!(|USeEk&+e4!xKiOr}?F*&)nLlK_k75m(GY&VeEAC8UDz;0&0~dOg|88HW zMb(cji1LKXwel=ysv)wz6($vvS6D;Y0ZT%K;jdf6jJ4UJ5k57m;6+0kJYda69N`Ma zY%U`kE+}E3)_p$(mDqgTY99MWN~k$>mdMCSH5n5H0}S7}$QvcBU<9^db=q-$*%#WO z$#dKCVwR0f4|&7lAP-eS6JTbf(KoChJ_sfY^~M&(ChT_z2Q*U7B{$6#qLuJMN8J}l zs|I~n-0_m$qQ}pJUB9 z+{b8yUL}*dj(z+0CyyX8AXI2t;ol7`Kb>qx%n@@G5Eg?aAz+ict}IA?cFM9?cUN)m z?a|4+i?6)}C)Tw#=rv*{9|XQEAKCIw@qK89UIXbB-+pe~FNfvCK3I}?WM3emSf}yV z?U4GA*Qx~ew8SEFeYN34>`!Ps;EPPZ<<1wZ@|DT^Hcz;UyPFl#hL&Oj)eF({RwWPW z{g;ko_HstEHV;0M3-Xi2Z6aSePU4rAKU<1BZe4zXn^xp^AZ&-%f+fmy3fhE9Npi$aczvTl`QcH|HeVmtO? zi`4vZo}!1#_etoNZ^@_Z%6@rttp_FP7OTlNB#>O0ndX&dgwE1=G97%dfDD}!n38@5 za>g>4=U>hl6pI}}S@F9`@gyqg*MN4d_Fst2CVD*>CfF(?m^u37j!5q~bmq+TOcRn} zMD-By>hk@$bK)$4?__=Rjf&eHv{sERr{A_nyLJKBQ^05{9yj~LJi&g)$QRXm&kZTS zqJdfziFR6aAU|1*F3{sz#q-YGPDf(M+YqHT)m{6~3{3AZ5?cp&S-7@agLC8U&-5JT zxB#O5&PADXK(FjI2a^0u*Z>%iq=s2wWsoJeQ>e<%vk%PU9jY<|`F;Wt)9~k4S;A5# zNz64GSeDQOzPf(LOK5((6a+@Mhl;_z)4`!Ab|nj^Rp*0bz5PcW&whA`nJ!H9?lDgK zC1On(8P)Q^8XqD_IT!5Y8S?e1$jgVE-jlwcC{+?(8jedc)TXm3{FNRN-krkJiS`tr2^~U+h=u?c39IJw&?HheQNu zS-CU_ybCJkgcPn5s|DCE<#16=%nEw2-zDKamH7HeqKhkHpN4!Kx5>UAZr1i!qwTpZ z5(+?XZ5)7kOKf|g+N17$rG#l`CH({!23Pw6(OeU%iC#kV)zY0T8r_m<6RP-Y0R$G- zAE;)%y;W_d)>)tr_>z-a+qtsTUx`0UK$Ghd9KNxJ82f7_{O+jD{|lQ z?{d56u$Az*oEE4Rdf=bn@Di zKZh4B-C*O{pkhoF`AZGEH1*8c1mO}KXC`u)Aj1Ea=ist>%%Qkovo^` zA@RfQ_%Mdmvlohd`W!d35}Vi^{DbZdti6O$(IDX>Pqmh2^X&_?h(72ECdzhxDG0ty z#a`5)0cr6j&tp45j@NJz<6N3@>3D}uD-}j!_Bgd*1&B}8_}{{z>08&6K)!<)m_v^F zY8vuVmSlnxvUB!|TOS>cEe6(T>YMiBKhR9(sXr?G+pa>6c~$S;%KKLdKHqsy4|FV- zXmT)Jbjr#5I39~_t8-mQBQsJ|hb)OV(ty9Gx<6!E7u9_2=j=CbI6?~Z@@xyp+V=mV z6RsM}jvW~#xa^I(wL!463A>G|Zc6=?-wi4`kX&c@tnTc55clkVUj8TUGDiA4@t6Ns zniN}^wbSp{phLZ`pZE#vs>R3>Sd***LKQ)uv1`Fr{TXD2$U{k)poT8CUZjzj-gEFC znBgE)$)yn{Y}CbvkcbzNv)sB{s2zRaI$F=fFXNWmI^FUzO@Y8v2}wApdrwQrTI!Nb z@JSOqIFtkmm$Tj!@JW7f#LP+K4Bb?e=Gs2s4s_fhLQh`pYm0j_-DvS2*bVR{DP{OH zXDXsFm-8|nIe7MRO(yg0U1PR$oQP&Y#(wRIp#GIV*Ea*Pc)Aep$7x|AYD&h7MjMvN zn#C}v?G|{JDDLU`H11QviW8MTo*HZgQi1HkY+KIXBe0>;SSLqER3AnQ% zi<{k0X{iR}JcO+@GR$$0HSu26at#kq;4kEzimL|jkwFcCJyoXgn=CaYi1aGZ(lHVp z0u<0MfT{VF*Z#386kSL-saE&syEz9zc?-}+j4Y9yw>9-oq*eMG#8V=Yo)7f2%?|w| zHW>BxPbDz=%4X6#u*6lK3KOZSALc+3Kqz_PnvTk5KfvBu+0WSXrZtoAcXSsXw3Ae% zK2UIyP_2tug71w3sw*cEc!($0D*uZ+M6E*7g>X270!sdEnh$aJC*{kCBoro8dI-g} zG}}3AD`Sq;*58h^p0=+px($|vRyCkVT1I97vE9;$+%ld6ouHwSL<5RCKbr2m1WQ9> z);YtD;C09Ix1?ZDN%LPeZrhD^EI+#6B%YP+^0x>Owr^W$tn&Mi4&HsQSYsDFM9yV8 zi@Q5$F*-hB%%B%&3T3Kdf-Yj7dHWK>BA|MM-%KF#GB7BZAv^G^^;_9*C_X;?Ln z3o|dC2vK7MM!(AyNagMsQc*-lm`i+3j-7WTmvt7(?Z(U&^V?DlarY~Joa6Yo8BB9^ zX4szu4X*4jmJH@`AIQnHzuke76r&yPupvf@1dVyo9NAw(QyvgvIy6(FprBa&ua*8K zfVF<_d*(ac(BbK|T}xZ~hgOkNoWy`o zuLCdN)fxx!#j!ztF%(v^{p)SA!xFGA5bIXB)y#|D@X!-+K~@(!$JmddU*}*rlCzJr$LXl0SyjFlG4zC{Pr}EGgmWT6yCs=jJ%8aEK_zA# zG3k)_=VO1;)RX(%o2!^2-Mmw<)6^vvzlUe@YN*WYp*^u+&A&Y_@tv8nh+}OotMSg` zC11PkL>RskTgC38nN}*t%qY<~hQY*Y>mXt2%bn|}>&xvx)wC`6FLvV8RBHq!?!cc5 z{gmGrmbTO%=#uBgB13t#P)0R0(DBTQ65HOtK8<6VkUxN-=?~{Q2$m9H(35@$L|IGs z7YiOZojHI*`d2M@c{G79)fS41oOlsFcg#bFHMF(c`(l6uY-{Af6$xR zAq1E3>NLw>I}}r|3#vjbzy_~(83YTe?#J{F(Yw+eZm3%$uTrF6QSl8$I1LOv8aXG*pmOLvy8{sZen9YwlU}>#9+`}Z1^<;sV7T;B>W`+ zQ380WNMcep$|&{=96HzyJ@nzNHQax5D})sZGD|03A=!EhdGNS4WIZ|bzT{YtmX^B# z*~XeAESE>cnkx`dJPhp8Klmdl)_N1xuT}uXeh)tg2rca(aOGb->u2<1hdjojF#ITe zJKbheDvfIi7%lv0xajRpyDND27oP@GqPjCYc0`9T-ooRPDVal#w&JRGvPQ~Lhq7j& z`#t;lzk>Y%LlQ$i#-`s0hJ06sH1$w@%}-M6CPbLQs6=Zf*^DpWd_E_&bi7KHwKfL~ z3Pv<+FxX9@c^gjZsFX2C`}|R9T+kEK$$Ilw_r^RQc<|z&2cPZq<$#Upei;`a4H-}6 zCd@|$jw`ctk(z$l*anEN8Zoj=OlOh$urJR$YsG`NN9R~Z>X~ql<-L1yHY|%p2PegE z%YY!IR1TjR>1tCZ$p|1xOT{(#a=1j1JH*D304F_bP(s4|`Ll2P%xNOH9|@-U7fD6S zk|~ROWK6fm{O16N-2vF5z~k8tT!DX?Mck@Wj`f4$SBP`$s#!;hZJqn81ePgge+Ht4 zC!-F}{{bgJ*uT(8u=i)kl1VgJJw|QK!Sh4j32Qzjgx86Mu%WPbEjrk!%DYQF?*_0! z4LFr|H}VYRI&bp8uc}?XGIlfOD&?f^KkIjLKqn8cTOpuE^@dK9ce%)&7iPj5~!OLC-Oq$CUwh0v__gjL!?a2QMuO_zUF9?xpbBIZ@#faA8xg8TwH3PyAOl72y?<;9fk85aF^iyskn)`Il znBZZ>mC#XH*lw9LXqo{6Xxr`0Ny_)!15xfmwkuk3Dq{LRU|>ScJ}QQ;N^%D?K}fm{ z8kSu@dvF-)6!RjWx!oFZSY_u|T{@BT{56bTOx#=bx76CAtSJ2+4^+QFFD6ZRbvDle z!YQ6YO=x)*7WgA`1PNLU_F;p=GxIXjP<(H%on5T>Lp~`o>df>DGTXnUBO)mm#yC)# zD-D$#HtF14xsJSY_aG$XuePUMLQ^|)Jzxo>xUmhb$Pcn$h@bGRVi*50o$LM`+=Nl0$gJ#2PveVyzH^92|do#2aH6hT2HdIZsE~S_sor7c4jNo_hF%8 zZXzf7d-KvJL?}RsX3N5~hFnqw@Ga0hpO8F%0A@W2pZA5awCmy!;rso6nG^b&#v}>Z z#K!X*q4y@4#$M`O|MJi){w$N(v{B|vJ951`lBoSnMQLQiC?!L4&Gk2SFjgKiIbksK zlQ*peH5GxYy2I27KvW5t64k4Wur=^zf;%aBM2S<_pAYpzV_EzL)q&w2KV06YWu~uZ zzlwpP{gSDQn&nWic5v2zq;n7=#eFG;zG2{|O{?}4PIi@I4w_N3Xd6FBxsPY+XXdS) zz^!N-8(s#$pFpTpF*U8c0eM)F zViqK6a}7q+WEU;m`{70vOx9P|<%^<{n^ZVJPLB{%ZmYppuF=L2^}U~=6j62nD)7YiV1tNpkoWx!M{ukAj4$l72EZEC{ja^N1rTnxX&gCULA19e!3ir8?nnQ4^h!Bd zEW54u0c+6DUgfeDz-dBOxVxJ`zbPY!zLyQ=pPDbmhqCFsj~d^WPPCTaFS>yz&%lL$ zD(`ab)u)^b37oVo_k*$73eyppYkF<%?oc%rDL!M8w)#v`-!;1;a5X18B!q5?9K7Io zG}32_$PL~^dpCobT_zw< zh-_!kkmY#%;wBF={!d;1RrxIxZ->-FK3$O|^jO{J(4hG*8`q%_3fhrWn;a6WFjCLXiwP#IzIF1X>l&?({O$a4Vn z3~i2gW>y5SVLG4)rABI_eBOgNu`#SzIjCU?N!TAyoxe1`@qO6vt&awZKPAC8t_yg_ zJ=f_<$&P&1726)GlBejWqWm0D$mA8yfN;Uazt2M8i~<#kG6%cfY^b!FP-O#-4}Vna*Xx0w4ig zh2=SY9K5oFXP(1k1z?*mOf4ImwvR+0ZjmzWWo)fcwk_Zd!O}$Tk(>0~`en*4ViVtV zmq1;wy%a?^gp2Wc6ei_5xT4|%y`$6AKBUP?>aGdv$AdyU*jF8;v@C^HaIb{vS+U2$ zCdr{C3x)y|rNPWy^3F4R&ALf7x%e9BraKKrq$VoA-Vzevc|g<$&DH2By{$$g4z{V| z2W$Av7Tw0kk&?Ed?N>|y7bfvlVf$V`*yM;$*(yB}vGp+jmxx|>sCzbLmO{E5&=C$z$OeUtyN7J)xoJCyrRSsy$gSRjE5Uj z&~87E*Y=hC4+gu!-Bj~M!yHarxsM2;gt(R}I2ctza4vb)Mp@k3$V;Ccat|5GSQ>zQ zt8uSWRv@!4mh8QHVbZ(PD6j;Pt5ts33!Tb4D}&T)^isL{#zpovfN4E;IsqXI3ZigQ zwEu=PwmPzNpwlm#|KUklYk9qfEhfqyf(Uk|x;S|Kmt5w&_B9Iyf>cKG0-cmyO&@IL z2`lkoSySEk+_><_cf(I#mz{+A4xn#_GNHaPyXs6JuN)e-Ttsy4zfnLu-}xr(V3c#C zu%0$`zsLaWl&SQ@b6R|9Kkt3UuiyRAj<=7t4CD6%Llg}XNK5_(MBVw2t@h9^NaCJ15$%W6_-R!R}xXVofMc_!T2Xt1u63Vs3E<2!~^6 zLwqVueQQ(gvmvzsFY@jCy_h*rOs-nKoulDVuJIZRZ^e}BQXMv)-{#Fg4h)<39r`hx zK?Mkb8=xeK836CZy2hj)Tmc8l%#ak=)zm_ES|Uxx$_hlTG$p^WRiqw|laieH?R3&{ zFZW#b*k5bO`BWc>Z$=C8TD~bI55=BDD zSqHPh>~}gx(U0XG+bi7vt~AQAFbkYU$#bD3(u~kHM4aN?m8op4VGRZUrbiVu!|HIm zPP$=}5zp$c5fpIBO-ax*u3qY;=u6NP_rMg-hD6HwYlp>L14psp`EdWy8vJkRV3o#4 zjBDc6r`lAqBX+JFcdMAKpk)14W616%*J+nz+|W<=d#nG^*wU7Girwu@a&T0OdJ=&I zhHTp1+482ezl>g}hw-a*6kLO@RN?_aDkt@qiBAPD1ts?ed;!Ht<*|JSesiV(Vq5#f z)qf<|(^g7=Q?EU~@qko+B9(B`8;ORd47aQmjpE-CRZ|`O>vt8)Ug6y_uKBMCD|=+G z27bBcu+YRCxHZE|_EQVryc9j#U_|eV65CtB`;H#Vnn9x7d?5?~{}(&~c}o{+^`gIz z3>%`RGq4WZQbw!;Vw?O=3+do_E&)Ryw;bQIlI(MIZa!hEFSP;>*tO1kX&%d^SJs=r z9Q&rdalfu#dqKfnWaE-Ljt3R&+k*w+s3{#jUxKcn)j5dJM9*sk;G_#Ge}p{iB@%`l zBha^8#p-014E}~rKohk92HQ`Bf->_;R@tUXFy$M2F5vdTs$qDXx%y`9-1l_Ke1b^! zW~51no1{|keU-CuPkW#aFq{5W`WKHTjWg4xzMRr#*v6U~T zcW**p4tqI9`&+A=(67(rOI7>G`FG#_7IvPcq{Z15 zRgp^Md+$-$9HxTZ!zxsSjP97*$0uHF1v0S(<>)!;sSpU~Jk}?7Dm@H=oP>(|0mF%^ zCI1CwJ7BZ==1sMtlhCQ0w%k5gQ>1=s)qQ@x_C;>$78%B`v8q~%*(`J} zzEb03;cIeMdy?V9Aw=y0?;w16{e_2EBa6btBW0TScXqj+-m|;O7=g-Y#Y~q#48;d( z1`h`|p%3!kp`gb~xcIk5WpS6_Q#t39MLK&8g;fR?$_+0^vl*u6Fx*V*5LXX{c9d+^RQo@J(tov}lg&k*3mTD?-VJ#tPev4Pv zs_TRk78O21k(a9>ajg42S5*18#Vbv?{@QpQcue!V9+<-1&f#l(<2S2p>uWI(LzT4N zxlVqJ#M5I3&b#agw>{C5z+R9|#y9q_u!x`4x*x1ZOttjqb*hlD$#EaQ_Z4+JMSaiO zFAmX??4y!+)O*?JO{nZ`y>;1GCI_>V^#>sazl85bZJc z*T^*8j8Kr6Xa;ScCNnlaiqo@@Zo0lP--d!chv*!hhbuIos3&Ovw%Z~Gy%Nf>cGvx? z>SfZ?7Y1s?sle4%mKiYi@kDqvU}F<&;O|AUEzfp5 zEdCuxaoXv2Wi2D=Z3I{ALJS*t3X{F~i@|=g%i4%-9J4o1lDYqQ{Oo~Tl_EkK<7*uS z$~4E3{fgaaq<^*L-I;_(Ff#|7$kR0YxnWMX{24dRn!D?%?Z)P(#S_kUTN~YZkFB_K5VmzOaq8Fs;x?Lbu>1_^@g6{!zhd@RW4*A<$ zCw2$uo!ga4fg0<}^ivMU5uJQ-Sw7!kDmh5}IJu(ioym9wV&0MW$QR=|fm*BZMT5*8 z$3GiXcoHg|mZ6+Mc}1~QNF&>YWZhOM&ZgnWa8G`w9v# zriuR%b7wIzP7D&vh4A(zUg9|i)if$QyFC{~v+_g*2^im{>ZBO*dvXJz{sH&Bpvm3B zik9(bcIY_SP%@5FbMPJiy5jwoSWJVA0`t%Y`N6tZ$#Z7KS)@@UR&USR4c;pjLoj48 zGKTABo3n;bYWS-G*98Imr6TuTkW*ze2LP`p(&wIyd^Uuf%VNDjpWk`f3QUk zH3C;ciQ#x)aRW~>)L~M(j{Wp58nPYJC2k6)R;@3)_B3>{)C_dMy(k=nXh%F%!#;^N z3=x8me;an$8l36T=w2xIIU|r(>-+EZAXf$>avu4^$cu`&p zaw9C&jq}MT0M&sF`MwKG(-q~QCn9B4X|_yy{}hC}*~jTg@!q8CRa{Uy{Y)Ba(Mz(mcZFo8%JAZ91$HkRGWdB1tz=YR1)Pr8 zn!|D4^K8H)P-}FrBoBDn^=jHIL|PfO)#asc(}CrR*ck;hJ)$s-Md591W1!dP|NnLh;* z<--5@kmB9amT)PgD@wcELlRMf0``m%pnu=4BXO*Hyou&lY!CP0I;(!HHaMRdv9f`BKakPSc6&<#5z?))cuRG$RQ%yH@XUtt5){zBDK(5S`jO zFu*DbFtZJ_nzS7ObpW_jrcffiKzA`Z7R;HD5%xl`P9;fUg23DaKS;1W+fKc6b88!$ zgLdOiOxP-8U4DY>_vP&El?8643hryINvyc}PvF*XS15^7HwEME4v*gTXiO+u>=H zKvZ@;GVF6QU0CCl4Y-rnf2(swJR_~;-$z&xlicltOnE@J`sciVSAh0qhdGfyMf(yHSxypY))DF#7!;#@@0irLZI$ z|0NRT6SqpA!NS*XgSr@$>`57@RDPF8h7-mL3de(?&lnZ4^Z?H z20^@@74LIqHBZvTUXx1Y5+s6Kpr^v@pZ4u;Q&LcOvL^oz*0zxU^29o4d(qq4=h{Q^ zS$GINb@>T=r~~Zb_1kZlJL?=Zhmp?!LEsya3C_2%ys0Hc@l6K!?%ZJJY09^?APwU<ENk=P7}q00B^)l6d>isHALfTi!gsLB8`qPO(te^br8ZN4Platzf;ZX70a`?g3|(%c7lKFq+dcR9rgT2 zTwan?X?STYIR$m8#o*P~ezh?F+sTb_nTWq-_R3?5CTv#E$<`vB?NA24Cbjg{HFs=W zp&hzq7y+#+EdB3y_n`T~;uZil%~{_A?#r(USO>;D85O_wNP8ABg0YT;gU&&pP^<$> zI1>V6HMBNaMV?wV=-q6$n~YyHgSG{ejRdWjoNY@;2Ilu7c&xcxB5(7Bum3Hz*H*-O zulJ=nG-&+^kF8#PGatxle=$V$_No6~bVvzo&D_=UaSL*-8W7J)?3580`ZRrN`AqVQ zK2P7Pa+zmsRIQb^5A?nd@>-{7elHS_ki;L$9&}54$Ev zs$K=H0b`*>3{e2p-h7D;FQG7h+!s|_CN*-g2SQy%-K0Sqcgi|4qKwxpL{6oUxho`F z>g=l-5GeZ}s|?D7hR3NNNxe;$6p1fmg`hgE<=NRR9+aRkS~i6Al#JGLPOAy)q|dAX zPvoLRUpY$GRFP5j>Qr7)KJJiVj#3!=ad6rvq;8JsJ2Ee7u`t&Y-eFfN((4a7kkAy* zz%tok#PZ-0U5nEUErrgBe!=<^H68lTXg07k?Q~lp% z1+&yAwUB3UIqs8IoVTYBClYA*?!?O!#v@_BnkKOXHy4e=l5@tXr%L#V14npL_0D*B zr7>4pXMuv;S*2S5MIe@g z3bmZE>XDN-L;SlZaU3gA2W;P9?vQDBjl%WKxy1GhHqM^2G5>!s)@08^u03^Z&51e@ zWaF*~d=H^)|A*ivM3O=P`FY1P3i6ed=E>UZ0?9De+tuEUki5O#k%wRfjZU^;Ies-W9;Ct{JIN;Iex;64HPE(xe}gbVA#pu@PM){j839JSUQu zw@-YXJH8@YHLGwl;`6;e!d?;xk1_WzWk0`v17Ccs#~VdhdPs27j9vwa=$2EF6pnH| z%mI`o;Rc-OTp-HX~ zkAxt8(dk|&9`LfK#KMGcKuyTGiLZ@neGb)eqV%Hq+9Z)>P)S>+YtgHpN&0$V*SFNJ zJekj1?_Ljg&nZmoRyj5hzzjUW9Dlq=N-obm)MjAj(F%QVQjV9?T^YX51UDU2HhX2a znyLI2v|*oXMa$7cSwqb}4jK((`g1W8UVDf9GXCwvGRoOQ+;b7=1YxGX>Zkb3fK$ao z)o88t8=cm*W7dAcudfrsRjbPYtDE2om8Ot0OTF=NbNE*d0qKZ=4WCw4o;{ruD4E_5d$;fexV=_etaan)CHdp zq2<+_CH|WRS%B{-a5KTwbP(-UhFlqPf)Ls9!v7P78m+e(O$Gw*C9`Bf-o5&gi>+obm&R?WvS=jS2ME+|Xa9 z*-=JJDz_$Y%12kWjR+Kj(-}*!>;=L6z}qoYG;`|!th4>WjIM14Wf-i|kOVEY84UD< zwT|Lw=~fDx9WK@2svCS1twk=PC1PG4TL)=2A?$Lp5yK*gEUA{|z@ zo>Hjq$I_Hroi5<&oNE8553A&JBX;Eg6lSH7p1@koNakxESm-l<<+3Jd3W@8XP>Q z_{omlB|s~Fu&R>37rD+zPN=_@QAT(+Hdgx-GeL&imArf}D{GhX$#Rx`ba zro8P{qs67fIl!Nr{ERf1o)GTJFUJ*8XE|WG4$?I;iw+$s&P`9aeDBK3vU`BOPk?yx z7rB)_(1w*yy@$s04^+fz^KK@EXNw%kn6iMJHG0T&w?A>TP6mKsH)@WIzr&X~Sh;(U z($?b5t|P4nzMc6;I&=D!)Ox;fot6xp3W#5RL;PQTs(?PKg7>C1^~IsTNlcq9;94Z0cafP0vsi&t#&qxv+uW&!7RUM5BHs$;j8SECyyM>z0zDFW$}SA`cv zZ*`&skQ>*9{Q^&)_ zQ!Fpbe3iX)1&lmNqF?(V6r-@kNEA(1=q>jd-+j+JcDsw<~ZI zJ(dN29C;K~&(*>{c(KefyPFg&Nai(v{LMayoEe$@tI0LQEdl^`MFpJR_d<$5%U~Vp zD_Z44uQD)E=*#oD*jM%w@h`Fs$*7roP&Oa0hcWFS@cUTtqNJJXtz=BPX}SHK*OgB4 zJ$2#SYWI0m%2u7c1qt*?#cRiD6-%@zkvvt3S7$TBe z`pmPde;V&c;s4zXd-C`XKlbMhn7(i5k@ig?_+2eEG%TO3v`JanuVuA{`Q;~XPe37X zjh00h-?SDu+jZK89(hpLUgHy{M3upPtZ2LjZw2-FV|+avsg+q}YQF$o{A+F(A%Lx2 z)0N}2FPbNh)PoA7`?d4K`-P5h=O0&jV(ntq_T^$q(fbsDWEa=zA(5tc5izg>vPS$!LRf%F1X(qwAL-#nW1z$tCMrNigNk7{Z zw^NER*7_%L`##!YQc(*^Y8IDgLSeeJ-kucO(yfz0pFRNQF|d30&X)0@Yy>01Q}vhq zKtH&m;07=6&l!0El{Y}xp66}F|f@;NMN$5fwZI<-kS^BIT_1z+c9H3)uI4P zi0JeV5Y4?PK>jg{;s0KGgtTxPfz0HN`5t#p$9xFVTxzz8moyk}s|T2*fYY*ERDgW; z{gJ(}A=XTCjcP@Ug=mKSWy3aEPLH-1s1{cPw(QK7DPz{Q<3VGVhiiWWWSD34>ftd6 z>EGSQosXXxtvq4ot-Ol7hI`r2^Imm1UA=FQUc~5qWXg_>%?l}HN;C0vGZ#=*)U8!7 z#;V5#NULYTVa87)tdPRZztI#85A^jyLZUqxFD4ePuDw&rCq_f)x)K_VE{GrR86!I* zhJhHV?dK@%ca9#>u_(e#zI?w$&q9nTRY*gQWzFJ%tHzdU;QgTutELBb8Fm{rgu;Td z)tV!W>|sP<32i~W<;L|pb`6ycEfakrCo;Z+8H|_3-iwlB@u0nHN1mc+opSz;(`^mD~7Y>*`v~wlGAZT z57OPC(&i6aWFketEhTC63PoHg@hca>oewG3@QP0wx!UWg97e@rU*i4{_r~0yrh@Zms00 z;by!P^7l^?405gNcBWOy4kqJ@@Im-U)Ob~1Ss=I@bsZz@lM(YVYLnYC2ND`2S6^pc z9n#Tn&0v0cf(Pd_(~Z(F7WgLCfI33e76FBLt%Sd4UZw1R{YOcmNC>c{(@GEk3KiSs zbLWHeV5LCvAx#g&jh&qYdG2bM;u?Xf?N04GoGLl3=_*(Nuj`600g`|Ir)#*_udZy9 zl+h_z%pyv$sBcKf!He9j=}A`@UMKoNx-(VqeS@_-B7f&DPkA8kA%Jcu6l~wqXC#8# zJrmGvzAVt6juG?WLZmj_J;GN#ZS|PZVd~c6n8~k|k_|7M=ZqLphmr2!Wv>fk4^EviO1QjdP`E+VgF-#Mg_s72@h z-Okq_&dN|PIOYG{&O+FJM`%R>>$oU5ZhxuBTYYi=9RKjk$w5)az1(vJ9Qtx6aC<6F zTI?d3uLmURYSQB3!s4m)fD8N!ANvMo#tvn>-MfHq$#;shDf{yx*5xd0@aWs-kFqm6 zwSG`}sBbTTZ**e6Yg9`cBhCw7CtNzY$;!c&+JAizV$1w_0jAAwX0=Kgg%^F~8@~M{ zaw13KLP6qwmx3H2q66#JOBXp{g}hX(H4SK)_BY95Q)SQ*9(FLSTy)iJDr_qH8m=sQ zk>I2RgsTZF)awqif3@zvjq`JzlZN2~E29|)C=A0Z9B=3HQ1I@>Ek31VA4a5TAvOPf ze|H#jr8MaYoS3Vx;V_j0q;pqsgk=NH<7c4!%!u(>N2Epw@+uf|uRW+>KQU(W3iibn zHpuT=veB_#3)&H?Jria`{X$)s3?(|PuVAawVYXN_dyCu4^tE8}a!IK;N=~)eu-6UU zBnIvS z5R(-%aUZ7;&0q&W0>ad3$Za}NM^xR!S+po_z2=|bH=#>Ll-#tFV~m??J94N*Qi(Epz%cJWc^`R5S zy857p`#v7I$lT#S2DZiIf}9o}=Z>s8MPT#ulrz2B!xr7GXzLwzbPr|=BQ>Ztu`RG_ zX30BLKUktt`}0^gxKO$gJ_$}uYUsdFD~SJ5t3D|PTo*tUo$AS2;szao(4V2{v6C)+ zrYhWb9$@Ybht)d(L5p%d;?GpX8Ly~`TS6tv`ankjC<>__nL5hv_j8>T{};F7gJq^U z=4rV#&VG;zhMqDSOnIPb^{Z5=mIhUeC=-BAMQNrGrv84Odwhtj zRARGB#y1}cCJMq@VcT#vQm6Za%C1y|=g{4gK%y2&ei2(Aj3MTa<$g)!xlRWZIEYK` ziO6(8h;jdr6HPw9Ia)5aPxb-_7P&>?ke_}_5}Rw{?4=S)aB04m_F*%pU{;igs+?Sato&7S1pjdso}D z?SpKAtHmWs%NzG67QuMDj)T7NIFd(58?f)>aGV1=U`qBK(tltOFQ`YK-1AWvZrj(;X8``uM4tBP z?dix5q6urj;Yq!y1Z-rVm-bWduf2*<0u*@1$)eJ+X4c+J&p^v$=Eep%4zxjrJo)A* z7~3FKR*Ka{&Pq^bK*3cQTLWRg5(>$=>#x%gWOmx(f#`B<6(`{7k327MPFR7O^K&G> zAwY49`cE1BW!Mt!f+NGoo8Sibn1B;*824Kh9wQ@5L8MNh-_<&dY@8VL!P0&!I23qR z10>=JCck&;Q2RVB~P zT-P%rRurfG;+D7w4QL86OLCw+PM4I;@Jao=Np?#g5x8(mC7_-Z3ap(N)ZD;w$6_~1 zYDaCzDqQwI#O4B*_KfXgQ`VDmxIRK8hIuTBI$Mnei33>bH^eAU^8DM+0W9Ay5uGp z<_>*fjhlUKAt_+e1kA5Sn=tN&MU?k#g_7=&j@HiX~4UanXTv`at`&DfFF!HRLx}wz*1J|wK79Xt3LcWA2&XOGatg_$AM_5QE z0+1{B1zmr>($i&RRj9Cp01a_|hQ8&P%sC>K$-|oN3(>t4T3Wzwtpr(!bN$Ly0<_Sd z=`zc!$Pl1T~mrJ^IY?aFzPEMnNWM-NRg@o$CZbJ@Hin{eaZx^ose$W7XCb2#4=^CP#}ox@@jqpuD+oUyRT7` zK6gI!ngTu5dWDc3%Lq7jn9nPk+MYXzb zQc;Ux3MC7UNPjk7=t2%~xFUZB#5-r7kLAenrIB1*W(V&7*6cFprM4pUh( zJ{%2UYSn>xCL|6ea1b(@I%aN8V3hd!A2D>gnQ-aYFF1v83`pW>u7D?cgd_63kQVJ~ zldZ?rY)N0#0401&jmEm?TKl62%y4~3-`pjw97D{NG&Fhby{g+Fneb#3NIPla>*86@oiVgrRbVxaT9> zo6bCU4EDyyox6+?ou8Go$q7V6OsU2oB>gPwPaxMRf;qWXzNa(Y8rF%)$WS7QC-)Yu zn@b7ZUrywxNQXiJo>_Di#2NQ4c5Gp%K&?Pe0`TK&ClRJWt^3aNU9cfJ8Ubx5u4&2V5XoRd*2fP&!#)Va9!rmL-`cwvnLGkgWBEah zXGB*~mq-8QIvGYd`YG&FCkOrDE)bRiGHKQ5r~o3g`_BZV6PZt3+b)Y4+9x7JX+ByPtD^4f3xe zSHc0OPd^CntVS9f5NL$~QJ6dns_ zwJsi~l*g|dN9{jabrCyo4r7jYct{*7_A=D@COs3tnat<4yNw46aYeX#Hm^~LsLEQ~ z5+A80jM-9+a~ZhR#<1!gtB;hO?`aHGcSu zV&~gB8-nS7$eK=;fBIx)omOKu6JuJJRn0>`*<>OB&EQjzirBw(-a7yCqQ2X?BtM~m~C?f|2X8CLL(Rum7OyYfUU1M%KfudtBj$rnpd5%_L^^6>OHo^ zqD+89Ie>tHsL-_bER+u^*3rP5y&-$UL^P_PqNEh4xUXNSFUiHqHt^JJ76xp0h~u_N zGkFrqvrId`1t0b=J0btAn+VhrdV~mvno`~^k1JP#Xtsht-x>&u?MJ)yPIt|1WPPB9W`J~jQ z%-Lqr4sZITAFWc8vuHxy1y}0{Af-y5=d_7&%pBjlUFEuJqU=~G!=V=uGA?px=cH9<14+0` zuDV-1&0fTjm+uz|E9MrIhLrG$r}rXvMWFG@osph@D=Gz$>~C!zZ3XU(D}!V^rsu6= z6%$v0HeCf0C-vVjB`qFu_`C>#TXX0JVj0s}s2(9iz4xJEnmGkxc=t+NP&B$;i0Hy! zDn=9wVm}q>WvEx0Om*SUC#2a-c`^?Rtdnwa%nl`=ICHx8>Z-KLS6?@Ak+gNR@T+>+ zm~VbCJ5iCFrgK>U;@y9&de+-r61>H6t_ozZ9@8QAP~~fE^uE;`!*=zRioA>f>C$Uw zmaMWbAK`|@Q_LCW%GqioW`s5OB>yoA;@uR)Xwi@wPiRGCI2paGb7M=D1bEFQ(plz&^Miysun1_R0Y(%~!Gm7;cwROEXd*%3$({|R5SDC>Bxf0? zU*sx@IedK;HAVDP35xV|<`Z9I?X>AtCqGb?+~gRNA4VMxsfW&2$8SgT@EGT+7U@?! zPqBS%%CqNKHztpNBZGY%6LIbR36~0;BNm!xUpK6Hoh3K>P#-}?Mm`keWzcbN6O$e? zdqaAcL1AOT<7%m_Bs+@CY9F{S<{lvy!U8vkE=F5aTc(4H^Sw)f*i}a;yuI6JC4_d-3oC z;R|#pT2(?~(}FjLf|H%{)>ps{pC)gt_1H2I)!O07syKeeq{PJ9KnFl`?MSk0XJ1Yi zG$xGh>41Qy(9k^@(`yrLLa3LZ0mVEIPEBx_Y5&@?OxpbI!oc(_AgvF&m&b`RSKwDt znTg7JGyMK$%Vgt5M|vrO8>~|w4|sx(Usvt9xIg`A(!D#;|7m7q+XB7QDo38_;X**v zUpg2iiG9%l7;zi1X}(25Q2C*D_50>HrU*@_on1@QjlA;_e8OggsHnUl5(jMhGpdi4 z-3trCGrde(9_4v)zX}B!agmlGA)uc&M`7!QN59cq^}l6ys4tOM^{=9))#)_-q*=bd z@pA!|z?S1gnDQK#C|Zg5Nm!GVYeIsN6k^L-7s#IXwR3631rqGnn=EP3rGqG^?kdiH z=|SzHpS27w;cm@E)m`cW6g0o`+SR#>rYQDQ4zFk<0cmI03}+L$i#nlAJL*Y!$)4FY zZEIsoNj+_M&u)D79QjlCplvmjdZm8wV$}K#K6sI?^U2AT_U6U_5U5?dTD>b|=-EaI zx$D@E!Tl$CwhA}F__qbsbYi^3Jyokh5}AOddK(6lQxSP&Lg?nedU>}k5P~@${U`&0 z&4TD8&FNp0qiT-1)4BW~XXPhOD)Ny~nP5jxnS=0I-}!RS9!Fp6wc-bJg>u{N%`{)b z6m;xF^C6)@k~d<_Qm#u2O8G}+Dx6%$v%c+gimuJfebE^F9C!Uma8|gQCuMXr5gM(F zd{RY|;n_hNd#283I4*Uph%4Bb1=!^?b3ZxDB42Mv?-7s%us{oI;6Lpp5xC1n`3#uw zN8Cj8PivP(qJT)TrSvo}g2QRNMPZ?{*Y;&TQm(Lq70Rt~1c>^1hU?QNOv@@k^63U7{5U~iCzq#@$ zJM7`{79;4n{i^Z+kz^wYeL-E>d&h9H1+r~pZd(nB`3x+=Poj~jG0v6?+%;J? z)nJC{zfM`(h@iko{7BL7NjBkCO5jK-v-vJqQx*2gi48LJ{nOz{VluP5I&Q6NNO28N z+Q4c=R6k`Dnws8&XDay2ijCOFl(tk(t+LD-FM`j|soMnjp|<{K+f4Xq_seuV!`1=n z!cbGW%yfSEJ*qCQjY|Op;&yx6nxscIIz)_-o~}e>dGY z1AyBgz!vLOltYw+W}K&uAGBJM2f)gGK}i@hCDX>?dPS@FtH#p!V4Nlkh&=f`6my z`-k5BE1T-;aCADq8PGBKc2gD+9Bg8AZK61d+R7qfvaL`?u_kkT-LRrt&s?x~ zL{}5A1Zd(zEiTf#5nF;R0*wHENunK1>|YFpi^ut=dyVCcWplrwVQrI~*T>85a|Qss z7~L4X$2Q6KpReB$^oFZGm9|IP${?wW`SK}av{$0K3dBD!(U?E|E*+wTULB4B$8;-CV|)p z$@FUsWvvCIBQNAk*_)auFBVcyw)ayH6B0oonoK>gCa?AtOxYN%i^sFT{{#glJ!*`S zREHR&K)%z(jUT557vOt?|GVj0u3#>dpw>-Z+3dUv>;zA*j_B?x%^OlTa2!3Ge2~K! zAOJl;!oL`8vt_A&P5g*LjU>zj$uAm*k>KIo-m8bN36=Ih0UHauP3m6FJt#X-0NPh) zf1$OH{7!{;*sDf-l;h>LogvL+R}n5NH_Q-RNnYF|585NLxSE-fI=pd9%qTF)8F$wU zc+&8Z;>uCMmd33YS1`9Onl`m^5W9|W-rc(f+dkmtf`%p=kRPTZY|GpC|YIuA} zhVgiw6jOsH_n5>>?qah;iIdp}bDW_1?0tgGQFGHY9XZ7f%E4GN&P8PblDF{DrYhJ6 z_JkXp)z^QnkDkAvrqC~QMc1rp6|U}ly)s<<@R2Jk-R5HcsDxy(vd_*8ujLST0d1OG z0t(P!0Fy(wjXPRE0AAR#6$FG6TIj0_7EgIwCN7;i#cqMRj4Rk4UTreb&!@8a>EM!z zF$(qYF5NrW!~AIyFcpI+Aj{jN5?xC~C?TES4+Q{*D*25-VKQ z0j_I(*{DmYt86Jtns@N9t~v(!1$z)it)q^y{qV7`+2g_;mnL-!`dv@{E2 zC4q%>^9WeG4VKI(;f;YYPc|bZ5COtx`e1K>@IdkmQYiA?d+fKl$t6qz>%is3$a;k( zg{bs9@5LMOEJId?xDty0VG0)JweN9_GE3XIWFqEAu`NM@#CRtymarRDTr z{`ZxH6~^~ZeZd^_Ndkj19aomDFa)R9xVE9D0t>Xvt0cBtQ!FmCu)kOL_C#(o)HsIT zkv>RTa>{lGZDZdPT70vne2`HKw34fpP$GlU#pY9+yj2h~0o?ykZflwHm!q90SkmKz zWhPaz_6ChMSklq@QONMsU{W?P%r?4N$gi3(i?MuflfQahO&wO_I@tK+?U*GmLYNg! zXhx?o)rtctxGu2seM$-Y#Ol$9UWPH&fd~a`YH?y88)E|LUIZqhd6SD67>2|e-N&uK zh{gCWx9^GuA_c5F86ebrZ_nU>UE+A*sJirqptjDD;Gn7e!sx6KMCCLj0Nay*>a1S#z6S=Fl z-!aw+rTmyrzTOW2@6m6*a;GCr)R~M`%*U&)cD?A2mL@MER2MJE0M{oxE1O~NK@Q986(7?x zb96R{j zpuUCP{(07Nzr3BjCPGqsLg2F*Rzvtd8bpD&#JAYP=LF9JT*|p6d>>`qc5HV&{4JbQ zB%0#D;?5lxX2s?ne!#7od$<#HX1fZ969cML^0&$umTx`68S5PMlJWG$A2}||C&{5` zaj|$4^lw>%H_CrxvJ~C}?3ApXy~xBNyJ#m1U}>ZmQ54`V%?j+8IeLuN8Z>`hE9CLk zGBnhYE3h^1v^I$X4dVGtUKYN?s`lwc;b(8ywp!Nw^f+iTMX|kl3?0C{D1qD$giC`k z*_rEaI+@{6l5kNS6SyGjs;=h#9!v-yc-9R7MBh^PnKg0*rb$SlC43qOF$`lI3*z;c zHX4-@@tL!5V?rG)G+bbDfI;g@7?CZfTyRVr1s{swQ!Bc1RmGN^w?$*P^BnYaU$!0Q z`#(SAP{#>dl3t6{QCc;VsfkmOJUXQ-ckcOJ9%iDl8Qn94*Y1xPmBAB42`~g30B$jz z)IFW23uDH?hJlCYuht* zoGt^@7A&%glqeS8_?G$fI4(@2XYN24YrP^s;bG^biE+X43eE<%vTE~WArXrFpDesZ zWyc*kPPUX}CH1v$;Z|oDBJ5Tq;IFeZ;bY8_B=PJN^QYF-e-6_>p51Q1@sIh`l%vI5 zKG1#H4_-Uxg(Jv0!0c(jBU540l1D5Eo^c~3!i*J{x&CliU66`&_a z%-MS3u0s>A+`}YPyY#ZyXkRr!t$O0d1^;Ws4*~Y6)BY)N-ymPS z9@`S1Aedx95M)gGq;avyW!b?AEtRAylyRk77d{yW5SHBC&+OYUw!wOJa;HaV>aJqc@&_<2)K3~K><(F|9VwuWoV^&D_ zF-&lEjRG38ivQU{HMgf|^INd8?YrY%gk}!!N~cKW#U`#WZF=AFb%fw8umQ8jI94iYm-)S6#Xs` zAo5-Yg>nq$S&lIAeKb}rFmYWu!G#ex8*9YAin>+v zj8-n|cE?OJYbf20hP%DQJwiF-4y2q^creIUJB;%szQ7}+z*Ct_e)|N8FaWX^GR$E- zNT;_~=275s*6_Acqmp9#tEW9qH9#p7S=aYH-g!ca86||r^g?oUzU2=hOtp1`T{Q4- zApz3nAOHi+0fceZKXa>b4%)VjS-wU5D!O|TST61GgUexeyww$_f-*&)8+oWX&rESW z-sjkvk(4!{hub<4iBuJSW^gR%z5={UKml<7H?rXuY9Dn?ASg>$SMx-!2~n^PLG?rmlL(=NAh5mJ(go9yw$ zYdLV6NKxhxq#Vg?8qH=V1)7T6WTBN^N|fsGeWEgVCjhVbnQh?ZDi0&3yslRF8?Khb ze>qWi7sicTe3s5m^+9&5`Ue9%#x6KLH64db-PzXM^qk&W1%Edl8s`!xHmgJK@s^`S zimO%PI<72BeSYF2WcZCx_EfpvZqe^t=cg=5u||BxCVM>4qmrL06K(dZ901iXJu+-7 z9k*H0M=>SFolff?Jlh{w(bwIYz{g?E=grKB5K)(hqzgzGgXpY^dF@~a?yO#)*OxS{fn*)E#=pOdJf9JDEC zesE#+Kv+!Y6az89e0!?YUFcMg;+k+(+CqQ-i}u^7kTFsd+m$L zk@}~CImWc7thb+xV0~_igdofJ&C+XIzLC@%-TV}OYTXx`>s59-<$iSt}x;eVmnHor@{0_g_uvGfoz_^ z?%$G=Hn>I@E1knkQ`50cbUrI4{YaN^p8L&d(poKZ?M(P3)t zitBdN`M2n~4}A>cDSbj9ff4L)kj|f*M1rtPFFy;L$p2C1^qoa-qdy@q4cINxk;)9wgHYf~bDvFcIv;7NL;G*+!6#Y*?d7_UT90 zr{B5?q0e#|UYS5))x9O_2zupWWE_&qhq{TBK{c-Wc7_j|h+lqsYnSu2-@XY_){0E6 zf20G?t%B_VHiiD({xVB*8GV;=l+scmX5}$|x+V=ty^I8C{w?vXdt)Rgp1p}n?vv(p z=Y2CZMmlGqyB~aT_Z$ckRI;O91+lxT3I(;MJUo&SY`P+oPuE4e@v;<-4b~%cQghV> z)QdHB?kavvw09IjY7(ON}A4j*&nxiRNR`Vjt7^P%8 z_jWR8G%AhDGB%`U{7{sXrZw7fqVQzX8yb!}`aqiXjTkoo3-)GA9e$l`(ZG|hSF<=hXDITGF*@>1{g8C4 zFYVxzkVnq@Pzn^dK%$-UmYfaxu#fAMa9^2jBjm_F6-O#GJqV%$f2tQ^J` zXZd;S+ZLFK9p)WlVqrw*eVB@xEDhUeqh~xGT4yyF4X|jl-Bn_iFjn+lY)eS-=czBf zppaP0FpJYzaxlex2XkeZ{s~^7JURJB{oEGf3_1C(G0rCHPuPf$^E~9DGNt8&!V!F%`-Oxr(nwroD`NeYx#x!vJxDUOvWvhVM^Bxv^NSZ+ULc zV(F-Xl8MxS+|+Z-UriZq-E`R#-c3WaKOri(5@QD0;lYKW)jkT8_JcV6gbxJ=Wql%} zLbcd~B{6>pg-DU1S9=EZs|C)iPJt ztywK%I^T5nGx_xvPN+TX<=$tzYX}sqkmbbqG9!A0IxZB=_F@3e_T_}z#msf>`@~iu zfL%%B(3ERJ-N5*s@2H8&l#U|Wd>6dZ!|B=bC3gubw-+{6`ZW`3TxE)$jSQDDE`vz7 z4m$uOr=~r-^jQ)~2Aq1IWI*oTJ-jDmWEz$km1XGlCk!ucCw2|-$dv+xX0B!XmD}8_ zDyS(X#yzajz8r-l%;w|$n48D!=gv4B81v>>C;~JIhrn$OQiq1Yxk9sX{-01)srKS!5ew{PQNRymKwUWls!$9o!d* zZU2XuZBijgO*K z*`pvKYWrgApU0A(k5xD)jXz7khp8aPLSB#_EshB7G?r{CYkQ2_s1J4p1;5emq-z>Ca4hiP7aOn6g^3je* zCl&8jlsH)WX7zx`K9M&mX&glkBV6_o)d zRNkKRnYOFa4xH|gp{Gh7&48+nA-C{OzbALkr8e-z6h{ir`pyWk4^G~kh{n})-tvE$ zMvcv`Yl9eHJn@~Y(xS^m{Afa)oKRdI>w|KQdeR zMc--6rMiigeJMWXD&AjAZTUW5ug7V+l}_j2_c^3YgPYHdllJGAE=ye;FA#pZbVI2;VIs5~;`@x@vMmo;^`;@#mQzb*Mu zH6g2ek%_q2X}Lut{gW)4GA?30?9PQGGh6u8*+dsrv%tcI*2%42eTap6YE>qb;o4^{ zaq*kV&C!8HypiVLN;Qo_b(D6^Fbhxvjp+L~5eAiyezv)$sdrF!lmvPNV(pf~E8;a- z3yaNMixd-u;lcuLSwpV~$%Kk>jdV_@s&j%D{`Xu7P08?ar_#VO7UaKav&ta2UlZEM zB;xkrw3~6NKH5!m?G*t7EDd_5@i+G6!dr3tWWtl>BGzKlUM3ulIMoU299VmstJ=lP&uVOjYw zQ0qX|H2m^&GGs;g`_HYQF}PS!_(sQggB1GNMIamltEu*aGkBc$j$2)XMM}8Hf@tOi zzXl)jXRHl-ZzU-9qwXtS(gB)RmPeKXOsw?*g-=ESgB_7`fF~_)!b7P;i2uAN7qRN$ z!XPw~=6k0P+@R#j1%0t%k_c}KWF-2C{e8VTTjT=8qRP>P2$JmcG8TOv&t&0Xjg+rD zduWdeR8UkvmK(V^_^incpjNxs3DAqTyoc5(J8~$Qjfg^9U?46u(yV)MBrieZ@ z^mUPu$yC&h9$aLmx{Yiaz}}wIWj+gN6tJ1uhrcdlE4U|hcr+@ksFhCH4nRMOdVj|9Hq* zRgDIz0-X(>cOQ*UzYg6jicaGz*%OkAwRTRs=GBVtsWGkJT3KF4stxrB?+C&2W4O*P zYv&uqlJpkHnL;T?K|uxOX(USN$44zg(D&^y?P9G{Vvp4VFhkk)Ld0u3`?Fc8!-U52 zsj`Rqyyk2`bZ?yDiTfE!p4xM}O>n{$ncQS4RXDcpw&MCOOjxzf80@BbL2q>*LX4{) zHf#Cdwu<_CwgLoXc=-=TV|R{MX$4tL0}U=M-V{hDjgz>(dQ;-6o*RQ?1g%EM(9^kyDQy^JP#d+r`|OS+N?UIbI$lW z-4%u|NbvbQL+GLMgBnXiG|u4bo`Fg}kSNDfgKL9evnDT3oU4hAXB$?KDT7+Uu!|I` z2BSE8h(~QLKP!m5hK&6Jl*kgr91gcag=is?o)M%TDe*m5<4|Io|l7lqtVpU5(GxykWyNe8r z?G|AEDTRL|xm?L*uZjnhH=e0~tUt=6OQ#Faic5sg<*4&I^`~9)bu^Zak%-_v z5LDPan75@R3M0EAHGaB7ssh^`e_OJIXn{}2zm?{9QVO6w6>>5_wFsm&nLMmgh{J8Ik%8NA6ziw&M0v41tySy1N4TvlM zyF}~Oc}8{bZcRuShb9|Tmv}YP=4S4$VT07RrfwejvkTxnVrhO@ZWLJ1hkif?%FcHP z@MUAmX|5^=!)NVhwW0OinvuBmdvsGZkC;ro1*DuV9l$A`F9TzC=jLR(&BN$-ACQ(y zLYM6(_OM!)a!jhRI-_Z_cJHaik-(Y}b%Sy7qoM$$7K-q6ii=U;L_0)dqH&0n@7_5B zgk6nMA!imR#ZURMQf=u(?2HY2Ja(~}g24}|vv%DTfKA48)D0PutYwj;V<_%zXwAFqLBuq7MeXMg%n%Mmn#Q^0erhGnn(;i$fd-rZ4)`96+8DV%3td zbEmnV!pQXz`d;!Jzp}A7Q0PV`n-}(!^pCq>e{*rab9>y7C&wQ6OeK9Ob*jTu4%JwA z2fM?}IUd~WK4!P{4Jm^DI}wLd%(2xH#k0KHs38l`GJeT@*yLqyvsZh=P&4F(dLiB7 zkX2XDr$OD^+o5X(B97ERC}7NT-QSOzjabo({wSpCgPpmGu#?RmI-`d3(on)H^yR4+ z2WnYKO}Z&^rQEWy&wCW6mL607U2jeHsfmV&VTLU2Aavs;ctKTeW$H4-E38DYJy7d| zO#gv#uYs)h=oH*TtIQBy7<313`$Mqn=NNR*=XMc=D#HN z^?Ue_I?7+oqw*JL!j&G7onOC-_H&M3yTkCwCmlc!WBQ-pIjWFL}v85*>MV8^E>dBKPN z8Q7&HWu&Dp$$trOzb1#}R$$>q0GHJ0Ft^wHnf4;w!(sJkVR`8LaU|kvN!7k@cqM^k z>JE_dRA&W@jH=}c+cfj!$}!|gl!D+{h~eM3NGx`iF)Nm(MQs=b3}X>1;ph75?r;YdcWL znqKeQgp^+`?>?~9*{@uC4u3teO76BF-XUJ6m-n5MF7SPf}iHky(`tOoheD? z#jLf=do^8#-kyqjkt?$+?Q2s=6;4QU=+rf5(o(k z9Yi|N$}~AJejlDeg#`FNS706{yj1~jpBBL`YRB3m5U@+y-|`N@*ATRt zuOhS%EBHKRECEH*F~c{O^c*a$PvH`yih7y)4jf=h1dXwgaFEAqKEOR1b9{H5YZTaI zb92SC?r@;+m=@n#sayX8?5=+fi}0n?f6mklK&h2|H=Pca{ssUq;eu#EH=Wg*CKX^R zW}c=Cxi!z2^nG>EfB6r!q|9PhxJi8@aZ&|z8ti@-{0Rb=j<*W?2GT=)6jO+ZLhwBc zS*jqH(TT64rWc=PWyAh{&vfUI`fLWn+~Zr5ij}q|ZIzikWHr$XeMU7J|GBD#aV!d% zco1u!4qJRu2Tv|27o!NXo~9cYpgK)V#GxJzHGu!LDt%Oflg7$m(RAhYrr!vFejDh- zQE*dKPMcF?ofC^>9zFB(%cz?19~8m33cHY99|LeidQXkVRrlcCuZv(d^<86lTBl(4 zr|F9Ae8xMfzG*^KQ@tgG^YrBmBy(Y~& z<-6H7-d~yeYmrpDiis5E|re~R00_^yBOy}0DXRxbI|DQ#@-TU-Sq zxDfQWg-rRMO(DT4UBdlzv1H1@zU2I<+2qUz`d5{V5N9YnhxUYIkVeQZ_0k$CjD@!7imO*#=D)Qw!} zODLXcZz3QDG8+c>+f!thYVzJb7YDkB=L={5{nd!O&`Pv5^mM1w&7fuy0&T8FEOPfV zui;P6E`q8TW=qVI5H5217g3XQS=kL$Cec%ImfqfHCW?!a9}WmiBTKf<>Fialqc{G{ zsty6K&@@eyvk-X2+iD}?N5P{0q$i3QXQo1yp7g2ZqgquF#4T3>>z+U0mvfiJjhCZW^RA6bMQMQ#Z5vAjRiu=ZWSf4yu zQ!{YHJu8zVVLhmBH&vB(>DB(o0aUn1;<|2QMn9)33HMyRBafV#jdqWj(7mv}5lIly z(BxwQ>x+is$+6|(1W;-`q1Xh13Myd(OenC#RhzIIIg}EWOi2;6J&w%EUF3=iB!(Ix z9I92HCq8ms5oS1E7p4TdBu=tA0^Y&%rX#EE<0I^nno_d#n+i!CcgkGU4U0lV6AG8mSxoI z1(ssYQYedi8|M)(-W$>580URF9A#A1G8XZ$cOs}LcRxnX#SrVW`R-xY;G!nm{b?hA zFbg)Uv4;!cvOS%G2pBBjk=c7_C~2@3f!r-+}qDgs}Rn zEV#AxP+8aZs(@)``;?dupOy4?V4J6Sws7w#d4ro1akuwYVv&so3`jN$m-TE$YZEa( z&N8H$D`)G>EJILd#ZfWc04tEhsEVOYW4* z)D-vlF0LOcDm#wp0TWvKA@!C1bx{xmovqI7^h)z{?#>=1*NJF7G1;2kAvljPuP8i^ z90n>Z{Z;@h<+f2Frj}CtC10^5KXRf%4Dt%Hu?>$h$QFhCnT!fy?U{ut=fVqJ zHfygE_AUQY5E#({$GC!=QZIeB^bf_lS?bxj)$0%SxtZT_%2bgZ2IODeoH2LTBVwv* z+h=%lpp-c7NSj08cjO2`sc*qs0D5 zo@<8l4Pc~iy1s5DC)j+K&&?a>1YFo)pIUx(A|I%gnTj;}l*0LXCUH zpyO;qo)CNNjk$oq|!}j9V~>;@T`_@6AwGJ9R}0w)WfC*-U{e4svZi z262M0fRabGjNXPk+Z@(&W{R22egyP^A@AS7Zy6kfNcWUWU;tTyo+5&F!xZ2-$MT*D z5Tlu9zaM=BA0_IZyxpa{eAF;CUlkI4r3+mE8J-!~nm6w@xs$*8u0RYivk+7=#ovi@ z&98%IxKT~Y!Oxce?Lu~Az*Fj4?Rw4AItBEzlMw=Q(>lVNbfWJV-_U58iM{B=k7 zNNLh^GYEsgr98wNWSBbMzq4b}LO6-V{35``x$|Y?1CF8wVGb&jFGrZM%$#?awUrMb zi6v~XD(K`5k{xxI)AI5D8vkrGU_B+%?2qs5#;4dvC9qnNLM}mP+bECAnICv-pmJ=6 zO9&gX#cQOb$FtGwX?fR@e^0T8g`P*+J|YkDN-H;+oNh2Hw*bz6A5b0pj+^tX0Z6eV z3I+xAJ^=oBNm1MIWCZ!hfZTl~azxkH|R0&n!XA~L%? z$9V!a_MyqTqO5e?`G1_G3N|iAw9U;nI`a1PZmXG@7Re&h2fJw{)2Ej&cML={f7H%a zFr6d#a-+P=cxtybBb@JVIns^G0UrmH*L|KTC}%b1rJGzuNj}>8y$2kE`dW<3JeT%% zfDSgn#vGfVgBO)?d_jq;)Ho?G_!Jd1DqdkgJW5bkjC%~`NdI6ZZlpHEll^EHHs;s9 zB0GQy#V3~}*4E+ubi`-1&faaPH>b;lw<%&oJY%#ZgpT}zz+a|dqTK+}pvz$hV6Yj9 zMDQMuk@Nrq1rhbBp>!o?_IaXO*{SXDFR#XG0V@E&&o=S$9k4+Am>zi{;r&ys`gYSa zumRg>7+i`M4#1>-sPBS2o?K@3wBB(PdLP&65%SBi&2j6KpVb@$zLEe^9P-EU#^L`H zgxNL_3CAP^lHYKW7a>MJFlb7{uL>eT8zFZX#CLpjnR)1tNh*pD|HI&PL{55tVsxXpEagCQe`euJ%jk)yMr6$ z`MzMHv#Vj)O`QOn^JI$U)A8LGN7{%Vz;t4TKWXVYG41S#pnWDo`RV;?T+iqQ{KM8% z)0dO{*LsZ`7}!{o7j6-;S-{_=0x6=#B~g%KD5eNrvQ99#t;oHa*eql+tt-IK+C@H! z4=+nN=-l#dGdUpPRN9(^STnTKT(u6R=>aqms-0W=x6qsxa|b^)pmFJ7KuPvz-yN7| zbIXJ9&&wkV_eu(a=Zt?FclOA0`{PUtVOBaEr!hjO0!)gpXcdS1u&5vmh%xjALxz$0 z7`cBNAOtJyOboZSCxYV3ok%(~ehNz>V;#&fmK2YR~+)>VUQtzy5)?ntdWKuuWlP>MWWE3t)w z)6OEOE=TBTN%^06vt%xN$qRfRpulxWAhHtGZSz&|meNP#>j=AH-K^NU@GIKr#UP%m za_JNJK@?F#{T|r+>p>&uEUQxk0hyh(G7XNwE-x4JPCLj*3KV;BFvVi&kdr)u^mt-C zIN`2GhkO|XS3pFu-MjU=vG^jj$&JfmPD2BaU!ay_tiKv;SY1c=%E@)(AAS|f4}0U3 z1AXBohV$=sVtufggzj0$cS=KLp{e%UveqEj@f@EE-d)&t4Wc(5ZL^X}PLr}3{@*#s zv$6Ej%a%%18}DL~%-A{FiaNMjNA{(kHy>>qy`~!~3ZoC0CQ;Ox*^O-*;hxv{{|`({ zkE(O_`)(scP7_ZFB-usKe5BsWOpX#-*k};mo0o@P_FlW$U26`e5mZz9IdoT0Q9r-T$4Ly$O|R3$5Ys;AM9@PVW@2`=+pby zh~r(@OG5yo$=DF27UtId&WKzj!sEFLkAkpZ*E&%eN}lDWbADquN7dKm-Sz5S<_t~V zu%bE|73Q`?n{E~V!G)AC+J->eMOug#h24D7ms42J*tQ?b3N9N+@egcGv-*M* z`?V(nWgLx_WVJ`X=KEf8pAdzd2aDh-0VnR)_c*jvRV^CU!0BSD$`2-dSMxH2Ew;7P z5q=E7AWU>YHH+7eI1U2=IRrsa1zfOI7axpM+s>h3-M$Y!pN#Onhu&3q1mqlVF;qHW z$#@B7Rs{>h(~RaEY=F)7z3{y`o3pk^UebhUECazFKaxi09mb&#`wGq&^T?R+V=ras z0T6)B1~XhMEsmxrvtsf*fp#r6hnL#*nTkLM9$FA(evy8MuDP|_#-gXYO_zsgs8TBX za5Zn#2nkJka$_}yCqS_)TY}`fBv9o;E07WdPF2y((u*EwW|d5`Q!(p7wVY~(Sg9A9 z_JDkuV-_k^1{2^pxI0l6eOe^=UR%9J)6g3T!63wH6~e&$4Kl>$=}-gA)E@xjRRWo@ zj&;?Yx}n4etLhJrJ+B*o5qu#7Or^>xf)ZDq9X&WC--*vPOdvo=ilu{M*PDdrZ^AtB(OM z9qT&`aRE_dj_SJ}>oV^4+br_@pa6!%+&sbOo&>Y*vvslql~|G^Ba1N{wv=84EzyjI zc>je)55d~dFmVEO10D~}(oY2ng9v23Px={peJaRSP%F@|^(W;X97$u+zU)SbV?gb} z@IJ=BA|&s24HdErv5r|+@$kC5L`a`>XpS@jC-U-U`QXLOAF7PLYbc>K){7sb0^t8^ znD9_>r=ZW@6+1;IYVJ+L5QLpIiu{ZV;z6_y)?Kx2Vo8?XJKwgPu;!A}xm&GynXb0E zQcTkBoP|7}ubhnn z+*mc8h!MN@looVZz+c4=YLbcUC@A5PCFo%6vb}%5|C!58jPE$y9wlO8_EJVSenHZ4L0kzp`&ULV3@qW`rA2NOYOf+M#{02(LlUR4>J*-J7( zw~+|vT4nE->N;!L%450S+-X9jF?qu#f3?hI!<+`xxiZ}P&wXNS7Zf9BWPFUx_xzrG zp%dUMt$rbRNgJDYxZwHASTEtUXbfLOcgpUad0fZrJ|xG!W{9D?2v`|Qmb?o9-E z`4N=_9*~f4Mr`W3T%&?hh*o99JczWHc@p@6<{$zeWrDBtOea&3uBRXO+SBz+HL|eK zBzF|unjJ1ak=4G!)A3FokNixjsbz8wXurQhZ-Ck`BKGTOeZWTqlT9ZN(g#J8TJ%;|7u-;zP@hbRD=-aHL{fSN9zvR4dNE|g0>rB7l@@N+G;QcX{thRq`enb8 zkR!-1hg$dyui&ut@-Lpt?VSW76f(;5(wL4T?}U=rfhWxjC)rP7w$W_!O!Fwoz0uq` zX-PnSqqu1i*}4~=yPnW_If_XtTez=}HtGNHl+>pGT*g{r9Fz1Jgxq{akX@X|_HLXg zIgkeqAtqAYCy6^Xgv@`biFK$Ju#>5Txw6S^3l4Llv^7~nmTw>sy|Cvi32|EVT(RsY zlQmC>_wH<1tArRhJPSM_UZ}&~de}Sxngeh98eDU}OkjKmX&x)ha(o?C&4v%1Zm+u9FbQ$D< zwp?H*bZa9{8aj-y93^Bb6n;j-HxYRfE)w&$$Q58`PqxyCTsjZBG;AM5om0*}ecRfd z2Z*^dRhZ{S^DYt#EG=HI_vboOw$U3Ugm2lq#9wGsB8Sk;bg*aNvzr8@bvX5}X5{5h z1F`9G5|4Jz=l!L#qXgQ?Ue_;h6kulh%LOIvD6+*%I6L&i=>ZIL1!{Hz$w@cVQIbLV zX^i?r-c&Gl2v7k{;6YZi3Sq1P==bRA)SEH>kbwn@0JIuc^H;l1KrPEz8ttG{nIwxK z;o;B2DB*T4+mYaD0A?{Md3>(`!ILlax{uxp1aMkm#rKDF9#j4QOuO)F!XH|K?05V7j(eAWwe-9Dwd_CUf-v- z2v;tM*k1I)lw=mcU=Q_krU^a*JN(2-?9djE4`<>{-6hVJn& z5v3Gz=C%jWtbB7v9?(uucH)?S9V*|y`KBhIu3m5IpXyHdFhh7&qXz5}l2$V)+RO=K z4`S)4-#m^;dM}Y_OE9tgq`IZgdO~ueSTVLh`h$-k1D-7Pw_#+@YmE!cR!uiu1unQ{ z2K@FVDBAhyM@Fa-JV)$EYg(mbfJ}7rw!`2nVmL-Z*v5iultFUW6|ndzKd{aMI6c-| z-lxqIwYv3RBb;E1Lqj6rIE#6H64$fbo7`Zn$xpl>pbYG6gWc0Q^%A-{OkoTX#Ef~7 zzQrJ9s=HpmQUK@7{)~P>wSnzNXRKN~n~?hDSDDsTOuuhL#AsF?^X4Ve1+?3_TszGf z+~WAz>1R@llr_ak=Oe|QMm$Gl5wAsIZ-K?%6`hMu+TImq6y*kNWJ1P>Q7Pob45r)i zkUb6gz_bZLy^rK5D2`XTomCRy;06_(B#vB3+2l;tmrKI1d6;}R5FI!k6T>(tLmwde zzn+1OrUV8>0SXxTEO_n)8au>te-mgr0DC?T8La+h&toiIqQT~e*q<3ihA}O4yuOYB z4F9@>0UfmXs>-~2`)PN|52c3xb*D9ru7o~*Jm1I$J3d|Tp1__RR)1XR3ehbq@er#Z zdWAxE77jX%OhI2>3PhPCLWrwIs#npPzZVFDm{Nnd;e+O;9mysVmJ#{tCva22?dhR#8~UAuR!wcl|?^XzMJ^JA8*zF|4`U%t);?m9Y64*CP5Uai41D-^f-V?T*U zK{EPul_@4hZx#x{XmtF-G`g4>p|lMk+j0AmaTu2NL}SWJ@QnpMEKF8^^pLPnIKxd7 zSmb!70cTna3Al5syft++TfP@%7)gdDqQs7tSF7#zu4aI?Xl6<1?$2^ZNlaPjU~Hj9^m`Xo@8UK zb~*5P=#R0h+UT=7p;8MLh}je{u%NWLHY&a3r!4&XYBn4f{-IU9j8ZgTAV$lV?tQw| zO=n*TCEzh2Xoa@O{XqJ9#x}?H=hl#H{*v`au_zGP|NH%VP`at>U#BxGjCi*$rEQc5 zay2-YN!L@XeG>R_SmjSv3|k<9h;M=;ob(H4`Nsl1m%PDwRWMrHf{JgT5ebBZUm~=` z;i|{R9IZ;I^M2NZ@Zc;%7*fnG=@s+*PC`|>jmM#p{3fPE@D-xJ$dTmG#AhT}DuX zC5pARfny>40_Ao8yR7MHu2}CNhQGzjC>!uTiYdGFQl(38Y|jN&9(om9P45g=$`9Hv zI|kkaf{;ylXh!mx#n4olG$L9476eL%(W-Se9~n93)7BqtVpSbEv%dSR9G=h+nbhPj zQM`fWd4Gl1t9;3L6oJ2RrKaU2b8bT)V8rFBNdbEp2!8!@Goc=F(A!4Kzpq9;$+K_d z4iI1@K)pM5gbqy%q8WC9&5U#@+Q+S3D)ZA!#Pd6_4P8~|RCvS1wjZ6z$h_;6L~Zz~ zVP_alR;4Rd$jLg(k6Wz)KZ18=2^0bYDOq~c_q*{vef>tm0T?vmy4wlOyv@7}nefag zps&v;S6(zB>_zhDcAs1pNlI4d_-Qzs!zqp+C{-8V=5f~R zqG>SN0@SCH8tWWzq>bRo7uV3c*5Ew20|*{(`@GTpPlk;f>qq0Kgj(`_g~&e|dv}Fn zeihl@CGDTwkpj3yHaklPq+dQ|wk#P&aO||z**1yv^Itg%o9ET|%IxmS-+FK_+wFcs!cH`%p(E3@@wE$NnsuQ@^R#-(U2ULEMTg zhUqb%c`OyNCe1s>4kKn{9L$rbvKE?0gi@CF{2hL~v%kR&`iM-2vn7r{By~n%|3l#I znj`J5?|sZ|Noct>N;0$xKj3bf-#5HK|5Iu6WSQl*;j$Rj2__<#Z*_WNd=qUO?OQTI z7#S_rZ}c$qrzI)jvT`8egO=H@^r1!L-)suPoDcej$l5 zDyHO2HdeC;)ozZa4fVY!rPppdCDAzECsX>Vl-YEmtllBsVt>;N;u~|Roi4LMX_O_J z>AEF%>B8M$VWKk){h4y9GV^w)i+@E=`^=)KA^khz$>&(n{P1-GE1;|a&)!dKrp@4E zU#WWuSHL{0iuR_g*8Jmgs;lUMo+Pxkkm(#at!FMR;3T@h=I_y-a#Ku{WURC08d#wj6|-$Luvg|XI}adwDvCC_Rg~n47KtOH!BxL7ud4i`WGwOf0YSY z{DaI(Q7pWbJ-Xwg4N7+nIgVLn)0u&a%iC1EV@-|P)69Yq%mKh||>kAJdqHA?|~l2;7Jvarr{|1kTEL_1EW zkQ*i<1TgR$?X{b;c(Xlf8DEX^3r%#XKI&bv(oxq7h1DtV3kFg<3D1T}YFOXU2r5@+ zcST;LqlfU+)u2@UAVu@`_IzO@7n2fmn(uFRjBPa3&5X5X4Egq3p3lh(lNv*FNotcp z_#bDWzn)r$iQBxoi@E~{<5&-L%|q|c9w7zrSsJ>=rz3@0PY#32M1yJ1CAp5#7X@!i zka0X8O23tR9OzlaXL9o2h2;OG!eI(;z&)xHRhKcalUl`p1dNalcW$&+gO)s-%3s(` zR1bA*)%5)d9!B(JL{!TvKmSXJTgQofo^gMwEg{m|qV4Uv)9j3`TpC$4%*=1#md6UB zGAF9O?q$#S7sw%7R2+r;mpu}~q{D_=OgwqX^l8yzuAqAhw5waPnMSceY8yJ z9c;5A{~WI>v?j!GD0m+S0)1f9DKS7Z1zhJjI2F*wQyQL7Z@?3;k@6kZ(JDDt z#kc-887dP{vHFfWp@@RQ*O$#ZR`QsW3Gk`8+O9T7<*^uKBdJFtiy(e=b>Yj@1^`M48h5*HxSmb%~ugFx0rikvIDkHyiy{VF*&f;`OI*B-~fG zPa&m+f{3Va(q47MA3vi6c&CW_{@2JLh+kJE%{~!SdLHyILthe%)oQ~PVYz1@eG`Lm zmr<%^U12FWbs|sw*{O~Ca+(((9zst1fA^*_*;G=@1q|CzvEJqN(E3&sxuWO-uyeYv@8#`2+9{QE>88k{fRhjF>>- z5>o>`EkZC0>%7MBHR!;U>jhK#oGlHefC*(~!lkvesmg%IHUHvqOd$KTD_BqZd21-U zrkgo;&`wZRxTT&&~D1e^8x}3?PjH1Cl%|=mR8?u*wI`Yl?&!m)H66Ad{XT2HWR+J zYgI9)`1^p5KY)!%!mh>ojdl7LtRY7u@;KO4Cn4TuN{fss0Z6lEJSBk(C_)iKHwIN_ z0e|Rfn6Ebez+**$h0?KsT*c1+d;$`>rFaI+KcSb1XhTNCF`oW>FiWIfDlgnvrY4+@ znez6Ziv?=>^a~Cy?=9oHsqqDQmcD~;J`ZH2o*k*luYsOdv@Jt1(m}?}3BxPl2@HLd zar#ShH<4Ca-b?}0_+Y7*BxHpOygh`78Wv{s(353Q(}&1Iv9bWSU4+CtNE9ZKt{Q8R zWeN=O{G8~DJhZ%%3rf@|DzYDhF(pPC*9gwe1X1QR1DFBH(`-z_zoI($+Tg^qoI1iJ z?GwLwDdWh4yDsA&}Br6bmnehIo7~n-ab$r6rvdS7|LDac=ZPN7-oYKhnBuowbV}2wE?>j@v7;4MA21}Tb4(q{ z6ErW>LZZC&WmUG2;9C4R-VG3iO`tA8FLTdva_T6f7nHprwFEMKGs6#xVTN)k8c(1)FaC=!g+XS!GEm#;Yu8tu%8WJ_gvLV5RG(sG=vSB2Tvh?% zfwo3HPE>1eZ2F(y6n`)KMeR7I#IJ?*7vqr-LATt;zYMs4A>4(|fy+zGCKl*+*?y;} zj9?kRSPT!MRdtM=dQonUo=V?PIG;R(FyZ|`XmSke*s)Sy{=AwClq(~Oe~R0D7;TD+=#*0C6Vd-XH|0mQS#r_D z+q5m_3mHwXk&LXm`;G5o!5hdCnI^tu9N-zx1t$a98*TZuyE98lVLyuP5!K$XtKIfo znEX3@(mneoeUx3wvV7IxKZk;`tDM-l&IRjRr_kCVz2x+}`_@M5B`19%Y}UmlIDBd1 zp}McF>RlAr)rxFqx1b8Is`Sxs7;$=caYXL6L0Ll!x|B#Ix0mCX6z2z~W@`MM+xptZ zW@0ZwfO!GuIs+yS%_G)IQir^LJS`|?I<}w>E43?lwf3ACsP}zoKW6C}xfF5&sjXGn z0i^r4b{#L8!UD-v@~``TLU?=cgkd;FfWmkKZ5WB*FLXCCOrwf-h38;4E?nMU8lwU1 z3Wu~H6*mcc<(tnwg34V3EF(agE&b<{_41-z@zQe|&K(hS1cfY|$dz@rPnGkk$AxtL z+SK3za?A>hvJUtVpt~>yBTVQf+OJq_eJ(6fN@P#4v*xe#b-3akn84t_Be=r7G%=w# zr~C*HDSZuCKGt5#QYy5KoK#<611DW;P#^!fUG2PSzJADqy z#;mEoC@f~sTx=pA(C*_`WmFZf7^tk3gK_uh3+EdfZS7U4;iIAM_s!Yo6&qiTF5jd{ zLwfEkeHV|Axfup!hSchNi_b%!dygGcp-7^WVGuWeplFnAy9EA%YRE;xe>Y99LRn=R zwspfFl3_LrfKDK_#vc9$WFhcF6jsmRN)=Hc#GOOIB12(URpg4A>ukT77v{<@) zNyvuNauZE?O-8D2vvzQNwHn}YYg;#O`>k9rk<5X6=1Y?V&`Oi-eZh?RmWK@lkJnbe z%{yvTNT(8FBQrL|Be+AzUg7{*d^Ql`3CqDK2ctY5o|{9)aSD*0s7oSM|AuAiT;QP} z%;MkCi;(l5p2Qd>ai=pZoFpu&-nl)Vo*ni8S}tZvzskWyb6IS~`6s zuRYyyjrokI80>v}`A$(+1-*IXdxotF+AqcV_`(>S8r3UoE)E`!3#;&kcfA~Qld?PApcGddpd&OlFiGV zoBuk7@b{A^>YliMs@4~5B=+wa`Tk{iBzBqg+V!JG=fSrQxHmS#0)pkfz$!hWW_p+C{J2|-UdJ>4zTfaYgTz2mu!F+I_&iHoz# zO^d;~?Nn}>rfPa2($M%1={}U4o7+tGMf>1j@m5Qwk@qb)*k4IgYSZQ(s0P>~;SohX z=`BZP7b&23^q`}R4ac4LKhF&xD)7GjD0~O>uk4_Wi_v*cl`uN{>;+gT103x1&G_m(@sp}} z>{nHM##^mwd*Z9>R$)FSSLPoy(j*5?VUBS}Uxrdsp5&%7Vc6Tx2;Q~AyB^WlB-;!G zxq6Tyr>a&wNe}H&UPfj^to2426R`R9{MYcCO*vkN(&pt&NC;$IUWxNQ%hc9 zZ?c>*%qK)IZET0Wl*p+kt>2kkDJN%#w*^83Gq*+jw&DpHL(I_}$e1|N60%_j&p%7l z1DVc>;!zPnPZoIc1-kb0^u4g#Ws5FBEb+btQo{(a^DCD~qqA<#uI)N}2)?A~KSZ5R zACBY66ZlzvqCSlXw5LIm@?6V0l;#~CI94PaAh^OBZJ0gR%o}!$pH4Ksde$kxoOT&l zs+0?K>1L2kh3<^rmYS_r8+k0rK#Mi6>bmcdyXx_fcUlx|5#geSbeyb4Ze8Xaky{iM z1(sMIqI!}}-sBZd##4o5b3T^H=VqOY;7_PM`Fn^YhKQN%$V?l<=UQ?U7`MX5%kFm0Hr=$%9_=}4F{}) z-n|N3SwSxY22xEYGeQ{8Z0`1CK@Kh8T*q!~C;R)oJ*@N1Wr@(bLf7T)P;=Hpp@o?t zR%VZ`-cP3p9vUVuAO<^eM1PGXFi^1hWW<4mc^Bk!MU}PJ&NepbCQmK7TX04$H(lFM z4`0`+gW!cGMIuEZRlpN@#WmWkR=4~hr0VEXp$!!Oa~$sV{AqUcLjLG5>-zL5ud=jF zkP|8(?Bo>ZUx}hnBFQr$lL{#%wUs((`?u&q*@Jr%p;$Ty!L~2O@~dS2jMN^CjKRwNfF69u?6!F(05J^4d`?4G{|CsO0%wC{(kq6pE2r zrYyBI{f$V`#Vm?Xh!?%o-}UPl!sTO`?Yy!rUJH~_mP~@ zJwcgu)e#G3K7r6g6qX%4Lj5-$ds<0>E3 zf!S$vnV==eI5Be?5yna<2?nB*6|Nl%UfI;9!F zCOt+dcsnG1?N#K@i$$b|GyUrFWf+w_2;&{J*YhN!-o68zlc$^O-!HANNd-W|7oS-$ zN0S8+^#f2XN9BQVQSwzeVH_!bB<_!Jcc@gAhs9RJ3YcpqGv5@H=68q;AR1 zW6#ftpf!OXXssj-%+P6aRye3%XDz(8G=w40ZGR9%I1}k5G)Yy>*jf%$U)D8ucyI05wne%|_0T8f*;Cx@= z602YKVPiY^Avm8roa0OHWAc)fSSRQQwS~UleGAesmYhyJ(scsysE;_Xths%@IY}AA zOv7r<9ORRG+F`0zg=_xxe3JEdZpFE&ER4R6UYA-D?R6ZuN7<3OQTZbuyQLgN;yS+~ z5EQ*Sy?4rUYOD~#gQR3u-wQ&T!B8s;22igkx9ELE&MrnPX9zS#oLe^-*1i)~v>{%s zL9^_`OO6vg@- zpWH?igEH!%-wEYNwqe`&{xl@b+%pWd5Ngt|3lw@M; zem$!ukwxYXte$F;?`9a@@MAJzcQMV61c_Aah2gvih?_s%TrLA?s8VJlOZi-NI(g;H zspxvZ*-W{dkjp!kGX-J%OVx>p-fwdh_gruZQ4`U1UCjCxJs4YccyILfo+`tS$_sFY zlhaqOZ?-m`jx;jKf22w@^h&>G<+zn!eT6nm$Nu5g?(s8)|7L6UJBGJ{Mo3|SbvLYB zbx>q{E>8C$=M@V!ow`Sx)1lSKpp}@$c128ekF=bI_pF;Bd^MU~d+x}|;WkEewrHD0 z%@aAtk5KjX1Jim|=*x38Xkto%@0e!;R}p1Jui|-JvqCL>6;$k#!EA5XsSU#HqG*wc zf_4^S?FRdLC-`aQ{bSHcTPW(Wbivn8{B0bfSrG~S4zkd^kot&bp z{*G9u(qk(`$e#MyJs++>1S7*v;lcD^u+)sbN?z7 z{W03P+}7ECXm8cPNAn8QYo8ka{ot+6`yjmE`tVihSe4C)bGgWGx!V@%cF%Y5gX#VU zX9HMhoTe*8hSG;%tMHt(3iC~3cpJ9e)9S#YTMA5q-fb;@8|M{rW1X7Dy}G~oyJ0kF z@Y0PQfhjZBL1|Y)e2V&UI=iYaD=>Z9_NfcT3WK_yFT@aur4@S#iBKQUkv-Qa4ss1V z_OvRi!?(apm4BxjDDJ;Uwhvzk{uLAx z4dc--w-kdhurK|E0Bp-93AI%Ll~7eK5orwH}<ljToYyh(`mRQHCcHfC3rzQoBFJ{E~ikKQ;^0!YUD;?tTEOO+;a1Z9r!MhK2$+ zug8rPy3)TB4xtlV#^b}zE65mMpvml9lLIo++iPa~VGL7Bnwv$aniRl!r-#eN_o;7T z(8}X?MD^mt2{q(jguW*Qw8AS`PPME^Mc+Q74}_JVx@W_JC^t^)LANNHqZyW#K|gy4 zV(jPlga)-3>*={V3g}#J$8{Z+?&5Zr>U1WWjDDUTb_V*e&z+4N2gzc$th8!@=guNO zr$2Ek-q>;Ch}?_0%iQr}HHB&8U76^L-=}HEj`TT=bb^4(WxqDBf^3DIhLbAG_vY-G z(jOIMjQH4OSb9HcBpF-B=rT)-tD2GqHFAO$BP{)uk~ZH<($A2cK+GIy0fa|=IjcrJ z$ACBr1(+v87SrxvMR8!#K!2ES$J{LLJW!UOhx;6v}`D`3#`@fS}sXeuM+ zQHv8%BM(7&${9FGjlyD#RLN5z=;dqL2pPuRSrH;}nG-A}JaiC3K{8pH0ae?4=rcTL z6&(S4HfJlh$#4<2nmA}aOAFp<+vS1u%}qqSr&9~d*6$${swK2b25v0} z?$Dl;4$%%P>|oV4AEpDiR5ih&HB;5h{YCLoc)9p$6u$MS0X;?>1umQ5veHw%P!<1;JNN9k5n` z0mBt78fJ|})K`8vUf}u>J%Dyngy{_8c}uv^fT`n4)zz<(-xkvpu08 zJ0M~29TNjTXJhG-O@3`s2gjGDdfELKC5R#Y``%w7hH#xQjUKX`N%@a3bq7>NLL99o zdZ!N-IrU7&tc>PX% zX&sb`N~60XH?%X~->&)qYiZv^&kBu8H(E?Xf^nY4>7#M4DhoF!Hk@%t3}pD(Hoc8s zH831qYlGHVUHX@d1Z*PqzL`q$;I+^6J;Hc8IxbF z?ZZ;UKR691c}7&x&2GRMQ;jS2H~tO}3&XH7lf?QC1XoXK{Be}=KL^pgXlys9A%Bs8 zf8VxN+@Lx7x601nqj%#*ThGej6>}zaz5d6i2Mb&beG6GlUhqxK#NL293;^m)t05tRZNW%0lCHNy5LTIMLrDsc$<%Egc#; z5=(>qBL_--FNvn811D9t*1)rOy)m}a?pFhYn}Dl!OMCK3pxqk#Nh;9$4$D!W@CY}2 zf;_D`(?S3c5KtgM5I`_M2tX)67(h5c1VAJ}6hJgU3_vVE96&rk0ze`_5mV48SbF9Kbxl0>C1`62LOR3cxDB8o)Zh2EZo37Qi;Z4!|zJ9>6}p0l*=^5x_CP z3BW198NfNf1;8c16~HyX4ZtnH9l$-n1HdD|6TtV+&(D9}0N1bocmD;b{pTN`_Vr&K z;0FK*7zhYt8TGGqKd5ZSLtJtgPm*v-s8T_-o!&9X{ChP9?_K;TB}+78=Cx-yX_?*w$ypsoq4x|_h?|TprMhvXaN+?& z(&4Ua_3Y`tN7k66>-R@6FPgh?R#8tRd(PqGN~LOz#UDpBLwW?=Rw@ibs;g6Pt5^@0 zb3Sm=CH_01vOu>vCg$E44zVfwCVMKA{hYj~c)MUl z**&`vte32cJQYz(8Lf->ut)lzu0M^&7r45VElJi853^Kf)jT$eL-vrGh{3GWa81r0Dx|uv{volZ42 z3NH`tI#h>)5eTx!K+Ez@vk!;dF5Gk=U_{(46J5-RIL+D=)kg+v>awTOjXbgAs94p3 zYLSuK31q_PXNw=s;v2smtK>%PQ2N!*vPX$o_D0eLu65l}zw&1w8bzkIM>^3+bou|j zY{>@=2LpKU-aJ`<#=`z2;-rCU_KacS~;fF-fptlgq7r zpF9qD28{FRwY#y32wr8W36ay#Q3rf^>$qmlyyoNk6 zQt8U^n2it}XQkXIHXhgAuk9FTrl9Hm;oW9;t8oaz9wT$ENi4|&nG^jTyJabvzwb2H z;qyiW?4GPY$+`yw!RGN6#AAw<4x~7ah8+#RxN(4RN;`t|R+Ih-g68ODkr@3g?NmB9 zhOr71D1yZwSi?L(Q_m<@oiq=S7$9T%^MRt1t-Q#%a1ph~W7M<{r;M_@OOWKgPcgpx zaSi(79Ix>GP z+W-~Xv~0a1$sX_Dzng8uz%hvEW;)|7ZT;dTpOW><#@S^6s)-)grjoYiiks`#?_7@k z1(iH(%jJ{f%*5=k%h?ML&Z?Q#6z47x7i8SYt-y!~s8(bp4dPt*`V{W9NGB@? zXEkOnJy7w@sBRKJjd~8axH~Fed|~@`ZlGc`rk~;iVSafZRIkzx3gaa&n&`S$pDp_% zmsH@;6sv|X@5U8DxRHkoBm+jlQE3+BfsI5qRpW2ivGBxW_;gjCVMX&rT74{~y*7rl z!Z_ZW$fQ8*N?%>`-}SQ|yVr!+5wp@HQatSDqIVP1>hkR^8-;?#zxq-@lUjDgP^LK< z!X9Mg;3bxZsXf2-JS?MY-+-$1+1RU65wxmB{}ADPBqyGgCjJ{kOUjEQt2%<6r%>bx zJ2o7Mh64?>A6UobZ3XW^{a4O=J}4+^(?3Ea9LFi%Q(Xj?;p%6ddF>=?mJx}_wEf5Q z=Lz-;6flgQWRXSd{rFKBW46ylaq`tWTI<5zoz>X?aVxMB)dEKaw%B;K>NeT$E?m-n zL)mr9Q}5oY1S$AlghaGvZK0+q@Jndb*%5jzf$A>?PSueRLV<&9M4?Q>V(TjTOreVx z<>O*AUOCUGodW*`tT;vz#joeu+9)t5Q``!W*uDVE{Jwf^skx;Wmco}m*jq*8sgc-j zhQ8;#vg5BFG1DRaCcntkt6J6)KJj5+iQ|vvHPXNR%CL0~->e{RlpB`| z1!qgDL{GU53vND-c;a97Da=m22b#^(I_=fuqtyCqxFA$0j3n{aYeHP13%>_dL5ZM%pF@=a~YeRG^maU&!!8E;&e zR6UHndvZi_d8MTO${zm(Tl;|QAV&kWB5ts(GAB1&gP?C7oMa6Q{z^+?G_Ral*m$-S z*41)$renr{JRN~^k3AVFHI9@kXu0^a=0V=;O3YujZ78p6!)2+_f-k&#)Yxl6+UC^H zp%{>F`?ezV^Q}Vd9w9s|8;xMxz_`Bs=NgG}SWf5G!hCFyRV#3~K$%+fNMWG+C3Ior zw3E~th%}^%j%rqZagHA-G>Camyp;ep^O&Nbpctb;hWPUnuVLjo5X|UMIC{S{S?JGT z7-KgZ2;KNg94Dx$YXBAXFY^;Mjh}3%IqEnnbbVM?D}&^08K;sCQ+Z%iKc$K|w0aYD z8Qsk*%W;9MYxrYn63;l*ghsJ?3PZ~;UiLH>?ua|}gHpIWftlP&RMoM?Nm;8E9n=-- z7oQpgKFdrD?J);%2?6gapz4B%U5$TcQNhwm15~Bo`^<4|IuFlrlDgwBs5WybReBS| zrn6@&)@SPcHWB;>1;l?OuXTZhLsLGxz6T#rquiTgre0qTbJSS|oGL9?!1oqE#)pgG zn*E$z&Cr?Yho1j;f%!iV76rN;l$S zxH6f>!?K*h{}oQrGh3nHHb}io_{N!=5jr@+^GX$4vsVNst_#HPxfCqmEucH4+_dnc z8ADnw?)fwNAuFGKt9;^~XcIr|tS2lzJp-B1R2Dl&G^G5ZrJO;MY4jJ;9UQ1!G?hz$ z&s3{(wHZ0pUn970fxv*E_!|h;ok27E!|N+8Q?xZl#`c|`Ys&4k+Mccy|75tu^AMl0Dhsy^(tmDq2UKABC|o`A6L zYqYWqxKdeufrxvU2Ike9@uUk-8uajE5zdRe6)~B7)5J*{xoR!y%!#a=KWZ~>{{%D>NkfS)S`g!enY z0eOxqz)isEj||s@h)})R`SItRYC*@-6U!JoPMgK+iJs!c^}mAr)$5G49L%G)vakDt zVjG}9)B*!Mr;?1QTGh2sVV*ZYhsVrgOQfdj*Ux?#OFjO4U^1oko~`LDNcQPvI>I{A zIcpfnGITgnS=w$KET;Ji<1vu7*WvpmZ#=_rRB6k7q`|)_6f|&ZArXX#D3SvAR4>4! z+!2CzIZqU&FT=8Ex-?cQ5C^O?z2FHPry+542TJt6gU)9a>nnWRx3y(5HeR}YD2Zs% zXpoj=b4p)DV7wRyfL$Hnt8k|6p;zrDTKI}hjSLV$rGlc%J4GJOZ6i2I4#**>XK@%j z^P!UA!z=}jt8|o_Y4}1fNz8CNb&FO;pJfY38>*U$dtRFXfko*3VLNB1=;&%89Ofud z44FCXv~34x3) zNMXyM=N{NzO0=aE2cgd*+{cs8bjQsm&kGuSyn=t8`R8@8iOGhCe*ZqxeL4i?m`N=7 zS!&WpRMBL_$giMilCeU10>p8aF}B=SN>$%uvcy{!CMaLj8TEAK#l4R*2VM=U(*jJ; zD}9kzHbU6q+JgqWT)xKO*yYusezp=r1ohPYR)lZqiz#C!4nA9ox zA}_m{Mw>U!SsUck>A$N3o>pM2jiv5e_?QN^CWZr;zj@I;oJjNrlG3L~bb>#8t}&ctI7njRd`Sm1Uar+8 zPVOtQT=B=qSxi;iTRblQl}1OtGPp4CJb~=8r`=S1wy=G7i{#}p|Cnj`nmC!J=v4FfVbWi%cl>T`bsYkS8lU~KnL51vaD7-qqZJJJSJ>Y;*GpPjv8tR#7UZt*c7p zeP384ydTDROy(GsBxT;No1wW%D4jfQrl`qy_;ZQSx$@cieJOT4&G~Z+=Yr>^_N#rM zhzk&x(lT-CS`A^=FU{}|x#f-cn*>q#>i~E_uZCT-jQ-QF0B^N~14Z@oD--l}ycd%8 zeLo|(^wI2qmNK&4-keygJLhYlV2&>c>C;r1xe~A}l35ER z_?YcI+}?KUO@=02e$i_2ovG-OQ_Y@BtnOW1u2%R5s`MU)MNdrG{CuII7DUmZrehp%`J z4V-akiHCh-WaRt4!-YP_nvxnCL-1#_ z3q!LbSoFjEB~jJn=qgt%5B=R_1s++n11?Ym|Nf_}{6|b64eECp{=|Cv3sHwcrdH(& zY0_wd_G#gqBP|V(=_2q-1t1?_1s8@&bVx zmmK#O&l>KS|L0jg0x1noH6Y?*}r0% z;X}j@zB@s>K=}W`>HXytTrRBo5$H`($TmBe!BhYyCcTQg&w>Io5yx2kC8<4r@7;N| zq5$N-R6f{Rb8n+X>{@}#gVH9&)vIGe&7oo25tX=&1|~2Bj%9U3WaRd=d>~pZSs_h2Vt%Wz*r+q!=bfvFqD(aZ zAoiCIR;qr_v^ghxc_HTC+p0A?p;)K-Z{;L~#ppPH7!Ass&XF$>5W#AidP5Yw^x?8j z^chtwcJYTda8g%X4Ie$_TW$h$ln3|+qAe>K{bRoeLEe} zd-aC1aVO)R_tE8q5qlw*aZFh!FJ()Qzb?J(Ok*Q^oFU~wnQ2ic5E@&E8_5%?Ful-> z&Po5S`Khsv^UpY#uJfiI@FMvfghnBpfRQ9U!`9Qu`{1uc*7dz@xlg`xEuqa+;;7wy z3dIiXu{QqB*iCcm&%_dC7dRDP)4yR?S5znBZm8RYxp8==_=4V^RYuh~7yJr(q9v52|T*=%}9zZXxQTrSr<`^m!*V(M{3< z*yR}gAyP01s`uPX9Xh)E`7L+5r z0)k7q-<%V|B|%aT?V@V~y+0-YuYJ6*)Wz(T-hprK{hnM&CT#l`y$yIX*$xZPhu7+0 z%jx0Nh?}S>`KsDAxZUilvIKqAPw%t&=e%VFxsunQP|QvmR2)iOFD0X-jzs^oheFJu z`otAb}o`s-Y_a;#t{-%_2t++@D})FYHf2dMMS4C$sVw+O2dhZ6EvIMR*G4nakETp zdG$0E@(nS4kGYhm5zrT8-ogu3+F5deJL;j_8yYAjc*@SeaAf5I4-VUG&eywa z&8s$Lkrl%Z(Hz%EIP>^40e$(%6#uBASY%A{NWK-JPfESFbqmm*K82S7H&u%6AX!3C z>#q1~0bFkblryBWTX+VPqKHEmeYxoj+koA%|x2!5D6R#mGZx=^;(oFod$lRWASkW{A=C4K` zAjzT0%<%=LBhTkd4N=Yai?GXBM}r?bt#{nzKofDLK8oZw@mdmK12}0F60SYkOlNGm zIpQCL>I@sB_6)SeA+M)9dLjz+BaWi*AgMr{3Ore1RHzNr+o9OljjP!A$1Vw*I34j~%s+!1LKJ8Dt1jnWI7Y#_bEwqJ*FogYA#r{S-PUA8+3);rPm0 zx0py0x!WlL@oOf~mW*a-cr)ioAz1^*#bB^Q$O~`XfuDrE79=U?^m-C?=fkwELD}pR z|7kqUz~KbjK`rpTgRnr7NH)ZBr|RanR~&-P`&F6-tFV!-o4z!%8xMO%DWN}&b6t-! zRE8--QV#RkEiU{AQ?XA`UAXesXi`%B;>ez?R%#NLv0XVHU(B)zc1}jsN)c<>KUE0a zOLV}{<6hB{Pk>0e&|bu@zY|RteRfQ#Y~rd523)p@3ukXu>>FD26D&RA=ZLq47Mgk- zjWJ3yu(R?;wefe*tVv%pfF&`eNO{I=vYB|aO_tfUfkc@tVikGb?roa-;rBm`i+H3? z^sl%vdK`P#`&>AO!}aZeKHInYQt+MZWs@|mOvjJF<9UP9ZpJL0H>?t2_oe~&U8vz&#&Po+x*S+N$Iz2cn08rD9Y5$4LEn?(z*yu{8OSU0sN-atcjQzPfj0M8CU2 z(8Iq9>-n>?FW5vRtzY0ICY$Ni0%u(s(XEA$#a!LRrW%Ku5%YGYiJUExhZ)6|m}na+ zP$tC2PF-P`LsDj_SDhboxGCKZk5aq)(J)&sH7{0c`+6RZO$X88Yqx7;(I{CI@BKNZR7Bzg%eychW$ew!3qm7(^zi)5;xwbxs(e4E782xSfWU zD%&#_&*^*0Y%U6K(263b0^v$8v>T3>Eb+bwh5#LO_?8D-Qkgbn!VJbk3#-~aT!l>C zsXH)LY?wD3r`0sV$QLp6=d40@g3C78jQ_15+YsE9u=QxL8C@eCv_tAUhG$|hk5W(C zNsVwzvG($eq=H18ngu3eZRv2dHN?H>3ZVR>@27@A{# zY3upzh>+&kt%=)>?x@sI6f1~|M@crmiqO4t+0L_U^_+)n^HuQo$olM?|{c&1rt-N)>+W~JX z9J35ha9K!vrh$?%EzpMCcUlE-;X#4-8dIDVa0`c5JcGP zF5=69sg79dA#V?MIVnOLJzU<0kkA@zYlRp8yNI0~HdZ#^9!p1R7*F^!BU?=p&pP56 z7#P_}v+{_J-%vR@`@PdnAo+T`-ycQTN@sq)lE{8)&FTv_#Bm$9z(uZqZWhbL!0zPm zMJ0#3xWn>9Xdv;e*&lU?@88eTzno?(#b{|C%+&Fjd%Iqj@V_osyid)cF#iKLK*+!P zq>hiHMZ^ELxxeXv>je@bl)n;yP>VRTn$(HwItz-stq@J54#o(=bD{L~&9pr-KTa1` z$Lt8_t!ZkT&D(^uH8_94PeRxT+B#3o#{k*n2gv!bDQ?Nfsdo*&bIqzx7o~> zyMDSxv}^19j(0#_)CQTz8&$P^mCk*!;wpmqY?0y5B5r+enW^Jj+4las4RWhsD%y8K z{A27A>*jR3IbekCMN7=0G`uooYU0Mic)FGq^Q2EAAzH4LEe-yoE2!F=-}Xfg)8ayD zG-Ndw>(EV_ttX3SWUr^*M(u|%dwfM{>J-jydnqJ4zm0wh?@*YusG_);I$8@b9}!k) zGHqIpl7cV9Gsu>}aY7%y?k-kGpin#D=_#)_9GU`-X|a4q-l^&w;kaMQyJ{i{)o*Ry z%EU^fE~uJ27N^kf6^Dq&X!Yqroz>ulN;BX$Ien<#Us;)&y;RC+f-ZhcVLK^U*!*r8 z1|VX5Cws+W0=K;~Hxx!YU0Ly*%)mItVs;Jjp8B>HeSJmuY;^d`NtP0V%UC)=5DS*eb=%tuf~IRzk=ZanzP z^&2_P5;dmLX)115FZXh69-{V0A`o4~i5i%js%dg8VxX5$HYz;1W_{hSda-8=z?_Q? zx!01i)|QvFNDP2hyHdmB&l)PD_OaFh{*3%2M z(#Qm0Y2!}AX)joN{RL;1IHzt}B&@We>Zi_k6K2U|5Lv!h_L%m*wq-(gB!wTUg!lG# zCeQ?0@%3;D0So#HF9G!GDmc})6Slvqv79ZW#9>;z~%G?7~e~!#%j4*X@#w|a@$n5ao#M-uS0i*vEtLNex z$dV7Y1)IkG%>}ak4zo6<>~veT%ZxK&67=RPe5Kn)&0|Uklm|vtj{Gs$ubCk=I zBo4pf>Rkw7UVyv!lHPV><;)-%1@>8*sC%~BPynKHhV{4#qfGBR6 z>>zFiVUpP!EH@4YRR}yp4X6!RimnKli6CfRj=Ru>0V431@bAchH|zq{MX-XD8p+L6 zw+oq3H=pkHTUb|^V{osA_9ziqztRw8H2Nw}bDcC7F&Vk^nid(CXw$mJc33>MivEQO z@>)cet+S4oGTR?L=B;L^`N_7F*GSPFFaAlzml@_5g8n~F)CFUVFifFRC*c&$+fm?E z=xfaA0uMl(1Z{U3y#E0Kc+Z~>R;qvgqJH5it+)}TBJzX}Yl58@L%fGoW2eBR;%+^! zzE23ZO6(=T+f^&8T)_~z+tM1aXjR6b@y}H>8>~k!y#!xRk3D?Ljm>HG%Rz5b(k+(a zjRfW!2wlxGAJ#5*3*^z-$K;>aWM4So0ivnET0c-C}CWTtu1mZee)r>nB%j zeIq`Zl;`gyH3dm74fd|6DOfJbFj!*s&3GW`=?9HjgTU0AKTg>Lf77}|NmhvteZ5npfv9E zAa8$f)Vb+BIHdMB9w#z~Zbk*k_8TBk?Vd?VnZdVBA5m4c^+? z5BEB|{;26x0-)6xId0eMoKH2WQ+|@EN?get9q#6i6H(oHp+R+5awcyeyZT~|HK#Oc z3xK2(sWUvgt5#6^)9l#{LM~6}Rb36I_DoB#x*00kS}@+E)Yc|C&CgM0S-bbwAU=R- zLAbH`l+Nj{8lVER3x5arEmoXbz^|5fCI%06h_8;tMNdC=SMV>dFbWF);GFYRpsG5{ zRus=Q+4PwX^=<)}5qH#gDHo#_jb)PP`q<$bn- zv9?a8{U50M(GzjiOxzr{hWu6!LkyWhM%WH0W}|`6=9Jw2(RNW;d`E_r`Y9~cWr;jt zwH?rt9hT%uW{;+x^SfY_O=yF)RM7ifQ)EEmNpoISt@*61#?PQzv0~0z;ywb0y;9Jk zWDa=xV%^?S&XW`9GKNM59MUWhXs)o)WqHIh42Wuy6qN94lQC?_4B&0yZ2D`^RJm;P zq&=US(XuXG^i2>d%?X}sigjsOn|KkxQz0f6Iqi<4cKW`Ia6FU{N%BZ_MbSkwO3n3YqC4j*a8urG~TaZdie>=mjin z!)bp+ivFm4JhS`ge+1KGFkP#*t>7WUiCmq;hs&zBB2 zXn4Sta?bAp4_Ki7Q|aaC=rDy}+WPw#7RJ~u*UmA02BsdmzX#=yudtNyw-|U3Jt5D%_|bR%&Q#b6IR8b!y}jQ%MSsbmJj|=+$r1ZPc?kqdxp>(Y z;gLn|&0N1}-sN5gehELtWJO3WOn1r4}Rx+Yvr-tvc;#iCXe z;%~B>pKo2TGaWS}%|B7Z>RET6^EP9Q(4(O%e3G@GJc%G0h6;bx8TpE1X_Uc4;_u3w z0SVeDza5?gnqWh7NN(LK#1vEfs%I6o(CEB3F(PF0DerNF&Li@-EVPf7NF;>lV^2FU z1@k54BSGu|@Wdc_H0N8;*y0$2fzFYy&n1?wl$A7ord?K}HT{7rFdEM)IWQ1RTXfWh zOFnK2U<5{7XrpKDDar~#U-M$Y@T$Y$wq%!l9JUOMsBH_#EG;n9t48L1!<}){(b07F zv$PL{aQ&Q}Hy=jMsV_~7a6g7Hm75KO?HX85Xa6(6Xqo^pXI*9rEdlU**a4rlO5E`P zf0)|zX{}PdD!YbOjI1>{X$1~XgmB0ynPw@QR)`=FP+;pGgNQa5XEcff*pqexm~0t5 zhkhX;-soBw5^E$7<6r;WwAO?9s0B6GefBK2PP~hH)tA+~blmmLh(nObd9+rxTk5s! zdQj#j^*>`n_ngzr>$u^V82UC=;62mf@FDcJ{@Se?6*5LTx-xP4H4I=02_P zS+vU~*o=vHLi7Ui*2`G(o6eTaI=shVgIFZawQx*p;9dNIiK(;%2MRI;e{yiM3pxMh zX>v*%2CO1uJi;<#vJ`LwC$j(<6{V8oG66n;N})vqH%AUMkw*$jR@8l&uGaW=pL(BO zp@pp?mT=jz_dj2~u?M2Sol#mrxbmkx^0uz>siW;J#BGmOuG;Whq;LnP?UK!A)@rOP zE}%wy73WGy0?4MkgUq-gQFH-4GEg(=W4Sc7H(t|x@NWFgBaJU;$C(pg?p{~HoD+8n zI#^HktnyLn3+=2~SwKP7BVDSUs_qw3Q>f2s6(~j@=IFq3(5a^acxeNV zf5XQi)54rqca-HNsFXK?2j1BXDSp&{Lp>UUCfv`D(*ieve33PY>b4 zP}1$PbE`t!)mXCd&ZE8B_>O?pJS|DGdad~$Jr*Wyp zk_pHCYlvXHlgp!z1PH2(wxxAgD&7B?&CwqptfI$ZUFt9#8#_?bnx!pas%3x08U;U+ zT1tvJXG|TA(bE&~t;hRmR!+r}n(L*mX?9&+xlLiNgf5U>po~-0j2qQmT@7qK(cj5^ zJ)HksBmPIH9~!BDitkFT?9w-vpHpOztMD%0`B-M1Ni>LbfzoV*iZJ})I)&ut6Xk;} zi13LRIi5MN%3P1#OJ^SQHAVlj{!4&$`O4~gcoVx=z5 zhB#bEWRQh%d+z>=9iW5Yy#)sq&4i-sHCJ&kV;Ly>UQu-U1VAvXX{IGR6bGAaM$L?_ zQ&t%&N6V~bA@;s}@g!lJCx(4dZ_w$KCBG8s{t6J4oLxI%%9H$zj#uT-OZ}$<__G;K zN(~=NR0j+)JGGGDOxt;zZ-nwzbkdaY*ZM`qVDt5l{yFYnQh}%zKVze~k({ybT41Bl zjFh=*JeUkSQ%p(h%LBSMqQs$E=l+KG6>gFZvvyE=_BS;5*a@fn;Av0Q>vqs|<`<04 zG`4Sv?^EDY(m7c;Vm+Fx->Rv#-326y0W(iLJ9U>EnH#wf-hyShe6ILA4pA(AuBeX~5=S){QvnpNf0u|bT$ zTpkRz?3<4!c<>yUNbalaARXwx{9^@m@xyVzRr)!eW|u&EdB`nZfb>$AS_!cHl7@01 zZXzPC-~DJblsT$t;9IYf=(jSCUM`}d9!kO1!_i->kt*RR9D*PI%?>KEE6kiK1({%A zXD^&rSQZWKc!e^|S#FRP>)ok(MulcieRUIZ1ozYA(qs_*E*;+7l-az)4r)gt}GN~wJ3*9)Pf51j3bEVHx(2u1FiN z1^4uAz6#i!x??`;1~8shJi?E`2gL7=j<6reR<1xv!e3AaBNjvI33PocR+BFl$uTE` zzJue^ERyYkb;IbMO4Q3xP3{VSm;PYa(~OK{=2N0&|YNP?(}Gd&ZV3B!!~Nrrh*m zoTphdFiFA`d`yaa2!^2d0QAP>CajYLATG#az2LP_fknVn^nZ;d{&#Z8R=mz8F6^^J zZR}JhiH9G@I4@OKxb=v2lv`d2$vi_?0XY(TB%(z7Z-I*j3196JlmTC%)y|AbrAdmr zI)W0}W_ER0vfh|Alsn#ojWclz(Tzgl(7XnICn~K6W@&*1*${t^diYRk_wW%FP07Af zu%M}0ew%!p9y#}#3VfWKQW|`YRpD;xVg#V--pjmKl)F{9$2EPM+W{{Lb{5 ziZb_@;}{3c<<_MTXK)&bS^0#L#6%gVGfMGff;7g!{UMdRL02*A_);A{VN>yRmF|;F zfARKY_6?(;2*6)qJO~WA#E_7ORwCaR-@|ONK6-z;VTUHM(K4GMzUl5d^71D%sxG`< zMbJw=#QisO%dyFI=wu?gxqDTqb2zo5HpbwL%vKsLFi9Rs2AW(yeMtHCitznGvbm6u z36&N!ztDwg^t^f+vg#JUE)#K;xLZZGhwrylSQUX`RxbG37X5HU{8Uze%A@8NZytQV z$%j}V5V%QHdt;fKp}U+VTa!wOpD}I0V`mb+HZga`3r8~HMgleMPBy;l#c~zqenMfD z0{|O)r;#>=bIY2p71e$vvg%bYP2|H*W@Rm1=TyjfZ!u~2R6e||K}nxky$#^0Aq>Y6 zFA!^AKk50(D4m_2A?iX)|FY|qB;DI;b^y`J`J9aR7b$zfI zxiQuIV*lT*wUQ3$J2Z?eOkq9l_=a$ja#H`a>EP(>&0o;cHwKsWs#6ku_**spBym#c zC4=XjbwIzGphCF#8Xg%vjZM|vP%&&}3G`hLt<`H6;~3QQtu`z>hik_9?;}A_iuEGn z(BE}|fur5acvK`YwW!xF+%1=HYViXR73%94l%a$N8d(ve0>xSFK%_d-w%CmW5!x4JHe*H{qDW#0e*`ErHkQjY;ih)~aW%wO!0qmiKXtYJZ zpg~Lwp7ZOd|0RZl0)?{aNoXoV)1gbRns5I*kpLa3_A$c~ijDkwF;&oqq~;`dqkL02q1sghw^~XF{(_=(krBm@QxQ1Q*Tv|} zcD}itR^ z_rduM%ASnS1dBqdy?|3i@NOdwhv}+oipoQcDEgbPyBLmQu<6@ZfdsAH$u`-}#(d8S zN3~hzu!zm)m_0el-Dth=Sv*NEYt#kZIV2p$=7`mkPe7^aVA+o#CWDa8>FB5g_)yF2 zU)(R*mv?ami;E=@qfod@+_1Yvnq1vqxD&&GNQ~dCY#@a>jV8E&Ay}g>*j~QD$WGuQ zl#Rh-EjAL1ZX8aPVRQnvW5Z4;u1ovJvHCt~{A;-qPv>02(rh{cJ8>s%$2OR-Y@y1#;#2W5JQC&D;mp%Rq;1txIhK~5B; z#`7-B2hh1>f7nZ09)SlHsXCi>p__KO+f)ZZhFxkFnj%m5A~_Bkyk;W zD)ZII?kR}#xr_+s><}InQ04I zV=whU%Bwjh=5AF{UQ8>>3=3BK$dI+Wl}kK%Da_>ks0Iy5B0fniqYR1k2S8(so?toF zzc7bolIh`^Db9mP1nnJS)n#+5p876lQE5$(KWGsqU3-B$x-jFYC=MbzNB#a>N$m_R zKhK)iEhz3{mYe^dCUZpRP>;}o{M`@vv#Q_Fl+mSYM)u)pq9!T`_2BwTRHS;(KqFP5 zZo_Ja*WeP&GZM7{x@!|We0|}}2_7oBb2mj3E93dktgy%fzKLv@IS1%h^O?>Ri(r_BlR z0rhgQO#iUS6z`?SCa3`J8RkB|uqeg%J(gPb*kzi;StHfBET^s6b!Xy+x^+|pHv;a< z;=9wKp9a!aIoAmM6OSd7aYx+xTH|IwQl!z5!LQbUreT9;s@)1Y;eHbd%#ATIY1K00?NcSS&2kp5A!j!(4k} zUKjb7d?Dj5@x^*541tU0)>Ml|@v!g$R$`6ARj*rxv%luethmpTS|=B^rm27GUh|hX z{T8PwpcnaQ)k)qV&UGCVvNzXZv3(fLn&%!K7H)D~@B6i+F)YTq^X-Q_1V7)@_>=-U zToR*S|C!g1QGwZ^N)o4(4rcC831olLH7ituD+7?-Waq;}gZ5u{6EnliWlFYUq3g5n z3}Hw(@q^Ehg{bKFPK-nxof=YXaqJ)k8I!Ejy44wXWjt$m9sq}9Fb3Oq!j2PsF7DS) z+wz9#*ugz_F#&AoTk|6$KsJWX5Az^yyR0zo!I2z{sc0pc)p~{`Z&vrQ!vV(vS>!Nz zBafr~=HJe674)|f)=g|juWY6Dj=}_q}1iP z+w{Q?r{_K7(rO`>1urTdh zsF4JiU_`dMM4#O@sZnb8_=i;b`$JbDa(oVSd4d>)nZRWSAlum+(a(l(_q~yYG!tKf z&7Y@8g}eGGq)S24Hv@EW)(vXVX9!NB2>G!^NiiptABRwg7J9`mjAp>|(E7#AP%SP< z!ZM?!D!TtlC17-E@%L~4yENvLX9G0pVdW?Ri$+p~y7gpZ^S_X)s#L5`gJ~_^k4_k3 z5`Dn0&?Doc7_@u3)x6_Ye*tDH<;=o(oPghBAiPvh*;GNvp|Rc|>9SG{;r|~0U|(ko zOMYhHAR}s}KWluj0Jbn=nsua6V~=Au?f?~$I=oW5$RZ|p8pA$H}i9$N-df-2Kz;F|et$!yy(=dwnLUnJ(M1Ib-B z@~y#BFTB(ULf)!fd_-83VKsq?l{TFEO#D-KNfGX59Q4-tIp38pGa^)R`r zLf5Q;c`F2o1*m~-j$E)4M`Rd7h47G`GL3gd!ZwHmp_8Z=D*!^)U=RGUx z&AL}Ia12hcRT)bq#Na$KzM;6x7=inh%Qi~E2W3$@wML_vI%M%B)bVY2z>H_3{@T<* z_G~#oG6UWz;Nay(zkGO7z~I3A5(7P)108@3D*&JC?a&v9OUpw;r>TLmpl0!^63_Z? zh?e3@vu@ii+|s&dgS?a969E=-d%wF{k%BOHGS+uJa;wogYw89W@zJNs8*$oKQnF`r z=GYd8zC8@Klm8#^$Z5z?auXX^w91FHAShmDXjtzR7Oa=?mC7(6&ZIdF>S0QK*@an_ zkm!0h0(hXA@3&A+2d_^eHtpVolsfFyo)`XF9CFOW`55^nxhYHY|jFgtbWHxJUxm8fr z3Sb(5sFd-c7gg^a$76)B^OB&9`IlM(fTu3_Wh#FW$^F5~GAU93wTOz*zIq%$E^hLY zC#ndOmsR$|H81~1{xG6G$jLJBc+oT2YoK3YyaBM&Q`W4c6BgA!pGDi{ky9+QgT~x4(o^uQl#CBvUN5$tlQOnPW4->L9-=jTz{KwY zeb@dm(jF!YeHPeWt(q_``0ZL4R&Vcmj7Qd0=L7xJC9SLpfc)1XtHXsVF6Q+FQp9uzF7IH( z#hizfj3Oe&dmVCerqRX8?w?P2LMyXFwd_oEeFu_K3(|WXry_tfxcIW$1@s9C&n=A6 z%>m}2PP&>aY1NWsa2bFb5#~&I3e(u z%0q<0RUjrsnv+pbr{=L)9*grMw{??mcttiQQ5O<25sWmO%PD;NGx@Vd8lU?3Wf{W= zqUjR$HJD*JrW_iJbf#doB)93( z+wkn*Ue7Jrd0dA{>k{lXUQMX?`M>_{XE3V3=Q3F^*Su#GUr7QtEUNlc^l%C{5Gt~r z!iM}CCBh9ZWbs*J)R*s$*o;g1Ma@&?8@TH*2IJSY_g*gpRkX^i_cSZV?S69k%|vpA z78^O!)xmTgBfEIq;Q;PFuMG5q75NuT^e<(DEjam__*ml^;!pu4OzT;LUl>^>z|N*S zSzf2KVhS_mu$q4G2q6b`D$?k)NG7fS`+3XpUkuWA#6isnL_UFe8r1x!nIMoh_lEL*gg9kE8PKcYFWs=f_4&pM_l z(;$oHh60vOzSB5?`#p5$2|7Kx>DMp)9EK(@U-u2WR;)13#ys5Vh-Esgb8acJ$nB-W zZWBOo(P--FrmU*}Rlx1TkVqk`E5b++R~6B)=>&&gQeEOL7*ezDt53$J*$EX5Vby3& zKWuD9N5&r%VcaW`cA8cL4JfxoCkVhtOO{)|1VFPns{iV4Ka)@--I)C+`|an{saP6s zwiajmu}A(}6Wy_b2vAUiyk|qEtGTg=lXbti6+yKEmQfNvC1HO(Fw+DsC%db4vSkNj zx~fwpOYqUE{KV+IbW`C*4TmHD*>V9}5W8aM;%=(sbcA>%VbYxW>9*8kapE35F=u3} zZhb+CayX=oFdTLI$|C|Oqqks}<{yiTa+f~%Raq#;GwT9wVHE^o(cE`SP&(2p+U@Ym zh);P_Mt_^r+$`RKWycRF@_)rZFGa9YwLnPXW)@i-TG?oR-usmaJvAz4MR&Q1 zOh>ixI~V{Mq&asKD@~LiEq~)mGm#X`(cs(*mX(gXhzlp!1}slwv%2Euan<10M1Cke zoiL}6iN~WY>Zom#!dst)+*~*xo416#-93ARh|Sf(aqFtp3+N!dD(gpns+|r zm-%nmby;KW`?f6blhF{LarB20TwYSnOV|p~;ywaHF6B3$J$!=DaUic)qj?*h-UT_7 zV7en}Sy^%w(tG zCUuSPTArknd;3h45v{vKR`~}1UbWsp3WaLzC4JeDjx8RPzaKYJ(6+>V_lIHQqo+&= z)VVC<$JN;k8%KC_c|uouw`qrdg^NBa9C3ynBDg%Pd;s4r{jewdCVDT{D^tAve?XJK z*X*BFw+d0}>mXf?E^AVUFHkNZ(IaUq5)Y(y`61<(0c0tzM?F(0G4`Wq3J8hJsotLX z93@Ov&5{o(vU_yDuO)^Q9hrFQ{{v;3xkpIxqHB_qNDJZg&*i{!1qJ_P1JduayT*e|wkazw7kq`vb;l z9w|5QX@o}pX$&tWv#h{;u6Ov_rMO7U5H*q>8^$+sTEleG4OrGr;oX&D&zJK=$MkE{jM@-0z1@=v2rC5`xaZCd5p|FGsp(^H)Lk|#tb_*wxk6Q$xwH)F z%(nbS=1xXBk>n>VjLS{<7);14$|Iat^hMkwy6(|j(*0UxCS=nDg}m~j!EML#5d8G8 zcs}!Jx2eLsi5lRIvfrs}43QVQCh)Z)n|I4Y0)zD1hNdT%dfu(7>#wrCaq5XZ)1SNU|uM=;5{TszcJq) z%ykbeI@cc&y{7d|{%S@y%qA`?)_Ppo(XWWK1G=zKDLNhLYCw{=P=nDs1de7r3=GjC z|F8Y_bJ|M*1s(y*a_R0yf=Y%_DX{jcY~ykGOnbzW}|t)_dgk~ zK)=cKHm%@f{PkzJa=mg{jI-{$nQC(!GxQn{{hI8KDyuI#nklWrgr2|q_+U(!URzWf z5C++*y@F(6TKvc>wf48Nyt!3H$XH6(;+!5NG>v=t1xmlwL-GLU82P_zWRVGAxq=H3 zFY@5i-=%884~;eDX2i_=(tdAXQK=GJ3wcx%|2);~^^83L0w)6Y4DAig8F{<@yUc7@ z$k-M8`_+BHT4HpBl;}0X-V^PIdlN{*@T(LM<Dofg8#LG|WeK_8L;HY!7F z?z)F`4%}hB^MqTp8iZ_AxzdL5G70honb@gBcrMuFc`x99vYs);i|sUV^w%)Yd5Ki( z*0OH1v7lHIw%A2kf6T0;iYdR^+jGgSEa4e@|FXWR=ILcS=7|?jNI7)n^ZF2rJz)9` zy5nOW-nGVsQCiHRb@DM>cyH^oYOx42m_{bCi`?b|3LY+G&0Ml@1t9wW$vX_a19pXJ za$7H-f7k|`FF2=bdcueF^c`U`+g-PtCDopCwe%(z3xNOP6`#|_W>tR8CP}jyzE8Q+ zgUQiIf(Gqi(xkO$7$3BAYw%rtNLpHbD^|B|-_Rp({tX{JsY9-dmRZItR5DX0GI0HBA;g9XcrXos^Mha1YyUS7*smZ#>z$2V zHCl={&?-d1v;#C3DS6eNV|?4Sl0O8l>)W*GEe0fwa9I!S!|qAYf*5|e6d8W;qQpa} zjrq}d=j5rw7qRxQF5I7nGUU_ycefflvptDnjx7=KjT`)rtHxu)Si>~NQG-@6Rx7e; zB@p5$tzLNN0%(Z$cu!2nK3u9efX{b*()qhR6Yx4~Ogk&uV8lVMY|bsU?lh%pVMsV- zljD0VnJZ5$=Wg)SWzsum>GeWyk+y)bXEDV@V^}Lg?-((gjTJtxUe_bxQAWif>RDFF z&CV3b5kOrxVXAt_8)(hqGm;7vz?C0uQvg1+voO?j6^lUHL-~oE-eUTv*Jr>h^2Do! z^?{B`Lug153<02CEC3cK-w_Dw_grzdn?yn09UdGG^_35Yh|Imhk?9kanlMW-E|O!5 zzGjiGTP1_(LX7Msi&ua3jcVE=@z{Xa?^z9e8?1zq+n*kKs*wRaa(l%BHTbHb^sK7e z(7bn!MSE!@26zpp-Hd~uj}b{RmLcXpMNb1DF2SlgTW8mVCU)IAD*A7zYsaIJgk2@9 z7gey|G_nzWY740Z8J>0;NlL-|5mz5zTbMDs_I?_AfZ6v7>P#G>1SXUY1r}0~3989b zg{;vDN!Iixko2;}wJpp&TFE~ZVCA09(MZq!=hvRUx4@f*=Uf0NJM+Ck$x-O6P;4Wh zur45$GUevkRW#^_*>;EZ`k@hdT~iqU`!@DmIDrD@0@mr!myq+G3s0zVg|jnRJB4Df z;HDLIHJV{1Y5sXyRvC8d0E3zjU6-a9I~P(c{ehhVt&2>n8mivMlZA1GodoNBh|MOVSs|=mV|F3UzpNWsCs$Cq@F4k`bEw}K zy!lGpmixQzX)2?}`k#>a@ZPkK8?Hd4OndNs0DLjm(noU%K~PJno{=1L-Hqt|9-|h* z;_tbM+519~cAvsD_$IdHBnHbw`0zKP8&36@00LHK2^80sP>IO|1#3uKP17AdapmCC zIRO_%bS8BZYsto8Z2a9@>!f%-NWTBz`405YtZ-`6?Wzfp@n4-lQ-uMMt`k*|)KS*R?ZY^R%=T52&Z*=2zAV#lJ?c*_=G zbVB?8QKo|cjeZP5U8=wzHa3L@D>ksCLg%gGT(6%uVp_fs!=_Ix`m`vl2aCA}L0P$m z8Z0~nqK@hk2B7pc+UY9?`U!!A5yoZ@krCA;W1Q%;LxlQWnPJVIMlSZ(Eq0pt{;3tO zi2IxaSsm_*VjLS8DwfT#f%{R#!x>%M<_oR zTk_e$7k*(v zqDe8mspQdgc(yzs@Q-&AT?rWLz3z)|a`ZJMG(3JibwY zvjT+rz66I_Vm)Zd!%5VFaD<7o>x~b)2DW!{pvTb<+>0IW>JA~EG4eV}Fe<3+jj(Q% z0x8(d>8EISq=n$29I-rm3W=;7pENWog z>Rs>IyXCYH!M&D&tP?bA&|-1??n$}5kYbjI*^+iEspf0WZ4djx#m`w#C$V8$f_5ha zwDahOm`I~QAc?CL^X$5U7cEyC z6mC}iQ-g(CZ{Qd|-0<3$RXG>uU0L7WQVt6*OtCqg#L!o+hZ5)`f*ETbJ}rWa`Fg@6 z99(7VJlb=Ctv;JIH|58%)F4luLT(CS#_eB))^?s2GS_zBJKrhF#)%AgJ!J%yqx1SpL>pQwA zs>rcY(LpFB6FOn6uKsu!vi?$`6Vrk0w1L>{sDl(Hsvy7P03k*nCvLM(x!ZDUUU!$F zQ^|YsOOk|Q2hoDz~P(Y0S!x2tkOL4Bt*0I z$h~WtrBbljJz_p2X%;{jevI9FXt=)u&hoP%P9SrcYC7RiWm5hy| zCf&4RxygFoO^=l4HYedDdV&P|4G^748ISV8%n8CE-6!XlO|2Izu6$|-LHj_aW^{{q z@Z+iV-JQr>&DaEcK~c8pfVxC&+?ojbgq9dl))!2E_c-XV=h{)rv=qFYHWP4I#>I*u zHDp8UEw_S;B33JJpOavc3hctq(LYJs$-WdeD=Iz9A{or2f~f$`hf&cRz zOgAh$md>b+Q!v)|N5)y*sg1Bac|55=UXgzQHS_S{m+jDnFQCwvsvs&F64HxaX-+0x3uUQS6MQ z<2P5zcKl(Ricrg~{!q`^<^lI=45C0unioJ(xGxihrQ@%{Cbnod+|8Ct;Ej4Ea4hVrAOP5yv9)Hlf#hL>nS7WV^h$iC0_8(@n~WmEtsw> z)KHtPC{f4-6tK@Yur5KVX=$lfsnT3Qb=LadhymqA;gVGMh5(GBMZy!`?#AuUeLKFs48(`;i+NI29}f$-0_qQmLgBY!n2 zbA?a1RE8_dcxPj*RtC=ILJMD=q%5F3BzK1>L}%De5l1n9um&IQ^hEe#de`K`hddsl zH#qXO-DrLXBqb1)V7ESkcUWZ(LLH+UH9hO0wkIlW)Z-baQ8@{*2WwV`=0e3PTI`>v z)Cfm+3T)GWRdf&1#U{k9K@xW!^BY8%eO$2t3eRZ~c1)Zg=Ys4)aV9cGbj~2C?Lz7(S)+KLx$3Nl>La4+<6=vQ*webPCZzHY! zi)l1OWMVehgABZ}z^{wFsj+IoLt2@moD*Y9f`9MhICd5URjwFkCo>+?w{u5&j5XM> z+6eX0E%LL&yy5HOxFHJJZ1rC_n0}aNu*D0d$>4h{*s)#EZ~YG+T57zBaIOS8Ikh1f zgc`vQl8|G>=wlLkRSD&3y_n|W5csPM=fsd@{`r$&0@G373YrV~51E-|u!VKB@V-Ms zQoX%V;0K4=+b!kphagyN;84w_q&j?>3|qSpagcvbLevr+(2Y>`OFkakoeAzZ*_jcL zT8r@~#;rS!Qm@z1F5TR}{HTG(9us#pglX1dR2{&Jy!b8!S~K%|Dx8sE0t$?UFf96U ze{P416k3~cHZWJ2MhS1JuD=eQDx_KcCM5mt!O1(+O+G>$yB7(0`IC=M?GL33bX~vn zMx;KOYEk!#%F@&!4YG>2aAC1yCdXOLlbT!suyH7=BOSXH{k9bZ65|VgBH$dFWY^eY zNZvJv_8w%-k%cZ`xG4jL=>xsLn)U9;6%LsXYM+x)7R!Rgqt0KW93t&b~E zsA^;eeA@|z`C1_{!mZ0zKvsW;WZ>FYolGVW4+san#@v7&)Ba<7LiViLfV(M72dHpb zEN9y14zzB@Ebta9)KmsG^i-X?+-kq{iz`N+jPdRmeAgAox@Y7L?i=+aIEWL0zALKP z+TLGjsU@@SmwJHTjU!R4l~nQLF=szvc4-%a)1cjgF_us1jGxxdnC76-k6(yeMsCCb z0&3D~en;EWJyqvm$OW@p9hyMdEsTA!NG+DWWIR!}5!kw1$y>Lw5oc15<^--8!E@j& z^nq?tPVcK7dezb-$@=Mfk_v9wm+zM#&FxKN&*mgq+mS;i# zg!9Ipnk@Sr1n;Wdr2VTVp;e>vLSHN3yT83iHWAUK{+ZD{p=|0UWLC|z^ zO7uXl$>irsY*u4nAAcgn25@-!{&Z(lhXlHP*-F`+xc>^a{ts)HMpDb~PKV!2y;D#e zUh^co0M1JFz2#u8V-~gIFDx6HKL@ROo7*zjOtucN2YzBHqj_*0M7C*#sET^zRf_ND zy)}LP;5P|fMCcZS9o(7&he?-+iz=dI@DYkcUjutkRiqmoomINkakiN0%*r%CenG2| zgil!TKzoHE^-y^ zwo?u#Ql;)YKay<=y^1c!w7oeaYT29uvz~X7rBN728G0!ZHZ*r+`H<3D_Hi;s-NYMt zs4PyW)K!xGSRp%LogF`u#3dmWE+M|}-{6`m#k!7)yuE5P9H*{OKO!!+ea(*b!b*uZC;Y<;t}1B*jNZp9VI!WY6KPWuc76K$=P!rAYQO_fJp{PYvE-Cr&-=&<>S+x3{Xt7tLTL7IHg(KdYy{a~?yXl*R49bMXgfSF1gQq8}d*-+**A_sZv8ATT11I;y=BKnJGfq=qVHQLh8C2&i zG}g4BsFTHe@mx7iw1eto|4kIfYYMRBfhkYI`w0r#WpmlVN=ETq;9Pa)B4F0itXLsx z8HkUX(8)2kbS_do)bz%-CbSA|pDuphsK$KOg5ekC_%Ro+ni zp%Xe=T>@^|2jvOEVeA1-W2=#Jpo}}BGAqSr+#1W&x-UZ`5D8Hq#*0MhBFAiuOw{Q# zKV=>peiHTKY8TTLOZyUfUe-9wg#`z^${!WIQW)P4>`%_Qxd?s9w@`_?ZutrjcJW&e zjDm4d$h?P~AiY7FvF(I1_Amryw52#NBxV6!6uYGv(2|-y-4SA`XgsqF;UDi0s*mwM z7IXFg2`#gp36_+}RLSZr#H;kSN=>ErVg3D0lz(6Iu5CJi`t}`%>epyCD>RiAw~37` zFZ)>K+Skw+lh&`XADT4c5(!7>b-<=%8VnNsJ+~=TCSNZ;5Dq^xWU^}C z+jULoJs6wvP@vfD>q*6`PM&{)q-1HUMFmB#w&+@>g#mc*rBUCgH+C}12A(mrM^gW@ zQNx4tcM5Tm#1(Iaw|g6u4F`jg2_yy3l>p@(Yh6!w<>1w{kE7(06XN;*n07&-t$@}& z;A*+NK=~tuZww;|%tV*N1mm{`HT!O2qf3Z5O@Yp)Qa*8`9 z@sGb5QkSO0Uwn?r`#`m|gJXldjofi;q<`uq)sx^4ow?|-VZrj#PKtkEomdSXxcIL; zjqv%9Hz(BG)ud(a;Zc5dGGwGG?rV`G*FKArCd{tXYrdRvR`f3#Hw7` z9l5et)ct;^2f)@CgT^T)H=H5Wb7)f}x00zva{X|n6)nDk%G zq}6Hgy4u+$_3muhIFwFdK)#TP7osTU9tOm1aFx{P*&ZECP4y)}FU-2n>Nt~hQKWeh zu}ty8p?r{eqito74+~V$)0_e;HnYNR*b4T6I2ODA-_sTm~Br~U$8gAK!VNbDko^Gf2pa|pmIeDmfFhlfe0_Og&INY1*RxHVpm$6{c8ZO>cXZ!labN-3WB`Va zFwhO#PeriyzmGVfngfXbnYOcYZ0=Xkb0BaK&;Vh0gw%xA_t9AW^UT1ED? z97V=UDd;76#Pw(&6H7%%{S*TA)U-urB|r#lJhDdnd{g1aiV($7_hXD8u*jTTr1V zUHkO}Q5JZpw2r`ZJeg~4lpwZfZuf^pPiVc%G`=6q9bm{vBwIF$`s=QE7lHXdNaLby z7chJW4Sh)-Z20EQR?Xuy9m+=v6%%N+FZ6(kri1|t=-Qv^$acEuQdA@+Qab>zI!z81 zExul0dtt-CbeIRu55_?={j)xfgELt(_b|-qf?(zoF!0>fvBaLxX}roifG#F$twU7W zX4J1G@sM(6K5n;Pzv~qV^g$=Q;@pcGf1?ZvuVJ|S2;Yq+H)quEIfw3IL?30QJyt^|*4)b(o5E#kmA;1iu+;=4`)3KB-OgZ+EE@@A2EYp^xP&L4 z4gxN@idf*V#@i$w3_VT2?+Rv{25&2&IV(Z1ZuuSMM&pGw2l63-4pMoSzth`pWT!2C z_cGANZeSj@{V4IJ+g6H?3`aOS;rjNhph87a(XF+9Z2)%grW!5yKWUqH&Nq%1g-gWV z;sz&#URj+GGGQ!XhusDpft&J}DBKKR(joDzH=up znhFY_S9vLg?fv8y{MP)aM_zX~8dwi^MI5h|@Pw9#WWp|YxQlt2@MI2)L)=}Pv!0KM zYSLCVLJY)F45GZ@?U6yn&k2#NOhPxeXJmUknJU8T2-d$*Mn*~ucmKx5eATZx)j8l{ zqI{(bHdMD0^X-@sO=8%`EGG!uBm_>4usOIF?K>9?FOEU-Kh3CnLJ*5@xpZ+}69*3k zTP1|hmdY+H?Y3k4l{dikXU8aoQ&hI{w6j%)t=B5&%Tu567V=6|s^9J*z>x(*HC#n-4!tIX{4Ik{u?|&-qBg`sJ zjP|hstS<@mCmU0wUl06KFg^4_&vQ;$+b3KsMIYXWp$TN8^^Nw|LE~5E- z6at$p7=S0Tn@xEuT$Vb0dT1RlP|Q4>KvD)ftSGrNcNtr$al_X2q@GVCeP6%aq8c!; z$K>GV_Y0ePjj4aKFT4I&ZSD;B4-wPdz@WT}$BK3oG|+lqomTE%D|hF^k#{d`b37m< z&aHhyqMPNmdqq0?FG*53*R@gsy+yN}-ht&zkFBR{fbD?8P{uX3l_Cdfx~o6fxR#B) zD6&LH6MG^GQ3UwwZ5ieF1~e%sYBvnTX!1R{bI2uXi2c-QHc`5#K-Z>7h!a*dwHU(+ zO)i*SmA!@AJ&7^EUgtDKu*zEU;lt!P4_M;SO=?N;l=A3*$c&cP7m>;hdqJP8A83$< zS~|Rf3L(AL3=^_05S)jkRA5*j3xs-ZqjqkYuI7vGj)!$lDWXB`(9>zM-!cT$pp!cwJH~9Q*(^ zU0G7{PPazy2h5`)cyt8md@x=fP8^tgLR>n}7)!)rj}lBJRURh&mL@asOHVwcc!xbu z%fRRh9x9ud;QikP*w874y-W8=SOnhQo56`=OH}&XVI^_i1AZ}=lk2`dnE*=4VXV90 zO@{~ptCJWSwj(Bq%p##%NDV}t$M2t8(Sxd#3+!ChuVdr4N#N*xiC^0Zwffr4YXRD# zzB)DukfhU&Q=;+=LLStf_j=62suQtT0zgMAGV_ps>Kbax&Eq}d4(QJ8;wdwfw0QFA z8Qe`pPAVWO{Y)sot1V^lzY4-Vn)Zz#I{%~=Pq_lXMY%%_gfv5~Sl?XEsM4R>-Naul zuFWD)xKD%Zq&wL=iZz!+mvZ3ZCiRciCV6M5>Z}ltIkJL$zSw89YTdN7m~t4eK$`E4Q+0L@>zlcv(p&*IEcuQ9!2IT!t|NXB)BKi_JEZFjaDf~P2$<} zcm?i@2e14}Hf2rr)b-p=^~a<#UA&P$#;=b?p%2OSc_x_r10GMP{kE!s<7g3DR2&hzp;*SK|E3G-BYst5@%@gfSnxn}M_fV`d3 z={>LsF0y=!jauiV)QJGvqBKN!5cEiTrHBJPXJi=zHDIIPh26iUgcjSJ#~6}%^L7S8 z5JwTo-SwAhr3H;4IYQTpTY2vIM6hF~KLx`Y34u}scUPY$bPNy@SznuD1Ub})gerj6 zihv;>zD;b4E#LF_ULagc3~mY6nvlS%d8Dq zgv2Q@7e|?4O6@NgEqw3vE~^bMhZP!Zckr3NIp1^z_Y=8S??v06_kV9gCpCWrZ@d>B z)ICdz(y)Od)P~@8APRqgqCIQdmwRm#IzQhhEt97?pH2Krrx_vkv>$Wbpc3kVQq#N? z$UR0n)LY)$3m9C|6mumCj5s^17G5-^#u_#+xQL1jzk@VOD_q;5;y98fc->JgDuXxg zgO{3>Dp&-MXcZRZ7ysxrd*BC>9+eAe)6>ua3*M~-S6~BHNs3fiDuCb%wHePy*99Ty zo2>;i^`+>0_k&&Te5-xMPAkDy+3DWMk#r9GADuP}fWf2yJu;%>ndRQ&ncCBx71LJA zlojX`{|#);aLKQ%F9=h(QiBx~wazcbyIkkPna=K8r>@NdW6*}wa~`B7@c726e8#RY zC%$dRl~OQLb^<7gr%jc3a8q*;b5`0Y*J@$L2Mi0++)!_JF}~KT*alW5F?OTy{|EuB z$sWv}ym5y9h8NjvGcmrgfSL=fdOBcKTE!_zI$gdYH9gK7%Nrws;P;==|pVc0W3;FTVh&lWbt>$JgO&=(Wag8Jy4UEYQUv31Ftv{_(aypYHQEWbu+Z0xaalBBvTgjFO9BJg&iKcZ6r~Mp~--)Ny@JjRel~hH8 zgA+y`;-Ht(4=&V#z15kHTxAt>m=a{2_7KE~A3*&`Q39<(U%4nNfzm$A?fj1y&C?fz zz!1~jE!(9{5sc@bYP(+@Rfk zL}pr9t2+kx#P^6kp=ed*rnXdB3{n)Q)2)dFlzy|BuyN;ro@L&h#E*cD(%lZiuU0f- zlU2vEXc#>a97hI^xLn;Xa_poLhtA41pi(&c+vCU8CF7*8);lI#jRO&(SdeQh68ng& zZ5{zc@HU{I1b@LT5xf@PA4yBXLl2j8dvU*sDMEuI1WAFRDEpus{C!hpH0Ht*j_@ko zrZ@g0N&Pt-r)?{(pp|9fVx~_JgTd0@nD!rF*fP3xz7!T7yyLv4VnpG2uuvTcro6>|`VB!I zEDEIYB3SMcP_qyU>)75sizrLq1mB}pt?!sGgU^It07#pTne(@a!Ys7vZgT`bq2I1l ztT^KVy@zDKM$9Plr9hXlunMhD8eL;ARl7rUFWWVsg3#*mV{vi`3>QVg8$wmYwreg< zJO8P9ORnU#?IEyvSW@slAxNp?13A2zjYl!shzL?2`>RB4_+zh8Zp-ovhETzm-2^F! z;-Q(b50MtZP8-)^I$ECG;46Uz1kfHATEG#6*_6v!B=dc7fc9m(G?-(pYM{hS7T~Q(;knc zBVjpPmICgQ*WL0rsLc1(Kth7_Fv%bZg5*T3;Bqgo8N&JgQG6BtfcrULYXQ{SD=wy! za+egp3ak+|bS4nO0lpPax`bfqF3=+zO#T`Df)=b5N8!p|witQ^&Q1?2nyHzKVB=(w zWdJ>p5PI&~Rl%2N0!fOe+`g=((Wahhr^S$q2(Bu&+v@&Gebg1+ojiG^y78DfAPR zGFdf=sF>P(k$dxY_-dEO|Mh;F?OOl^b&(;bzzAwq_hYg8iW-+55ts>jSebGK1>&U) zGRh4PA8QdGP>)g(KfA`qx{{)(b&{@}_(r=@nI<9PhadeQP zJv^YzI?`uVpejMdQn(Wtky?$gjd8}%W+s7q>uMHgv=(sjL~^C{(4Q$te7dAd0p(0I z(c26Xs4@lK+ICz*8CA~ElMMVZN0rV=ZUkNA-1oSh3n3$f<}K1cDut2X}Iypu>9!WgGJFz9cn-HwMqK%V~V7`uoR zrSH`SnO9|?1hcioC)3f;{IE~JX#h{#n5b!B{J?#=0UE`w+kMWLMAGgc`kR1pyLMvH zBW5qT_zdZM6gb}e)TKVm^zqJ|?z-JpD_Z{0?E(>$kM8|cUAvH@17ZP_&o{OGW!nJ#k?4;}P`(Wf4tq60{n>us*^ zRZiCbRiRI0;3NT1mQ{RCCzewGTS9QK?LYF8&x{aSrRFUT{Y%xmpG}^xgj!;Q+zvjY zx}GZhw3%Fh8ap3Brf=y0;{$ss6!TMDYb?68bZ{D9gW^vXu%>&1E89=IdjFmiVu&D& zjw4|Pdmrw?z^OI+*=P?1cC9&(7{j;Id z6d2iXpW~kk1+;mv=+2}vEkOweN8OtlaYd{F<)O{6$gz!0vs+Uhm~{O!y`o5jLTXNI zS|y!a=fpfE?iXm9jR=2TyKdI=P_Q_E_wcv%>Vif;wzUa8zH=jMrhbx>|GW%!rYmKT zM`Y1lS(LVBoM$u)38UxaS?w9B~Ix;(JI>z8Q+_Hvh+Es z%BD_G?lJvhNP&V;|8}{nb3GnSKV$ny+iqcm??G4_|mubDVS$b8ndn^HBBMswOM?`f-+ zZkqu8T=m5<-9tFkNUv^K;dc-7bv}-U!Adj&CJshdn9Xs$`Qo@2(QN3wk=ZtMH8-0f z9jjW?7E2tP12OV==Ag1p^BYBo+Wa$Bk-Y(l4S)|H^8 zaCiC33iQZtz7JVQlt`WcOhba$n(lp;>9P$KI$>Xj+?({p%7V%_kl7sBF4qo5DSrAH zqzU@m(Kq>$ogKfM`)u%(5$L=bH`1oV$kbG`43M9^oh0++4f zMJ+3)q4vQ$1jGb!PUTvijF*VL`~aX#O@1?uog3x8S2)et&G~aQSHvz)NbUVNwhdoq ztQbBvgkba1gX~|JmQ3}9OZhgLAZ)G;XAtTB{bmW562g_=+J;xPy>ciU?pNOAAn$u!m;6Mqr7QzCK)wYcYTWA>kaSIrL z%{Xa+Z|{;d>QJM+W=_Jkz9oN(=!Y(rww+=D6TX_Oz;y7Y%*{x{bBqxCMglzJSL=qI z=aP)lCFDCLk%;Ug;(x-08fFAIg-r-1A`EY}!HP2*Y)L)`>Cw?UxS>3PW|l-Ie8|yw zki$h&E=9q4y;IJ_s0}t{np8avNr{~4^gi`wnis;E_7W}&$bI);Yxo6eeaTSFddWZs z=sSGF<>t%Qgr{3`69#GWBd|Lp4yPDs5P1n_oa31dVc%lgg^%NU_cnj1XcudmPhD{x z`ymZ_sX$ci>bCGI`hAi(nBPaX`Fnu$Y!*|3@5RyzRoviqg$wf*?a0;)4uYrsRgW2m z9af2xn=b${U8?kPz3;dSK@97wws`+ln~l5vS~Do|`fgaUN$4s0xP^=lxY*w=Namj^ zwL6ha1wqI1KA^KhLVw>a-7*w~g(F~^!~pZTj@=ftm{2!6fsoEP!#%rDUL}d5khg3q zRtoL?CO8Z{Q=uHicH2zu4jHrezO{QiS|hY&KX(K>qCZ`=A{c|3qzhpOBtM`<56jU; z_4b?G6y+6rsz$2mh(n`2BeDe2D_a)#sr5bWA312T=hGp=A6iqIVZ>=#cg-eeE;^76 zv<7_kxj1IUu`%(j!e?`eEexXS0+n%2k$H8+9|>4R8ebgejCnA-%bSXT3yer=eZ>v# zOptBgB#&%}rl(KK?e^4um5oNmBl*$VCVLGx3XL#lj$&{47~w5wpeaBceZd_We{Al2 z8rUwRgltZZSKf47w7w2lt+;bQ&m{1u97q%|d&Lt>xb^v+xHvX&T}U6$oGpC!*t7Fb zDOxAXfA7?5P5u?rNV#|JzJ1Y`X_zmn75i5UWaAfyybDc4_xSdo1L z36F$aIH4*39OFXXw7ZqOQ@eXG!ke@DthYnRo#{HaaT+bR-hM-~d+_mrKiJmSp@JX~ z>yz~%7kFa&;+s@G<=O41BP5}$^KIoJ(D1vDV))U4L&7T9_t z=1C1bv2RSqyYn&L+FN;Ik;*3kp&jBh*Q*}cWPl)?jGg9+WtH%2-hwWk*`*xbJ68pEnmhH+_Kob6ZwoblP_cQ4E$F-*GM(t3NX+^-L2G%nQW)$p=a^TL43Ux3;MtY!k+?bsziry zI~ot~sHr7=&zWV}gFMjEYY~_GA?65RqI%VJa~rRsI%%Gr6nji7bqVFFn_(RDAn6^i zapZ~KS65>k^Grz|CBpRoFP_JD@;h$CkLMW!XwX}kcxF%cA|Fi3#wfIv@$GdOlyOw8 znlOx&Us-hxG9H#lta>Tqh9VPgr(yNk0BV)(f0~Eon&cRGE9fA2N<0MZ(7rY^K?FZ& zHs4Qfsn+nw*9b}Be=eAh(7KS$=$Z{V={-UT`aP@n{Pw*Xhxk7Ocm&&MEl7i96y!ox zJfg;mb9l??)^bClhf2&<#HeL7QDutG9I1tf-$9eSU?{#lRa0uxK}kp}ncr7e@cQ9x28yQn1JK&5czdGMhLMtjuzX={p;V|ajKP3h!rt#^ba9@RSz6xSrke=D znZU1iH`gJrC6`+4)_#`G>=z)ybB2b6yCL+#yP%~uS%Mwrq{&2v5=9k;S*=GsY{-7P ztInB^yfCOL>tcSa7SvLNDcY?ufTyhHlc%{;)(`vAl&sVDF_K<}@~fRh{etW|!QoWo z`z)q?OI>Bh`)i*Wk(zGl&AsPwgwVjQ?h`QMgu6-S|5ykB(%C6YGD8}~v>^1M)O(ss zkp#R&a&7DATb&q6Cexmh>FZ_8KVf7xZF&^hQBU!rCC?8VE;y^8Zk5*d*jyn*Qe0QJ8``qn06xVLBWneh&BIoldm9u}^z~&6<&* z-r=#`aSiuwtzrh-XAXP9_cC@vnkTj3SU{@1FkEP%H62a-iB(A1XG%`WfzD*?)~L-7 z=*}u#%)Dltcw+ArK~GaPWHD`2SUaFJ_d0=JjbASl%^dhll*pEH2sBRgN{vvF1G`BrFfj{NyCtm*$sdU`ZG?qxGEqXv{!fAB_gj9iP6drJqe0HDNtr9y|2b6e2xnu97n!)Q4-V@(Y3iXgqh3X9py zdv|h@1DA7Tjl=i|RZTm{u})WOGKiw41@*<$cA(wKA-GHyEk?2n2esut2f-_9+xKT- zu9QqAaBgfuz72?+6S3xZQrYMWa$*A6{>_u3Qh&f2d>R~ha?QV^w%G9TLrM5mt4|#X zwP{`?Tagtwqq|DOUwoCo3M|_hWrvoKsbq*r$4aF8V^XXZN&v`*4Yua5zo;q*3eY^v|qSzyqW;5$9rc+daq2Ys4`g> zhM=E@87*`$wi%*|2uj!$*Q&hmo57=?ZG_YOGS03qDnKxHTxiWH)#3TEgpS^NOJl*q zy;*`1H5w`{vK*=bS`za7sr19-@rfO;WLuBSkOS4lVf=f=)0_ZxlEQ zp(i$}mSw!J?zRpuc;hbWfwzX}5GdDzY$~#q@z4y!>Yfyj$R*?%SWiC+kG*pCUU29Z zr(xGDyMw63nX$=1X(YjM#*Upn_zV|62+(EqN!k{?3$X8tYjdGCQZkwr_f5N5BTA>W z40;jP@lq!nlc#eY&c|N-9msW@L+IU@`DU{)WlOeQJoKaO0r(kL<++l%iAzOn4e+@H zljKVnrmL-MmjR@RFt3v5u1J6Apr|rN1DXEMx+s-$C&VJO(i|57StS$Xo-?~j_T69S zn;9pdGs3LbXHXD8h$5sQPy@W_5T9#D$b>mjJ;$N#aZu^*tL6X$a4Wt#fLFS>?y)1gcj?Dby z_^#b*%|Qx0BCqePs=6N{#tFaSI7WHy7Wt^F5^!C2_UCAY*E^tOw{A?vW08rgqAQ4f zJwrs1Mx(LPFM7I5@ggT_?y6Mk;WQA@ zvCi@V4CZ;b8#R@-38S9qiw{`ie z+v~xfev+7r+6qNV!*Vm5sAk# zlCC23Z|!`jvmyjLhh(y*cFf-?<5md5JPR7>eW;7^mNSq5_g@YWS~X!X$FCoNTn8TA zl6XT1$YQLmF9(Yb7W}5apE%7^H?;?3$(@sMOE>|U`WQ7U$g$M+a`*y zV*5UrAeu}xbo`5$x0Qf3lzlr8LR|LIonesGQ-9gVJPg!HNYJxB?`wT)qXG!|;n=~M zlzx}zq3A{?VlUq)aMKfSZ`)}X0b}hqo%rRO=+A;m)3D4d4wOp2l+2jcvmD8{B_`Qk zzz_+IDA4nj4F91=)TJ39pnwJqUT~%3`BNORdQHOCB9{a?jLVnA`eTr6(Mw=*{XhL` zuvwdfW(b!8nv3m zdf$h9bv{j5r~#g#-}cIkt(2_13wq#$Js$=5IZ<6q)UzFVjY&}DcxM%r3DUjm{kxr3 znF?)s9ii>W9Ta5VKb4GFPLht`Ypbh)t;X~!#84)CI|m=SHAgw?^re0J5$%YJ?G#v< ze@FZ6Q>YgWR%Z+}JgB*!lciitxgeAdgSB8ahN} zb9A)t#)HF^c5isYzKs;~>wuB99c*hJZ#0rKc9}24r9n0-0$1ub9}mG2dZB!cNnMPrND_@H@yMpIk7dKe zyafCD&bZQgp?H??fzKq|Ys!?P`{W^RuI5d$pCJZZpCqONn35FjVi)HG#H^T!>87s&9$5$M)&zB(69K>V#k-q#Dt zOR-=M8XIz&Gm7^{zP8I;Dj4#^m(`yUW{3<1c2WB?HFOPjJ^rgBiC^FNM{ZSf5;#m> z64lo!)bG1I)(v#FxFpGlzT2a!1WQ$c9TMqJT!n#&cvsyfg!!+9xY*>E{{X#Hy|FN{ z&fV%iV_hwNUKIy0Uy;=#G{!TIF>c@q|B129@xWHO-Pf)+8xpPgLOrBp@11y5Rv{gm zeVBD5^9l!Nlc4TEC((eP2tDXZ7ae57Z>I%lcPl0Oz=#E~;#jpGXxw)LPe+pz+3xW+l2$ z67Ud6QKVI^F!LLo=E+n*JB=}lWR}Zpi{6)C-+}v2VXCA)Fk_LJva(S6yXF78sQ?1M zBbjg>cw&3Iv}1b4jc`T0Z{J1zwZo+Atyk{R1NkX-1$taK?Hb93rhf_b=?KHit-!xf zANxq6sW>!j`UE#IA7F!AZCyW@JRH((_Vl8g+=vU1+(P5vM0d@Ms_5+p7 zF=TCS)Gmf*+S7k`slI0o8YzquPqB9+P~7ZZXrn(a|E9GujU8RCzn3_Ap<0k88y2#* zPBEX4f?DIyv{P{_hC{1^(k$b}<@{?(KP{wCCql8_k!)PH$^GwE9(@U7fDfj6dJQPg z!>VS`oF6m>ut~#vI5zuBbZXn%>n0S2^_0e+kC{@&iNwv9{W+9Nt5dy!Oq}2p>%mKR z|5gWrj~>w)yIFy%kXHnzgdd8EQtaMR){AR@<$Z)DiCs1^bB{V1sDaOmDHKl7xGoSz zuWQC}+UeXl$0)Unw*ILX0?B`|l6kIA;!jg=R_EC1yKEMMfHxFM1XyG+yVtOr{vP)aIi06@?b0`U4)ZyNsXS{cbp(XG}Uz? zoxr_+Kizic{?sA$8vLoY{p@fvx&2+A zdb+7mHP86?h8*VB3gkguFbD`-bp2gx3UYg=X+ai0x#p8pyJ&7y`5w?;z_Y7^bGbox&5bzJ$}CT~ zirI|psA1ZSuJmU%E%9+bJpI*KbjDP``nLD^QqpK?R;KGn7Kx2X(tSr@h*w{r=@VXq zlJgSv7x->7e=KZnvST+EV{<^#X?NJa-CxjmdYDAcAYl07t*BFIn*TpGvd$-+rt5Y| zdWAc%b6>b*WLN$RG)A6sg9x@eT`SmFUvG8oT1WfkUBpnsUt(AY9Euq zeqY!ZJ0yiE+qM+Xy?vOR?(Ku?W z_7UKRss(JD$6xecHgqE6Z+4Vi<^|;T9Ucy!M2}LgLwro>4)(8Q!8qheU~pEUI_m5@ z20(1U1ce&uu(+%Dh?_Mtj9@m@o55g>Tiw%zZ@4@q*rARLxr9gQjO!LrOh2HxtPU;Z|XBYZoKx1!8 z>3Lpw?)B$hevQ&1e%^067Wo^jx4?ax>PL(Hw0)li@N#SRf!LeN0-1O>G(8vpN$45w zoD#B4AYaopL^&gQv56$Hb9tznODjL88a}-L9mHp6GEa(e_T1Bg=9v8enG0u5H!c91 z2v=cDRii@79rr4hW)9NTS-%uzF1~QP#cwGhk@d7ZGe@&TpVhSvFR%4c3mls>-Q1eC zm_Aqq8^h#+h*iQ8!BN@WBBGM230sIP=L1#C*!%8gLr?ZhmPE5n!FfRNNmODo)6?$7 zqAkG2LboTeuY;jmlaIh9?)J~BRqyij8#`l z2YJ`VZJzsCJg$NAWCa>sIRV$nB9w8@uXO&yN-DeAlf$n&T(=XHW1!d z=e(PEBzid7$iqNFu~xF`ZR3=`?ypSd_m#1MNkZF2=J;dm0dnf4qA$w!tZv6gHzI&u zDuKYQCnN-DX_$|e`o#~kmn0qjpmF5KoMce~U}q~}yBp~i(YW(j;3+@exsjf}ctj%7 z_eVn5Fw2JU9dbHj)b^z`QI%AK81-*H2~Fcz7VIMJZh`s+u6XO({hzUE@B^^ITzzba zHJ>*!E{WIbc;2}os$~74W+^J{h1F@dh%(7^Ai?M%?sZ#VyfJ2Yt~< z<)^3w)EufR-6_h5;?$N>FNu3&=QLp+57M>l-ierWE|R9ow8Lim3x_^4l;s{T7QF)2 z*^8zTK)j~6e%%lc1tLSj1b;%kzIl?D@m+>thmltVfuJ9VNqAvo=o=4%=R?V;GOs!u zUjXrXiNb_>@umoC z9lKb5_65{EhDXgt+8Zw?kKy75qWHO)e@F3#;2uE+d zGD4xF_sUeUI~&X%H&!TcrI;K=ILwh)J)2EnfWKGQ?)um2*+V9m14XI+=<3CwWaPL+ zBLxFG3g#XCCwR6RBD^$^R@8kx{bO9`r)Y@?IvpBhJD$AtcmySdFUU`$bLEo5CT4T{ zr}y<)xfg5Y$e_`yxGTG1>JSTQ&efhL!%F05BhGa7f&YbhRFJX(WkQ zNLOdAa|shs_`1>FB$wUbVHQ?1+{$N1E9Q0NR$TA#&9uJ}P%OvZ4hC_LN$dRPR`cfm zRxhLXIb9MYLo|*mGCP0%l1ciPQp9I*Gv!oRwdA(UHP#{)C*y42h1q10j0;;(2d2Q zaoaNUmpBG(&8&4&!l~lU;8|P9K98fgjqGP1*QqpbKwm!(`@0>XH+v?Id(V0=<*xs9 zuT1=bLy6()(%le$NP!OHHKKt3tI-f%ZgZtup|L3^rI8Yos}7K{c?b$1*HUqPX(m^3 zTOOEY-t~b!Szyt>SHhlM?WIi+uJ#3{>1*PvC7Gw^^F^YW^&b>!9$H%n_*X${qAkV5fh4m_m)>x!9f3>xhH5QUAZToq} zH<7PHd^FZK_FY@*-UWH&t*63r^mEoevQ%goCtUsxD{;b8M+(ZkVx4OtLz8ChFe3{4 z{_aYJw2ofJ56E?bK2dj14*-uw0L$^JLrkWB_4ZU;fhyId<%$c6M8@BfVaLZ~2LdT& zDlc@o<_I=Sqdzkb*a4yyL)$V6WM7|u;!U`lxc3lR7eG(_l@3O?Cl&gklzM z+wS2|9sn|*40A}ZZr%;$6NPCH&Z;=L8>o`xhR&zz-@;F+0mF5dz4||r6$Q4NGxv(W zT-vkK{uuFv5Txim)f6<+dzO)y^(<Oue^zXU=WSg%dWR|#_t<-})wyIfiFu*8yl>8>@v^Evz(|I6z$EMw5DjhU+gW#QM zk75LD(PiSMIeZXIYV!PQt#X?bb+%CQ3`Jn(q2RBx+hgwYumDQvMYeY2i#Cub;Lh+n_jWRfGYC#Ah!y?b)+01oinPoYVFtPsNSpRwho zp2uZ9i(6zb-5juab8?akiOZw+*KZT7SHN;7VV6t@0z;r#@&jx5jpEYdgB}6mTj2rP z6aV}*-eKIhJb8w#e2$JQzINNKIq_#p#SPAZ&u;2k%;#Odv6^C|kalMwfwfFn6Q8UX zP}<<-K=c)I1)wWODV0ssX$|!HsZ7+*GC&e1kW$;ER``y@jZQ`EB98WWB!|&Hgp7G* zm@T2>OG7d}f!o5GZnT;^41Ol|%F#G~!M1esdCQtGu&aI+bm;td$tZSAm2ZK`(xZ%w zFhbXRpv1w(wjI}8pjH6>GIucV6c#IZi}tTD!KokPxh0CAR`#y*GDbwTg51~|ZN?@(8>SiudS=>I($Il|}-7#51bJ0R!x z>={~>L2O{|eSIImp19HQe<6ekP+d8}haz{X!XE4Sp<7hurd>8lw-J^G^yBPqdcD_Ynh8H0)YU4^eo%}B zVSPG@DMS527`(VqXDgK=i;IkTnH*cpv8e>#5f|`S6mMYYf`p&Fn+21N-)vh$=?$4U z1nWT=1s}X}9|DzB9WLa*ZC%=zXUxXUQGk@EuvFm|7XKJiy}1O!0{$_Tr8lg_h9cu)8%_& z4pZf>?^drfSD~~C)iAM&8Hg+tgrD{+p4>_*vq6J#F z(ypm3#eBn@{)T~%7Sxk-QoM$86x0b?H=a@B^F<|rCZ=?kf$)oGE_62oOjfjAG0fbK zyXVjxPWH`HCVRaT4Y|F386Fz=$MjNHgOXI@!FqMKF(~CQewYu@iR$Kmcbcj8%_kas zfEfoGuYyTP4yoYbRbd518L*u*rR=q_bfhOK_st_{g0i$1LR-PU*zuFL{pXa@bBtaJ zpJ|-Mnq`V~;`B!1$aTGbUE}ca@OceAW|gA!yQG^bLz+kzdhge!?c#l@r0ldXr~Q)P zo2=WImpGw%hSpsACq)%>Q4nWLP#Sad<38iuI_Mn|%0eEt-@?y?iWufSaizqVj|L7kL2yn$6+VIz7x$pa-SBD{3|g|ckgaJj(LzSYLF zrO6zyW{@Vun2_kfI@anmM<=C-+}ztOqYnnVm9+iBPoELbHF%15Bo(Jsn)zqW#Ro#B zg8C*V~kqu!S%g+3$i#K?-8CiR!tQ-mB1)ZCp7Nm7;G~JwweGy(Y5uE8>u4Rc9 zb#$RGvC`K5rMJHbPkVdq%EB3L8E%v32=hZ_(JPGVFP_M=5@2w?KiZ%kkKajKahIH9 zACG))fU0wsuPOfVJie8^vfjFLe@p?{#F3*Kn8{JPYg(0ELWbbTb9B5J7e0l6L zSN=t?G36LO8@riAPd^-NcGA(wXtW;xRzMG*E0)c&$omjI6J&APl>{)U<(RjVnmvmq zK9`ggM`|Hyy?hiy@~NMD^Vig^tfG(H%M^dW>P;-#&1#-aStWPL8ZJ&umoN(}zA5@L zUFLeHL%e6^(9M|rZHLl@ZgG^u$p_}*3nv(>Y+05{nr(oI?GEM0u!Zz2eP_}@dHpW9 z55B~XKS@*_KyPPf{CrSy4t~0WD|muOj6?5Un9Yhum=rurT!C#n=p5io5$hxO$ze;M z{n{lFa?Ia7PeLGo&!vd3h$b~rMxN_WWXTCm69WZSQno7}^A@6$0)YnCL4MCs4Q~$*qb`d+@d71AGihb$y64j zjnXFEmn=-RHg-3rX7a7~I`g_NdqEBFswAEWXEK|@!s-#R?~VA9eEIBqVuz{I_9=!w z6wqk~y9++2qT71sSgmV9>7`V*ae3iN0ZcLDEGYaOaiO9(iDWM2M8rl<1x_b(Tn88g zs$9pla^R2LD-rLuO(R7H-%IC1H6dkw?{|KHY&(C!jTZ#h(!Rl!gD}m@fN^$%q+>kR zmd(w3BqKE{(%cf1u5?)c)4J^Z`~EF?d|XRzZwC~(Qx3bxM)bNx^v86fC14f&bwu7T zCZfDPd1NVNgHNo3y3^bgyx4d$C2|}bFk5J^6Ttv-6{qA}R|SvCyGWBkPeo7DV$Sld93xon-4tal3hTmh_Sq$~2WBduQ>&UeD{m}2q&&dHgWKc=R$_<2(o5>ib z^m|P&4!}kH8}hQnb5ILv-2ro}hD;KVNXN8JM*ujUI%VatWihDAeWFO=;#?A#ZezC2 zr;}rOk(oSNPIIwUP_ue@(rv=$Cx22}MP*4il_O1_{6FAW^L|<6C_3s$QzY6&r~= z;I?w@P)1ru_Mshb_VOXcIqI;+pCyPJ236ztW)1m@8hl_cG+JmzN!&GQ66N?YG7vIE zv!Lv7?ssGrZA9DW7=*_)po;+mDRT1gf$w;={NnDsRE+SkKS0}pq`QY)!rj6^t;`L= zR;jSeK0pG&d!qSz%KuSW^pltEN&yJ5Heb9|pWB>QkgLmo!v{P;lvARkU-O9$d>2uU zEEY2wa*`utFgsN4{bZlb8=423BovLjImLDJZG1rJvz24#RGT=2y*h@=Dhl@xV%3TAS&FI+uvRZZ`u}+ZXhl37|0oW;AhQF* zybzp3Vz0~ZVDK$E%{>5I0#!m|_tV`_2)D7#SzK8Q%(M^a9qluQ;iL%K?mNgM2=+m_D!UYmF`Kw^9aWy8 z+rWxTQFaU06{h^A=~JN$c=Qw;+`lP^YlGcuYnj#Lg(~<8?Fy)MXp!KeH(o~rgOpm3 z?QK+_byTT;E$BUHP>PsAJlXlqOgQ(&{c%L+ij5m4d?ejiH-slHUlqX;9FAU}LHFO! zYxru>2Gbs`t+xdTx|eJh7WlBG3=* zu{*E+bV2)WtNU))OBhJ$ZsRnSDV~g+f6A4AEudO96^6O>c z$2=-UkeJkIYbiqbnw2Wm$_F1Lr0yD=|>s!KIdgUnHTuY)j=}Cv~mK^NX+lTW0>l>8FkBJ zW3P+q-cL8iE=BQ*UI+fbKIf{NHbus}sPCv*USA1

uFLr%p@Rw|1cDPZPLUi#fV)NEcnly>O0rV#!Y8{Q~6>MSgdJ%vX^`7Lz zXf1=tJGkWp*;;gCr1lThl3{w8)ZveX3f@fSox%&ot+7PBTX-!YY2|d&`R+5FH=Ky0zqV!bFs zye3OjpCwZAZRMP+EYLl|7=AqVtq0ZMNvk2v!rro)^0r`m($}j#jiVCrTcr+LI*}uZ z_qxFs$FS@7Q=4-ivZex*)<47q9036E+4i^l3O8njpCDp-jJ06;Ync4J0m&cqmoi)! z_s`{GX~>W%u1-0mnF60}zyTwxCRtdxMsu&4d|^Dew39UNd>2a{W{z0TSE2NE}n+?0zv zGBqos-`yQ4W`~ROmdCfZ-tA}ChHQUm%}*5gAj;SEu*<8p>K(nRN}~+*uZ!csDm?oz zGLiD;SIjgDy-iwB2P(B7lUi}aJ^)K9dCF3h34u!TL45>^RpZP z$-r=jFDOI(Qick$xT|dt{%ru!o}HHqME$|vhvl_>{v%p%aoLWctR4u_n3&+A7~My7NYzL-*7zu=zXGoMbI zk-^Gx2pYaXIC2-r<1xTCi?=9%KPPwnhq3 ztA;BB_D{LnO91&)NE>AgLA4?<_ggH0sp4C{r%Dkbx&bS z;&JfZ*rNsvfCU6^aX)QD#^I@S-t=R}kqJS&+=|r$xKXu6T%>#GO=1ykUZF5Go463) zYzEq5gT#0Vy~`%U#ncr8(pg%76fQfAT@BrdZeo@bQin84mVuy+-PJwyd?_TmEBu)^ z#lBkd`|doJE`e@DOU@C~m8{^hPFlgj`o>PYK85T5#M|X~RaP4!?7#_v=;GJqVC%eH zzLGraW4+8I4QbZ9#*8Xu%XvW@8M&NCXi!*Iz~LQ>=iW-_p56x`sDz&YO@D&On^r<7 zk{i&g+7HiDJCzdwru+7H6c*6_OnlpDU>rA`;t5NDVc}bpq#kV4g7JTb?D-J0$>9v3 z5A5)fL2y)}Sce_vb`&?p93}aKqLwN;r&r`THaPy|D56#dIgk{92X$VVapkt6cfKGy z&$+u>3p;#}(-`69hs(rn-Sdh1ij5M>e*fx5?XHT+0FpqpBfoRL8qkMypEr-@)*=lA zG7Y7_xo2#{O3ZeC)f5-k@V;J}?*QS0<07OPL7C~Igu7)U1dHgA40wvvCI3z`0bTW4 z?8Znr`O@=&Be3?uuP9ab`~_D4c#AWxMEw*A)G{q?X}!U z237g}l@n^vCg*Jl?Es4XhUGZ9Ol%QrM^k&P|L8`8($i9RX__p+1m!L=^>kXN)0{x> zm1O|;{2L2dTCe91KL?XX$KFW%jA(>K2yz*B4g$>D&yWz_2IGUkhm<@G))`#)Xwh=$ zh}gOnbpUM{6aNVIW5loZyd!E8R8}Dstxx;ucGftO#6HX4RndnbnxLTsMZ#vV*MnL!$|8$+ zns@AZ-y?^Dvt+~=T~#uMJcNBvgYIr^0NP$eql2D3Lp8+Oz7p-DYJF+^f`i@L5I)t3vgHf2M@bl%rX+lf9^-URI8Zx z<3hD*;WVgvKMd^_+ z^by_gVR`vNB)cfm4m)n#Rq^$qDRIdi%*EaR?u_VngJf9?06JMYBMQ>x2~fOuIu%5j z9iC`Dm;=Yy4AlvYmAD(&-y}G{`Ks2H3_+8qO<{E z3?AmXDA6(I=$%?%0Btkgs|SJ*NzN+G^xm}mdjwn@=J#PYrmGbe>zkaNU?w4I57VH-cwRfDckb2e(70#t zMAnQ)RES zg*GNSM9Art;d8H_0p%!gnvH+{al%~K@=o>o5PPy`=4TI}*k%nvsX|btUgULqr4YGokkT6gO*aCIe4-f6ATCHWZp)Y9BztVyHPBOLMrwc= z1?Zv9C#mm7AktzucH)p_g{6ato{ijTrJvd`85a-kL^Jt5$MRNr!xnvsU8H>{KuwEc zv?EJAPCm6ZXYLj|pP$W$dC85)o6MA#YB0SoS!B8k8a z#nT~wI83jtt()Zu%cwEVA~JS&nXm5ENU|r*G3Wzut2KEZXpSDuUhU{^GGV0Oi%e+Z zhu}mDp3;e2XvGnEU%G=^D-=_JF@pW>q0r|&rJF1-FAIBY8w#S7OF-mwA?-5k-&kNk zdMQaZZQ*Jt?0lWKxuThQ(uy*~b&)>DTt{`9n%hYCM|6lSuJ6K%dbSo@sK*<*A@@j&$S)lrL>ngSoGE?7I$R540R5X6%!Np!7OhpTQazUu zphr$FI%{(IyMdw!=6;9wQ9{;ZmM;$8)Nu42!np&`9#i?q%}bym5VnH$d9tJrD;r?ZgBxZ6tp{b*AImkhhEVaHut_4wbU?WzO zTN;yI`w@3^nDGQisf2(|i4$m`m(YZFfo8&nqU*!Bul}Y?Mq-_5LOwvxD&hFQ-KqV* z4w5vd8`GWh-yb;MwLBqBYPSer>IiyL*!YmAWF%SjUS3!&mYzrp<8?HQX2mEO90?D!EO zKDU&u5%(0ac??m&w5k3YchdP(##SPM=2@f#*2}aT>-BD%F8M!x1_eid=-slT|4$F2 zxpO`(LS!diOM^qkl5aU=m1`Q<~Gn)S&g8giAeLwDzyYTB-92sJjXPzlaZ zC-bN2wu2O%2ix=f`Eu_~XWoY#8#UkZYGW7|x_&OBX6whtid^gZ2o`I`=QwFy9+-~R z^`cOXEU*3pECvvZNl=NVoiKGpU9)?>6*n)Glqaf37xC<={uO~g;d-ARN%8FtkoHZ# z?a11(!QcXk^G?Iha_xN{b$hRa>+23uEY{!%6pJ&PdDDSZYZEGZ(!0p>c?BX<6DB#O z$?m0IAw@bF`+cs>^4)c!KcecZC`C32yV>FlH1m`l< z7(eK!O#na_v`jsS_=19#JeOF~NjDs=lWf`jj=milyN0dwZf zqOPF<-|W~Q199|g`|gQ$q=tlxJOHgEcKDV|)r2Y{R-LHXF(H^@N$aKUpl9>hqBOa+ z|In$GIcct&6#G&LwQtaO2^1!YCFw^$o#q2U;ULhcqg)OFx7A+RzBy)%4a}Kwwnr^} zQt-os@NL83N?^uO>QP@Ofz$K%JRj2-C@JxT!+_f|v+#xCiju3@E6M{SoHFht37Ms` zOsthG_!}vjDe|Vwtf`{N04~W1`lND^0!F|jIU;S%+3vv(UG(%eD-;P>-*J`Kq;q>} zRd>ucAwyFE&cMUAK=av{Pt|NU5f)JMBY3rNDV^yvL{-FNd2k1D_l=;7(ot~2I@Bcu z2`mDktcI1&f$b{8_(eeM6{gDPH5IhtM>HEHw6EKajCKsyMpAJ=c(pdR;QgBj909ga zFMhH!VnKmtMAmD$;aiNY+S(GmLe!lWIeBUQZ)tw0q{l*%4_nXWrDWPUhT!xnSUfMA zPb%d()lxgm57uA2FHM-f;@WPd@0F}fC&kn7kt(PsXu92IB||LXc903Av>qfUKO0<9 z9`F|NEfa;cD5yR;Vh;H;PQy}NU@DnC(`Ff;mgrkaLDQ1Y zgy!fP&R5lCynqvViBY(75L=X-Su!xHGL;;rwF!z}--|M&1t(D^*W!kfjo~70#%5+4 z^8)!{`W9(t;_m<%v|cblw<=acdvV=cKg*#AEVFFGWn4G1aW`LRdJ9R5a7~bErk*zh z=`Bzqp^UFEco;{I>PU3}#qG+XKK;)pWXocnV_ny4T3X^k7i+ zw;#zLSVXQEz*`W*%YdIA-zOFruo^eu>2?79h z?2^Ej6R?gDRuCT`w`s#8%E|*%EzT^Yo3DWbS0(5yhQyt%=KRZEOK``F8+0E*;vLnR z!P9!GU@V`7aY%H7;H~biPwJUGwLgBxEA$li#vNfZ4=?Ts5{5LR(ZpBvT>-qjJ$?;pQ+>@r&S}hN(YZG3 z;*;gr(>Nj2HsJ{E`ZC+8;n$h^F`P8GV-(aRbo44H5~N1&LRyUR8^Wz%z>x6yB&y%3 zZ<=kq>yWK%GlC!l?wNI!e+#ZMv^|gDjJiQRtbpH+^D$h41k{Q5{43%i(8#3qeP&DI zx-axCQ7b28WWoYzZ0O)*f`r*gL1wf(bZ3&xrj$PCo9$los&SW;D`y1;mL+&QJZ4Sk zSQorFx$*~B(;(tC39Pa+CKI())T9=>WPwJ|EhN0SX~7M zYOBvRf^4&2^?D7-x5&*?JCyuCo(7lQ@l_uff8-JIqq5V*&j7RKtmArerQ5h3&}lVY z*5CE(ptxZ$hNN>5t5uDp9eBkBLHfGTFix&OmGhyZuMR9N_$Xq+ZR}a&mc8pYmbPP@ zJ(0uD;;r;~V2^;GcQw707~tbB{Yn)>UpX#~j6K8P_cmwlC8ef20I&3xP3K~{p@}2` zud$YE?5*wO4mhAj^K%N69j+ZomlLozTA9L8b;q3QkCYeJK*${JGCTp6P$>~^S2G6?DQ%mym+F-Fl!d&*fd}c1sc=XfWi3dPwvhg0LDfQUonsMi@IvLc>U4##; z**}-kPk#H?oIri=_T=xI!bv|*Rr_L&Jzh<8U76vHsFxbt(UEd85glf3^;3Llg8-L~ z>}RjtuTwJOmwO&Hj?z^F(Hut8YW0+pQ8X zXN~eJJd4`W&t>F0z$Qw<74S3dJ6Wv#FIB&%>|c^sVMg!K1jJctS^_e@ zUQUC@HhtUR@5(MWxCg=uz6dp2r5#`}g88M0aU}V^mP6^Vh1A*1IvOEIz*CfZ6L->kk3Ug-*dpu)wVI&YX`+WoXw$GA*)~y!DSN+qNCksa(R3=h0MFsVMxGW z`j?Kc!iQd(_Lqs86pXeIA=7+yJ`Z1*C5i}F)-s~3VdI3~a+nA^Co6GDarn8bg%;QtU>fY ztKPL5cX*lhzH-cd&vJ8SU75oEKGM1ng#~V)Tg;OB>r&FSxPm~}(_jmUIw~KpsO`AO zMFtl`q}xVA#1CMXRI{gU-!OE4d8BQ!4CcbM&4NP6!s6reI0MieR!1(fOAcU;_x zcI9J@Maxi{hrs#a_yMfHLZ=Wco6QGj<1Exq>@7e<>}dvN0i`@*Ad_rgtqw5y2F=*N zOPf-bwB&3yp7aVw|M)fzL;0rZ7>8Yt*_X(C5$R=}G`ey{xB3L-aqlAE+y~rpV9VDv z80#^;3m?CBTa78*>eToN>`2Xor;T1Yt1t7X!MTZ4NFk@PQzs;rlf3?6m9-fxOTRVe zfr`l~5nc1J;9>1?7nQ;ct!9`irir+EVbDuTxDW z!pyamT}*n%*v?G7T9^+xQe`XCO2%3|mT*XsyJ}K#LULyF5W`Z6$E>Z}!WiPdf3O(H z2Q0eFzvUf>9IyhYzOH|3&m=3T6cP!uw`fw;JUUnsp7D(+w7>{dF8v0Fw!(YR(QBI#ZVx#e zGMMK3*Qi@#H#sB74Q~CE&B{Dq_}d+WgeU{62Ze*k8^S<}bhI4Y!C5=4k-_OoSl1c! z{0|c!&jrbl#TTMQO^5?( zmzoAPO{l;gj&wH++l@2KXNoa61u`@IB#%u=UPuijj_XHoUtF?PL7`8&6KY=%Jg^>7r23W9A zZIow*x+tgP1Iu0uARfw8L|CYi>g+qfk73e7*+!v|p z;VAV+E$H!FO%8Wg{}F9Fu8S_ntYL^kRC#EZI6=2uu9^wgT+ELn55#c^;L-FFU}5M< zsmi@f90(!?a>*=rRI!#z#HMl*D^5Ms*b#}{^LXL8mBJqYH>toTuY)I`BY^}jW}XzG zHh`g$yjwqWs)@kK+2Wk;r7e6P<+8C6CaLynV48?{oRY5 z?O5(akU4y+?#kTIml?qD8#HO+Q-GzV#ol}Vm&6u#NAiCLF>}zjv!8WL&m(8FT(8KX z44f~+yI8Id^TFqU&8x&H-6qT?eL8Ywb31P1aG7=;3}l(VeyNok`xQT~@cZY>g*k6x zw|W#c**1Wbq3*tX--z7wE!IA67UcIdH&-fnH^G)@`3B2xc=$~k7YaX0_;mld@Ng^u zAtI-*B=liF?EA+5zm6BMl-Qt)$h$C=+=~7LZArzDyfVZDOs04?Z|S@K0L7KVUiVq0 ztpC|a>G%D5Q9MfF24~M|2pk4|4UG{TND^KSd6y?HD&ghzI&0<`pj6JxTfu2})B$~4 zBsFz?(z9KkbZD0<;1Yj?|7=G9V@grbIDyVucn+84?5+$}R*_v7*>fbOQo5|Ewpqg2 zqg$Ro+iuG;^tbH>Mhggmcf4KP0b0aTQ!w`g0g?S6;8=FOv*xUK~;2P zz^?=7xCo%5133nnpf=xbmb-H=9Qjzvg({gDYDr{hH2IBD$_y zxR|s}VI%-jtn;QX@2u1^1L(R+v*8+I9+hIjdtz`^J=9H1S^UqF7p9@K^-50AcSxsm z9U`{&qE?s97$fbl;UIj`vI!|XTn=C7^Q{6$S2$JG=gh(;MyGeFE2JiSK*$p4&;SCq zvZ8CCOj0k?{ib;f>?uf3nQXBI;|miB#ApX&e@pcH!Jcgex%iQ%{!A6L2{06ThB$f7 zK`DMe%2G|E-CsD=4$1cZu=yY!2xL0VeAQx?7l|kAD}?dhU%FupCnKj{YZJyw1>l>d zB?TC6O0hLJl`h1#;BcT5p;_8L>h<-&>qewWC7=!2kOx_$S6 z(rIgQpFPD~b@Y5zO0#5f96u^3aq0WGJx>Z}xFO*t{GSCtO@bJ(RvD%GOx_4_QiP;BH-(e`0&oz#RsPSfRKgE*aCI}pu~9pw0Ir_RNqe$K z!Xh*X?_?l4Z6#(~brIr0mR^I(Pc?`dp(`=+_kV8iY09DjTGv?)=?x#`X&x+r32$~janTukO<$;kM5 zfq4Uk)gVP=J@HK2cSj2C?{(+iO^FT7SKh;>8DlMrAdbZpH1O&ms+`XNTN9<3jy{;e z$ROvYDOraXqeDgXq+KFW->cux6|Nq>KC>aUOhLbu;;V}LAhnhQWtUI>ccv8$1hB@1 zKOv$>tQtNphM#n`zvi3BM5IulKTkOR#~;DER-EP!fcbefnZvYARHi2?6`4TXSPeR* zDn%`3uJZs2Z>v$!O0|IR-Vnyvd|tEn*vSqX=EQMHG6|cT`37Z@2e^9xz29=fy(s`n zcqGTzilT8__ov2WV|tcTH_lrDwJhWo)JNel|9k^J#As&uM~|0_q3);9i+Pq~$U@8+ zU)C>vZ-Cnx9zC+7p}JN>UR!?}u~pE%y!kdjf4b;jeX>2AjYOb}y8l61eGuEanc(n7 z(8ll(!?mxDzaSKAP`Ty4#nzH?cY}+de*ZRs@|5`yZoQ-e8J2n*8EHx8s?uv2DRUpL&$s>+^=oVX2ew5Q2HZn6dcBDf{IcTD}=xqQExF z@DpdQq6E{iBIs3>ItqE!Q!;e1rNr?RvQOXsS|rWx@5mL!{;POJ#8w4bd*H}aLzaQ< zAeCWCrvJ>1qhtJK|njiJ@PD#F`AQFC-b@46Ky(aZ}PP-67%w0*0}b4UhT~?cME3`7p0!b5}jQ%dnwuBx>+=;NsRqp z#XR06Z^)PU$2EX7VR7XC=NKyQKh#SfZ)v%(-D-dtYk7tT;X@q5qdQR)eE6vR9M^P4 zDuJgaWX|E63_XKkRpa7)jNuV-Ju@cDFG-y+LFhM*(8ND3ciWs6!wm@WerBt6yOA6n z5^dD#lw}Sojm5b?eKf!Y%gW_uVE7ZjCkNxJv0qva6}l|Nz^~aj>!d=<-#GJ9RQeKY ztm?5SEPNMPVQ3a{DiSv+(I!3Jr40nQg5_=Q7B2Fqc#0Ioc7o_L-^72FpgCo|vIX0s zZ}ON%GK;;OXUdYnT5<7zA-c>~%^s~<#^$VcFp09zj1g#?G_VTeHQQa%$V7xt?I490 z?kOeI_1u=7nEE`eH4hR>8pqJ>vXN2yGLtSiq}dFi^E=QI|JZf+whmvAnB@)gW(b~0 zVndvB1t)xQv2L0H&*Z?YIU7VGTfrK`p^z+>cPL;uR+G}oyOc9{$GY-Q^h^$Q zzayn4*%gSVGLW}!AmAdUK=Vu>3-0bou#kmR$EqUY^ZStt)M*FRZD)f;2&3s|H$M#F z_-NE>sl*gxtiq6VjCdh>OeX-u?6(u2fSVBc9@2F7L|s(glo4J2C2N)i`eD&BaOiLg zX)&})?FGXA6ktt9bqSO0Re=96ek6vuO>wb}yD%0#^!A@r@ zgZcE_s=f?A-37}#?)DETv;6M~MUJN?0EBno`el~U6(2sbVkK(w+-7_BeoIQY0sGBV znL+rwl$GxVWgeK!+-V-ASA;$gOx(DY{8Ma6mahys+vZj6S;|=vM;Ufag3 zUI&vn9=)|gco@u>Ps~ZTZn1e9%mmCR2dk00bOR3{%6D#06`~A&z#17 zaM*Iew4t~n z%Oo@ER-JY8y5*DAqy0k-md8Ipw6W-*hMX{Lgo^CK#ECHuO~f>GJw`5ZNVSB^`Bpu1W|QH&Cu5D%G7ka*TV_?*yz(hxFrs zxCPAPcWT6vJ8FX7@4s)Hknl#+|&lVM=@Tww%WSXDe5@mY~MEeDiK1x87h(y5C+ z-;(MN2LfVtv|%*RTF9#`dp>D~e=&6663U~K-GU)C#WrM`rI*h_$Hsi2EWKF_U9Jwk zJ`_7Gsy4<1?6Df^dtRJ=-3pXK_IugluUy3r=z4yP7pu*s}^W$3X=BH*F~R{Bbi-NZwb=EPz>oT0{quW-3i84;k-E-iZB;$RXL7W}RIi>`f9L z2E*R5lzdBcp1@D~j7*d>=pK?Jv||W&1D=R>396ucf@V*_p&o#AvrZFj>iZ`+KdPiq z5Q2u+Mkcvic!g+*0R_i11dhKhyOIm&@bI|*GoZZz#^JwT;33nEnK{2O=H1`c<~dIY zeu6U9&#u2d|MJO0v6*q2`Ny-=uhl2V%MbZ}&md)EwA!USMUS*_XiE=W28E*(925*XU6^AVkH>Klp2!I%lP{dsd#J34XIrjR zj^>9K(>p)Q03LfC@N9kW9d|&zf8&s+lDnr1zJ|rpJR6D;|3fvcFg+|e{NH) z5+Vn!xgZsav%e4A#_GTk(5oCnpUI@B%K|9`n|}fRun(F`DnES#|F?VgE1E9YXSF~XB}h}quQ1HpIw%|WlK2Z<-#_llp=R3 zbMObka0MoLluk&+Q7^z==6_ck%^^~W!uEf8AD3v>qE!#~NRSsCg7tIDGT+n!UhW>U zGD4*|%=adh>8#F^36#P|_Q!l5-*vca@dDV1!W@Dv(6|Fv0FFM-p`zr$eZuj5o;RHeSEDn+0cn%3eHc<(((=u4H*ktoauh_8JT zboD(Jg|e7RS`fM{aa!ZZ+OqOwoSyvlwVz`H%x54)-Ty^xX)X_;1GB z1eNyg%Ngm_sZgwbo|NpTZm5r%=bnzYsp~;dH*^S8?M728kv!`4ZJ4g%zwBkQ8)6&tZF*C}IhEUV5o~F{$iWSR zRakxeOJBB=|8B8X4}025hU7r;GWmZmgd82G3#j%|H4~?ZFDPC&@Jn-Z2fJ%aIiZ2Z zrX+{#ADi0p3;d`Pes;#_Lwsz43;zbvuX|G$5(_9}Q3GZAi_!d#^{paZjrdCd(;3NK z@R_4KjB+`ib2YBfi*9oV5hSrvST6tAn&V=T)y5tN!YOOgDR6NCwMJes;WX$Hj$(QiI% zF5rE*jx_LJG?;?sZH|m=g)Zd|n!#=8oi07Ul}E0*t>bH#y0l6c6{q>n`QNP ztfiLAy)^-s8FNz*A8CsneU-p1^-Qy-R!Mbk^ zCI!;9Uqez7tZ&z@E%Fq&rk_q+L%EFF|1|s$@xhQkne)^}etNSnThOHK%I=YYt3MTY zSo-PWoR=fu07Rd4_18+7RFIKyX#|U6eINB_$)FZXYV>b`hnq@>hsMCV)+q`uz4URo z2ey4jPClcYYzBZ5QK4*NjEhdDg>FBP3+}+=M%2Kas?dX$~_IT?Jx(rqS_mkGo5!Z#W)!NBxn-R@IjxNPdUP&?ycm$1N)I4CH~|G)TynEVmBLv+&Sja z)_K0zf<)LxVcY^p-UG4K87;eijOUb7`cC#QXCyx42H5~iGV|d{RnHWc5XZZYpCSTD z>M;E??To*qlFR#UOP-`5-GY;mJ1S#B@ZY zWVSejZjz*vAXTmojx@R=_RQcqZRUhKZ2=_{QjDjSvBPu#pt%&#r_h(9^IUBwJsGu+ z|1i+3#C_w6pAe%V9@gN+tDL&+_DvEj`olaWtpkh8>Zr$m-lmab^2-E7(UPR|rj!2< z&&ZVaD<^FwNFJlDY^i$EA42F|7>@w6dNcFN!2H92HlS7RdLc&ebHEdM8Q#eyQoFYV zsRK~(tWOwRpBpCIx`0oibOdpnoKOFdzhWD24 zZPMkK*f$ZjEdld{T!(0on_6=1N=D@vPeXEc%%|l1)lZYPtCdA6jGKBGOE?R-Mx3(n ziwv+F^am!07BXT^z?rdFQq?IR0NjwEIj=R$|CLNfwSk{P zFBcjTT|n$oHliFscMY{_i&JjK_#WX2!~SXuH`(1~zA8QdETry;8F%d@_keF*8zPm# zkw#I5T5Tw~TW%VZwUhw!_yqhgjh^ZDl+$lAuX!h9qZo;-?jImq0sZ$v5k4-Fp@R^J z#XWlmlc5>$!v@SCDm~%2!9rF01y7oy8v#6Vg(X}3WkY~TnD^+t26x*=JYqF8d26UC z++HA_(k7kAg|8o&RVthw(-k4k|EAWZC|chWUezz#s_-md;<==xg*rn|b`Lt3(Zf|1 z9eC7;XlqV6rc%sJq<7t%>zbQ;eW_SYuKkoK%4r}OfyOKS+4v?=fjsU_)_H$R^j7LU zbVgSb$|<5N3m^WXu{=5lfEavvvo>g#Y2|7>f)}jAen3CfN*hYF_H&E4tIO!6Mm%sL zgI~{Dl^E7v8niXyI?w~w!i7`A)a%j2UiM=$zc)?kw}Y0BrtG~1bQIaTu$|xpf@`oO zXy_zu)fSSF_O5m}wA$UO9e0Ae1$TD~Zh>Kd;1b+DIKkoHnHlEJxo7TK_x!Tf_kZ85 ztf$MjynF9gcBQMUZ*{LEJvf$p>}qgak#+u)+`W(Mz&l5hCGJFVH{q@|NnbZN?`dhV zZT*)C;k^Urdm5I9OWvp7zj)$&>YUQoi5IK(sdsz2?8r5BpcWfbFz@~8ed9F^KO{XW z9K8nF)wj)=nI4l~|LUe0zB}&x+G&fkbM8OgxI%8sEd7@Jef@HXnXGO3{!zEr&y}{m zt@yjq-c^y-A3Mzx)@pQkwYJ8kZ}1Maa?um_z)dZ;Jhq;ivGenlQ;RkV&Q?6uqsEz` zTF2AoGKb!~HsR{|>m{+PndJkC8>;Uw7D$J@&IUPT-3+fivSa@gO%nNr@+dT-cxc#J}TemTvFaMf# zSY2X3Y93BMRi#=6tvqe6XUtR4U}a<3l4eHf=GWRQrRVLO5A&X<^jx{_$cD+{yc>~I z3G)P#8-K4eL0@OH+A=cQegQrtW1sWtj^TOi76VEsX0 zC!}QGx!trpRZ*TQ>d>s`sd`IIsz<_(3p4L5-qL=lF}Tvde8ZU5-fK;_`wN#$uky8c z)8_Uc)HfcVMVlTTudomQK+=k}mY#+`i1f_D`MN{Z5s^`1QnS8?b7zV5bo zzvk(_p8Y<_R_cGrxujpvV+<$H_gKHCDtW=k^wTd~#ZMa*f7&_Fn)X7i#)k&#HoRVU zFMZgpLm;-+H{)|DV^iU+I)3N_A5fY&8#}Ns|~0%b>t8$Vb{E-HQTTL`t-`x zz4g`-u@_M3IOz?5-zRiMp6v-qXivUK~WtXeaNRn&{V#nq1< zkF+`X;#tNcU!diV`-0|eb23JS&LQ;N)Q{aC7hUXFZ}PC6^pE|K!F@gNB6r)rxmxp4 zy{z4>Jry*D{9wXm2DEL|=;*Ido@?OyZM}`B3s$zNz6_i4>}}DBSB$o>uMZ zyN}K5HlFbSJ;lc`3g)v9G7lc#>Rfkd(X1YwN{1g@th5)eu2nSZfVZru?8w%(SI&RS zK3|)7f9!Sso3ovBRt(XJs*#Vb=$2E)13yH%Z$9najixv0gR@t?|DoH@yZ3VQouk3~ z!_Id+VIB3Z&?}Y=E`QVcx&Q0n%k!(8>z?{+?Nf)@2V^J5A9;DFvwiQ|hJ*KHdOGr3 zySk%Xe|@u>tVOz|HqRBwcAyN5I(Z?1sD2t8Lm(=y=s>ay_Vk<4X?P2fU z6PBFdwGBn0PlpdFvs!+i_#FSoYM?z_XoS)S(}yfI_>gIz7oz1`n=%g~d=(4%)7CsfpUS!GW3 zq>|$KsAaho-@AfeXiI#iv z@4T}J{$)eUu-i>e1a1+hR1ml16}5eMyv4A22Hdh5%l39M&FFUTktIE(+nG+=@_uvq z^ygl|++F(<*6rW(P}y_)^zyUHuB`V;;_K4I%~rd#HKvdT^&vi%yx(_&d${(+l7_aW zt=e@doSgga(!R@^#+2#vc(%c@b8LTf+QkX!6$#Z3wA$Bbol-sQ5-W1AM>~61nP{x& zIKIr@uhaHzUsvRhuFISFaM7*_-6Y+QDIi`JI*>J?5fu`%GiFg z_UhUXd+>WLiRBkgXt(ZLqq1)oo*tarabT~66RSso3E!u0%4Gd^kJ$S$S~cLs5M?<1J)KSgqPp|vWVGpOO4&(o^HpTP>38FIiolcZxD61F3fF`M0ww-2&D9 zm)Ii{cE8`LdR%MFNWc78y_1s`Gm9H%4x*qL*!y$3ZtcBbY?mn$o;=?v{>CayI{D=F z!3Jv=B^O*tZ1%3@fgbII_sAt3AFltB66rK{*{?F5!}8%cA-ep!{q2VPf%f&9J6{$Z zt5O?$%VbF7L8&+1t@^Ry@TZw0Yq#22SgoSx4)orN#xrME+-vz^P`g2&$Vbfy$3{IF z`KdUoj90Sw$&)dS^YiCyyY%4o!^Y5zo?pHv?+h=x%Q=<7pLw7Yii9TsdvU<6aY+MKozOn(iN}+1uZHJ5P*odmq^Sta-s?=c$hqt-o|HT?h^NaQ4Kp zd6c~6S)+a=HcRS|7m+zy`j#*n9?2f%pZI7_2`#j$!(BhII&o>CQ!u!lb!r14t3|7> z?Q;D!Ub@E&&rI<(cvVf#>AK-_p+b7KMIpbKKW0b&dMBG5?s>o7oCcSuQ5nCuG&_0V zP}jrc$7Yv!O;v1_Dk-b4b}BXYs5Wb9kL;02wH826@193Dcd{*#=IJT9`mL*k7oC0U z&we=@d;|7$Bj%9ugC{kOXAPWRC#9^Q!OY`}Sygw`BTxCU{phw1@0TAPw0ZL0z`W&~ z&z-qF=pOmrL*2Uv-4C=k)jfF*>WqVz;qM;%a$>;Q6Hh13?N{%qp3v1g|8da`F~c+z z(>l8&_QhEC1ljz{`%b+rPFit&*r6lr53{&79TsA6+xGTcp~}-_oIe zMLR;UUn<{@4k&3~F>NDu0(;RDm2~Pr_lr@R=MCMn;(=}EgT?B_7_{Y^G0jqL-CK6C z_O3mPYA#sxoY<=T;(GT$8gWSgse#`Z5GcjVvyTC>OgP0bq(+_EF+@AN7#$xYCldayq|VOyQo z6Uu~0&j#CzT2e!bK{r>hh2F;X3f^5SZM`63+VJ|-X56$-57GzrGrl^JuyMSY_sgbR znWeQ_QhV-s&>z!izZ$c=H0Q8*=`74^Y-kpBUSd$NK z?mU}#K`5`%VchldmEY!`8a}1&gwZb^E@}D{fQL%6y%7NX01@O|R=RJDhxc3HbbzJjhRtY2=PHnBykHdOC?b?c>r^D}1#&J!NGws-R15xZxs z8gPD$>B)y<`Jc=y4{vvD$k>)Nq-cEKDU!B##!h)%ZTP6oo!-?Smo#fOafE-t2W-OA zdL8Upl!RIh#Ur_0`!4-=1$4!nb7sbhN++K@8%Bp{&3;LL5CMF zt>SMcsb^S{K8tKHW^624mw088zHPR@o4#cH{!#rj zdvH7Bc^~D?f|R%N$6W$5w&+J+Sn}pg3GyuYF6I5G{=(Dem!VJ14HQ)0Hd4{VBbP?( zYC0$XM#|^*b)42ljWO5}4~DgAKlD+nT0?sz?GSXAQIaYm;%DO+Ym&@D!`O;N>mHxF zI_5<6R5!on*yO8u?3eH@woSG?^SxKmA!keQ*9B{y=S&_UWj4KsxskLNk+h=)8lW8JNB9N)6bjBsmz8`!!R*ke`A z!&H%0?k=Mjp85WwmYVxqU1FO&dU_vk+vE>T^leMNwd}m-X1s3QU8TdHJ>-v$EP}iX zHZgiUy*8`y<6e%3M_3M4DU)$I2f&!jfJ&j*};4pj5cIm?%65{Ytrv~`_T~9RK zyK7l_hn24m7c0{a+1E6ejOO>5P;KAocNMFT4=D&Xy<%+(J0BkTIG%g_)4pTHJ3mj} zx?)}VLDG2Vycx>QSE{esebjyUtN-D5!JT~JE9*%6%pJzlsC&O*d6l8PH*<%~QHyzP)4gv9n!f$*V{Ie6)X6ru)04pLS%b<+Ez)q~K7C-f>i^>lF6hDE+F_d0d3n`F$TO3; z`jn}!ROMrH7FD#jT zHWWKiHZt={d(qZI^M+;(r+=({f|lKCO*~v!(V~ZO2V_(s0Q?G6qzIf@2C%g9* zZE7Cd)7$?A@2@{Q)X*o~g|Rsryx!QhMsCxapCW6EzDzi~ zJa%pGSHjde%gJRk^okajgS;k-4m568+-R~h^-PA(HGFETSx18w9lc9~<^^2_mgjtY zyzk`5!9&XKpN{3+Xw~}z^R=tbaOCLsvFSHBbJz9j>L_`lV_+{de=A7bP;E*l)0*}- z`r61Vo)sw9zt4!Oe~hi?c#j`a!q_|XNOr@ScRn|}vvcF1^Y5<}Etq$slRx8I$=0OH zXS!%>)g9Yf_oR4n_Zo%on%yqwef#hb^c{4bHlkI}@;hJJv@r5wY6z5Jf=*R?^0{sw zSHE+#V?Q2ee6QDydNmU5utrnoQwvC zTQ}cwptOeAt}T2&{!@oIg&io@S~ov4!nw<-%-DZ#_>3Y)UT8q8(#30*-_Ubo70Zmpqx&AcA}a*5m6DJ-yQhR9jDb|nXxnFxeFbuBs z{Y)t9g7)C$6Z^D>%B0=bEG(|8A#V#W==0i=`fP%E2&ZXm;BIU@C%IQj8wReVM zeX{sOa?y$ns#}SD>^qCSuWB`}J$U+rs%saJ9&R1gmgLu;8PCdo_PE=H7j15QJgNSE z@x0x;Hyx%ZSmW0Xo-`&cxqBPV-Ob6)#~bgwN-7EQ3^iLK$L@Nws;vvYUOcY)gNl<< z{KxS_x?c*7&!xTfubxbMZ$=hfY0>}HrhuFTI8$OdBwt0Cd`s(1<+o!oJr~Vk$rEm7xLC0&h$$OIV>r-F5`-yGPz`J!G z+=8#u$39!nZfEIM9mjpY=Xr7I$ePE|>)5u3KgQaW_Er_%6ExhLmd0)M(T-`m_oKSk z(|NT7kzl`fS^)`Rg!sS7W1#dtaO| z`IG<%>=d*W_X^DL8H(PP~Dl5YjLqf3}w?mzvdK|T7X`Ili^V64mlLJ+^ zzGSRcQwvU_iv9E zCiqjDN|%iK_Tc5s?YHZe+wY$EZh3UWtzU6={@7QOO}3#jssBd}&$i+IKw0x>>1W&A zZj1Q^Po!(stlYXeb}^$9v$$950Sne!NNKM9^2sFihemz76<}v;*RS|KeB$-bhmI0g zl3wj$4ErEI;pw=v6a5CuMRoO-q{&+p*>_2jCXyOfU2=bByEB7si7#H;Io+Gru4Hox zzh;}ayB9q_wxOo)al?@Tif_Qx`5olGH)neLUVM>Oo!DUZrJhyiiS&ioN3*DXpPjB1 z9(LvNj!k(}mZW6nI!XlW(kWH0-kNiN`?yY>QooJg)1SJgbV$yNk>gippM0@;nB8J3tdsq5(HIm>Srmv?B|zI08~ibni4v~H~(>v;>l?`;vk{dCQ?DPx{} zE8jUcBV))V!gqrFY9-`lhGV~6b@6W@u?jG57DVgo_M zVzFQNo_AoLc5MfbbVoe zzP3;2cf5SSae>pGmY zs9)i<%c6;A`wf{Mp9M{N*y+oL>v=zBwY;%j@w6h@+VjKmRW#}gILk~z*t9Snxa_02XH7yvih}1G8V`Tekt6DF63zetTukk61QO;>eMkI@~Ud5SxGrS`?iD%B5`&Z{zLcgDV^2d52tyu|rzdVJDRWy0R(8TS{~Oi$R+ zbX0Y9U%?pu;GnA4>885T)}yCyj6YB`_*K6L2_@vZjBf=ypN%)xm)qwxpUf-P&a3ii zM~!L|((-yqpFci}uTu1R(VeERnpf2CFMKLkoYQvXlU_IH-+D7mdGPuKeQe>|^Aq|- z*HlbWuPaNg_~M>M*^uHbx_qd*zHe*uxRp{H|ChhxIw8bv%+=+W2+OpB* z-EMcJBvY$?FhAT&?jo&HhrDC)yw_`5E)(+mMLMN5YDB%KtIv5hcY9aAaM_?>-lmEl zi#Ri~HV;2qN?Oed;|`Q#Qgh&@}4cwl@``u6H|9aenf*Sf@-GgJ6woq8U-q#yfwmZ5opsF-kXOy7Ge zU%Ivh`c7Vl#d?NH*^k$@eMq5xlht9KtniI)`5^u2Z1v85XWk{>F4$J^X5rdtE5BEN z|L7@_x&38sMVnAC;TK!(?4v&pAOBj^q}mHo7u}=tFIR24Rz9RlcOraZ?9#xNZr5## zj^|C6B=*nU@N&(t7WSRluEb883O~uD_I+29Uz;Mi%DyL_wBAI&d_{lplaey*^_%#% zZ_lkGZ_xrn-W}~M8!~4s&Ht!9^y*5(#0MQQg=da8G`tyi@9~+T*CyE0W_CH(=+ls2 z*Z)X3+yAY{nzuJK;x&9J>)_gx)m#_^gt(WE<5TVK0>@7e3~ zEfzQ2w6)H3X5Q?Ny5yO@Oh=1@#C}!jVPDmO;qH%F&Xg)`udf=m^79D)-`;xc zUQ#Fa?&a^eLhuhl*An|R^wgfaHF^Cx%$r7D@srEbW#a_}qX$_Iuxz{)*ssmIts4f{ zg~=`F&a7MPsdJmQp~r`3D_V_t_iK~ar)zwDW0m#!c;(Pk!PRB#YntQ38*T44bMN4B z{VC<<4y}$|?j&hU>c+R{I(CYR%QF*~zg@Fp(=VJyqZ@6XSzlS>gHVH+S*?u zkMXrWU2AiOFz8BFt0n3YryG3BNyz=0hZB^ajkRBQ?(5tAU8)UfkoL=z6F22KJx;5r zp9N*3k93||$G%6q?DoT&L*IUwUN|x{e{0(ZRfgv$cu$JzcR1Yh>=wpB6MbP9G4ri>^O+FDjV(iw#Tk>l8h(U|E7eyt#`mYK%2w!c{?4Yjk4Hg^uoD}+g(28+9 zT3t2e*E@|)LW8E94UB9Eryd$uz2j*2RR1mSkSUHOtVX%tCw9O;JpKVsC?&S&Ar~dBH zuA_tv$G-`02U;v_R&k)3)ZSodQLD{Wt}nSwck-L{`a1r=*7>VWZ9iBrUeUil?_x@~ z;P&vn0qR`SjPI#)DLref-#E7I$Xigjgg?HQ}s>u2ONDiAfS zMdiQ$avT$AE z#|P!hTMu?h-Y91b>ow`b!NW_wwjH!Dz^&cr>2tHEzvkok9FOhb`)F!g{Bk9(`0f?@ z**e2)w_O%?OY=f~?fTDOC;ZRyf20HwL9M_4`0=CaAM48P8eqF7pcbI^AIHJr|5^Sw zlt3>^URZBmpmwh(FCCwaOHVa>j9!!7W6ets z1ua?B^i(v6cua`P>oMo0$INJY_xxO=%izj!AqKNcXh(yoAQ2CmQ+Y5i-4+b`aS(2aRSd0Y|`L-FBa_G6j)2h?tNdlK$6ZQ|+d_^nNsp z!DJ@V$XR%V0A)eo>+D%nv&oW0Bak2jXCadQSym!YSKnW1_^&rp5~?It>W=Dw%Hi>^ zum1gP6|l1Xap6Db4+BY8%K5*yv`$D&tjxDkUsdX@zgrG$Yt=@4>ssQ0PHlf%|Mi;x zr+p|B)W0(SpUYp<{;#&HffLj}PX2S>`nT)c{CoNS z%%{m8^{=!Om1X#O{9mmr`wFpg(1Yc_lK(%;<@f9V^JgZ|9+ml4E=m9SS4G11dEZ zVsM#(!oa7ey3IkG7i=niQ(6HNP+#_-8LreO6oQ3Dr5b=vBU|uU5FP=F1dkZ92qucr zgu~-a28aSu3z*&BaAj&h7h>~svHz&+^8cYcTCi6s2_c{#usW0kBIpyG1UlGeC1fSw z637X7u%DFxfin!Sp9RJa1002ZJIe%H1aOoEKIy-mA%VRC36;70om~@wzFuj=On}OD zrB$o6B7fK4D$Dhg$}#(V(EodF29o<H+El8UPvs8UY#ungE&tngN;vS^!!CQUI+0;JxVyZ2)Zn?EtBOG(b9_J)i@iBcKzY zGoT9~1JD(K0b~NQ0NDU600+PW2!Kj`0s%+>GJpc00%(97KsP`xAP1I!0304xM70xSkB0W1Y911txu0IUS80;~qC0jveA1FQ#Z0Bi(o z0&E6s0c-_q18fKE0PF-519ky+11e*i|5^S~0{?&0zklOz18-Uz5D9o2MQo){?$Pgde^IXIA2 zKOEiy5zRDOmcc+X0+VXSXBntQ64*77Nk)PZN5q>3fJ|BWxnepfkB|w{$K!~^oXXu) zrq^S!o6O*gn}{nlEA>C>$BqWg9+TOW^RtMsM4ZY3K|-n>O*Og@6t!FIW>YE%Oj3OT zZ_sQEnoZe%Nq`e_xl)yOw>g#V_PM+G zLTC__(1-*eX5#f<$~u}azh)+T?p!GFk~B^PBW%mEOdRkrGXZkYUj zc&f>SH(QV_GLb;cf=CE1%V09mvLJ(rL?swZc$3BWSLy#L-=D>yvsiMrLh(nAD!DK{ zzcPG7LHq_$Ek+zjD9cEN3|SC_7=g27pkyJCkz_VoOeDmJ&knfJY!8x+Af|x(xA?A4 z#BD@#v45lsoTs1lm8uv*m?>cE@^k4zu0#$id7^x|f)1|zb2nE?mxErRWXn@Mh#T~G zU&!FH8?($&ABa6YK_CP*U8rLJB|-&lW44+DL@-duBoYzy@87Nf8ULA9{y!z__L@Sj z5c)UC84%P=hALggILMxWgQ)5GIy@`H)%fXRG9Se;5T`g6;>a+Vm|Y^0lbm8gfMt)^ zJ#mX4H3h{2zXykLDV-*c*e$VW2_h?-;qe-M3>_&hh1^;V#$hnU^puF5$&`>3REZ)2 zbJ!l6L51ZS&={Mk(eW61zJux%s_+^-g5ifz7uzDVaJg2upQp1bNmyDyh`?$N95Pwe zK`zSD$Q^1M2hWC`4!e*UQcyUMKoARi+I}=y`d6+f>-U^b-Y}sRI2(CuFPH!asV%@1r40{riLk&`pRd1X>ErTU7r2=CA*Y z+Y|!TpSQ?Gid4eCxJ3zq3QsWWHwE~6CjZ^|p)zLpIigvxLMewCDkacgQVA>u3ikIA z@yC!8g4qxcYR;k?{UJLDRDia~`fV8g-EhZQAQBjOxGW--2!94~bsE^d&1(wPL{XJ+7{C_*{v0$)*+JLE}ztaHNzmdh?9Qp_G05cFkJmz55 zza=Lc#Ng1#G{V}%G5t~!7kz}p5RG;El_bFkcMF-L^sK|VPVrAAEw7LzB` zdgMZz%_xyEgVY$&NRmeEVg(~Ex8uTiiZf()*%U#U&4sn8!B=n;lqe$eS|EjtqqIfh zcngE&W8yG+qf*O9W#&i_sScCHCsZlr zIDLp9wF=p$klQYW3G%p)6hd@St;=jDVT5?Gi0U;8Tv9%X8`8@0LR5xTTM6#4Q?8*& z1SATo!BHYSElsE;3B_6mKTdKv;|4u=2a8^#L@YuSid&=jC`qp2a%`Zw0;d}5P(n7h z+z4SLLL84y!g2^93B^WsF=@7l-3>e0N}QZ+fe;ZHXZEp47N<2PV|vLdA%(2P>x6zx zRBDm1$#_`BjTvZhr%_JB2c-g@L&24@=wTb(>vxz092J)4CmZ7~8$U{*u~mAZLnHM% z#X15e=%%U^OuOC;Q)Fa}i=h@NVX-14R>=P9QT?@}bJ%nUiv!E;-sM3&yG z#D{E%i)IR2F>0cWD^seiI;qv}V&R#vHs*7YBvNaT#1U91Hi5*bg&+?Y?;5$!88Syv z2i6r<%IOB4Pp2~l%tBbl|-l?X0g2nW;D#zX_Q=)rVCJEQj^U| z)S75AuNAc`G*%}lyx#?BU2Z>7qcmz6VN;YG<`MZ?$fp7^BZaE57<43wk&Lq{6%-Fm zD2*%qbc2wt@Cd?6uHD6m;^lOvo1yd>VF^pBm4p66rNkUoiOCk>>j*9Z9Q&cEQH~VQCxyI|Yx(#lD*h`2z!(=mwAmnOM zfyE+F#d%By2SnI-FI7j>QOqdpQux$nQ;>)nW0nYN;ViMYBW-%be=fu^N`J~03}Sd!ycYLtmMhK6qruNsPGiJ zEgEN1uw<^t9#u&ch$bF^Z1Si8VMM5Yb|@UCMRC4>38#yPEfgihu{lI>l2)yYS=dY& zN2teRm3odG3MeEb6Vrxs!{EV^k;tRVty-Ij0h6(Iks-j7Krn)fX)R$a8^e>DV^n59 zWWzytx{fIi*?~=C#p4_u%@`1cWZ_VR4g1185cf-C9!m@uUxrtMlX7GpuUmOAA?pWD~?Ogk>Td zR?i_g#U2L7=)yv*klGvI5fGbRrL|EsKO5y^@FnLAW$f9bi{AY>7i+Qka}fr^-xFvm89EETnV@Q3grI z3^UyrIoqz&1&o#;5zkk|Bt9_z=B9InSPVXf;DcIafEytQFi{U*NTe&(A)=T@R!Q_` zE*%a=MM@u#p%T)lMe+zI;?iP_DzaS0H7P`txQ54YTSR^`#t{oC7&<-Rxxs zyhbm?icsOO6{Q758UuyPh+9xd!B!GHLK5n=GU8S%oq`113M6Pi1U?p5suxT3tOyk2 z5-eh;Mr(HBU>Ar@Jw^>VL^MdK46M>;kQ-@6CuE>IY(Y&-Dd9?#20zV56=3WwTh}Vd8uv8|OXf+L$`dqCm`#qlaJun?Q_esffwN za7U>OjT5g4V5lJ}i(`mW863G5Ct_+fCZbfNA}hRLJcp%J1Qy`LG+#JECKxqQEX$9W zt%%)@vaCMTMv|+eE^b6R*V?DR;mYr zeEfh6!NWl}0pmCNtRWV}c90>SRP1K}!7@35K!`*n*<%8y0P({-HrE^jdd$Tj=nNr2 zSR$7BaZv}!fmr0ufXM6hY6L8SOhwUX4Qva}8&{h|EVs?1BN;qQyB5qkOHirb#x?@) z(`k)H@WPnU536WqwLlBYsAe+wE-MWsINchJGH8f-oLsj(AYed2oRee`v5h=Nh{N!a ztwEPlL9_}SL2FDZFvV1O5=pFL$}t48Chmir6oFQWLs5?wk*Wz+BTmI-X}EEkUKAse z1E`aWqe~FB%tW@E3@{s~)cT!1oXY89TTNDzSI1G>WqiIxU{eLWJ~xEcx#tozTppe01^Uy%hDKfA@Q8Ye;8a89(HoKFl4_gBaLd>QE zQ;9;o)2#GF1+YWrjM4mYKG%-=d^D6tW3${ihlYV#f;tkxD0gy$LYO5DNy9cRg-wqz zq;6OdRm3P!IxS4GYKVNbF(AORxsZyekh4O15smJZ1W2+l4uYZ~H(rjyPM#AID;;r@ zgF$dPqNET%Oca;|VVs^z;;BL;vq8d^$LSh9992gmVS#`FR6K);pmusBpinbR5h32; zikU+~l7jD7YNSqfKu5JhN)j3CA(D(zgPv^D(X1||oMVE)Sa;II_y_^KvMAt+NF*2< z%Mc0%9Ws?BL_+9ToS7eBam^e@EEa=dr%0>9VYv>E(;&e{3>=z(ufjnAbvyuzWiWxR z7t3j8m%(NWLJXKk6JUsOn1qR1{S<{d9DzhqJ(lK@aZGU@M=s?P4DqmAjM9m)+`D zi)WKWajbeaJh5%LuN2m<0$$|PTahzG`l-o3VR8I}SA`2$sGRD{usv0qo zV|Y9nO9_JC0-}+;HmzIj)oXofCnDBx11JfGy+i?xDin$YRwg@QkBH42BZedLQ-UTl z$`7l=s%TuwHWTOy22XD@MB-LDnWA;a5tpBbH&OLskq?Hkh=>smxY%~JL5NYR%`9U; zf{D3UMw6Q4Pa(fE$6NbOl0C@L-%R?lr*VxK2xwQ@KXG0;6S{ zbyBLAB@hGG#;&t-wJHh?6D3PYJb|C3ant!8BNr!82cW3J8sJMg9=}tLI2c4TT@NE+ zuSFn=s%==*uMIl294D5?2{;KZc2FX6$QVkAGvZ-I@g|Hj0J&q{aLliOSZb}DY0!mL zak@`I)Q7wzNXSHlU^ZQ8p`D_T8>55oTaSSIC zh4{_@%_EfrF(hjUiy?b3CbJDRH9-XVV=A^0OJY(;PK{R)mhhcCGYk0MPOQTcC3*sC z)J4}|aCE#Mqh{dTA;e+u5gal|Y@?%Gw^FYQvZPKY5{_d8oFL*4vao(CM@)dIco9$) zL>JY`65t8?XpEu9@c0IBxm9h93cbKy#|SFA-k>lj^+cUk$tR2GSYaGukRYF5%_d)^iczw&lb9=JSWU23Y_rPSte5F~0d&_)W0n?okD%xt%b8>5m?p-yMT za;z4aPwpqFwIUoR;xQA*R1HC5@!4>^s2q_ncs6Ft$B66ve5szPQIU|CFXnJl&BP#w zjs^|Bn9vsy@MU&v97~gt`C?Q=^jqyRppwHejY*}H5ST=z0>-P{0jp5Uj>k|~sa5(+ zJc^7ba47{aChCo8y=1Y8CKZL8_9(^bqnap=N{3H_@);^k@Nf0ELgiBgZF;vgEHa6r zDknx5B~!(8)ERTf{1gF3!?Ynfk3!-0#8`N~m(JA-9}8Wtf(>0GrxZWSo_ zF)T}@jK)n)suTxY5?6?XH=z`XlpS{x^e#?_Mvh9Ucm#@J!Zs_2^C>ru0VVjaib(8Tlg9S&`>#Y(8 zE{;kiD2hTsFB=~e3Mo-ml%$jz)bdco2fOqErcP==DC!6lX2#46wlfaOMba)^OE+lgaIVcW-NfN&YYc>SJI1E=6 z<;z$?l_Ms0#no~Pf-1x`Iz;7Q%o+|$PiG2zWQIKi=>j|=hsPqBVW~ndwv%ZFYe0x$ z`cW*(Q2>Vk37L6LzE~Y4vN^1PQYO;}nGU9hjSa)Lh=Ar6c(ekJm}MjfRB^K3;$b=Q zCP#$9c7=j8nnR({$z%$j+w38PP(9=o#awu`Gl1dwZ6p(&r8mg^8cGOjuo7iKA_J!b z|6*WJiR>h`z$X=wbdm=k%Ag*7IDDs3;Izu ztg^BN7=cQU6-vb{JPuY+Y^oqupb874cnZN5bAlmc3i~|~Rh;1rg#u!X*JvT&c|2px z9<)kCYN<*nb@8YsJ8=0id^$TIrG~kB1&J$k%UD*3BF7?l)P@W3+!kY06r^*)YMO*g zunF-VFWySE+Egqbq=5rrgi8+^BN~_^ar^CzD8qw>VicduAa4|rA{ zB700>HwgaWD9tJlM@bQxMy$7}EgqCC64?>N!gR&)YGFWabJ9$(7)K*o=sbxwEaf}n zE;r4~QkkL_j$U9Gcv6%w}viv zh9F!BR#PN)i5rf_RaApP>Xz_aa;?D)MRX=_z$GBsoIWj!F4M{dA+=T<#&hF*#A4Tp zm@%G>6h~d|08wQTyR8PUn8guWcrI*Q3+rM8F!N)H2e=kt$P`fr#nCttbj9Un;LX`o zA%|OJa5nyPnPz%O$`rC(;eb6TP5UP zE~QTr4eKFs6lFjPpo%Ge0Zu3m$w^uoj4Et0DNY%6*@<$>-?m?ftu+cXLJ-qht^629 z=5o00RIARR2?VS`y&6m7IwE0(15#>z3YU*g5yM)m76f~s9$Y(v%F+Nm8xLUoTDpMh z1LeR-`BIq5wBzD32O)@|a3O6ROaU5@08>c9gpIcUi@o>Qk`?LFHZO>Qqs0)d!2w6k z(83W8j=uV~Z`l3E>Q#fPtUphjNsi7SBNkz1d%xlt0!*=7G+P%+gqK7;im2x!8{eWB zN>XRmCM1nI?$}ZuEgC;HNRv6X_uZ&IIxEzpwA^ef?ftFOicQ!!9M-qpG@b_4GpX+F zBgynvkGyxzekXI+fW?W*8DzsbvXsYyNJ>;LMV#d0F}j%Vm(_?VRDszk8r26RT&b+- zv2}{Cq76)(Ei!j=E^q~}Dn!DF6p!|WOuLO0%2~4rjp8D#Z9GRm&XuO3`e}zlj>VFH zdt8>YE6`;wye>}Q{AgD3jFaVz5oi?_G8H#yxYeG>hsYIkq7~s%jO>G6@5FmNI%<$o z&Kn64KcS^?9}n+6}@j#=T-L9kW(tZJfntbzMDyP25Is;qE`9 zg#wiY1`{!BM2ACfs*;ya49{FMlv6siss$)9Lu2fp@1>QI*2Au1$Bm^l5_shpIqL5Z z93q;q_CJtoo7D^shqH`We~*iRrLiH?ydFH$kHkk~)Ye}v>3Ytf_DXYXit#DI8ZvET zEglwWndl59?fvR=r?(%)cP-o)thBX@6?-9^A$5*EmT|cL1&aeIhOX*p_Up2g-4R-D z^mr+j!LW8d=JN$ti7I&^i!X>BTXsP74ILhL z7H%jxbUW=mA6mL-0h0HhR|n>FYe~{kuq9R2(mYPUvJk4yhWWUA?cAD{3)&Elht;0- z8|zId?W#4TjjA#Nre!~C1aJnsWsSD<(2on9uj&&-LXh$>Fn9|l#5by}x zxlg@zG55}vP7uKr^TFPXE=)NjcI}htO~x{yClz&v2cLAy8D$ti&OvIE@0#`=N5yk_ z?Gkf_?>SkH`*|({Pzlr!xiyGH=rB___6s~w3BuD%RUBs?(>?c=^eUlR1Ovi!8ur6O6ZH?|WPm-|)ZhcbY`jdSK zax1A%GQMj}bS+f#@0vqanGk@`8p5B38b&7Z<7CA|FNmjClghJE@$Qklo-gmwK?qKZs|hAT59|Bt4QynoB))_N zb;!m`;rh!Xp9{HUIDbuG$|D-ASGNf{rZF1i3 z{-zJ?Wl@4%kCczaU^yl#@7z&JwI%Xop%fj142n+gtJjZN;T8%w@3`g7RTuQm#n%IG zDpE?A%i(aHj+ee7zdoa&9_RP9tuwDdc=cuWK2evzM1WPECubGra(yJ)8 z@HXRkHa$gQic2X_#5|TMg~J#)0v{9?7HY5!5D3CV24ieeNLIv-N7aN55KlYXwP9Idcu?aa#S_A++i6Cu{;;Z9<2T zo9G=HcF2c1$2P0xpya!ZwO;Jsx?ZB&b_I$xAf`U-AOU1A^ZQx1(nS%1|3aJM)<@va;)&jNXX5AD>rxYkOyjYsC!&ylLiVI}DFuy^v4v&%Kg%!t!(`ac8B|2e;?`$#gE zoJ2X8G=V!}|GvES-Em0iYdO^tMMv}>ej2&}Tq+7rOh$rA?zB%g#*;5w+=2V!6*!5t zF7k-)4-$zoyAimwVez0JS=N>V96lII*O4%>Ssg41dOr!08M9D=$GNoS7I|mjyFBhm zgAfaq>Wt$EMdYwn-6A=K&TDHmNKcn}*MSxb?d}sbOhf~=fM13uz9m8!-34EOkLB|=6q0Oq7JIl}<$0<-KF1*% zExg5D3)X?=kXd4r3F-6gLZuA%htZt~V^BMLV|va#OPvRTtT?Ts_FyQO>lv=tdzMa# z(K~SxC3)f!Wwyl%pxL#+vGG)%_cO0+H@rx&HUlrX-*Wz@@yu3Bn$Az#96UG+zuk8g z$_+@9dGKlPOG$h6z*}Hw=-gB=Nm)`y5=dt)WH9~r12ij-7D;!QZAGi5THI$*V&mYf z&DRvIN05ubtLf73<(jDIE(Am3z~|cUs5ydQWuf1y?-W;A~Um%`YS1b zk};?+7k6_?xG<{+3gdH&g9#WL-mgv}S45#R`^2mcQHXqJW(@bAJCc`fevK4*Lv-PZ zN-(orV#AG)xLepIQb-Y)QYHwW>!kE;N$|AKml=$2lN!=O?WjOoj=={fp7!X_!4t@; zr@}^jP!Z?rBe4mh-n$j&azbCrg8cM;tDk35)IfWxp$P(871U7cXQ4k=u%Atrxe4(O zU!FPMfma$wF__0d(4* zx*JE-QiLl_@XVa$srCD3KV!!A631=P8@G6}BS3sxVj=%C7Dml`}npTI&Y6T;q)EWAn z0wupfrsbaJ+1C6R~06#8ZV!hP73>P}QK$0_;|PJby#w^mB{%zG!@!f)MbwX6Q9Sfe?r=7&QS$M0S1gw~#uu) zyb2T2A1?^MfFlYqos&v{c*l^itFGH;q8?)RxWf4gibzX+9DT+@iiV%#Zal6HM_G0< zXCLXfO8pEM%?msXucZIbLi~+23#`zG38R9Y$FK{j;#$ultcuinL~7*p62auAhhK<< z5ibJ`4uks54(&nF!JEm@P~@hhG+Oq_*-^};AMkh|0(TD7x>#QUsc-5kkkr4#9IRY}$0*rvpBw$orjh1I##`U^AHN^= zA@wNdd7(gdxadVOP>LCNg5WJX=WUj<_St@!yR{*XRd%c}Fp7fYupGH-@N_$-^Xu)b zYrz4nn%PthC_}*Rk?BGa(wx?@2}2sd@W9(;&hAl(eZ}1mrd0uJkEnFHeNUJ1B*6R5 zi|S+oQ|yZ?sV{Vf{taC#OujW={_G19dy{^4k(iM8Ir}5yQ1ZIMf~icoyECh%W2yI* zJ}sKi?tYJJY>%Qh6l|R$Hq=pjKbjXfuk06ImdyHaSv)GW zyK>YG=&ec|UJ4~y&I2qOM)?_p8;bJK%cY%oihAJEqnE3(w|t!Lk`L-vn1yu(MtB7} zJ~&*z1285?SMIZ%H;6=QxMt1`DQ4BL&ryD(TP)3f6k)8>l@}7=M z`~2wN@k#%a*0pnR*D8Kh!soJclmwbtJQG!qnW!PBl>51_Wb{&bp!2>mWAt{%(l;Sj z>*3qS`JuhabRSnCLQ(5J=C$KR#An#oSI4{rJb>j?BJAsZ(%9=2179MMaG45^5O6kc z3a$p(g#efFyqBrKKXd8=HtNULa0B8WKLVse)ye|JP@q}9n-gzly z>m<4JJkF`{1ZXh9GWY-7Kk7f~H`>ml4HcAN?ayw_tiqK)k0>n8~1GWf%-QL@AzXapPcbhE>&EKBq9W18X8wTNMI@8B;-DiKFc*D+oPi`&#(%llKDbF**yKq+ZtUK@Qj@^5=a2_t zi#qiiP6Hom^O=%Qnc2F9{-1e&6~H?TiDhFp5rZ*Y7?KiZ#c=jQXCGu_ry*aCZKE`r z`B8rhtsZ#0t<0WNxXg8YMfbskYFB^Pll&~;=Lr!~X+hP~HMOWPQxW`$8Q;*bzLVI9 zl`@|;CSBS&InBgtc*D_Gfu=|*)cyXvY?^VIXVtyDGqH_a7cw_ix;?6^ch&ph0EQ<2 z9G(38B+>E_fM+%RmGw&r7TJBh)b0t4>2~o|?*?b2e{O%|6>RGQd+woc;D-yOB*6l= z1V*rG1MZxZd&^~8917z-k6g(8iq0-AHr0o2c8~i$lf>D*z9z|6>#JyD`EW}2WDC4C`g7R7`BLHgelAblU-&5pmZNp5<;{^E2I(H| z^U+F=DHt)0RqEwzx8O(p=jZ+9kNJhWR5x(f^V`Oux2uK++7V(kg3W3U_LfEELDT{C znvs2BZ9SMGryI#tJ-}?4R8Eu;WrC5Xcbo=FA2RrTV7xoO?X5E&Z{B#+@E1p4SPWdg z=j-?E?}I9Q2ZAhO*-Zj7at$m8caHp?`=?zRYN{zV3xEkm%Q(dtLm`iB#G4}_*>v@J&-SxVj?6jJ& z$}H7(ZW4`d>445h3snoICVc@sTE##CUCGi>&r|}!>F>?x(dQ7p^%i*Bd;G|ap(7Xsz0B?fSpt|5UQ zv9wCb_*??*MbWQ^_q?KCw7>g#ezv*pKn{qjJiMfCNkyA}bG&clJ`?4evu{_u6%H;y zSoRm`$5aS{*g?BK-|Xe6KK|hxu{vv2c*}`^pvOx^VNV35Sz@?9^7|=3v@;6I9_hIb zor1ChtzeQwrCRM9WEHx=q|D6R)o^4(PE`OAaHdVNlaLz5R*3rGtf^!5_yJ^B5Uz5# zM;{=;Mk<;2Nv3FenL&%f_vThyY1MbuDBuc3IB9ir|zO z%O8@_K5>SlSsDcBVnfiRjUGj`HQ7)1C|$N~KCA)-z^nEi=N&HL567V_%%;)#y@9>= z->$#&Pbk>EY?(G=gg%9eIKe$|&+6d^DSAu9y&ir@L4st*hQn1%IPX<*({oR^M&ex~ zsn5&3s`SjK$>z7E*mMRj$_;$xLkR;<-|z}R6@V)5Ym=$f*GB{()P{M?1Syrp24N{D z*EVH@FXOfAB1v3jXW?4NS;?Am5Rd>WiXS^rMj6zDA8Dt;V63UO)4kz$!ZI z9UTaD?sD!wZ*)~bGK60S(2a$s;S9<#aN)#C6lfi%9HU_q=l+&sRP5XO^^Wzmy3$1g zy#Q|Y70W*9EXcxIr-h&FGxuio)j#%km3K`n_)FXFxkfrCZPIJ&OIWl|AA9yd1emJK zecUjZ_AEtw*+m7~A&!RYX*Rlm%u@jARfyc~a|1=oE9S=^x&J@}oDc&F9q|x(v-Q!= z<{(T@wpTHfnzurPH&@d~r(9_DiA23yyuFahAHXP*+`uQS&k)m^!F5U8+*s5cS}sK^})#DVmyeILV&S z!5fY%r^CqtlSF>Pt^4Em_rIOL_;g={@F8@nB0|h0xp{hvtg$G=Iv_F3_lsJDNgX|+ zN%=kgnZ!n%6n7Cd997#sfaNp^E9g3H{OT*~^?ux+Nutj6+U_r*^Yip_t4IDVlXJMr zf`->Wa{sD;{Czn+yJre)%^=)~o?gRXI^>18;#q-1*@tF>ASC00$scH>C5VmO?#qSt z4&)D>`yYA!{OF&K^H_IkpAZ+@#5Hh_wbtX>TCEuyE?ur79lP0bOuOljXG2gZ23i%G z0v|yvvc)TtE&jy&4+X^lpYKtE(%K6c2cAy~q^AvAC>7=vg8mn-3xvi*F69VD9c+p~ zfGlK^bp_QMo_#o7r4r~`KfifddcCDo+yxPz;h*zAcAw@cH{0?}al@V~oZ1sOj;u** zZiIssA7rY?wiYG@)UEuazfmEjdY?y^1G91fQZ~;wNu?Ah4j6i${}3Rx*L*7}kDm2= zdxu)oxdn^W>NbxHmxb> z*KQ5&RawaI=^t#q1!`UM*bSq;p_iR0r0hI?kAE@h56M84zSvx#||hSRr2Y=0bMgYD<3w=uX1=lzHAEv0>luPCo6#LF;FpX#7eHcTF$*f+--C!u2XOMHMkg?-cNKj zhCX!ZbKk`_m$lkG~UHzW^Q}E};qpUeEb=|ol zU@FUDsM<^Uc(gP|+tg=T!mfhi7Wy7%I!@sPkdXRnp}`X2w2vXH&IS?JzX5>yScn zaRy)Ow$8VrldI0mytK2p$gA=FZ_h95o3gdS?&pP3My*aYVW zy&HxFK$M*27;{Dqn4+1X$8l2tf@M1e#0_-;QK|E5>ps+%&?@K`_3s0UFqhMtQ}M{g zy#S{j->vWW)-5WwK-GKO+6cQ75uVG??>T>d?62`|3Uxjmk8o>u)v1q>eQdmbPk&8O z!%2L;o}#v`B~kajv$TaxhL1(_f&q1Lvm?!MmPIc0*R;ZKrDX?vgqe@=(vu!==H4Uo zHrY4d6BGIRj5HzI|6qO=w}iKHHO`W>>kZ+-8%N$8Mo_OLh>9_k08=wp$48Wd3i_Y) zH|DL7wB`)O=C4GR_LJLT7j=)4ns&Q-+Q;xvFijT^?_G;yqs{5g0qCn7e&3*SBoW}X z{aD4inLX`Etd6~u#eaUSml^BwRT!h4J$XrHKKFXlq4Wlw{P9CN44*;(J(i#f=b!5@ zuQQtkos5#SLUIX44T*7+rwzB5?8Ivskm@L_7M5i9lZV=QrYY~K*TazY>ev82At)i3 z3NzBi)nvEN8*u;=9zc)&$o_13WoFR%t8+~Qs#9G~_!Q1p{pI|hKK#VggM0AqD@`GQ zI+)?C98@2)!fDAjCC&Bimk4aQFY%9jfBeXAG>#d5i7d+yZ;{`tPcN@>$^4%E>CgR> zg}NMooYmMEd*cta!MwamuwUN4!84^RC?3tL4*-m9cJWH5srfzn!yHpq89Y}|0}Z+I z=}s?rKHCX^HV(z=K0amMe0qq;v^c+K|Cc?e{Z%5n(P>i%s@_nOr<9h^Ms)GgIeeiV zLOU}cUsiJHXFXVE1shbH*ibklzkx=HKXU#}0LASaoJ<>++Z3Z-NV<1}1*ze5FjU2S z6_F%P@aXF~UF!XM%&$aO@(^{~<=d6(^+)z^WSLrPxGh=Vti{Eqp z{;2;Bj61!J79Nya6PR@`tt|d}Bj8*mG%jOI#RVd%ekF^>ev$vC3yCjD#iDlDF4~O` z*hE0(_%G+j`XX-<=z?RWRz02T@wxT)i}~&S(HgLIhz63eGnGf04?w6zAUTY)3-WSz zf~+B5)qvAFK`7726Ar+~qp8i~8Y}wUq*dzlzOhMV8ej^~d4f{R3P`g#g1xH>Y&KqX zaV_&Qb)RqjFXvzWm|v{QnEw*5$`}mJ+u!EU3(c^8aR|NOz;@KEi_wl(0DKDqlxzL0 z@AV6q7Tf0>vmHgXR=NS4je(p>vpG!P=B>4d>)+0QcRu3&cyXU1Zvb?DlWX$*Iy+A> zu}xp^G*XdPpOqAq6pXfRb`D5N0#@U1@ic8B?f7<)hC<*grCNh{aPYV%ar*c@`X{cl z|LGKafBYh450fji=jgtyro3B^uF$5CE^3sU_v%8rbk=$3EzhxvBDdhS7k}aEbXC5? zZ2a^Nn(5Q;=c*~2XbKy-N-M@aG+)o!SCjIG@nAwo8iif)0sgr!o{kKsf8_on$h6uL zNv4iUyL(nEl7iYqb~rz0CA>F1N3%#fj?#Ov>&tic+eatRi>FB`U6`DtvY+@#xfzh} zUO9LA;>YI)A|-wgf8jv1DgBI2BOZ*Veb(47F`>3P6=FS{et7A_w@2fkYS#Td{B`3& z>-M6tbs6|1C|Wh1SFzc%lC$PR(Dz>0d$$w=`)Z;^O)^V(y}tl_YiFbIy=SrcGx_Vo zr-ul$BcfVHF{wWOT>SB61xC}~KoL;)p5z5m*FVX6^?U#Y$8 z8)sMN(wIzIg3!Az1!J0ochto5zP_3$x(YT}Qq-~+KQaS2>MHjiTkx&#x%UI;|2LhhO27LSYu8sAHbOF6hftM1D!Ab2|&@w*&)HQ zuJ!(gD^b=ZqH4EFr>EC4YNUzPeJmmaGAR@@oR6(&u+f+9{J;7~qhG#1{ZYTF_vr1M@XoeR!VmE5G>2JNf6x0}|1tkOD=TGkcT5KMZ63py zvr--YMf$@7qF`*zF8oTR_CJI)c02LSOPCoR=Kjr0OeF%$UW%Q+h#x(~mYqp?6%WcX z%fN@Yy>q-k0JleFo+HYR2()dsv%YWAY)$w0b8wFw=+8>dWI|Zo?|<^m?7)tqZ>ANGQU3JqkLfW0&Jqv+=ZWG6ocOT1h46} zJ)Q4i=mGr6j$|sw>C60){R2Pp>;0#19Oq2q$JrJc&olBQ7v+!sS)2A3S+LP7l2dY& zi|vucgBpSFO~D9`e&b3$lQXH3nJP6&H>lU_fI;QzdM&}q$j0!^S zZOdA@0ln|nzXI9Ue6sLNU(V6E`jlfdd;%3EBT>(GfZ#=BO{NoYqO>$5tqJ#KU z^n$rn?*GXBM|j18Ywxjyb<>SeMfizsNBgFEbPz^6S_5+Brt*G+%mN}BG-AzcIA3nK zD=*gdSUnUVIK&y|jj?&a=>hxM!tM9uXL8F+>*L-?jnuTCncHlyMJ!^4$KH*_U zgwI9WI4efK$N&6z@ZAh>#tJ~mgDfq7(__}n4aN%3<5~!ZBK)Y4Uf(Goe@}ifnUiXJ zsZfA(fPa?cdH)0qQVRc2>e??@D2#@Xl-xU_*p1@80fZR2R8xzV3zF+mDe_02KlqUw zL%oi1Ah%HW%#DwUd+LCi?+?~b2FV+@6-x9syLB5`&@xrt<}2EpblSvnBd|Y`Klo$+ zK?eIM^BAPY0}xmQDbIiiL~1!m)he8&q;MkkjY}U}Qh0I|@K7oBMd~ae;i0}zJ{}T56>~oPi`D@m#VxBgi1SlnsZ{&r#lHw`(lfTU$ z^*8myQ8g{}Jr^)Bg_M94TzP`Mej49!ao=X&L(mDydPi*(;5JC&zwwP^BP z?jsr1BkXqv}bGrv30he7#; zUlT#Dxf!NL@xh-x3V7QezeDv`;C8Ra-XY#*)%+K#hvSLQ)?%hXu z^{e*4^Ubz?kN^8e{h~Flc4#0<{meH^q%EbJ@A&zP_Gdrp7r#9}5#d?NQM>W6QMMsN zJ;g&0^zJZ3WXCUD{1)Sz3ZXKwkONZ2J*!~HmasO*VBToxgZm@>1G>DtUlo3pS$f<9 z1P||K9QdvjnknsQ*R3h`B^noHf*a>g84Eo9RIm~o=H*rpcDm~AWrO2L#?01 zKj+^f2H`cFk%DRsNj6=gWgo>>&{p_6VyoxCas93?(6KWFLoDk^i3a_vielfe@lu7>C+HfmF)$OPkwP`91rmRt?^^ z3tFJ0_U=L9W(|Twp8wsI8|IKYuP|Kfn!8eZ@BZiJ-;P>de$eKW5wk zXTVkEe#qFpmMl%A$z9aJ%ToWF`Ei+jVpfMJM7}dKhWpPQ$xAntc`|u@AmvfONNDN{ zD82bcd4m<}%hXAEl?H{s9?%htzvufI%BN%UdUBo8T!b5<#9ud3yahwr;P^OnC#B8H zc$gM`QZZj4CVu-zF7nDQb?~k5vL0_)JKQe zbaz?(9k^Bc^(=J>ak-r3R#BI|;++ndpa zDTlpetUWdq01}5C06gonDBvgPFnep`n(Ls;N{E}64^J$K6vDq%SSFE^_^~QT!vQf za$Sc9kxwfqjqp0TRa_5xw8sP0quhFi; zGo#VQI5*pI(+8ny4amlg`h_c3bH%FRQ1v-WW&W6uGP?9PHIXG{d(mU~AMp>bK$snT zJ74|Y4Ax{F8=0bqpY*d(TZR{RpN_(sUn7%erOK@ANBvFT#_?5rI^+Oyxj2PkgP+94 ziL=MoF@re|g?a}#M$n8X3nCL1FpQ8CVlFR96mw@;!y*WqC4cQ%23n>oLk6!0*YtGb zA!^GwfHwK5R{dxTDv7wl#BIJg5%%F6h@{!bR7i4+J>3ID6#C zdyNjdDcKM)GQCuahWMB?Nbc|iLwu!DPjyng06fmL+a zJG$PHb~*Q-H+Uf4amO!%KyY{(&H-KG!ikkAcdKZlyhA*;nvg{Q> zG=d?_Fj#4pB3}Z;vZ+ajlk5o{yy3WVI-IO;rTGcB?vFpe1A1Yts(zH%oI!KT@Rjd6 z1+!0gbcVj#o5NeXI$+@RT<0Y7FC5KFQ; z3^3Kv(mtjELTKgx$oKz$<@~WS)von63M>}6PDsYIVt2%jsSX*&W^SbIX~rP*s{QJI zPyb%}a^CL|apO=N@LD6iZ+qLYbA%I6w36gtg{Chp^I2Z)8Vs+RpSzw2O--p}9Of6JBUH%|A7 zEvoxC-@2g~6U0v(<`%DMqP|ZG5+WgANQ(YF`nmX+)_mSv>9Mcw9;Y5(82a|x{U<-p zKiaA+(DS6LwP(mZ>RHsd>FxK-KR@!<4PP~VTzRbX@zzG*X@$3vGmYk{Yq8{^MvR}U`bGTm zOv&|eEPcPx1jPD$6^X4z4soTU;#3iupLPZ_NE0N(i>cp{ObUmOBi$~G$MfvQOQ^{s z@oZAUpp;AGFy$*&dRmrIuQ#jCic*Bg$0pC#@1 zFmJnvE4Hud^HJ^`)LPXGt}FKz#%(;i9}Lij@k8lGa=9K#oOH1A40`>u(pEWU=^x1t zl~`x31%1X<6?X0T;`+W7W^8`|j@1K{aHcvkVUI!G7t;(sB*0x}CBM9iK1HCXWHAYy zay#65?FTwf%D937wyC7s`NA>w1H}5c=aw1ZABaF0(&$9ZKbl^*pqQD=kt}A0XNO4*WNeN`@Y$(pN-jeuJTokiZyEjgiT|o zVutDON!pl*(--_Omh^jG+v4qM-DHQ&v8`;LHE-lEkt~GMhot@`=3K4?j}cUk`P}Gt zHjOk#GT!>Ohsx>a7Vmvg;H?;6PXKRvZ0tkoQO@&1f$VV6i(*jn#lZg@KdD_BYN{zj zQ9Muv-fnoge6bJB4qQH6=b+vQDsK(JhgE9=xEzuThfvDr-YwLIN^7~|cmx2m)8rd{#*c;&pnI~9X zZyNJVJ@^_DPxCeHkqE5De#+E-lgYz&g)h+i>UkF7=z$r^kSE`9tSsYyBtI!Qpygun zbu?b`$0jAVm9PcK2cSWKB}h}aOvGys1XCC0NBymr4c3g{FV7K%4krC&7m@h`u}i=i z6N{uXCH(6}%&f8?YW@U;14Rm+hN8*Uu zNyzP&hX9H1{bPUVUw)rUJMk3tkWSFc)!17;f(m{ z&vL+ZNVJA)=G>5CR{i=M<(CTRryAATz)~`Xa`HnNIW9(eCV$%Ecw#aVRC1>s)H?9w%NGCg`?zCEd9-N!)F4gf*xq;Zm*1Cj9@8McaF@xq z*F#I_xc)D{?+5?DUw+?TeqYNOWf(xtL24854%z!(ejig%<>4_d=DBwyDddVM^k05o8EHN2 zDt6phN+aFhr;(#R5TziZ32XlYxwcu&m|vZL`F-jJx@!4t{o26z-9CKYvTp z2hlm(b@opd9z*CG&Inh`B5$sb(&mV|to(6V6OQ}Xt5I=5(xn<9n_Hi->eYAi zP@+ZrU+&Kra|x1xwkC4n2hd9{DB9iV{VU{{v|I^Q?Vfp{|C{5ksfU(esg5+yDY(6k|swWpZlMpyd&^!$PoByGIqR6x+fSJ8xt z3?m4BQ2v?`nCL@}l(aW?=B@CVwPKb2BygFC3eL5gHVdw&xs@yS!})O@x2H8)p4pZM2n(#OxqkNIW&M2`Z1oCI6uNXU)QQW&-^)9Jp#yHg4U=#6#fkql^?~Q7S#{>UD6A zhByvGQm>8DIK7ct7Q(R#IbU77J^6wH?j!r&jqS0!t%+3w+CO;O*&yEsxHpjEV^-81 zS23C;$hgkY-DCT`)wBCqAPPK!oS058k#msoR=lUY-iZhvk=a5j;gWDZt*vuq+mJ;b z0z;MKPEI=fWL$}ZS>g54*{t*Sqy#Bj6S8n^85I=mDdk!CU_R~B z9jm1jNS4%u8>7pN{ZtZ6!?#xIm}{E$6Cf)$OT*t!Z0TGPvgnH<8Ck`ON0+^D{p2ae zG`0bjQ5b=CvtX)p?4lkx+N@UzStP&8(G4wZ3yW?)E)H;EeT_nW|585;^UpwtaT{C7 z9-6=l1f;nIio=@gvT&2BF%2Q995WVyT6i#3xUD#rn~!s>WEePh{7L-sv72KYh#6qquM|z>GBuS5O^W;j0fHR=XHmNM-akn_e zWLFNEXTLgdA6c1xajltc8;^Wq2+nMt(d@=7fw=Q6EO1kDW$7fKj-RQ@VW#K+SvxuA z?0kK}_rU2l-5r|lo4lQ2e-Ev#%()-eX;Jqt$y{<8=Q*OJ8@eyPx4t{>7xnLv#&&n6 zTWn&@0*5PPJF)#=DI&^3^X>clR%!d_m`lH0Ah&hnJeZOu>II{FPg; zudv*c*tkC@zqOLc0)+DMf%jaNv1uh_f-J3xzcy*cAG#ea()w=q`vVa_$A6V^Xf53r zuP5V!QUMQrs^)n8Apid_^@qB>r+jbdfsEQ3H~Q5S*6RL=_G@8dJ>^}K6Lr*2~*?iv3x+*S_;zU?BObh#F0i18KKT7RsPjhKKfpbu@Bmi z_bSvmct?x-ZnXO9emj(PzXxqZb%cafu1 z-_`tt1zIF=P=!ZZF~y}pzSy?V$!7!sXadh zR;TivuBpz2Mz_iG12r`>fZ_bSve6U(nKQdbTxknF2Z_!)z>Tl*c*~q+sEF=L`lAJb z1Ry~Pr}CLg0suy?DF-FGN8>q^0>f=Yyj3XRQSFVOD1ODK8nAO=l&kYN5a6pGUQc+- ziG@~!7YZ<-Aa^G6PWtTVm|gunbPejeBK*JK`i)y~&V2_cB>^o1cqLt zJYm9&dL=qxDks-AWsJ|?Yu7}O3Rrd~{){*?T{G?hrAtNe-_GORum(i>=jpJLmiWE8 zisvDxzvNf`RrsC1)PJ)XhRzi5G0L*oSpZaOeGvX=C1sQO=p}ERo`OW#f`^0i2jwS) z*{9oSg?>urIOAeZ&hAS7^!%HhpIyUY1*~@*jd}Dz2B*{ysz1k{L()+tbZnF8uV74I z%aB93-FasyMJgBUj&7CWc{1N0#Lxa^e&J8g|M_MAQ){U5W^>f~_{$9d*{mP6EB|x& zzdN)sCKb_}(Kkt#cFCbHrT*@#Y<*5+YG-zQ@?#jSboz7j10*s=?f`=M1lJXthLmJJ zt-iRS+Z>VPQL3c;9Hj&c{P!PSi8vNx*M(*kTc4 ziX|AT;@aPzjf1-|uObZBk8s;VkQkkpm=dON0@)v*zNg4U)=WU%=-uRZ?#R? zT44|JLNDW1qZ)F%%Q~F0ml~cCk+a`D9JbJuO8w{PuYS4y4X}AxDMiYXg>KN%Gc2_c zf}b=0l89gNYrqNNn%Y#|JRi*m1}|T%5I;YMANP?9YV+zM#Qe6s@M4yQrrpSK4Klp)`uieOsu>eT(&{L2@Y0E4YeCVm&p;qX=foa@A5o&SL}>j zanXJA|0?{+|LFDaFa1mLn%wOfY4F1lo^0c{Z;5pwnXN4+ecSPiGeA)eNsiO?etLfW z)8#;Uz|k}7OxfJqgjXA|*{hlCt>#9szK(cgnS&&TmA}dR2eAvJ2+24KWIJm@(x%e| zglP{w2ON(~Djk}D4vGd63zey1V(RNIeVuaO;hS?%(9t5=8tm9XrOUR>$SRL81|^8^ zw8JGlavaLSY!;u(4e$W}#r03WuHTxd(IRl-HXm z5q0H$rXGB6Gla-%@*5<`gatszmJ{Ss-dj`>!QsYAzAcc0{9l}34m1Qr#5}jH#kY$5 zfI1u(Tgfl;L+h0Vb=+)Xn6G_J@ujXGU8{y^+AX-dhUeH5-ViZ5L$<+J*f zngM{h*VzM=mgaH#fsI{AdUi-SuWP-(;YyS>iKyDG(&)jTzfaP@>Q5pfJu)a1)1T0y zm8SZXB0hEVuf(#!*jPym`wE-(ztqn%wY#6h;Y4Oyh1GW<4WrGnivQe5sLx^ZW(w7W z8LVO@3n(B5RW#Uqc6Htg*@#Y02eLD`Gh0L368o(*V>ZSoko74Am4qH7f(*?}YEF;1 ztF>|PS##?Nfu7S6LqjS(4wnpj1H-ZM38FD+mN~VdvEs+WWiCsqO>fgQ=J<|+a@-gC zbmF3Xv~P0Myue@S>rqmbf2wfq8U*rab^%Bq;)APHOEQsK8q7krzD8XWPjqJfCjB#k z-M!s-&#LAXIlRDY!1xM~j4TWVWq3-jw-q6j2Uz-bqX{J#1 zjCD1TClTAm9Da=IPw)S}g%`~6i9Tr2(jaNiRZLj}|MXzoLlU&rplz*=?`U+ri6Yex zf0VNU&p(fr^&W#0a7`?;r`43enR2+HC7aXz6z{~9_b)aB2@fdV)^y*rUN7XXh$SE2 zVwDI6mX#@BUY@y*XP(0)?+f&<3{VrIZZBAbCF-mzM7Q9f=ic_QNr=}}A( zLY3lplkUD5%7Q>1g3toEpcS3!`}E(B-(dA3oyp{O&hje*gUgw2~wk&kCZ=al~eA3~WKX{LH6ObA#!9vNl znEJz};_mR^ll~X{<81Eev1JQD zg>o9f=utv%#JeAxmTZ>*;+u2R}f3`p?#n-)@vscE8!2dFQ|e2PHZ7kmu`j z^L830=yT_zLURc{Uw)APw+Qw886&hWkZ2-CsIllD8$YhKG3Gf`gOt>HbJ-%t`$7DL zzz>83MM6G|z%q3HOk&SBmU0Kq*W~l(4u{Fo16_(P&3}{ipI_DwGf^T(W!Nq99+!Q`{(Ef-c#60+Y-KkA(tF&P?I3Xm;7_|i@)~&(bM7G zs^8PXLX&LAGn1e=DDtQr$&xnqZ7dku_m*``9dT5gWob}HuB>^fxM36vKHt*8Js^FKD zAOuj?TbXK$`p39C9=a&h^Kg5M zJ?S;>+m{J1$fKHY`#xR|2_wyMuXj=3VfZ3D=k@aA2aR9|)Awff%8-RvDViEI7)+oa z2Pm?-a@L=0U}pIVx9%_fTaNm1%&LY;Y0U;tO%L9L)4#Ay)S zUlCdLJ|tn+A{!icSAh&uvuv@`SEIYmDGxrhX9cJ5_D+BeZPIJ+5d`q`-J+np$lBw? z_!$^1;{@z?J!%(G=`ICGq!^vWu5v7!tG=HmBf5zOHD!i%`RZtEbhmhtM5%9?I>d!| z!5M+&aw)#{D_vOO-aszu`l!71-~bo^Xt?%CxN5o@zAdN-sGvmmwQrE%*LJV_Eu^`t zbry6Ym2!(A4cH0Z;H3D-=oSy$aaM|JI4-qo8xZiMsIy)hnHVfg%Q?`OKxxcSK8!gX zWgEn=8DIFmF{=la%0v##sy#-xibK}(=0l1vi z5M1H`?AheCSI~u8MYb(?ZCQ{G-2L()WTxRS35uZtZHS75!nZF0fwrT~+qFoG{5@^l zeaIW*5gLWYq5r_vUSy*UbXY_|c$l|$j!=1(kTE;zo3rFi5NGLpjAMeG{I#x~^=L}} zkk`l4M`|pa&4WkRM2MW~$l?g2Z{6;BIUyI>#MNbpKvU@cpotsaNq-U>;46Gpd;)`O z%8;tf(*ot1?vb-Pz1HeGmGwyfbmJ+-?_-!`jHKkpn9+OraGUncr+Ynf=+z`DuP69i zeuzAVSJ!-$$#(!iAm~Y%<2eHU!^7n%r0W^YkcjW0i)~QW6Jx84K{zbb(iYMK-bfa+ zX7DDruYgjH(UEocq$UtDtqL>W$2C9Lpv(g5ArX?kUuB6uXE=T>SAM;|@M_3GDD8rh zy2`|yfg2VgZ7bZT4bYJyQVS-5^*K#O7eq@@(o#<2YW%?4PiU=gUuy~sFnq1$%{rRG-xOVYeWwaq9kREt>6mbRNj ztb?N?cn#?zXwA8`eVc;;bz|yrPu>x{Du%w0u`S9MIEUE=u9Z9H-`Ig_IB!l7HI*C? zsgyevu0hTuE&sr`ft6_o=aykBpOKu}vhwYha$;Wh{=^nhPq~6x90*5twRkk0@clk z(Dmm<8cK{WSEMDwx8bEm@8D#2R}l=C%<5l4CO|YkKo7Zozo#cUAnH3Bhlxg=1(Ck& zv6jsXhz-Z;eun(ootp}gL;cc{rRF%~n>UdiRG4z5p>KB(Ds&c}Pvd7LAS(kQ8}77& zk>0oVa<&ib);`BupP`_B=bJAo;dz~fV3Oo0B}JQ`gen#U$rx8lWkHmHjQABgFl-f` zs;KGF`Q{dLEJ;YCSt8veK>Y`E0h$mY7it$PJCG*ouBtMCyW%_VHec|ZwVh*A6G`^| z_(#ZLs#rc{f@uPWTsc(V#VKKz+X8)LKtg+E8m-lmk^iY1qAvZ26_Bgv`u`ZOjC8k@c!n7hL4lZ#xXpP-Woba>tL= zI^Y3F3-d+wWRDJLxY-SCB&1rw!j(ftBE^0KfC3Pm4Tq6es%=hjDnI%;EeGK2&<2g& z`q>w#YQK~X23wmxul#|K*D#_C0#G*xt7GAKpZa7R^9^sb3&Hj$0g1lS zg9@A-q_4*D@R@pLq7}u2kk&qfk^rQu;zdY`p}H+pLe~x&^~|DD_vjGIsaU32QGY3C&p31+t2Qf9YZ-`~A5>WB`tEFYs5H)Ue~_mfQf3RaQ= zLeH>Iri_G9O{KSONbE28PEJnMrY znf2K}{`@aC{pp+asPQpQ5vzsQ>l{Q@x53W60l;48?!PbBg($E{p1S z1|{$et}p^7h=0@P`{g(Lu4?gr?S(=sh6Wwt-)XW!e#emv>;TDLzoP|}7chbpFP!@2 zULejAl0W|Jsj|uXfh+#;XF-)k&j)8ymn~QI72^+2VNrqv|Nf6#{%o@V6fgRs`@c?c zJ{0Y@UaztI?T1*@h`tE>kNMR+aJp|S6N>E{twE@i)`{U2hKYrN1 z{MT2ulqCM!pO*Fp7ZQAn-~P~eq$NFR(s;E;6p)= zxJ~}|KK|uV0YcoLY>oH+JMw#o{N3MQga31Q|NIF4+1G$^`nNwO=@_ji1j2&jTNFa5 zdG0EZ#;6&OgI?a9mt71y=<5vM^<&@xx66*RctQ{?vV73A` zPvRsQ>aa)y-wM-1-oy7ZSe4_CpRS& R&xZcTfBXMGfAur)KLG2*{!st` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_1.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_1.md deleted file mode 100644 index fc24195..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_1.md +++ /dev/null @@ -1,219 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses "coordinator" instead of the paper's "master".) - -## Getiting Started - -You need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs. - -Fetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html). - -``` -$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840 -$ cd 6.5840 -$ ls -Makefile src -$ -``` - -We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: - -``` -$ cd ~/6.5840 -$ cd src/main -$ go build -buildmode=plugin ../mrapps/wc.go -$ rm mr-out* -$ go run mrsequential.go wc.so pg*.txt -$ more mr-out-0 -A 509 -ABOUT 2 -ACT 8 -... -``` - -`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. - -Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. - -For this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files. - -## The Code - -# Your Task - -Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The "main" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -rm mr-out* -go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one "split", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method "Done" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2024/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited. - -### A few rules - -- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `"%v %v"` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented "this is the correct format". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a "please exit" pseudo-task that the coordinator can give to workers. - -### Hints - -- The [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_10.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_10.md deleted file mode 100644 index 31f8df4..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_10.md +++ /dev/null @@ -1,122 +0,0 @@ -# Problem Context - -## Introduction - -You can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. - -Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the "shard controller". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. - -A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. - -The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. - -Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -This lab uses "configuration" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. - -Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardctrler -$ go test ---- FAIL: TestBasic (0.00s) - test_test.go:11: wanted 1 groups, got 0 -FAIL -exit status 1 -FAIL shardctrler 0.008s -$ -``` - -When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`. - -## The Code - -# Your Task - -First you'll implement the shard controller, in `shardctrler/server.go` and `client.go`, and a sharded key/value server that can handle an unchanging (static) configuration. When you're done, your code should pass all the tests in the `shardctrler/` directory, and the `5A` tests in `shardkv/`. - -``` -$ cd ~/6.5840/src/shardctrler -$ go test -Test: Basic leave/join ... - ... Passed -Test: Historical queries ... - ... Passed -Test: Move ... - ... Passed -Test: Concurrent leave/join ... - ... Passed -Test: Minimal transfers after joins ... - ... Passed -Test: Minimal transfers after leaves ... - ... Passed -Test: Multi-group join/leave ... - ... Passed -Test: Concurrent multi leave/join ... - ... Passed -Test: Minimal transfers after multijoins ... - ... Passed -Test: Minimal transfers after multileaves ... - ... Passed -Test: Check Same config on servers ... - ... Passed -PASS -ok 6.5840/shardctrler 5.863s -$ -$ cd ../shardkv -$ go test -run 5A -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -PASS -ok 6.5840/shardkv 9.262s -$ -``` - -The shardctrler manages a sequence of numbered configurations. Each configuration describes a set of replica groups and an assignment of shards to replica groups. Whenever this assignment needs to change, the shard controller creates a new configuration with the new assignment. Key/value clients and servers contact the shardctrler when they want to know the current (or a past) configuration. - -Your implementation must support the RPC interface described in `shardctrler/common.go`, which consists of `Join`, `Leave`, `Move`, and `Query` RPCs. These RPCs are intended to allow an administrator (and the tests) to control the shardctrler: to add new replica groups, to eliminate replica groups, and to move shards between replica groups. - -The `Join` RPC is used by an administrator to add new replica groups. Its argument is a set of mappings from unique, non-zero replica group identifiers (GIDs) to lists of server names. The shardctrler should react by creating a new configuration that includes the new replica groups. The new configuration should divide the shards as evenly as possible among the full set of groups, and should move as few shards as possible to achieve that goal. The shardctrler should allow re-use of a GID if it's not part of the current configuration (i.e. a GID should be allowed to Join, then Leave, then Join again). - -The `Leave` RPC's argument is a list of GIDs of previously joined groups. The shardctrler should create a new configuration that does not include those groups, and that assigns those groups' shards to the remaining groups. The new configuration should divide the shards as evenly as possible among the groups, and should move as few shards as possible to achieve that goal. - -The `Move` RPC's arguments are a shard number and a GID. The shardctrler should create a new configuration in which the shard is assigned to the group. The purpose of `Move` is to allow us to test your software. A `Join` or `Leave` following a `Move` will likely un-do the `Move`, since `Join` and `Leave` re-balance. - -The `Query` RPC's argument is a configuration number. The shardctrler replies with the configuration that has that number. If the number is -1 or bigger than the biggest known configuration number, the shardctrler should reply with the latest configuration. The result of `Query(-1)` should reflect every `Join`, `Leave`, or `Move` RPC that the shardctrler finished handling before it received the `Query(-1)` RPC. - -The very first configuration should be numbered zero. It should contain no groups, and all shards should be assigned to GID zero (an invalid GID). The next configuration (created in response to a `Join` RPC) should be numbered 1, &c. There will usually be significantly more shards than groups (i.e., each group will serve more than one shard), in order that load can be shifted at a fairly fine granularity. - -You must implement the interface specified above in `client.go` and `server.go` in the `shardctrler/` directory. Your shardctrler must be fault-tolerant, using your Raft library from Lab 3/4. You have completed this task when you pass all the tests in `shardctrler/`. - -- Start with a stripped-down copy of your kvraft server. -- You should implement duplicate client request detection for RPCs to the shard controller. The shardctrler tests don't test this, but the shardkv tests will later use your shardctrler on an unreliable network; you may have trouble passing the shardkv tests if your shardctrler doesn't filter out duplicate RPCs. -- The code in your state machine that performs the shard rebalancing needs to be deterministic. In Go, map iteration order is [not deterministic](https://blog.golang.org/maps#TOC_7.). -- Go maps are references. If you assign one variable of type map to another, both variables refer to the same map. Thus if you want to create a new `Config` based on a previous one, you need to create a new map object (with `make()`) and copy the keys and values individually. -- The Go race detector (go test -race) may help you find bugs. - -Next, in the `shardkv/` directory, implement enough of a sharded key/value server to pass the first two tests in `shardkv/`. Again, start by copying code from your existing `kvraft` server. You should be able to get the first test to pass without doing anything special regarding sharding, since the `shardkv/client.go` we give you takes care of sending RPCs to the group that the controller assigns to the key in question. - -For the second `shardkv` test, each k/v replica group must reject requests for keys for shards for which the group is not the assigned group. At this point, it's enough for the k/v servers to periodically ask the controller for the latest configuration, and to check that configuration each time a client Get/Put/Append RPC arrives. Use `key2shard()` (in `client.go`) to find the shard number for a key. - -Your server should respond with an `ErrWrongGroup` error to a client RPC with a key that the server isn't responsible for (i.e. for a key whose shard is not assigned to the server's group). - -Your server should not call the shard controller's `Join()` handler. The tester will call `Join()` when appropriate. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_11.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_11.md deleted file mode 100644 index 93ad8c9..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_11.md +++ /dev/null @@ -1,119 +0,0 @@ -# Problem Context - -## Introduction - -You can either do a [final project](http://nil.csail.mit.edu/6.5840/2024/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of replica groups. A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each replica group handles puts and gets for just a few of the shards, and the groups operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of groups. - -Your sharded key/value store will have two main components. First, a set of replica groups. Each replica group is responsible for a subset of the shards, using Raft replication. The second component is the "shard controller". The shard controller decides which replica group should serve each shard; this information is called the configuration. The configuration changes over time. Clients consult the shard controller in order to find the replica group for a key, and replica groups consult the controller in order to find out what shards to serve. There is a single shard controller for the whole system, implemented as a fault-tolerant service using Raft. - -A sharded storage system must be able to shift shards among replica groups. One reason is that some groups may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that replica groups may join and leave the system: new replica groups may be added to increase capacity, or existing replica groups may be taken offline for repair or retirement. - -The main challenge in this lab will be handling reconfiguration -- changes in the assignment of shards to groups. Within a single replica group, all group members must agree on when a reconfiguration occurs relative to client Put/Append/Get requests. For example, a Put may arrive at about the same time as a reconfiguration that causes the replica group to stop being responsible for the shard holding the Put's key. All replicas in the group must agree on whether the Put occurred before or after the reconfiguration. If before, the Put should take effect and the new owner of the shard will see its effect; if after, the Put won't take effect and client must re-try at the new owner. The recommended approach is to have each replica group use Raft to log not just the sequence of Puts, Appends, and Gets but also the sequence of reconfigurations. You will need to ensure that at most one replica group is serving requests for each shard at any one time. - -Reconfiguration also requires interaction among the replica groups. For example, in configuration 10 group G1 may be responsible for shard S1. In configuration 11, group G2 may be responsible for shard S1. During the reconfiguration from 10 to 11, G1 and G2 must use RPC to move the contents of shard S1 (the key/value pairs) from G1 to G2. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -This lab uses "configuration" to refer to the assignment of shards to replica groups. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -This lab's general architecture (a configuration service and a set of replica groups) follows the same general pattern as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are very simple; and handoff of shards is slow and doesn't allow concurrent client access. - -Your Lab 5 sharded server, Lab 5 shard controller, and Lab 4 kvraft must all use the same Raft implementation. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -We supply you with skeleton code and tests in `src/shardctrler` and `src/shardkv`. - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardctrler -$ go test ---- FAIL: TestBasic (0.00s) - test_test.go:11: wanted 1 groups, got 0 -FAIL -exit status 1 -FAIL shardctrler 0.008s -$ -``` - -When you're done, your implementation should pass all the tests in the `src/shardctrler` directory, and all the ones in `src/shardkv`. - -## The Code - -# Your Task - -Do a `git pull` to get the latest lab software. - -The main task in this part of the lab is to move shards among replica groups when the controller changes the sharding, and do it in a way that provides linearizable k/v client operations. - -Each of your shards is only required to make progress when a majority of servers in the shard's Raft replica group is alive and can talk to each other, and can talk to a majority of the `shardctrler` servers. Your implementation must operate (serve requests and be able to re-configure as needed) even if a minority of servers in some replica group(s) are dead, temporarily unavailable, or slow. - -A shardkv server is a member of only a single replica group. The set of servers in a given replica group will never change. - -We supply you with `client.go` code that sends each RPC to the replica group responsible for the RPC's key. It re-tries if the replica group says it is not responsible for the key; in that case, the client code asks the shard controller for the latest configuration and tries again. You'll have to modify client.go as part of your support for dealing with duplicate client RPCs, much as in the kvraft lab. - -When you're done your code should pass all the shardkv tests other than the challenge tests: - -``` -$ cd ~/6.5840/src/shardkv -$ go test -Test (5A): static shards ... - ... Passed -Test (5A): rejection ... - ... Passed -Test (5B): join then leave ... - ... Passed -Test (5B): snapshots, join, and leave ... -labgob warning: Decoding into a non-default variable/field Num may not work - ... Passed -Test (5B): servers miss configuration changes... - ... Passed -Test (5B): concurrent puts and configuration changes... - ... Passed -Test (5B): more concurrent puts and configuration changes... - ... Passed -Test (5B): concurrent configuration change and restart... - ... Passed -Test (5B): unreliable 1... - ... Passed -Test (5B): unreliable 2... - ... Passed -Test (5B): unreliable 3... - ... Passed -Test: shard deletion (challenge 1) ... - ... Passed -Test: unaffected shard access (challenge 2) ... - ... Passed -Test: partial migration shard access (challenge 2) ... - ... Passed -PASS -ok 6.5840/shardkv 173.974s -$ -``` - -You will need to make your servers watch for configuration changes, and when one is detected, to start the shard migration process. If a replica group loses a shard, it must stop serving requests to keys in that shard immediately, and start migrating the data for that shard to the replica group that is taking over ownership. If a replica group gains a shard, it needs to wait for the previous owner to send over the old shard data before accepting requests for that shard. - -Implement shard migration during configuration changes. Make sure that all servers in a replica group do the migration at the same point in the sequence of operations they execute, so that they all either accept or reject concurrent client requests. You should focus on passing the second test ("join then leave") before working on the later tests. You are done with this task when you pass all tests up to, but not including, `TestDelete`. - -Your server will need to periodically poll the shardctrler to learn about new configurations. The tests expect that your code polls roughly every 100 milliseconds; more often is OK, but much less often may cause problems. - -Servers will need to send RPCs to each other in order to transfer shards during configuration changes. The shardctrler's `Config` struct contains server names, but you need a `labrpc.ClientEnd` in order to send an RPC. You should use the `make_end()` function passed to `StartServer()` to turn a server name into a `ClientEnd`. `shardkv/client.go` contains code that does this. - -- Process re-configurations one at a time, in order. -- If a test fails, check for gob errors (e.g. "gob: type not registered for interface ..."). Go doesn't consider gob errors to be fatal, although they are fatal for the lab. -- You'll need to provide at-most-once semantics (duplicate detection) for client requests across shard movement. -- Think about how the shardkv client and server should deal with `ErrWrongGroup`. Should the client change the sequence number if it receives `ErrWrongGroup`? Should the server update the client state if it returns `ErrWrongGroup` when executing a `Get`/`Put` request? -- After a server has moved to a new configuration, it is acceptable for it to continue to store shards that it no longer owns (though this would be regrettable in a real system). This may help simplify your server implementation. -- When group G1 needs a shard from G2 during a configuration change, does it matter at what point during its processing of log entries G2 sends the shard to G1? -- You can send an entire map in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. -- If you put a map or a slice in a Raft log entry, and your key/value server subsequently sees the entry on the `applyCh` and saves a reference to the map/slice in your key/value server's state, you may have a race. Make a copy of the map/slice, and store the copy in your key/value server's state. The race is between your key/value server modifying the map/slice and Raft reading it while persisting its log. -- During a configuration change, a pair of groups may need to move shards in both directions between them. If you see deadlock, this is a possible source. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_12.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_12.md deleted file mode 100644 index 52d4787..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_12.md +++ /dev/null @@ -1,221 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you'll build a MapReduce system. You'll implement a worker process that calls application Map and Reduce functions and handles reading and writing files, and a coordinator process that hands out tasks to workers and copes with failed workers. You'll be building something similar to the [MapReduce paper](http://research.google.com/archive/mapreduce-osdi04.pdf). (Note: this lab uses "coordinator" instead of the paper's "master".) - -## Getiting Started - -You need to [setup Go](http://nil.csail.mit.edu/6.5840/2024/labs/go.html) to do the labs. - -Fetch the initial lab software with [git](https://git-scm.com/) (a version control system). To learn more about git, look at the [Pro Git book](https://git-scm.com/book/en/v2) or the [git user's manual](http://www.kernel.org/pub/software/scm/git/docs/user-manual.html). - -``` -$ git clone git://g.csail.mit.edu/6.5840-golabs-2024 6.5840 -$ cd 6.5840 -$ ls -Makefile src -$ -``` - -We supply you with a simple sequential mapreduce implementation in `src/main/mrsequential.go`. It runs the maps and reduces one at a time, in a single process. We also provide you with a couple of MapReduce applications: word-count in `mrapps/wc.go`, and a text indexer in `mrapps/indexer.go`. You can run word count sequentially as follows: - -``` -$ cd ~/6.5840 -$ cd src/main -$ go build -buildmode=plugin ../mrapps/wc.go -$ rm mr-out* -$ go run mrsequential.go wc.so pg*.txt -$ more mr-out-0 -A 509 -ABOUT 2 -ACT 8 -... -``` - -`mrsequential.go` leaves its output in the file `mr-out-0`. The input is from the text files named `pg-xxx.txt`. - -Feel free to borrow code from `mrsequential.go`. You should also have a look at `mrapps/wc.go` to see what MapReduce application code looks like. - -For this lab and all the others, we might issue updates to the code we provide you. To ensure that you can fetch those updates and easily merge them using `git pull`, it's best to leave the code we provide in the original files. You can add to the code we provide as directed in the lab write-ups; just don't move it. It's OK to put your own new functions in new files. - -## The Code - -# Your Task - -Your job is to implement a distributed MapReduce, consisting of two programs, the coordinator and the worker. There will be just one coordinator process, and one or more worker processes executing in parallel. In a real system the workers would run on a bunch of different machines, but for this lab you'll run them all on a single machine. The workers will talk to the coordinator via RPC. Each worker process will, in a loop, ask the coordinator for a task, read the task's input from one or more files, execute the task, write the task's output to one or more files, and again ask the coordinator for a new task. The coordinator should notice if a worker hasn't completed its task in a reasonable amount of time (for this lab, use ten seconds), and give the same task to a different worker. - -We have given you a little code to start you off. The "main" routines for the coordinator and worker are in `main/mrcoordinator.go` and `main/mrworker.go`; don't change these files. You should put your implementation in `mr/coordinator.go`, `mr/worker.go`, and `mr/rpc.go`. - -Here's how to run your code on the word-count MapReduce application. First, make sure the word-count plugin is freshly built: - -``` -go build -buildmode=plugin ../mrapps/wc.go -``` - -In the `main` directory, run the coordinator. - -``` -rm mr-out* -go run mrcoordinator.go pg-*.txt -``` - -The `pg-*.txt` arguments to `mrcoordinator.go` are the input files; each file corresponds to one "split", and is the input to one Map task. - -In one or more other windows, run some workers: - -``` -go run mrworker.go wc.so -``` - -When the workers and coordinator have finished, look at the output in `mr-out-*`. When you've completed the lab, the sorted union of the output files should match the sequential output, like this: - -``` -$ cat mr-out-* | sort | more -A 509 -ABOUT 2 -ACT 8 -... -``` - -We supply you with a test script in `main/test-mr.sh`. The tests check that the `wc` and `indexer` MapReduce applications produce the correct output when given the `pg-xxx.txt` files as input. The tests also check that your implementation runs the Map and Reduce tasks in parallel, and that your implementation recovers from workers that crash while running tasks. - -If you run the test script now, it will hang because the coordinator never finishes: - -``` -$ cd ~/6.5840/src/main -$ bash test-mr.sh -*** Starting wc test. -``` - -You can change `ret := false` to true in the Done function in `mr/coordinator.go` so that the coordinator exits immediately. Then: - -``` -$ bash test-mr.sh -*** Starting wc test. -sort: No such file or directory -cmp: EOF on mr-wc-all ---- wc output is not the same as mr-correct-wc.txt ---- wc test: FAIL -$ -``` - -The test script expects to see output in files named `mr-out-X`, one for each reduce task. The empty implementations of `mr/coordinator.go` and `mr/worker.go` don't produce those files (or do much of anything else), so the test fails. - -When you've finished, the test script output should look like this: - -``` -$ bash test-mr.sh -*** Starting wc test. ---- wc test: PASS -*** Starting indexer test. ---- indexer test: PASS -*** Starting map parallelism test. ---- map parallelism test: PASS -*** Starting reduce parallelism test. ---- reduce parallelism test: PASS -*** Starting job count test. ---- job count test: PASS -*** Starting early exit test. ---- early exit test: PASS -*** Starting crash test. ---- crash test: PASS -*** PASSED ALL TESTS -$ -``` - -You may see some errors from the Go RPC package that look like - -``` -2019/12/16 13:27:09 rpc.Register: method "Done" has 1 input parameters; needs exactly three -``` - -Ignore these messages; registering the coordinator as an [RPC server](https://golang.org/src/net/rpc/server.go) checks if all its methods are suitable for RPCs (have 3 inputs); we know that `Done` is not called via RPC. - -Additionally, depending on your strategy for terminating worker processes, you may see some errors of the form - -``` -2025/02/11 16:21:32 dialing:dial unix /var/tmp/5840-mr-501: connect: connection refused -``` - -It is fine to see a handful of these messages per test; they arise when the worker is unable to contact the coordinator RPC server after the coordinator has exited. - -### A few rules - -- The map phase should divide the intermediate keys into buckets for `nReduce` reduce tasks, where `nReduce` is the number of reduce tasks -- the argument that `main/mrcoordinator.go` passes to `MakeCoordinator()`. Each mapper should create `nReduce` intermediate files for consumption by the reduce tasks. -- The worker implementation should put the output of the X'th reduce task in the file `mr-out-X`. -- A `mr-out-X` file should contain one line per Reduce function output. The line should be generated with the Go `"%v %v"` format, called with the key and value. Have a look in `main/mrsequential.go` for the line commented "this is the correct format". The test script will fail if your implementation deviates too much from this format. -- You can modify `mr/worker.go`, `mr/coordinator.go`, and `mr/rpc.go`. You can temporarily modify other files for testing, but make sure your code works with the original versions; we'll test with the original versions. -- The worker should put intermediate Map output in files in the current directory, where your worker can later read them as input to Reduce tasks. -- `main/mrcoordinator.go` expects `mr/coordinator.go` to implement a `Done()` method that returns true when the MapReduce job is completely finished; at that point, `mrcoordinator.go` will exit. -- When the job is completely finished, the worker processes should exit. A simple way to implement this is to use the return value from `call()`: if the worker fails to contact the coordinator, it can assume that the coordinator has exited because the job is done, so the worker can terminate too. Depending on your design, you might also find it helpful to have a "please exit" pseudo-task that the coordinator can give to workers. - -### Hints - -- The [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) has some tips on developing and debugging. - -- One way to get started is to modify `mr/worker.go`'s `Worker()` to send an RPC to the coordinator asking for a task. Then modify the coordinator to respond with the file name of an as-yet-unstarted map task. Then modify the worker to read that file and call the application Map function, as in `mrsequential.go`. - -- The application Map and Reduce functions are loaded at run-time using the Go plugin package, from files whose names end in `.so`. - -- If you change anything in the `mr/` directory, you will probably have to re-build any MapReduce plugins you use, with something like `go build -buildmode=plugin ../mrapps/wc.go` - -- This lab relies on the workers sharing a file system. That's straightforward when all workers run on the same machine, but would require a global filesystem like GFS if the workers ran on different machines. - -- A reasonable naming convention for intermediate files is `mr-X-Y`, where X is the Map task number, and Y is the reduce task number. - -- The worker's map task code will need a way to store intermediate key/value pairs in files in a way that can be correctly read back during reduce tasks. One possibility is to use Go's encoding/json package. To write key/value pairs in JSON format to an open file: - - ``` - enc := json.NewEncoder(file) - for _, kv := ... { - err := enc.Encode(&kv) - ``` - - and to read such a file back: - - ``` - dec := json.NewDecoder(file) - for { - var kv KeyValue - if err := dec.Decode(&kv); err != nil { - break - } - kva = append(kva, kv) - } - ``` - -- The map part of your worker can use the `ihash(key)` function (in `worker.go`) to pick the reduce task for a given key. - -- You can steal some code from `mrsequential.go` for reading Map input files, for sorting intermediate key/value pairs between the Map and Reduce, and for storing Reduce output in files. - -- The coordinator, as an RPC server, will be concurrent; don't forget to lock shared data. - -- Use Go's race detector, with `go run -race`. `test-mr.sh` has a comment at the start that tells you how to run it with `-race`. When we grade your labs, we will **not** use the race detector. Nevertheless, if your code has races, there's a good chance it will fail when we test it even without the race detector. - -- Workers will sometimes need to wait, e.g. reduces can't start until the last map has finished. One possibility is for workers to periodically ask the coordinator for work, sleeping with `time.Sleep()` between each request. Another possibility is for the relevant RPC handler in the coordinator to have a loop that waits, either with `time.Sleep()` or `sync.Cond`. Go runs the handler for each RPC in its own thread, so the fact that one handler is waiting needn't prevent the coordinator from processing other RPCs. - -- The coordinator can't reliably distinguish between crashed workers, workers that are alive but have stalled for some reason, and workers that are executing but too slowly to be useful. The best you can do is have the coordinator wait for some amount of time, and then give up and re-issue the task to a different worker. For this lab, have the coordinator wait for ten seconds; after that the coordinator should assume the worker has died (of course, it might not have). - -- If you choose to implement Backup Tasks (Section 3.6), note that we test that your code doesn't schedule extraneous tasks when workers execute tasks without crashing. Backup tasks should only be scheduled after some relatively long period of time (e.g., 10s). - -- To test crash recovery, you can use the `mrapps/crash.go` application plugin. It randomly exits in the Map and Reduce functions. - -- To ensure that nobody observes partially written files in the presence of crashes, the MapReduce paper mentions the trick of using a temporary file and atomically renaming it once it is completely written. You can use `ioutil.TempFile` (or `os.CreateTemp` if you are running Go 1.17 or later) to create a temporary file and `os.Rename` to atomically rename it. - -- `test-mr.sh` runs all its processes in the sub-directory `mr-tmp`, so if something goes wrong and you want to look at intermediate or output files, look there. Feel free to temporarily modify `test-mr.sh` to `exit` after the failing test, so the script does not continue testing (and overwrite the output files). - -- `test-mr-many.sh` runs `test-mr.sh` many times in a row, which you may want to do in order to spot low-probability bugs. It takes as an argument the number of times to run the tests. You should not run several `test-mr.sh` instances in parallel because the coordinator will reuse the same socket, causing conflicts. - -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names. - -- When calling the RPC call() function, the reply struct should contain all default values. RPC calls should look like this: - - ``` - reply := SomeType{} - call(..., &reply) - ``` - - without setting any fields of reply before the call. If you pass reply structures that have non-default fields, the RPC system may silently return incorrect values. - -### diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_13.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_13.md deleted file mode 100644 index c6a4f40..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_13.md +++ /dev/null @@ -1,67 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` - -## The Code - -# Your Task - -Your first task is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in `client.go`, and implement `Put` and `Get` RPC handlers in `server.go`. - -You have completed this task when you pass the Reliable tests in the test suite: - -``` -$ go test -v -run Reliable -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 90171 90171 ---- PASS: TestPutConcurrentReliable (3.07s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 9.2 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (16.59s) -PASS -ok 6.5840/kvsrv1 19.681s -``` - -The numbers after each `Passed` are real time in seconds, the constant 1, the number of RPCs sent (including client RPCs), and the number of key/value operations executed (`Clerk` `Get` and `Put` calls). - -- Check that your code is race-free using `go test -race`. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_14.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_14.md deleted file mode 100644 index f95702b..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_14.md +++ /dev/null @@ -1,72 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` - -## The Code - -# Your Task - -In many distributed applications, clients running on different machines use a key/value server to coordinate their activities. For example, ZooKeeper and Etcd allow clients to coordinate using a distributed lock, in analogy with how threads in a Go program can coordinate with locks (i.e., `sync.Mutex`). Zookeeper and Etcd implement such a lock with conditional put. - -In this exercise your task is to implement a lock layered on client `Clerk.Put` and `Clerk.Get` calls. The lock supports two methods: `Acquire` and `Release`. The lock's specification is that only one client can successfully acquire the lock at a time; other clients must wait until the first client has released the lock using `Release`. - -We supply you with skeleton code and tests in `src/kvsrv1/lock/`. You will need to modify `src/kvsrv1/lock/lock.go`. Your `Acquire` and `Release` code can talk to your key/value server by calling `lk.ck.Put()` and `lk.ck.Get()`. - -If a client crashes while holding a lock, the lock will never be released. In a design more sophisticated than this lab, the client would attach a [lease]( are commonly used in,to rely on the resource.) to a lock. When the lease expires, the lock server would release the lock on behalf of the client. In this lab clients don't crash and you can ignore this problem. - -Implement `Acquire` and `Release`. You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory: - -``` -$ cd lock -$ go test -v -run Reliable -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 974 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 83194 0 ---- PASS: TestManyClientsReliable (2.11s) -PASS -ok 6.5840/kvsrv1/lock 4.120s -``` - -If you haven't implemented the lock yet, the first test will succeed. - -This exercise requires little code but will require a bit more independent thought than the previous exercise. - -- You will need a unique identifier for each lock client; call `kvtest.RandValue(8)` to generate a random string. -- The lock service should use a specific key to store the "lock state" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter `l` of `MakeLock` in `src/kvsrv1/lock/lock.go`. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_15.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_15.md deleted file mode 100644 index 3d977f5..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_15.md +++ /dev/null @@ -1,79 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` - -## The Code - -# Your Task - -The main challenge in this exercise is that the network may re-order, delay, or discard RPC requests and/or replies. To recover from discarded requests/replies, the Clerk must keep re-trying each RPC until it receives a reply from the server. - -If the network discards an RPC request message, then the client re-sending the request will solve the problem: the server will receive and execute just the re-sent request. - -However, the network might instead discard an RPC reply message. The client does not know which message was discarded; the client only observes that it received no reply. If it was the reply that was discarded, and the client re-sends the RPC request, then the server will receive two copies of the request. That's OK for a `Get`, since `Get` doesn't modify the server state. It is safe to resend a `Put` RPC with the same version number, since the server executes `Put` conditionally on the version number; if the server received and executed a `Put` RPC, it will respond to a re-transmitted copy of that RPC with `rpc.ErrVersion` rather than executing the Put a second time. - -A tricky case is if the server replies with an `rpc.ErrVersion` in a response to an RPC that the Clerk retried. In this case, the Clerk cannot know if the Clerk's `Put` was executed by the server or not: the first RPC might have been executed by the server but the network may have discarded the successful response from the server, so that the server sent `rpc.ErrVersion` only for the retransmitted RPC. Or, it might be that another Clerk updated the key before the Clerk's first RPC arrived at the server, so that the server executed neither of the Clerk's RPCs and replied `rpc.ErrVersion` to both. Therefore, if a Clerk receives `rpc.ErrVersion` for a retransmitted Put RPC, `Clerk.Put` must return `rpc.ErrMaybe` to the application instead of `rpc.ErrVersion` since the request may have been executed. It is then up to the application to handle this case. If the server responds to an initial (not retransmitted) Put RPC with `rpc.ErrVersion`, then the Clerk should return `rpc.ErrVersion` to the application, since the RPC was definitely not executed by the server. - -It would be more convenient for application developers if `Put`'s were exactly-once (i.e., no `rpc.ErrMaybe` errors) but that is difficult to guarantee without maintaining state at the server for each Clerk. In the last exercise of this lab, you will implement a lock using your Clerk to explore how to program with at-most-once `Clerk.Put`. - -Now you should modify your `kvsrv1/client.go` to continue in the face of dropped RPC requests and replies. A return value of `true` from the client's `ck.clnt.Call()` indicates that the client received an RPC reply from the server; a return value of `false` indicates that it did not receive a reply (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). Your `Clerk` should keep re-sending an RPC until it receives a reply. Keep in mind the discussion of `rpc.ErrMaybe` above. Your solution shouldn't require any changes to the server. - -Add code to `Clerk` to retry if doesn't receive a reply. Your have completed this task if your code passes all tests in `kvsrv1/`, like this: - -``` -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 106647 106647 ---- PASS: TestPutConcurrentReliable (3.09s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 8.0 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (14.61s) -=== RUN TestUnreliableNet -One client (unreliable network)... - ... Passed -- 7.6 1 251 208 ---- PASS: TestUnreliableNet (7.60s) -PASS -ok 6.5840/kvsrv1 25.319s -``` - -- Before the client retries, it should wait a little bit; you can use go's `time` package and call `time.Sleep(100 * time.Millisecond)` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_16.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_16.md deleted file mode 100644 index 30ff972..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_16.md +++ /dev/null @@ -1,65 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed *at-most-once* despite network failures and that the operations are *linearizable*. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -### KV server - -Each client interacts with the key/value server using a *Clerk*, which sends RPCs to the server. Clients can send two different RPCs to the server: `Put(key, value, version)` and `Get(key)`. The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. `Put(key, value, version)` installs or replaces the value for a particular key in the map *only if* the `Put`'s version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return `rpc.ErrVersion`. A client can create a new key by invoking `Put` with version number 0 (and the resulting version stored by the server will be 1). If the version number of the `Put` is larger than 0 and the key doesn't exist, the server should return `rpc.ErrNoKey`. - -`Get(key)` fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return `rpc.ErrNoKey`. - -Maintaining a version number for each key will be useful for implementing locks using `Put` and ensuring at-most-once semantics for `Put`'s when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a *linearizable* key/value service from the point of view of clients calling `Clerk.Get` and `Clerk.Put`. That is, if client operations aren't concurrent, each client `Clerk.Get` and `Clerk.Put` will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Put()`, and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on [linearizability](http://nil.csail.mit.edu/6.5840/2025/papers/linearizability-faq.txt) for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv1`. `kvsrv1/client.go` implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides `Put` and `Get` methods. `kvsrv1/server.go` contains the server code, including the `Put` and `Get` handlers that implement the server side of RPC requests. You will need to modify `client.go` and `server.go`. The RPC requests, replies, and error values are defined in the `kvsrv1/rpc` package in the file `kvsrv1/rpc/rpc.go`, which you should look at, though you don't have to modify `rpc.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... -$ -``` - -## The Code - -# Your Task - -Modify your lock implementation to work correctly with your modified key/value client when the network is not reliable. You have completed this exercise when your code passes all the `kvsrv1/lock/` tests, including the unreliable ones: - -``` -$ cd lock -$ go test -v -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 968 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 10789 0 ---- PASS: TestManyClientsReliable (2.12s) -=== RUN TestOneClientUnreliable -Test: 1 lock clients (unreliable network)... - ... Passed -- 2.3 1 70 0 ---- PASS: TestOneClientUnreliable (2.27s) -=== RUN TestManyClientsUnreliable -Test: 10 lock clients (unreliable network)... - ... Passed -- 3.6 1 908 0 ---- PASS: TestManyClientsUnreliable (3.62s) -PASS -ok 6.5840/kvsrv1/lock 10.033s -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_17.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_17.md deleted file mode 100644 index d581e85..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_17.md +++ /dev/null @@ -1,110 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A`to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A`. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](http://nil.csail.mit.edu/6.5840/2025/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate("Server 0", "short description", "details")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know! - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak -$ -``` - -Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_18.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_18.md deleted file mode 100644 index 189d932..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_18.md +++ /dev/null @@ -1,114 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -Implement the leader and follower code to append new log entries, so that the `go test -run 3B`tests pass. - -- Run `git pull` to get the latest lab software. -- Raft log is 1-indexed, but we suggest that you view it as 0-indexed, and starting out with an entry (at index=0) that has term 0. That allows the very first AppendEntries RPC to contain 0 as PrevLogIndex, and be a valid index into the log. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2025/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `raft_test.go` and trace the test code from there to understand what's being tested. - -The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: - -``` -$ time go test -run 3B -Test (3B): basic agreement (reliable network)... - ... Passed -- 1.3 3 18 0 -Test (3B): RPC byte count (reliable network)... - ... Passed -- 2.8 3 56 0 -Test (3B): test progressive failure of followers (reliable network)... - ... Passed -- 5.3 3 188 0 -Test (3B): test failure of leaders (reliable network)... - ... Passed -- 6.4 3 378 0 -Test (3B): agreement after follower reconnects (reliable network)... - ... Passed -- 5.9 3 176 0 -Test (3B): no agreement if too many followers disconnect (reliable network)... - ... Passed -- 4.3 5 288 0 -Test (3B): concurrent Start()s (reliable network)... - ... Passed -- 1.5 3 32 0 -Test (3B): rejoin of partitioned leader (reliable network)... - ... Passed -- 5.3 3 216 0 -Test (3B): leader backs up quickly over incorrect follower logs (reliable network)... - ... Passed -- 12.1 5 1528 0 -Test (3B): RPC counts aren't too high (reliable network)... - ... Passed -- 3.1 3 106 0 -PASS -ok 6.5840/raft1 48.353s -go test -run 3B 1.37s user 0.74s system 4% cpu 48.865 total -$ -``` - -The "ok 6.5840/raft 35.557s" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The "user 0m2.556s" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_19.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_19.md deleted file mode 100644 index 1d672e1..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_19.md +++ /dev/null @@ -1,133 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. - -A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. - -Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or "serialize") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. - -You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: - -``` - XTerm: term in the conflicting entry (if any) - XIndex: index of first entry with that term (if any) - XLen: log length -``` - -Then the leader's logic can be something like: - -``` - Case 1: leader doesn't have XTerm: - nextIndex = XIndex - Case 2: leader has XTerm: - nextIndex = (index of leader's last entry for XTerm) + 1 - Case 3: follower's log is too short: - nextIndex = XLen -``` - -A few other hints: - -- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. - -Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. - -``` -$ go test -run 3C -Test (3C): basic persistence (reliable network)... - ... Passed -- 6.6 3 110 0 -Test (3C): more persistence (reliable network)... - ... Passed -- 15.6 5 428 0 -Test (3C): partitioned leader and one follower crash, leader restarts (reliable network)... - ... Passed -- 3.1 3 50 0 -Test (3C): Figure 8 (reliable network)... - ... Passed -- 33.7 5 654 0 -Test (3C): unreliable agreement (unreliable network)... - ... Passed -- 2.1 5 1076 0 -Test (3C): Figure 8 (unreliable) (unreliable network)... - ... Passed -- 31.9 5 4400 0 -Test (3C): churn (reliable network)... - ... Passed -- 16.8 5 4896 0 -Test (3C): unreliable churn (unreliable network)... - ... Passed -- 16.1 5 7204 0 -PASS -ok 6.5840/raft1 126.054s -$ -``` - -It is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`. - -``` -for i in {0..10}; do go test; done -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_2.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_2.md deleted file mode 100644 index f7badfb..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_2.md +++ /dev/null @@ -1,41 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ -``` - -## The Code - -# Your Task - -Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in `client.go`, and implement `Put`, `Append()` and `Get()` RPC handlers in `server.go`. - -You have completed this task when you pass the first two tests in the test suite: "one client" and "many clients". - -### Hits - -- Check that your code is race-free using `go test -race`. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_20.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_20.md deleted file mode 100644 index 433ac77..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_20.md +++ /dev/null @@ -1,124 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2025/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/raft_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a "snapshot" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf) outlines the scheme; you will have to design the details. - -Your Raft must provide the following function that the service can call with a serialized snapshot of its state: - -``` -Snapshot(index int, snapshot []byte) -``` - -In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). - -The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. - -You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. - -When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. - -If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. - -When a server restarts, the application layer reads the persisted snapshot and restores its saved state. - -Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). - -- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- A common reason for failing the first 3D test is that followers take too long to catch up to the leader. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. - -Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. - -``` -$ go test -run 3D -Test (3D): snapshots basic (reliable network)... - ... Passed -- 3.3 3 522 0 -Test (3D): install snapshots (disconnect) (reliable network)... - ... Passed -- 48.4 3 2710 0 -Test (3D): install snapshots (disconnect) (unreliable network)... - ... Passed -- 56.1 3 3025 0 -Test (3D): install snapshots (crash) (reliable network)... - ... Passed -- 33.3 3 1559 0 -Test (3D): install snapshots (crash) (unreliable network)... - ... Passed -- 38.1 3 1723 0 -Test (3D): crash and restart all servers (unreliable network)... - ... Passed -- 11.2 3 296 0 -Test (3D): snapshot initialization after crash (unreliable network)... - ... Passed -- 4.3 3 84 0 -PASS -ok 6.5840/raft1 195.006s -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_21.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_21.md deleted file mode 100644 index 5979d99..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_21.md +++ /dev/null @@ -1,79 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `"src/kvsrv1"` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` - -## The Code - -# Your Task - -``` -$ cd src/kvraft1/rsm -$ go test -v -=== RUN TestBasic -Test RSM basic (reliable network)... -.. - config.go:147: one: took too long -``` - -In the common situation of a client/server service using Raft for replication, the service interacts with Raft in two ways: the service leader submits client operations by calling `raft.Start()`, and all service replicas receive committed operations via Raft's `applyCh`, which they execute. On the leader, these two activities interact. At any given time, some server goroutines are handling client requests, have called `raft.Start()`, and each is waiting for its operation to commit and to find out what the result of executing the operation is. And as committed operations appear on the `applyCh`, each needs to be executed by the service, and the results need to be handed to the goroutine that called `raft.Start()` so that it can return the result to the client. - -The `rsm` package encapsulates the above interaction. It sits as a layer between the service (e.g. a key/value database) and Raft. In `rsm/rsm.go` you will need to implement a "reader" goroutine that reads the `applyCh`, and a `rsm.Submit()` function that calls `raft.Start()` for a client operation and then waits for the reader goroutine to hand it the result of executing that operation. - -The service that is using `rsm` appears to the `rsm` reader goroutine as a `StateMachine` object providing a `DoOp()` method. The reader goroutine should hand each committed operation to `DoOp()`; `DoOp()`'s return value should be given to the corresponding `rsm.Submit()` call for it to return. `DoOp()`'s argument and return value have type `any`; the actual values should have the same types as the argument and return values that the service passes to `rsm.Submit()`, respectively. - -The service should pass each client operation to `rsm.Submit()`. To help the reader goroutine match `applyCh` messages with waiting calls to `rsm.Submit()`, `Submit()` should wrap each client operation in an `Op` structure along with a unique identifier. `Submit()` should then wait until the operation has committed and been executed, and return the result of execution (the value returned by `DoOp()`). If `raft.Start()` indicates that the current peer is not the Raft leader, `Submit()` should return an `rpc.ErrWrongLeader` error. `Submit()` should detect and handle the situation in which leadership changed just after it called `raft.Start()`, causing the operation to be lost (never committed). - -For Part A, the `rsm` tester acts as the service, submitting operations that it interprets as increments on a state consisting of a single integer. In Part B you'll use `rsm` as part of a key/value service that implements `StateMachine` (and `DoOp()`), and calls `rsm.Submit()`. - -If all goes well, the sequence of events for a client request is: - -- The client sends a request to the service leader. -- The service leader calls `rsm.Submit()` with the request. -- `rsm.Submit()` calls `raft.Start()` with the request, and then waits. -- Raft commits the request and sends it on all peers' `applyCh`s. -- The `rsm` reader goroutine on each peer reads the request from the `applyCh` and passes it to the service's `DoOp()`. -- On the leader, the `rsm` reader goroutine hands the `DoOp()` return value to the `Submit()` goroutine that originally submitted the request, and `Submit()` returns that value. - -Your servers should not directly communicate; they should only interact with each other through Raft. - -Implement `rsm.go`: the `Submit()` method and a reader goroutine. You have completed this task if you pass the `rsm` 4A tests: - -``` - $ cd src/kvraft1/rsm - $ go test -v -run 4A -=== RUN TestBasic4A -Test RSM basic (reliable network)... - ... Passed -- 1.2 3 48 0 ---- PASS: TestBasic4A (1.21s) -=== RUN TestLeaderFailure4A - ... Passed -- 9223372036.9 3 31 0 ---- PASS: TestLeaderFailure4A (1.50s) -PASS -ok 6.5840/kvraft1/rsm 2.887s -``` - -- You should not need to add any fields to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- Your solution needs to handle an `rsm` leader that has called `Start()` for a request submitted with `Submit()` but loses its leadership before the request is committed to the log. One way to do this is for the `rsm` to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by `Start()`, and return `rpc.ErrWrongLeader` from `Submit()`. If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server to wait indefinitely until the partition heals. -- The tester calls your Raft's `rf.Kill()` when it is shutting down a peer. Raft should close the `applyCh` so that your rsm learns about the shutdown, and can exit out of all loops. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_22.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_22.md deleted file mode 100644 index af84972..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_22.md +++ /dev/null @@ -1,103 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `"src/kvsrv1"` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` - -## The Code - -# Your Task - -``` -$ cd src/kvraft1 -$ go test -v -run TestBasic4B -=== RUN TestBasic4B -Test: one client (4B basic) (reliable network)... - kvtest.go:62: Wrong error -$ -``` - -Now you will use the `rsm` package to replicate a key/value server. Each of the servers ("kvservers") will have an associated rsm/Raft peer. Clerks send `Put()` and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Get operation to `rsm`, which replicates it using Raft and invokes your server's `DoOp` at each peer, which should apply the operations to the peer's key/value database; the intent is for the servers to maintain identical replicas of the key/value database. - -A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. - -Your kvservers should not directly communicate; they should only interact with each other through Raft. - -Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - -Feel free to copy your client code from Lab 2 (`kvsrv1/client.go`) into `kvraft1/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. - -You'll also need to implement `Put()` and `Get()` RPC handlers in `server.go`. These handlers should submit the request to Raft using `rsm.Submit()`. As the `rsm` package reads commands from `applyCh`, it should invoke the `DoOp` method, which you will have to implement in `server.go`. - -You have completed this task when you **reliably** pass the first test in the test suite, with `go test -v -run TestBasic4B`. - -- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()`) in the Raft log using `Submit()`. You don't have to implement the optimization for read-only operations that is described in Section 8. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` should result in just a single execution for a particular version number. - -Add code to handle failures. Your `Clerk` can use a similar retry plan as in lab 2, including returning `ErrMaybe` if a response to a retried `Put` RPC is lost. You are done when your code reliably passes all the 4B tests, with `go test -v -run 4B`. - -- Recall that the rsm leader may lose its leadership and return `rpc.ErrWrongLeader` from `Submit()`. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. - -Your code should now pass the Lab 4B tests, like this: - -``` -$ cd kvraft1 -$ go test -run 4B -Test: one client (4B basic) ... - ... Passed -- 3.2 5 1041 183 -Test: one client (4B speed) ... - ... Passed -- 15.9 3 3169 0 -Test: many clients (4B many clients) ... - ... Passed -- 3.9 5 3247 871 -Test: unreliable net, many clients (4B unreliable net, many clients) ... - ... Passed -- 5.3 5 1035 167 -Test: unreliable net, one client (4B progress in majority) ... - ... Passed -- 2.9 5 155 3 -Test: no progress in minority (4B) ... - ... Passed -- 1.6 5 102 3 -Test: completion after heal (4B) ... - ... Passed -- 1.3 5 67 4 -Test: partitions, one client (4B partitions, one client) ... - ... Passed -- 6.2 5 958 155 -Test: partitions, many clients (4B partitions, many clients (4B)) ... - ... Passed -- 6.8 5 3096 855 -Test: restarts, one client (4B restarts, one client 4B ) ... - ... Passed -- 6.7 5 311 13 -Test: restarts, many clients (4B restarts, many clients) ... - ... Passed -- 7.5 5 1223 95 -Test: unreliable net, restarts, many clients (4B unreliable net, restarts, many clients ) ... - ... Passed -- 8.4 5 804 33 -Test: restarts, partitions, many clients (4B restarts, partitions, many clients) ... - ... Passed -- 10.1 5 1308 105 -Test: unreliable net, restarts, partitions, many clients (4B unreliable net, restarts, partitions, many clients) ... - ... Passed -- 11.9 5 1040 33 -Test: unreliable net, restarts, partitions, random keys, many clients (4B unreliable net, restarts, partitions, random keys, many clients) ... - ... Passed -- 12.1 7 2801 93 -PASS -ok 6.5840/kvraft1 103.797s -``` - -The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put calls). diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_23.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_23.md deleted file mode 100644 index 7c4d7da..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_23.md +++ /dev/null @@ -1,82 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2025/labs/lab-raft1.html). To clients, the service looks similar to the server of [Lab 2](http://nil.csail.mit.edu/6.5840/2025/labs/lab-kvsrv1.html). However, instead of a single server, the service consists of a set of servers that use Raft to help them maintain identical databases. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2025/figs/kvraft.pdf). - -Clients will interact with your key/value service through a Clerk, as in Lab 2. A Clerk implements the `Put` and `Get` methods with the same semantics as Lab 2: Puts are at-most-once and the Puts/Gets must form a linearizable history. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has three parts. In part A, you will implement a replicated-state machine package, `rsm`, using your raft implementation; `rsm` is agnostic of the requests that it replicates. In part B, you will implement a replicated key/value service using `rsm`, but without using snapshots. In part C, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2025/papers/raft-extended.pdf), in particular Section 7 (but not 8). For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) Start early. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvraft1`. The skeleton code uses the skeleton package `src/kvraft1/rsm` to replicate a server. A server must implement the `StateMachine` interface defined in `rsm` to replicate itself using `rsm`. Most of your work will be implementing `rsm` to provide server-agnostic replication. You will also need to modify `kvraft1/client.go` and `kvraft1/server.go` to implement the server-specific parts. This split allows you to re-use `rsm` in the next lab. You may be able to re-use some of your Lab 2 code (e.g., re-using the server code by copying or importing the `"src/kvsrv1"` package) but it is not a requirement. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -.. -``` - -## The Code - -# Your Task - -As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver and `rsm` to cooperate with Raft to save log space and reduce restart time, using Raft's `Snapshot()` from Lab 3D. - -The tester passes `maxraftstate` to your `StartKVServer()`, which passes it to `rsm`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `rf.PersistBytes()`. Whenever your `rsm` detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. `rsm` can create this snapshot by calling the `Snapshot` method of the `StateMachine` interface to obtain a snapshot of the kvserver. If `maxraftstate` is -1, you do not have to snapshot. The `maxraftstate` limit applies to the GOB-encoded bytes your Raft passes as the first argument to `persister.Save()`. - -You can find the source for the `persister` object in `tester1/persister.go`. - -Modify your rsm so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a `rsm` server restarts, it should read the snapshot with `persister.ReadSnapshot()` and, if the snapshot's length is greater than zero, pass the snapshot to the `StateMachine`'s `Restore()` method. You complete this task if you pass TestSnapshot4C in `rsm`. - -``` -$ cd kvraft1/rsm -$ go test -run TestSnapshot4C -=== RUN TestSnapshot4C - ... Passed -- 9223372036.9 3 230 0 ---- PASS: TestSnapshot4C (3.88s) -PASS -ok 6.5840/kvraft1/rsm 3.882s -``` - -- Think about when `rsm` should snapshot its state and what should be included in the snapshot beyond just the server state. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Capitalize all fields of structures stored in the snapshot. - -Implement the `kvraft1/server.go` `Snapshot()` and `Restore()` methods, which `rsm` calls. Modify `rsm` to handle applyCh messages that contain snapshots. - -- You may have bugs in your Raft and rsm library that this task exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. - -Your code should pass the 4C tests (as in the example here) as well as the 4A+B tests (and your Raft must continue to pass the Lab 3 tests). - -``` -$ go test -run 4C -Test: snapshots, one client (4C SnapshotsRPC) ... -Test: InstallSnapshot RPC (4C) ... - ... Passed -- 4.5 3 241 64 -Test: snapshots, one client (4C snapshot size is reasonable) ... - ... Passed -- 11.4 3 2526 800 -Test: snapshots, one client (4C speed) ... - ... Passed -- 14.2 3 3149 0 -Test: restarts, snapshots, one client (4C restarts, snapshots, one client) ... - ... Passed -- 6.8 5 305 13 -Test: restarts, snapshots, many clients (4C restarts, snapshots, many clients ) ... - ... Passed -- 9.0 5 5583 795 -Test: unreliable net, snapshots, many clients (4C unreliable net, snapshots, many clients) ... - ... Passed -- 4.7 5 977 155 -Test: unreliable net, restarts, snapshots, many clients (4C unreliable net, restarts, snapshots, many clients) ... - ... Passed -- 8.6 5 847 33 -Test: unreliable net, restarts, partitions, snapshots, many clients (4C unreliable net, restarts, partitions, snapshots, many clients) ... - ... Passed -- 11.5 5 841 33 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4C unreliable net, restarts, partitions, snapshots, random keys, many clients) ... - ... Passed -- 12.8 7 2903 93 -PASS -ok 6.5840/kvraft1 83.543s -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_24.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_24.md deleted file mode 100644 index c14b05b..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_24.md +++ /dev/null @@ -1,166 +0,0 @@ -# Problem Context - -## Introduction - -You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key "a", and shardgrp 2 holds a shard storing key "b". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses "configuration" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` - -## The Code - -# Your Task - -Your first job is to implement shardgrps and the `InitConfig`, `Query`, and `ChangeConfigTo` methods when there are no failures. We have given you the code for describing a configuration, in `shardkv1/shardcfg`. Each `shardcfg.ShardConfig` has a unique identifying number, a mapping from shard number to group number, and a mapping from group number to the list of servers replicating that group. There will usually be more shards than groups (so that each group serves more than one shard), in order that load can be shifted at a fairly fine granularity. - -Implement these two methods in `shardctrler/shardctrler.go`: - -- The `InitConfig` method receives the first configuration, passed to it by the tester as a `shardcfg.ShardConfig`. `InitConfig` should store the configuration in an instance of Lab 2's `kvsrv`. -- The `Query` method returns the current configuration; it should read the configuration from `kvsrv`, previously stored there by `InitConfig`. - -Implement `InitConfig` and `Query`, and store the configuration in `kvsrv`. You're done when your code passes the first test. Note this task doesn't require any shardgrps. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -PASS -ok 6.5840/shardkv1 0.197s -$ -``` - -- Implement `InitConfig` and `Query` by storing and reading the initial configuration from `kvsrv`: use the `Get`/`Put` methods of `ShardCtrler.IKVClerk` to talk to `kvsrv`, use the `String` method of `ShardConfig` to turn a `ShardConfig` into a string that you can pass to `Put`, and use the `shardcfg.FromString()` function to turn a string into a `ShardConfig`. - -Implement an initial version of `shardgrp` in `shardkv1/shardgrp/server.go` and a corresponding clerk in `shardkv1/shardgrp/client.go` by copying code from your Lab 4 `kvraft` solution. - -Implement a clerk in `shardkv1/client.go` that uses the `Query` method to find the shardgrp for a key, and then talks to that shardgrp. You're done when your code passes the `Static` test. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run Static -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.4s #peers 1 #RPCs 793 #Ops 180 -PASS -ok 6.5840/shardkv1 5.632s -$ -``` - -- Copy code from your `kvraft` client.go and server.go for `Put` and `Get`, and any other code you need from `kvraft`. -- The code in `shardkv1/client.go` provides the `Put`/`Get` clerk for the overall system: it finds out which shardgrp holds the desired key's shard by invoking the `Query` method, and then talks to the shardgrp that holds that shard. -- Implement `shardkv1/client.go`, including its `Put`/`Get` methods. Use `shardcfg.Key2Shard()` to find the shard number for a key. The tester passes a `ShardCtrler` object to `MakeClerk` in `shardkv1/client.go`. Retrieve the current configuration using the `Query` method. -- To put/get a key from a shardgrp, the shardkv clerk should create a shardgrp clerk for the shardgrp by calling `shardgrp.MakeClerk`, passing in the servers found in the configuration and the shardkv clerk's `ck.clnt`. Use the `GidServers()` method from `ShardConfig` to get the group for a shard. -- `shardkv1/client.go`'s Put must return `ErrMaybe` when the reply was maybe lost, but this Put invokes `shardgrp`'s Put to talk a particular shardgrp. The inner Put can signal this with an error. -- Upon creation, the first shardgrp (`shardcfg.Gid1`) should initialize itself to own all shards. - -Now you should support movement of shards among groups by implementing the `ChangeConfigTo` method, which changes from an old configuration to a new configuration. The new configuration may include new shardgrps that are not present in the old configuration, and may exclude shardgrps that were present in the old configuration. The controller should move shards (the key/value data) so that the set of shards stored by each shardgrp matches the new configuration. - -The approach we suggest for moving a shard is for `ChangeConfigTo` to first "freeze" the shard at the source shardgrp, causing that shardgrp to reject `Put`'s for keys in the moving shard. Then, copy (install) the shard to the destination shardgrp; then delete the frozen shard. Finally, post a new configuration so that clients can find the moved shard. A nice property of this approach is that it avoids any direct interactions among the shardgrps. It also supports serving shards that are not affected by an ongoing configuration change. - -To be able to order changes to the configuration, each configuration has a unique number `Num` (see `shardcfg/shardcfg.go`). The tester in Part A invokes `ChangeConfigTo` sequentially, and the configuration passed to `ChangeConfigTo` will have a `Num` one larger than the previous one; thus, a configuration with a higher `Num` is newer than one with a lower `Num`. - -The network may delay RPCs, and RPCs may arrive out of order at the shardgrps. To reject old `FreezeShard`, `InstallShard`, and `DeleteShard` RPCs, they should include `Num` (see `shardgrp/shardrpc/shardrpc.go`), and shardgrps must remember the largest `Num` they have seen for each shard. - -Implement `ChangeConfigTo` (in `shardctrler/shardctrler.go`) and extend `shardgrp` to support freeze, install, and delete. `ChangeConfigTo` should always succeed in Part A because the tester doesn't induce failures in this part. You will need to implement `FreezeShard`, `InstallShard`, and `DeleteShard` in `shardgrp/client.go` and `shardgrp/server.go` using the RPCs in the `shardgrp/shardrpc` package, and reject old RPCs based on `Num`. You will also need modify the shardkv clerk in `shardkv1/client.go` to handle `ErrWrongGroup`, which a shardgrp should return if it isn't responsible for the shard. - -You have completed this task when you pass the `JoinBasic` and `DeleteBasic` tests. These tests focus on adding shardgrps; you don't have to worry about shardgrps leaving just yet. - -- A shardgrp should respond with an `ErrWrongGroup` error to a client `Put`/`Get` with a key that the shardgrp isn't responsible for (i.e., for a key whose shard is not assigned to the shardgrp). You will have to modify `shardkv1/client.go` to reread the configuration and retry the `Put`/`Get`. -- Note that you will have to run `FreezeShard`, `InstallShard`, and `DeleteShard` through your `rsm` package, just like `Put` and `Get`. -- You can send an entire map as your state in an RPC request or reply, which may help keep the code for shard transfer simple. -- If one of your RPC handlers includes in its reply a map (e.g. a key/value map) that's part of your server's state, you may get bugs due to races. The RPC system has to read the map in order to send it to the caller, but it isn't holding a lock that covers the map. Your server, however, may proceed to modify the same map while the RPC system is reading it. The solution is for the RPC handler to include a copy of the map in the reply. - -Extend `ChangeConfigTo` to handle shard groups that leave; i.e., shardgrps that are present in the current configuration but not in the new one. Your solution should pass `TestJoinLeaveBasic5A` now. (You may have handled this scenario already in the previous task, but the previous tests didn't test for shardgrps leaving.) - -Make your solution pass all Part A tests, which check that your sharded key/value service supports many groups joining and leaving, shardgrps restarting from snapshots, processing `Get`s while some shards are offline or involved in a configuration change, and linearizability when many clients interact with the service while the tester concurrently invokes the controller's `ChangeConfigTo` to rebalance shards. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run 5A -Test (5A): Init and Query ... (reliable network)... - ... Passed -- time 0.0s #peers 1 #RPCs 3 #Ops 0 -Test (5A): one shard group ... (reliable network)... - ... Passed -- time 5.1s #peers 1 #RPCs 792 #Ops 180 -Test (5A): a group joins... (reliable network)... - ... Passed -- time 12.9s #peers 1 #RPCs 6300 #Ops 180 -Test (5A): delete ... (reliable network)... - ... Passed -- time 8.4s #peers 1 #RPCs 1533 #Ops 360 -Test (5A): basic groups join/leave ... (reliable network)... - ... Passed -- time 13.7s #peers 1 #RPCs 5676 #Ops 240 -Test (5A): many groups join/leave ... (reliable network)... - ... Passed -- time 22.1s #peers 1 #RPCs 3529 #Ops 180 -Test (5A): many groups join/leave ... (unreliable network)... - ... Passed -- time 54.8s #peers 1 #RPCs 5055 #Ops 180 -Test (5A): shutdown ... (reliable network)... - ... Passed -- time 11.7s #peers 1 #RPCs 2807 #Ops 180 -Test (5A): progress ... (reliable network)... - ... Passed -- time 8.8s #peers 1 #RPCs 974 #Ops 82 -Test (5A): progress ... (reliable network)... - ... Passed -- time 13.9s #peers 1 #RPCs 2443 #Ops 390 -Test (5A): one concurrent clerk reliable... (reliable network)... - ... Passed -- time 20.0s #peers 1 #RPCs 5326 #Ops 1248 -Test (5A): many concurrent clerks reliable... (reliable network)... - ... Passed -- time 20.4s #peers 1 #RPCs 21688 #Ops 10500 -Test (5A): one concurrent clerk unreliable ... (unreliable network)... - ... Passed -- time 25.8s #peers 1 #RPCs 2654 #Ops 176 -Test (5A): many concurrent clerks unreliable... (unreliable network)... - ... Passed -- time 25.3s #peers 1 #RPCs 7553 #Ops 1896 -PASS -ok 6.5840/shardkv1 243.115s -$ -``` - -Your solution must continue serving shards that are not affected by an ongoing configuration change. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_25.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_25.md deleted file mode 100644 index 496e583..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_25.md +++ /dev/null @@ -1,85 +0,0 @@ -# Problem Context - -## Introduction - -You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key "a", and shardgrp 2 holds a shard storing key "b". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses "configuration" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` - -## The Code - -# Your Task - -The controller is a short-lived command, which an administrator invokes: it moves shards and then exits. But, it may fail or lose network connectivity while moving shards. The main task in this part of the lab is recovering from a controller that fails to complete `ChangeConfigTo`. The tester starts a new controller and invokes its `ChangeConfigTo` after partitioning the first controller; you have to modify the controller so that the new one finishes the reconfiguration. The tester calls `InitController` when starting a controller; you can modify that function to check whether an interrupted configuration change needs to be completed. - -A good approach to allowing a controller to finish a reconfiguration that a previous one started is to keep two configurations: a current one and a next one, both stored in the controller's kvsrv. When a controller starts a reconfiguration, it stores the next configuration. Once a controller completes the reconfiguration, it makes the next configuration the current one. Modify `InitController` to first check if there is a stored next configuration with a higher configuration number than the current one, and if so, complete the shard moves necessary to reconfigure to the next one. - -Modify shardctrler to implement the above approach. A controller that picks up the work from a failed controller may repeat `FreezeShard`, `InstallShard`, and `Delete` RPCs; shardgrps can use `Num` to detect duplicates and reject them. You have completed this task if your solution passes the Part B tests. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run 5B -Test (5B): Join/leave while a shardgrp is down... (reliable network)... - ... Passed -- time 9.2s #peers 1 #RPCs 899 #Ops 120 -Test (5B): recover controller ... (reliable network)... - ... Passed -- time 26.4s #peers 1 #RPCs 3724 #Ops 360 -PASS -ok 6.5840/shardkv1 35.805s -$ -``` - -- The tester calls `InitController` when starting a controller; you can implement recovery in that method in `shardctrler/shardctrler.go`. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_26.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_26.md deleted file mode 100644 index 5d8c43b..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_26.md +++ /dev/null @@ -1,122 +0,0 @@ -# Problem Context - -## Introduction - -You can either do a [final project](http://nil.csail.mit.edu/6.5840/2025/project.html) based on your own ideas, or this lab. - -In this lab you'll build a key/value storage system that "shards," or partitions, the keys over a set of Raft-replicated key/value server groups (shardgrps). A shard is a subset of the key/value pairs; for example, all the keys starting with "a" might be one shard, all the keys starting with "b" another, etc. The reason for sharding is performance. Each shardgrp handles puts and gets for just a few of the shards, and the shardgrps operate in parallel; thus total system throughput (puts and gets per unit time) increases in proportion to the number of shardgrps. - -![shardkv design](http://nil.csail.mit.edu/6.5840/2025/labs/shardkv.png) - -The sharded key/value service has the components shown above. Shardgrps (shown with blue squares) store shards with keys: shardgrp 1 holds a shard storing key "a", and shardgrp 2 holds a shard storing key "b". Clients of the sharded key/value service interact with the service through a clerk (shown with a green circle), which implements `Get` and `Put` methods. To find the shardgrp for a key passed to `Put`/`Get`, the clerk gets the configuration from the kvsrv (shown with a black square), which you implemented in Lab 2. The configuration (not shown) describes the mapping from shards to shardgrps (e.g., shard 1 is served by shardgrp 3). - -An administrator (i.e., the tester) uses another client, the controller (shown with a purple circle), to add/remove shardgrps from the cluster and update which shardgrp should serve a shard. The controller has one main method: `ChangeConfigTo`, which takes as argument a new configuration and changes the system from the current configuration to the new configuration; this involves moving shards to new shardgrps that are joining the system and moving shards away from shardgrps that are leaving the system. To do so the controller 1) makes RPCs (`FreezeShard`, `InstallShard`, and `DeleteShard`) to shardgrps, and 2) updates the configuration stored in kvsrv. - -The reason for the controller is that a sharded storage system must be able to shift shards among shardgrps. One reason is that some shardgrps may become more loaded than others, so that shards need to be moved to balance the load. Another reason is that shardgrps may join and leave the system: new shardgrps may be added to increase capacity, or existing shardgrps may be taken offline for repair or retirement. - -The main challenges in this lab will be ensuring linearizability of `Get`/`Put` operations while handling 1) changes in the assignment of shards to shardgrps, and 2) recovering from a controller that fails or is partitioned during `ChangeConfigTo`. - -1. `ChangeConfigTo` moves shards from one shardgrp to another. A risk is that some clients might use the old shardgrp while other clients use the new shardgrp, which could break linearizability. You will need to ensure that at most one shardgrp is serving requests for each shard at any one time. -2. If `ChangeConfigTo` fails while reconfiguring, some shards may be inaccessible if they have started but not completed moving from one shardgrp to another. To make forward progress, the tester starts a new controller, and your job is to ensure that the new one completes the reconfiguration that the previous controller started. - -This lab uses "configuration" to refer to the assignment of shards to shardgrps. This is not the same as Raft cluster membership changes. You don't have to implement Raft cluster membership changes. - -A shardgrp server is a member of only a single shardgrp. The set of servers in a given shardgrp will never change. - -Only RPC may be used for interaction among clients and servers. For example, different instances of your server are not allowed to share Go variables or files. - -In Part A, you will implement a working `shardctrler`, which will store and retrieve configurations in a `kvsrv`. You will also implement the `shardgrp`, replicated with your Raft `rsm` package, and a corresponding `shardgrp` clerk. The `shardctrler` talks to the `shardgrp` clerks to move shards between different groups. - -In Part B, you will modify your `shardctrler` to handle failures and partitions during config changes. In Part C, you will extend your `shardctrler` to allow for concurrent controllers without interfering with each other. Finally, in Part D, you will have the opportunity to extend your solution in any way you like. - -This lab's sharded key/value service follows the same general design as Flat Datacenter Storage, BigTable, Spanner, FAWN, Apache HBase, Rosebud, Spinnaker, and many others. These systems differ in many details from this lab, though, and are also typically more sophisticated and capable. For example, the lab doesn't evolve the sets of peers in each Raft group; its data and query models are simple; and so on. - -Lab 5 will use your `kvsrv` from Lab 2, and your `rsm` and `Raft` from Lab 4. Your Lab 5 and Lab 4 must use the same `rsm` and `Raft` implementations. - -You may use late hours for Part A, but you may not use late hours for Parts B-D. - -## Getiting Started - -Do a `git pull` to get the latest lab software. - -We supply you with tests and skeleton code in `src/shardkv1`: - -- `client.go` for the shardkv clerk -- `shardcfg` package for computing shard configurations -- `shardgrp` package: for the shardgrp clerk and server. -- `shardctrler` package, which contains `shardctrler.go` with methods for the controller to change a configuration (`ChangeConfigTo`) and to get a configuration (`Query`) - -To get up and running, execute the following commands: - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/shardkv1 -$ go test -v -=== RUN TestInitQuery5A -Test (5A): Init and Query ... (reliable network)... - shardkv_test.go:46: Static wrong null 0 -... -``` - -## The Code - -# Your Task - -In this part of the lab you will modify the controller to allow for concurrent controllers. When a controller crashes or is partitioned, the tester will start a new controller, which must finish any work that the old controller might have in progress (i.e., finishing moving shards like in Part B). This means that several controllers may run concurrently and send RPCs to the shardgrps and the `kvsrv` that stores configurations. - -The main challenge is to ensure these controllers don't step on each other. In Part A you already fenced all the shardgrp RPCs with `Num` so that old RPCs are rejected. Even if several controllers pick up the work of an old controller concurrently, one of them succeeds and the others repeat all the RPCs, the shardgrps will ignore them. - -Thus the challenging case left is to ensure that only one controller updates the next configuration to avoid that two controllers (e.g., a partitioned one and a new one) put different configurations in the next one. To stress this scenario, the tester runs several controllers concurrently and each one computes the next configuration by reading the current configuration and updating it for a shardgrp that left or joined, and then the tester invokes `ChangeConfigTo`; thus multiple controllers may invoke `ChangeConfigTo` with different configuration with the same `Num`. You can use the version number of a key and versioned `Put`s to ensure that only one controller updates the next configuration and that the other invocations return without doing anything. - -Modify your controller so that only one controller can post a next configuration for a configuration `Num`. The tester will start many controllers but only one should start `ChangeConfigTo` for a new configuation. You have completed this task if you pass the concurrent tests of Part C: - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run TestConcurrentReliable5C -Test (5C): Concurrent ctrlers ... (reliable network)... - ... Passed -- time 8.2s #peers 1 #RPCs 1753 #Ops 120 -PASS -ok 6.5840/shardkv1 8.364s -$ go test -run TestAcquireLockConcurrentUnreliable5C -Test (5C): Concurrent ctrlers ... (unreliable network)... - ... Passed -- time 23.8s #peers 1 #RPCs 1850 #Ops 120 -PASS -ok 6.5840/shardkv1 24.008s -$ -``` - -- See `concurCtrler` in `test.go` to see how the tester runs controllers concurrently. - -In this exercise you will put recovery of an old controller together with a new controller: a new controller should perform recovery from Part B. If the old controller was partitioned during `ChangeConfigTo`, you will have to make sure that the old controller doesn't interfere with the new controller. If all the controller's updates are already properly fenced with `Num` checks from Part B, you don't have to write extra code. You have completed this task if you pass the `Partition` tests. - -``` -$ cd ~/6.5840/src/shardkv1 -$ go test -run Partition -Test (5C): partition controller in join... (reliable network)... - ... Passed -- time 7.8s #peers 1 #RPCs 876 #Ops 120 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 36.8s #peers 1 #RPCs 3981 #Ops 360 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 52.4s #peers 1 #RPCs 2901 #Ops 240 -Test (5C): controllers with leased leadership ... (reliable network)... - ... Passed -- time 60.2s #peers 1 #RPCs 27415 #Ops 11182 -Test (5C): controllers with leased leadership ... (unreliable network)... - ... Passed -- time 60.5s #peers 1 #RPCs 11422 #Ops 2336 -PASS -ok 6.5840/shardkv1 217.779s -$ -``` - -You have completed implementing a highly-available sharded key/value service with many shard groups for scalability, reconfiguration to handle changes in load, and with a fault-tolerant controller; congrats! - -Rerun all tests to check that your recent changes to the controller haven't broken earlier tests. - -Gradescope will rerun the Lab 3A-D and Lab 4A-C tests on your submission, in addition to the 5C tests. Before submitting, double check that your solution works: - -``` -go test ./raft1 -go test ./kvraft1 -go test ./shardkv1 -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_3.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_3.md deleted file mode 100644 index 4d2666b..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_3.md +++ /dev/null @@ -1,67 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ -``` - -## The Code - -# Your Task - -Now you should modify your solution to continue in the face of dropped messages (e.g., RPC requests and RPC replies). If a message was lost, then the client's `ck.server.Call()` will return `false` (more precisely, `Call()` waits for a reply message for a timeout interval, and returns false if no reply arrives within that time). One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it succeeds. Each call to `Clerk.Put()` or `Clerk.Append()`, however, should result in just a *single* execution, so you will have to ensure that the re-send doesn't result in the server executing the request twice. - -Add code to `Clerk` to retry if doesn't receive a reply, and to `server.go` to filter duplicates if the operation requires it. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). - -- You will need to uniquely identify client operations to ensure that the key/value server executes each one just once. -- You will have to think carefully about what state the server must maintain for handling duplicate `Get()`, `Put()`, and `Append()` requests, if any at all. -- Your scheme for duplicate detection should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. - -Your code should now pass all tests, like this: - -``` -$ go test -Test: one client ... - ... Passed -- t 3.8 nrpc 31135 ops 31135 -Test: many clients ... - ... Passed -- t 4.7 nrpc 102853 ops 102853 -Test: unreliable net, many clients ... - ... Passed -- t 4.1 nrpc 580 ops 496 -Test: concurrent append to same key, unreliable ... - ... Passed -- t 0.6 nrpc 61 ops 52 -Test: memory use get ... - ... Passed -- t 0.4 nrpc 4 ops 0 -Test: memory use put ... - ... Passed -- t 0.2 nrpc 2 ops 0 -Test: memory use append ... - ... Passed -- t 0.4 nrpc 2 ops 0 -Test: memory use many puts ... - ... Passed -- t 11.5 nrpc 100000 ops 0 -Test: memory use many gets ... - ... Passed -- t 12.2 nrpc 100001 ops 0 -PASS -ok 6.5840/kvsrv 39.000s -``` - -The numbers after each `Passed` are real time in seconds, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls). diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_4.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_4.md deleted file mode 100644 index 58d4dfb..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_4.md +++ /dev/null @@ -1,108 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -Implement Raft leader election and heartbeats (`AppendEntries` RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run `go test -run 3A`to test your 3A code. - -- You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. `go test -run 3A`. -- Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -- Add the Figure 2 state for leader election to the `Raft` struct in `raft.go`. You'll also need to define a struct to hold information about each log entry. -- Fill in the `RequestVoteArgs` and `RequestVoteReply` structs. Modify `Make()` to create a background goroutine that will kick off leader election periodically by sending out `RequestVote` RPCs when it hasn't heard from another peer for a while. Implement the `RequestVote()` RPC handler so that servers will vote for one another. -- To implement heartbeats, define an `AppendEntries` RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an `AppendEntries` RPC handler method. -- The tester requires that the leader send heartbeat RPCs no more than ten times per second. -- The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -- The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -- You may find Go's [rand](https://golang.org/pkg/math/rand/) useful. -- You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls [time.Sleep()](https://golang.org/pkg/time/#Sleep); see the `ticker()` goroutine that `Make()` creates for this purpose. Don't use Go's `time.Timer` or `time.Ticker`, which are difficult to use correctly. -- If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -- Don't forget to implement `GetState()`. -- The tester calls your Raft's `rf.Kill()` when it is permanently shutting down an instance. You can check whether `Kill()` has been called using `rf.killed()`. You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -- Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The `labgob` package will warn you about this; don't ignore the warnings. -- The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the [Guidance](https://pdos.csail.mit.edu/6.824/labs/guidance.html) page for debugging tips. -- If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an [example of the visualization](https://pdos.csail.mit.edu/6.824/labs/vis.html). Further, you can add your own annotations by writing, for example, `tester.Annotate("Server 0", "short description", "details")`. This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know! - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -``` -$ go test -run 3A -Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak -$ -``` - -Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (`go test`), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should make sure that your code consistently passes the tests with the `-race` flag. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_5.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_5.md deleted file mode 100644 index 2da2679..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_5.md +++ /dev/null @@ -1,112 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -Implement the leader and follower code to append new log entries, so that the `go test -run 3B`tests pass. - -- Run `git pull` to get the latest lab software. -- Your first goal should be to pass `TestBasicAgree3B()`. Start by implementing `Start()`, then write the code to send and receive new log entries via `AppendEntries` RPCs, following Figure 2. Send each newly committed entry on `applyCh` on each peer. -- You will need to implement the election restriction (section 5.4.1 in the paper). -- Your code may have loops that repeatedly check for certain events. Don't have these loops execute continuously without pausing, since that will slow your implementation enough that it fails tests. Use Go's [condition variables](https://golang.org/pkg/sync/#Cond), or insert a `time.Sleep(10 * time.Millisecond)` in each loop iteration. -- Do yourself a favor for future labs and write (or re-write) code that's clean and clear. For ideas, re-visit our the [Guidance page](http://nil.csail.mit.edu/6.5840/2024/labs/guidance.html) with tips on how to develop and debug your code. -- If you fail a test, look at `test_test.go` and `config.go` to understand what's being tested. `config.go` also illustrates how the tester uses the Raft API. - -The tests for upcoming labs may fail your code if it runs too slowly. You can check how much real time and CPU time your solution uses with the time command. Here's typical output: - -``` -$ time go test -run 3B -Test (3B): basic agreement ... - ... Passed -- 0.9 3 16 4572 3 -Test (3B): RPC byte count ... - ... Passed -- 1.7 3 48 114536 11 -Test (3B): agreement after follower reconnects ... - ... Passed -- 3.6 3 78 22131 7 -Test (3B): no agreement if too many followers disconnect ... - ... Passed -- 3.8 5 172 40935 3 -Test (3B): concurrent Start()s ... - ... Passed -- 1.1 3 24 7379 6 -Test (3B): rejoin of partitioned leader ... - ... Passed -- 5.1 3 152 37021 4 -Test (3B): leader backs up quickly over incorrect follower logs ... - ... Passed -- 17.2 5 2080 1587388 102 -Test (3B): RPC counts aren't too high ... - ... Passed -- 2.2 3 60 20119 12 -PASS -ok 6.5840/raft 35.557s - -real 0m35.899s -user 0m2.556s -sys 0m1.458s -$ -``` - -The "ok 6.5840/raft 35.557s" means that Go measured the time taken for the 3B tests to be 35.557 seconds of real (wall-clock) time. The "user 0m2.556s" means that the code consumed 2.556 seconds of CPU time, or time spent actually executing instructions (rather than waiting or sleeping). If your solution uses much more than a minute of real time for the 3B tests, or much more than 5 seconds of CPU time, you may run into trouble later on. Look for time spent sleeping or waiting for RPC timeouts, loops that run without sleeping or waiting for conditions or channel messages, or large numbers of RPCs sent. - -### diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_6.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_6.md deleted file mode 100644 index 012d6ef..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_6.md +++ /dev/null @@ -1,131 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent. - -A real implementation would write Raft's persistent state to disk each time it changed, and would read the state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a `Persister` object (see `persister.go`). Whoever calls `Raft.Make()` supplies a `Persister` that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that `Persister`, and should use it to save its persistent state each time the state changes. Use the `Persister`'s `ReadRaftState()` and `Save()` methods. - -Complete the functions `persist()` and `readPersist()` in `raft.go` by adding code to save and restore persistent state. You will need to encode (or "serialize") the state as an array of bytes in order to pass it to the `Persister`. Use the `labgob` encoder; see the comments in `persist()` and `readPersist()`. `labgob` is like Go's `gob` encoder but prints error messages if you try to encode structures with lower-case field names. For now, pass `nil` as the second argument to `persister.Save()`. Insert calls to `persist()` at the points where your implementation changes persistent state. Once you've done this, and if the rest of your implementation is correct, you should pass all of the 3C tests. - -You will probably need the optimization that backs up nextIndex by more than one entry at a time. Look at the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps. One possibility is to have a rejection message include: - -``` - XTerm: term in the conflicting entry (if any) - XIndex: index of first entry with that term (if any) - XLen: log length -``` - -Then the leader's logic can be something like: - -``` - Case 1: leader doesn't have XTerm: - nextIndex = XIndex - Case 2: leader has XTerm: - nextIndex = leader's last entry for XTerm - Case 3: follower's log is too short: - nextIndex = XLen -``` - -A few other hints: - -- Run `git pull` to get the latest lab software. -- The 3C tests are more demanding than those for 3A or 3B, and failures may be caused by problems in your code for 3A or 3B. - -Your code should pass all the 3C tests (as shown below), as well as the 3A and 3B tests. - -``` -$ go test -run 3C -Test (3C): basic persistence ... - ... Passed -- 5.0 3 86 22849 6 -Test (3C): more persistence ... - ... Passed -- 17.6 5 952 218854 16 -Test (3C): partitioned leader and one follower crash, leader restarts ... - ... Passed -- 2.0 3 34 8937 4 -Test (3C): Figure 8 ... - ... Passed -- 31.2 5 580 130675 32 -Test (3C): unreliable agreement ... - ... Passed -- 1.7 5 1044 366392 246 -Test (3C): Figure 8 (unreliable) ... - ... Passed -- 33.6 5 10700 33695245 308 -Test (3C): churn ... - ... Passed -- 16.1 5 8864 44771259 1544 -Test (3C): unreliable churn ... - ... Passed -- 16.5 5 4220 6414632 906 -PASS -ok 6.5840/raft 123.564s -$ -``` - -It is a good idea to run the tests multiple times before submitting and check that each run prints `PASS`. - -``` -for i in {0..10}; do go test; done -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_7.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_7.md deleted file mode 100644 index 6f50772..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_7.md +++ /dev/null @@ -1,119 +0,0 @@ -# Problem Context - -## Introduction - -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with *index numbers*. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -## Getiting Started - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the [Lab 1 instructions](http://nil.csail.mit.edu/6.5840/2024/labs/lab-mr.html). - -We supply you with skeleton code `src/raft/raft.go`. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in `src/raft/test_test.go`. - -When we grade your submissions, we will run the tests without the [`-race` flag](https://go.dev/blog/race-detector). However, you should check that your code does not have races, by running the tests with the `-race` flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft -$ go test -Test (3A): initial election ... ---- FAIL: TestInitialElection3A (5.04s) - config.go:326: expected one leader, got none -Test (3A): election after network failure ... ---- FAIL: TestReElection3A (5.03s) - config.go:326: expected one leader, got none -... -$ -``` - -## The Code - -Implement Raft by adding code to `raft/raft.go`. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. - -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in `raft.go`. - -``` -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -``` - -A service calls `Make(peers,me,…)` to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The `me` argument is the index of this peer in the peers array. `Start(command)` asks Raft to start the processing to append the command to the replicated log. `Start()` should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an `ApplyMsg` for each newly committed log entry to the `applyCh` channel argument to `Make()`. - -`raft.go` contains example code that sends an RPC (`sendRequestVote()`) and that handles an incoming RPC (`RequestVote()`). Your Raft peers should exchange RPCs using the labrpc Go package (source in `src/labrpc`). The tester can tell `labrpc` to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify `labrpc`, make sure your Raft works with the original `labrpc`, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Your Task - -As things stand now, a rebooting server replays the complete Raft log in order to restore its state. However, it's not practical for a long-running service to remember the complete Raft log forever. Instead, you'll modify Raft to cooperate with services that persistently store a "snapshot" of their state from time to time, at which point Raft discards log entries that precede the snapshot. The result is a smaller amount of persistent data and faster restart. However, it's now possible for a follower to fall so far behind that the leader has discarded the log entries it needs to catch up; the leader must then send a snapshot plus the log starting at the time of the snapshot. Section 7 of the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf) outlines the scheme; you will have to design the details. - -Your Raft must provide the following function that the service can call with a serialized snapshot of its state: - -``` -Snapshot(index int, snapshot []byte) -``` - -In Lab 3D, the tester calls `Snapshot()` periodically. In Lab 4, you will write a key/value server that calls `Snapshot()`; the snapshot will contain the complete table of key/value pairs. The service layer calls `Snapshot()` on every peer (not just on the leader). - -The `index` argument indicates the highest log entry that's reflected in the snapshot. Raft should discard its log entries before that point. You'll need to revise your Raft code to operate while storing only the tail of the log. - -You'll need to implement the `InstallSnapshot` RPC discussed in the paper that allows a Raft leader to tell a lagging Raft peer to replace its state with a snapshot. You will likely need to think through how InstallSnapshot should interact with the state and rules in Figure 2. - -When a follower's Raft code receives an InstallSnapshot RPC, it can use the `applyCh` to send the snapshot to the service in an `ApplyMsg`. The `ApplyMsg` struct definition already contains the fields you will need (and which the tester expects). Take care that these snapshots only advance the service's state, and don't cause it to move backwards. - -If a server crashes, it must restart from persisted data. Your Raft should persist both Raft state and the corresponding snapshot. Use the second argument to `persister.Save()` to save the snapshot. If there's no snapshot, pass `nil` as the second argument. - -When a server restarts, the application layer reads the persisted snapshot and restores its saved state. - -Implement `Snapshot()` and the InstallSnapshot RPC, as well as the changes to Raft to support these (e.g, operation with a trimmed log). Your solution is complete when it passes the 3D tests (and all the previous Lab 3 tests). - -- `git pull` to make sure you have the latest software. -- A good place to start is to modify your code to so that it is able to store just the part of the log starting at some index X. Initially you can set X to zero and run the 3B/3C tests. Then make `Snapshot(index)` discard the log before `index`, and set X equal to `index`. If all goes well you should now pass the first 3D test. -- Next: have the leader send an InstallSnapshot RPC if it doesn't have the log entries required to bring a follower up to date. -- Send the entire snapshot in a single InstallSnapshot RPC. Don't implement Figure 13's `offset` mechanism for splitting up the snapshot. -- Raft must discard old log entries in a way that allows the Go garbage collector to free and re-use the memory; this requires that there be no reachable references (pointers) to the discarded log entries. -- A reasonable amount of time to consume for the full set of Lab 3 tests (3A+3B+3C+3D) without `-race` is 6 minutes of real time and one minute of CPU time. When running with `-race`, it is about 10 minutes of real time and two minutes of CPU time. - -Your code should pass all the 3D tests (as shown below), as well as the 3A, 3B, and 3C tests. - -``` -$ go test -run 3D -Test (3D): snapshots basic ... - ... Passed -- 11.6 3 176 61716 192 -Test (3D): install snapshots (disconnect) ... - ... Passed -- 64.2 3 878 320610 336 -Test (3D): install snapshots (disconnect+unreliable) ... - ... Passed -- 81.1 3 1059 375850 341 -Test (3D): install snapshots (crash) ... - ... Passed -- 53.5 3 601 256638 339 -Test (3D): install snapshots (unreliable+crash) ... - ... Passed -- 63.5 3 687 288294 336 -Test (3D): crash and restart all servers ... - ... Passed -- 19.5 3 268 81352 58 -PASS -ok 6.5840/raft 293.456s -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_8.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_8.md deleted file mode 100644 index 853edfd..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_8.md +++ /dev/null @@ -1,112 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf). - -Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: - -- `Put(key, value)`: replaces the value for a particular key in the database -- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) -- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) - -Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. - -Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) - -Start early. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvraft -$ go test -... -$ -``` - -## The Code - -# Your Task - -Each of your key/value servers ("kvservers") will have an associated Raft peer. Clerks send `Put()`, `Append()`, and `Get()` RPCs to the kvserver whose associated Raft is the leader. The kvserver code submits the Put/Append/Get operation to Raft, so that the Raft log holds a sequence of Put/Append/Get operations. All of the kvservers execute operations from the Raft log in order, applying the operations to their key/value databases; the intent is for the servers to maintain identical replicas of the key/value database. - -A `Clerk` sometimes doesn't know which kvserver is the Raft leader. If the `Clerk` sends an RPC to the wrong kvserver, or if it cannot reach the kvserver, the `Clerk` should re-try by sending to a different kvserver. If the key/value service commits the operation to its Raft log (and hence applies the operation to the key/value state machine), the leader reports the result to the `Clerk` by responding to its RPC. If the operation failed to commit (for example, if the leader was replaced), the server reports an error, and the `Clerk` retries with a different server. - -Your kvservers should not directly communicate; they should only interact with each other through Raft. - -Your first task is to implement a solution that works when there are no dropped messages, and no failed servers. - -Feel free to copy over your client code from Lab 2 (`kvsrv/client.go`) into `kvraft/client.go`. You will need to add logic for deciding which kvserver to send each RPC to. Recall that `Append()` no longer returns a value to the Clerk. - -You'll also need to implement `Put()`, `Append()`, and `Get()` RPC handlers in `server.go`. These handlers should enter an `Op` in the Raft log using `Start()`; you should fill in the `Op` struct definition in `server.go` so that it describes a Put/Append/Get operation. Each server should execute `Op` commands as Raft commits them, i.e. as they appear on the `applyCh`. An RPC handler should notice when Raft commits its `Op`, and then reply to the RPC. - -You have completed this task when you **reliably** pass the first test in the test suite: "One client". - -- After calling `Start()`, your kvservers will need to wait for Raft to complete agreement. Commands that have been agreed upon arrive on the `applyCh`. Your code will need to keep reading `applyCh` while `Put()`, `Append()`, and `Get()` handlers submit commands to the Raft log using `Start()`. Beware of deadlock between the kvserver and its Raft library. -- A kvserver should not complete a `Get()` RPC if it is not part of a majority (so that it does not serve stale data). A simple solution is to enter every `Get()` (as well as each `Put()` and `Append()`) in the Raft log. You don't have to implement the optimization for read-only operations that is described in Section 8. -- You should not need to add any fields to to the Raft `ApplyMsg`, or to Raft RPCs such as `AppendEntries`, but you are allowed to do so. -- It's best to add locking from the start because the need to avoid deadlocks sometimes affects overall code design. Check that your code is race-free using `go test -race`. - -Now you should modify your solution to continue in the face of network and server failures. One problem you'll face is that a `Clerk` may have to send an RPC multiple times until it finds a kvserver that replies positively. If a leader fails just after committing an entry to the Raft log, the `Clerk` may not receive a reply, and thus may re-send the request to another leader. Each call to `Clerk.Put()` or `Clerk.Append()` should result in just a single execution, so you will have to ensure that the re-send doesn't result in the servers executing the request twice. - -Add code to handle failures, and to cope with duplicate `Clerk` requests, including situations where the `Clerk` sends a request to a kvserver leader in one term, times out waiting for a reply, and re-sends the request to a new leader in another term. The request should execute just once. These notes include guidance on [duplicate detection](http://nil.csail.mit.edu/6.5840/2024/notes/l-raft-QA.txt). Your code should pass the `go test -run 4A` tests. - -- Your solution needs to handle a leader that has called Start() for a Clerk's RPC, but loses its leadership before the request is committed to the log. In this case you should arrange for the Clerk to re-send the request to other servers until it finds the new leader. One way to do this is for the server to detect that it has lost leadership, by noticing that Raft's term has changed or a different request has appeared at the index returned by Start(). If the ex-leader is partitioned by itself, it won't know about new leaders; but any client in the same partition won't be able to talk to a new leader either, so it's OK in this case for the server and client to wait indefinitely until the partition heals. -- You will probably have to modify your Clerk to remember which server turned out to be the leader for the last RPC, and send the next RPC to that server first. This will avoid wasting time searching for the leader on every RPC, which may help you pass some of the tests quickly enough. -- You should use a duplicate detection scheme similar to Lab 2. It should free server memory quickly, for example by having each RPC imply that the client has seen the reply for its previous RPC. It's OK to assume that a client will make only one call into a Clerk at a time. You may find that you need to make changes to what information you store in your duplicate detection table from Lab 2. - -Your code should now pass the Lab 4A tests, like this: - -``` -$ go test -run 4A -Test: one client (4A) ... - ... Passed -- 15.5 5 4576 903 -Test: ops complete fast enough (4A) ... - ... Passed -- 15.7 3 3022 0 -Test: many clients (4A) ... - ... Passed -- 15.9 5 5884 1160 -Test: unreliable net, many clients (4A) ... - ... Passed -- 19.2 5 3083 441 -Test: concurrent append to same key, unreliable (4A) ... - ... Passed -- 2.5 3 218 52 -Test: progress in majority (4A) ... - ... Passed -- 1.7 5 103 2 -Test: no progress in minority (4A) ... - ... Passed -- 1.0 5 102 3 -Test: completion after heal (4A) ... - ... Passed -- 1.2 5 70 3 -Test: partitions, one client (4A) ... - ... Passed -- 23.8 5 4501 765 -Test: partitions, many clients (4A) ... - ... Passed -- 23.5 5 5692 974 -Test: restarts, one client (4A) ... - ... Passed -- 22.2 5 4721 908 -Test: restarts, many clients (4A) ... - ... Passed -- 22.5 5 5490 1033 -Test: unreliable net, restarts, many clients (4A) ... - ... Passed -- 26.5 5 3532 474 -Test: restarts, partitions, many clients (4A) ... - ... Passed -- 29.7 5 6122 1060 -Test: unreliable net, restarts, partitions, many clients (4A) ... - ... Passed -- 32.9 5 2967 317 -Test: unreliable net, restarts, partitions, random keys, many clients (4A) ... - ... Passed -- 35.0 7 8249 746 -PASS -ok 6.5840/kvraft 290.184s -``` - -The numbers after each `Passed` are real time in seconds, number of peers, number of RPCs sent (including client RPCs), and number of key/value operations executed (`Clerk` Get/Put/Append calls). diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_9.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_9.md deleted file mode 100644 index 5e1d3a2..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_9.md +++ /dev/null @@ -1,81 +0,0 @@ -# Problem Context - -## Introduction - -In this lab you will build a fault-tolerant key/value storage service using your Raft library from [Lab 3](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html). Your key/value service will be a replicated state machine, consisting of several key/value servers that each maintain a database of key/value pairs, as in [Lab 2](http://nil.csail.mit.edu/6.5840/2024/labs/lab-raft.html), but additionally use Raft for replication. Your key/value service should continue to process client requests as long as a majority of the servers are alive and can communicate, in spite of other failures or network partitions. After Lab 4, you will have implemented all parts (Clerk, Service, and Raft) shown in the [diagram of Raft interactions](http://nil.csail.mit.edu/6.5840/2024/notes/raft_diagram.pdf). - -Clients will interact with your key/value service in much the same way as Lab 2. In particular, clients can send three different RPCs to the key/value service: - -- `Put(key, value)`: replaces the value for a particular key in the database -- `Append(key, arg)`: appends arg to key's value (treating the existing value as an empty string if the key is non-existent) -- `Get(key)`: fetches the current value of the key (returning the empty string for non-existent keys) - -Keys and values are strings. Note that unlike in Lab 2, neither `Put` nor `Append` should return a value to the client. Each client talks to the service through a `Clerk` with Put/Append/Get methods. The `Clerk` manages RPC interactions with the servers. - -Your service must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If called one at a time, the Get/Put/Append methods should act as if the system had only one copy of its state, and each call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Providing linearizability is relatively easy for a single server. It is harder if the service is replicated, since all servers must choose the same execution order for concurrent requests, must avoid replying to clients using state that isn't up to date, and must recover their state after a failure in a way that preserves all acknowledged client updates. - -This lab has two parts. In part A, you will implement a replicated key/value service using your Raft implementation, but without using snapshots. In part B, you will use your snapshot implementation from Lab 3D, which will allow Raft to discard old log entries. Please submit each part by the respective deadline. - -You should review the [extended Raft paper](http://nil.csail.mit.edu/6.5840/2024/papers/raft-extended.pdf), in particular Sections 7 and 8. For a wider perspective, have a look at Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication, and [Bolosky et al.](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf) - -Start early. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvraft`. You will need to modify `kvraft/client.go`, `kvraft/server.go`, and perhaps `kvraft/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvraft -$ go test -... -$ -``` - -## The Code - -# Your Task - -As things stand now, your key/value server doesn't call your Raft library's `Snapshot()` method, so a rebooting server has to replay the complete persisted Raft log in order to restore its state. Now you'll modify kvserver to cooperate with Raft to save log space, and reduce restart time, using Raft's `Snapshot()` from Lab 3D. - -The tester passes `maxraftstate` to your `StartKVServer()`. `maxraftstate` indicates the maximum allowed size of your persistent Raft state in bytes (including the log, but not including snapshots). You should compare `maxraftstate` to `persister.RaftStateSize()`. Whenever your key/value server detects that the Raft state size is approaching this threshold, it should save a snapshot by calling Raft's `Snapshot`. If `maxraftstate` is -1, you do not have to snapshot. `maxraftstate` applies to the GOB-encoded bytes your Raft passes as the first argument to to `persister.Save()`. - -Modify your kvserver so that it detects when the persisted Raft state grows too large, and then hands a snapshot to Raft. When a kvserver server restarts, it should read the snapshot from `persister` and restore its state from the snapshot. - -- Think about when a kvserver should snapshot its state and what should be included in the snapshot. Raft stores each snapshot in the persister object using `Save()`, along with corresponding Raft state. You can read the latest stored snapshot using `ReadSnapshot()`. -- Your kvserver must be able to detect duplicated operations in the log across checkpoints, so any state you are using to detect them must be included in the snapshots. -- Capitalize all fields of structures stored in the snapshot. -- You may have bugs in your Raft library that this lab exposes. If you make changes to your Raft implementation make sure it continues to pass all of the Lab 3 tests. -- A reasonable amount of time to take for the Lab 4 tests is 400 seconds of real time and 700 seconds of CPU time. Further, `go test -run TestSnapshotSize` should take less than 20 seconds of real time. - -Your code should pass the 4B tests (as in the example here) as well as the 4A tests (and your Raft must continue to pass the Lab 3 tests). - -``` -$ go test -run 4B -Test: InstallSnapshot RPC (4B) ... - ... Passed -- 4.0 3 289 63 -Test: snapshot size is reasonable (4B) ... - ... Passed -- 2.6 3 2418 800 -Test: ops complete fast enough (4B) ... - ... Passed -- 3.2 3 3025 0 -Test: restarts, snapshots, one client (4B) ... - ... Passed -- 21.9 5 29266 5820 -Test: restarts, snapshots, many clients (4B) ... - ... Passed -- 21.5 5 33115 6420 -Test: unreliable net, snapshots, many clients (4B) ... - ... Passed -- 17.4 5 3233 482 -Test: unreliable net, restarts, snapshots, many clients (4B) ... - ... Passed -- 22.7 5 3337 471 -Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ... - ... Passed -- 30.4 5 2725 274 -Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ... - ... Passed -- 37.7 7 8378 681 -PASS -ok 6.5840/kvraft 161.538s -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_test.md b/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_test.md deleted file mode 100644 index a43287a..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/system_lab_test.md +++ /dev/null @@ -1,30 +0,0 @@ -# Proble Context - -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are [linearizable](http://nil.csail.mit.edu/6.5840/2024/papers/linearizability-faq.txt). Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: `Put(key, value)`, `Append(key, arg)`, and `Get(key)`. The server maintains an in-memory map of key/value pairs. Keys and values are strings. `Put(key, value)` installs or replaces the value for a particular key in the map, `Append(key, arg)` appends arg to key's value *and* returns the old value, and `Get(key)` fetches the current value for the key. A `Get` for a non-existent key should return an empty string. An `Append` to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a `Clerk` with Put/Append/Get methods. A `Clerk` manages RPC interactions with the server. - -Your server must arrange that application calls to `Clerk` Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls `Clerk.Put()`, and client Y calls `Clerk.Append()`, and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getiting Started - -We supply you with skeleton code and tests in `src/kvsrv`. You will need to modify `kvsrv/client.go`, `kvsrv/server.go`, and `kvsrv/common.go`. - -To get up and running, execute the following commands. Don't forget the `git pull` to get the latest software. - -``` -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ - - -# Your Task -cd src/kvsrv and run go test diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1.md b/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1.md deleted file mode 100644 index 8907884..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1.md +++ /dev/null @@ -1,14 +0,0 @@ -I'm running `missing_colon.py` as follows: - -```python -division(23, 0) -``` - -but I get the following error: - -``` - File "/Users/fuchur/Documents/24/git_sync/swe-agent-test-repo/tests/./missing_colon.py", line 4 - def division(a: float, b: float) -> float - ^ -SyntaxError: invalid syntax -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1a.md b/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1a.md deleted file mode 100644 index 4da0256..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/1a.md +++ /dev/null @@ -1,14 +0,0 @@ -I'm running `existing_lint_error.py` as follows: - -```python -./existing_lint_error.py -``` - -but I get the following error: - -``` - File "/Users/fuchur/Documents/24/git_sync/swe-agent-test-repo/tests/./existing_lint_error.py", line 4 - def division(a: float, b: float) -> float - ^ -SyntaxError: invalid syntax -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/22.md b/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/22.md deleted file mode 100644 index e92ed84..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test-repo-problems/22.md +++ /dev/null @@ -1,10 +0,0 @@ -# tribonacci(0) returns None - -How to reproduce: - -```python -from testpkg.tribonacci import tribonacci - - -assert tribonacci(0) == 0 -``` diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/1.md b/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/1.md deleted file mode 100644 index 70c19bc..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/1.md +++ /dev/null @@ -1,67 +0,0 @@ -# Background - -Introduction -In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed at-most-once despite network failures and that the operations are linearizable. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -KV server -Each client interacts with the key/value server using a Clerk, which sends RPCs to the server. Clients can send two different RPCs to the server: Put(key, value, version) and Get(key). The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. Put(key, value, version) installs or replaces the value for a particular key in the map only if the Put's version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return rpc.ErrVersion. A client can create a new key by invoking Put with version number 0 (and the resulting version stored by the server will be 1). If the version number of the Put is larger than 0 and the key doesn't exist, the server should return rpc.ErrNoKey. - -Get(key) fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return rpc.ErrNoKey. - -Maintaining a version number for each key will be useful for implementing locks using Put and ensuring at-most-once semantics for Put's when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a linearizable key/value service from the point of view of clients calling Clerk.Get and Clerk.Put. That is, if client operations aren't concurrent, each client Clerk.Get and Clerk.Put will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls Clerk.Put(), and client Y calls Clerk.Put(), and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on linearizability for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -Getting Started -We supply you with skeleton code and tests in src/kvsrv1. kvsrv1/client.go implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides Put and Get methods. kvsrv1/server.go contains the server code, including the Put and Get handlers that implement the server side of RPC requests. You will need to modify client.go and server.go. The RPC requests, replies, and error values are defined in the kvsrv1/rpc package in the file kvsrv1/rpc/rpc.go, which you should look at, though you don't have to modify rpc.go. - -To get up and running, execute the following commands. Don't forget the git pull to get the latest software. - -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ timeout --signal=SIGKILL 60s go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... - -# Task - -first, you can run the following command to install go: - -```bash -apt-get update && apt-get install -y wget tar git build-essential && wget https://go.dev/dl/go1.22.3.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz && rm go1.22.3.linux-amd64.tar.gz && apt-get clean && rm -rf /var/lib/apt/lists/* -export PATH="/usr/local/go/bin:${PATH}" -``` - -And run `go version` to check your go version - -Your first task (Key/value server with reliable network) is to implement a solution that works when there are no dropped messages. You'll need to add RPC-sending code to the Clerk Put/Get methods in client.go, and implement Put and Get RPC handlers in server.go. - -# Tests - -You have completed this task when you pass the Reliable tests in the test suite: -$ go test -v -run Reliable -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - ... Passed -- 0.0 1 5 0 ---- PASS: TestReliablePut (0.00s) -=== RUN TestPutConcurrentReliable -Test: many clients racing to put values to the same key (reliable network)... -info: linearizability check timed out, assuming history is ok - ... Passed -- 3.1 1 90171 90171 ---- PASS: TestPutConcurrentReliable (3.07s) -=== RUN TestMemPutManyClientsReliable -Test: memory use many put clients (reliable network)... - ... Passed -- 9.2 1 100000 0 ---- PASS: TestMemPutManyClientsReliable (16.59s) -PASS -ok 6.5840/kvsrv1 19.681s - -The numbers after each Passed are real time in seconds, the constant 1, the number of RPCs sent (including client RPCs), and the number of key/value operations executed (Clerk Get and Put calls). - -Check that your code is race-free using go test -race. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/2.md b/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/2.md deleted file mode 100644 index 42a8162..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/2.md +++ /dev/null @@ -1,102 +0,0 @@ -# Background - -Introduction -This is the first in a series of labs in which you'll build a fault-tolerant key/value storage system. In this lab you'll implement Raft, a replicated state machine protocol. In the next lab you'll build a key/value service on top of Raft. Then you will “shard” your service over multiple replicated state machines for higher performance. - -A replicated service achieves fault tolerance by storing complete copies of its state (i.e., data) on multiple replica servers. Replication allows the service to continue operating even if some of its servers experience failures (crashes or a broken or flaky network). The challenge is that failures may cause the replicas to hold differing copies of the data. - -Raft organizes client requests into a sequence, called the log, and ensures that all the replica servers see the same log. Each replica executes client requests in log order, applying them to its local copy of the service's state. Since all the live replicas see the same log contents, they all execute the same requests in the same order, and thus continue to have identical service state. If a server fails but later recovers, Raft takes care of bringing its log up to date. Raft will continue to operate as long as at least a majority of the servers are alive and can talk to each other. If there is no such majority, Raft will make no progress, but will pick up where it left off as soon as a majority can communicate again. - -In this lab you'll implement Raft as a Go object type with associated methods, meant to be used as a module in a larger service. A set of Raft instances talk to each other with RPC to maintain replicated logs. Your Raft interface will support an indefinite sequence of numbered commands, also called log entries. The entries are numbered with index numbers. The log entry with a given index will eventually be committed. At that point, your Raft should send the log entry to the larger service for it to execute. - -You should follow the design in the extended Raft paper, with particular attention to Figure 2. You'll implement most of what's in the paper, including saving persistent state and reading it after a node fails and then restarts. You will not implement cluster membership changes (Section 6). - -This lab is due in four parts. You must submit each part on the corresponding due date. - -Getting Started -Do a git pull to get the latest lab software. - -If you have done Lab 1, you already have a copy of the lab source code. If not, you can find directions for obtaining the source via git in the Lab 1 instructions. - -We supply you with skeleton code src/raft/raft.go. We also supply a set of tests, which you should use to drive your implementation efforts, and which we'll use to grade your submitted lab. The tests are in src/raft/raft_test.go. - -When we grade your submissions, we will run the tests without the -race flag. However, you should check that your code does not have races, by running the tests with the -race flag as you develop your solution. - -To get up and running, execute the following commands. Don't forget the git pull to get the latest software. - -$ cd ~/6.5840 -$ git pull -... -$ cd src/raft1 -$ go test -Test (3A): initial election (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestInitialElection3A (4.90s) -Test (3A): election after network failure (reliable network)... -Fatal: expected one leader, got none ---- FAIL: TestReElection3A (5.05s) -... - -The code -Implement Raft by adding code to raft/raft.go. In that file you'll find skeleton code, plus examples of how to send and receive RPCs. -Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in raft.go. - -// create a new Raft server instance: -rf := Make(peers, me, persister, applyCh) - -// start agreement on a new log entry: -rf.Start(command interface{}) (index, term, isleader) - -// ask a Raft for its current term, and whether it thinks it is leader -rf.GetState() (term, isLeader) - -// each time a new entry is committed to the log, each Raft peer -// should send an ApplyMsg to the service (or tester). -type ApplyMsg -A service calls Make(peers,me,…) to create a Raft peer. The peers argument is an array of network identifiers of the Raft peers (including this one), for use with RPC. The me argument is the index of this peer in the peers array. Start(command) asks Raft to start the processing to append the command to the replicated log. Start() should return immediately, without waiting for the log appends to complete. The service expects your implementation to send an ApplyMsg for each newly committed log entry to the applyCh channel argument to Make(). - -raft.go contains example code that sends an RPC (sendRequestVote()) and that handles an incoming RPC (RequestVote()). Your Raft peers should exchange RPCs using the labrpc Go package (source in src/labrpc). The tester can tell labrpc to delay RPCs, re-order them, and discard them to simulate various network failures. While you can temporarily modify labrpc, make sure your Raft works with the original labrpc, since that's what we'll use to test and grade your lab. Your Raft instances must interact only with RPC; for example, they are not allowed to communicate using shared Go variables or files. - -Subsequent labs build on this lab, so it is important to give yourself enough time to write solid code. - -# Task - -Part 3A: leader election (moderate) -Implement Raft leader election and heartbeats (AppendEntries RPCs with no log entries). The goal for Part 3A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run go test -run 3A to test your 3A code. - -## Hints - -You can't easily run your Raft implementation directly; instead you should run it by way of the tester, i.e. go test -run 3A . -Follow the paper's Figure 2. At this point you care about sending and receiving RequestVote RPCs, the Rules for Servers that relate to elections, and the State related to leader election, -Add the Figure 2 state for leader election to the Raft struct in raft.go. You'll also need to define a struct to hold information about each log entry. -Fill in the RequestVoteArgs and RequestVoteReply structs. Modify Make() to create a background goroutine that will kick off leader election periodically by sending out RequestVote RPCs when it hasn't heard from another peer for a while. Implement the RequestVote() RPC handler so that servers will vote for one another. -To implement heartbeats, define an AppendEntries RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an AppendEntries RPC handler method. -The tester requires that the leader send heartbeat RPCs no more than ten times per second. -The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). -The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds (e.g., once per 10 milliseconds). Because the tester limits you tens of heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. -You may find Go's rand useful. -You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep(); see the ticker() goroutine that Make() creates for this purpose. Don't use Go's time.Timer or time.Ticker, which are difficult to use correctly. -If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. -Don't forget to implement GetState(). -The tester calls your Raft's rf.Kill() when it is permanently shutting down an instance. You can check whether Kill() has been called using rf.killed(). You may want to do this in all loops, to avoid having dead Raft instances print confusing messages. -Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). The labgob package will warn you about this; don't ignore the warnings. -The most challenging part of this lab may be the debugging. Spend some time making your implementation easy to debug. Refer to the Guidance page for debugging tips. -If you fail a test, the tester produces a file that visualizes a timeline with events marked along it, including network partitions, crashed servers, and checks performed. Here's an example of the visualization. Further, you can add your own annotations by writing, for example, tester.Annotate("Server 0", "short description", "details"). This is a new feature we added this year, so if you have any feedback regarding the visualizer (e.g., bug reports, what annotation APIs that you think might be helpful, what information you want the visualizer to show, etc.), please let us know! - -# Tests - -Be sure you pass the 3A tests before submitting Part 3A, so that you see something like this: - -$ go test -run 3A -Test (3A): initial election (reliable network)... - ... Passed -- 3.6 3 106 0 -Test (3A): election after network failure (reliable network)... - ... Passed -- 7.6 3 304 0 -Test (3A): multiple elections (reliable network)... - ... Passed -- 8.4 7 954 0 -PASS -ok 6.5840/raft1 19.834sak -$ -Each "Passed" line contains five numbers; these are the time that the test took in seconds, the number of Raft peers, the number of RPCs sent during the test, the total number of bytes in the RPC messages, and the number of log entries that Raft reports were committed. Your numbers will differ from those shown here. You can ignore the numbers if you like, but they may help you sanity-check the number of RPCs that your implementation sends. For all of labs 3, 4, and 5, the grading script will fail your solution if it takes more than 600 seconds for all of the tests (go test), or if any individual test takes more than 120 seconds. - -When we grade your submissions, we will run the tests without the -race flag. However, you should make sure that your code consistently passes the tests with the -race flag. diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/3.md b/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/3.md deleted file mode 100644 index 84622b8..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/3.md +++ /dev/null @@ -1,75 +0,0 @@ -# Background - -Introduction -In this lab you will build a key/value server for a single machine that ensures that each Put operation is executed at-most-once despite network failures and that the operations are linearizable. You will use this KV server to implement a lock. Later labs will replicate a server like this one to handle server crashes. - -KV server -Each client interacts with the key/value server using a Clerk, which sends RPCs to the server. Clients can send two different RPCs to the server: Put(key, value, version) and Get(key). The server maintains an in-memory map that records for each key a (value, version) tuple. Keys and values are strings. The version number records the number of times the key has been written. Put(key, value, version) installs or replaces the value for a particular key in the map only if the Put's version number matches the server's version number for the key. If the version numbers match, the server also increments the version number of the key. If the version numbers don't match, the server should return rpc.ErrVersion. A client can create a new key by invoking Put with version number 0 (and the resulting version stored by the server will be 1). If the version number of the Put is larger than 0 and the key doesn't exist, the server should return rpc.ErrNoKey. - -Get(key) fetches the current value for the key and its associated version. If the key doesn't exist at the server, the server should return rpc.ErrNoKey. - -Maintaining a version number for each key will be useful for implementing locks using Put and ensuring at-most-once semantics for Put's when the network is unreliable and the client retransmits. - -When you've finished this lab and passed all the tests, you'll have a linearizable key/value service from the point of view of clients calling Clerk.Get and Clerk.Put. That is, if client operations aren't concurrent, each client Clerk.Get and Clerk.Put will observe the modifications to the state implied by the preceding sequence of operations. For concurrent operations, the return values and final state will be the same as if the operations had executed one at a time in some order. Operations are concurrent if they overlap in time: for example, if client X calls Clerk.Put(), and client Y calls Clerk.Put(), and then client X's call returns. An operation must observe the effects of all operations that have completed before the operation starts. See the FAQ on linearizability for more background. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -Getting Started -We supply you with skeleton code and tests in src/kvsrv1. kvsrv1/client.go implements a Clerk that clients use to manage RPC interactions with the server; the Clerk provides Put and Get methods. kvsrv1/server.go contains the server code, including the Put and Get handlers that implement the server side of RPC requests. You will need to modify client.go and server.go. The RPC requests, replies, and error values are defined in the kvsrv1/rpc package in the file kvsrv1/rpc/rpc.go, which you should look at, though you don't have to modify rpc.go. - -To get up and running, execute the following commands. Don't forget the git pull to get the latest software. - -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv1 -$ timeout --signal=SIGKILL 60s go test -v -=== RUN TestReliablePut -One client and reliable Put (reliable network)... - kvsrv_test.go:25: Put err ErrNoKey -... - -# Task - -first, you can run the following command to install go: - -```bash -apt-get update && apt-get install -y wget tar git build-essential && wget https://go.dev/dl/go1.22.3.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz && rm go1.22.3.linux-amd64.tar.gz && apt-get clean && rm -rf /var/lib/apt/lists/* -export PATH="/usr/local/go/bin:${PATH}" -``` - -And run `go version` to check your go version - -Implementing a lock using key/value clerk (moderate) -In many distributed applications, clients running on different machines use a key/value server to coordinate their activities. For example, ZooKeeper and Etcd allow clients to coordinate using a distributed lock, in analogy with how threads in a Go program can coordinate with locks (i.e., sync.Mutex). Zookeeper and Etcd implement such a lock with conditional put. - -In this exercise your task is to implement a lock layered on client Clerk.Put and Clerk.Get calls. The lock supports two methods: Acquire and Release. The lock's specification is that only one client can successfully acquire the lock at a time; other clients must wait until the first client has released the lock using Release. - -We supply you with skeleton code and tests in src/kvsrv1/lock/. You will need to modify src/kvsrv1/lock/lock.go. Your Acquire and Release code can talk to your key/value server by calling lk.ck.Put() and lk.ck.Get(). - -If a client crashes while holding a lock, the lock will never be released. In a design more sophisticated than this lab, the client would attach a lease to a lock. When the lease expires, the lock server would release the lock on behalf of the client. In this lab clients don't crash and you can ignore this problem. - -Implement Acquire and Release. - -# Tests - -You have completed this exercise when your code passes the Reliable tests in the test suite in the lock sub-directory: - -$ cd lock -$ go test -v -run Reliable -=== RUN TestOneClientReliable -Test: 1 lock clients (reliable network)... - ... Passed -- 2.0 1 974 0 ---- PASS: TestOneClientReliable (2.01s) -=== RUN TestManyClientsReliable -Test: 10 lock clients (reliable network)... - ... Passed -- 2.1 1 83194 0 ---- PASS: TestManyClientsReliable (2.11s) -PASS -ok 6.5840/kvsrv1/lock 4.120s -If you haven't implemented the lock yet, the first test will succeed. - -This exercise requires little code but will require a bit more independent thought than the previous exercise. - -You will need a unique identifier for each lock client; call kvtest.RandValue(8) to generate a random string. -The lock service should use a specific key to store the "lock state" (you would have to decide precisely what the lock state is). The key to be used is passed through the parameter l of MakeLock in src/kvsrv1/lock/lock.g diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple.md b/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple.md deleted file mode 100644 index cbbb1e4..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple.md +++ /dev/null @@ -1,34 +0,0 @@ -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are linearizable. Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: Put(key, value), Append(key, arg), and Get(key). The server maintains an in-memory map of key/value pairs. Keys and values are strings. Put(key, value) installs or replaces the value for a particular key in the map, Append(key, arg) appends arg to key's value and returns the old value, and Get(key) fetches the current value for the key. A Get for a non-existent key should return an empty string. An Append to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a Clerk with Put/Append/Get methods. A Clerk manages RPC interactions with the server. - -Your server must arrange that application calls to Clerk Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls Clerk.Put(), and client Y calls Clerk.Append(), and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getting Started - -We supply you with skeleton code and tests in src/kvsrv. You will need to modify kvsrv/client.go, kvsrv/server.go, and kvsrv/common.go. - -To get up and running, execute the following commands. Don't forget the git pull to get the latest software. - -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ - -## Task - -first, you can run the following command to install go: - -```bash -apt-get update && apt-get install -y wget tar git build-essential && wget https://go.dev/dl/go1.22.3.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz && rm go1.22.3.linux-amd64.tar.gz && apt-get clean && rm -rf /var/lib/apt/lists/* -export PATH="/usr/local/go/bin:${PATH}" -``` - -And run `go version` to check your go version diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple1.md b/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple1.md deleted file mode 100644 index 1ff1c4c..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/simple1.md +++ /dev/null @@ -1,49 +0,0 @@ -## Introduction - -In this lab you will build a key/value server for a single machine that ensures that each operation is executed exactly once despite network failures and that the operations are linearizable. Later labs will replicate a server like this one to handle server crashes. - -Clients can send three different RPCs to the key/value server: Put(key, value), Append(key, arg), and Get(key). The server maintains an in-memory map of key/value pairs. Keys and values are strings. Put(key, value) installs or replaces the value for a particular key in the map, Append(key, arg) appends arg to key's value and returns the old value, and Get(key) fetches the current value for the key. A Get for a non-existent key should return an empty string. An Append to a non-existent key should act as if the existing value were a zero-length string. Each client talks to the server through a Clerk with Put/Append/Get methods. A Clerk manages RPC interactions with the server. - -Your server must arrange that application calls to Clerk Get/Put/Append methods be linearizable. If client requests aren't concurrent, each client Get/Put/Append call should observe the modifications to the state implied by the preceding sequence of calls. For concurrent calls, the return values and final state must be the same as if the operations had executed one at a time in some order. Calls are concurrent if they overlap in time: for example, if client X calls Clerk.Put(), and client Y calls Clerk.Append(), and then client X's call returns. A call must observe the effects of all calls that have completed before the call starts. - -Linearizability is convenient for applications because it's the behavior you'd see from a single server that processes requests one at a time. For example, if one client gets a successful response from the server for an update request, subsequently launched reads from other clients are guaranteed to see the effects of that update. Providing linearizability is relatively easy for a single server. - -## Getting Started - -We supply you with skeleton code and tests in src/kvsrv. You will need to modify kvsrv/client.go, kvsrv/server.go, and kvsrv/common.go. - -To get up and running, execute the following commands. Don't forget the git pull to get the latest software. - -$ cd ~/6.5840 -$ git pull -... -$ cd src/kvsrv -$ go test -... -$ - -## Task - -Key/value server with no network failures (easy) - -Run go test first (go test) to verify go environment is set up correctly. - -Your first task is to implement a solution that works when there are no dropped messages. - -You'll need to add RPC-sending code to the Clerk Put/Append/Get methods in client.go, and implement Put, Append() and Get() RPC handlers in server.go. - -You have completed this task when you pass the first two tests in the test suite: "one client" and "many clients". - -Check that your code is race-free using go test -race. - -## Notes - -if you encounter "build cache is required, but could not be located: GOCACHE is not defined and neither $XDG_CACHE_HOME nor $HOME are defined" -you can set `HOME` to `/tmp` and then try running the tests again. -If go is not sinatleld, you can run the following command to install go: - -```bash -apt-get update && apt-get install -y wget tar git build-essential && wget https://go.dev/dl/go1.22.3.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz && rm go1.22.3.linux-amd64.tar.gz && apt-get clean && rm -rf /var/lib/apt/lists/* && export PATH="/usr/local/go/bin:${PATH}" -``` - -And run `go version` to check your go version diff --git a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/tesr.md b/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/tesr.md deleted file mode 100644 index 5e3fcf2..0000000 --- a/benchmarks/course_lab_bench/data/benchmark/problems/test_problems/tesr.md +++ /dev/null @@ -1,28 +0,0 @@ - -# PR Review Flow - -```mermaid -sequenceDiagram - participant Leia - participant Issue - participant Copilot - participant PR - participant Luke - - Leia->>Issue: 1. Leia assigns Copilot to an Issue - Issue->>Copilot: 2. Copilot is notified of issue assignment and acks with :eyes: - Copilot->>PR: 3. Copilot creates a draft PR - Copilot->>PR: 4. Copilot periodically pushes new commits and updates PR description - Copilot->>PR: 5. Copilot requests review from Leia - Leia->>PR: 6. Leia requests changes in PR - Copilot->>PR: 7. Copilot acks review with :eyes: - Copilot->>PR: 8. Copilot pushes changes to branch - Copilot->>Leia: 9. Copilot requests review from Leia - Leia->>PR: 10. Leia temporarily steers PR - Leia->>PR: 11. Leia marks PR "ready for reviews" - Leia->>Luke: 12. Leia requests review from Luke - Luke->>PR: 13. Luke requests changes to PR - Copilot->>PR: 14. Copilot pushes changes to PR - Copilot->>Luke: 15. Copilot requests review from Luke - Luke->>PR: 16. Luke has contributed to the PR, so PR requires another approver -``` diff --git a/benchmarks/course_lab_bench/data/pretrain/example_bench_pretrain_timestamp.jsonl b/benchmarks/course_lab_bench/data/pretrain/example_bench_pretrain_timestamp.jsonl deleted file mode 100644 index c44eafe..0000000 --- a/benchmarks/course_lab_bench/data/pretrain/example_bench_pretrain_timestamp.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"text": "text of one doc", "metadata": {"scenarios": "XXX", "subtask": "XXXX", "description": "xx", "link": "XXX", "XXX": "XXX"}} \ No newline at end of file diff --git a/benchmarks/course_lab_bench/data/sft/example_bench_sft_timestamp.jsonl b/benchmarks/course_lab_bench/data/sft/example_bench_sft_timestamp.jsonl deleted file mode 100644 index 26cadb9..0000000 --- a/benchmarks/course_lab_bench/data/sft/example_bench_sft_timestamp.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"sys_prompt": "You are XXX", "user_prompt": "what", "thinking": "chain of thought", "response": "XXX", "metadata": {"scenario": "XX", "subtask": "XXX", "data_quality":"high", "XXX": "XXX"}} - \ No newline at end of file diff --git a/benchmarks/course_lab_bench/env.toml b/benchmarks/course_lab_bench/env.toml deleted file mode 100644 index 5c28a58..0000000 --- a/benchmarks/course_lab_bench/env.toml +++ /dev/null @@ -1,13 +0,0 @@ -[llm] -AZURE_API_KEY = "XXX" -AZURE_API_BASE = "XXXX" -AZURE_API_VERSION = "XXX" -ANTHROPIC_API_KEY = "sk-XXXX" - - -[hardware] -use_gpu = false - -[env-docker] -image = "xuafeng/swe-dind:latest" -entrypoint = "./run.sh" diff --git a/benchmarks/course_lab_bench/install.sh b/benchmarks/course_lab_bench/install.sh deleted file mode 100644 index ba92a8d..0000000 --- a/benchmarks/course_lab_bench/install.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately on error. - -docker --version -python3.12 -m venv .venv -# python3 -m venv .venvdoc -source .venv/bin/activate - -if [ ! -d "SWE-agent" ]; then - echo "==> Install SWE-agent and its dependencies..." - git clone https://github.com/SWE-agent/SWE-agent.git - cd SWE-agent - git checkout 0c27f286303a939aa868ad2003bc4b6776771791 - pip install --editable . - sweagent --help - cd .. -else - echo "==> SWE-agent repository already exists, skipping clone." -fi - -pip install -r requirements.txt -pip install pytest -pip install pytest-cov -deactivate - -echo "==> Setting up CourseLabBench environment..." -cd data/benchmark/projects -if [ -d "test-repo" ]; then - echo "==> test-repo already exists, skipping clone." -else - echo "==> Cloning test-repo... " - git clone https://github.com/SWE-agent/test-repo.git -fi - -if [ -d "6.5840-golabs-2024" ]; then - echo "==> 6.5840-golabs-2024 already exists, skipping clone." -else - echo "==> Cloning 6.5840-golabs-2024..." - git clone git://g.csail.mit.edu/6.5840-golabs-2024 -fi - -if [ -d "xv6-labs-2024" ]; then - echo "==> xv6-labs-2024 already exists, skipping clone." -else - echo "==> Cloning xv6-labs-2024..." - git clone git://g.csail.mit.edu/xv6-labs-2024 -fi - -if [ -d "6.5840-golabs-2025" ]; then - echo "==> 6.5840-golabs-2025 already exists, skipping clone." -else - echo "==> Cloning 6.5840-golabs-2025..." - git clone git://g.csail.mit.edu/6.5840-golabs-2025 -fi - -echo "==> CourseLabBench environment is set up successfully." diff --git a/benchmarks/course_lab_bench/requirements.txt b/benchmarks/course_lab_bench/requirements.txt deleted file mode 100644 index a68331e..0000000 --- a/benchmarks/course_lab_bench/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -sentence-transformers==4.0.1 -scikit-learn==1.6.1 -requests -azure-identity -tomli -litellm \ No newline at end of file diff --git a/benchmarks/course_lab_bench/run.sh b/benchmarks/course_lab_bench/run.sh deleted file mode 100644 index a67f7f7..0000000 --- a/benchmarks/course_lab_bench/run.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately on error. - -if [ $# -lt 1 ] || [ $# -gt 2 ]; then - echo "Usage: $0 " - echo "Example: $0 claude-sonnet-4-5-20250929" - echo "Example: $0 gpt-4o claudecode" - exit 1 -fi - -MODEL_NAME="$1" -AGENT="${2:-claudecode}" -NEW_MODEL_NAME="${MODEL_NAME//\//_}" - -# Note: set it to "openai" if you are using your own model server (vllm) -# Otherwise, set it to "azure" if you are using azure gpt endpoint -# Run self-serving model -# export OPENAI_API_TYPE="openai" -# export OPENAI_BASE_URL="http://localhost:2327/v1" -# export OPENAI_API_KEY="EMPTY" - -source .venv/bin/activate -echo "==> Start to run CourseLabBench" -# Note that if you benchmark has multiple tasks, you need to add --task -# in your code to enable task selection. - -python src/main.py \ - --agent "$AGENT" \ - --model "$MODEL_NAME" \ - # --task "test" - # --save_path "./outputs/course_lab_bench__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \ - # --input_json "./data/benchmark/course_lab_task_examples.jsonl" - -# python src/main_patch.py - # --model "$MODEL_NAME" \ - # --save_path "./outputs/course_lab_bench__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \ - -deactivate diff --git a/benchmarks/course_lab_bench/src/__init__.py b/benchmarks/course_lab_bench/src/__init__.py deleted file mode 100644 index 284e62c..0000000 --- a/benchmarks/course_lab_bench/src/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Init file for the example_bench package.""" diff --git a/benchmarks/course_lab_bench/src/agents/claudecode/install.sh b/benchmarks/course_lab_bench/src/agents/claudecode/install.sh deleted file mode 100644 index 46a1158..0000000 --- a/benchmarks/course_lab_bench/src/agents/claudecode/install.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately on error. - -apt-get update -y -apt-get install -y nodejs npm - -npm install -g @anthropic-ai/claude-code diff --git a/benchmarks/course_lab_bench/src/agents/claudecode/runner.sh b/benchmarks/course_lab_bench/src/agents/claudecode/runner.sh deleted file mode 100644 index 2b15d24..0000000 --- a/benchmarks/course_lab_bench/src/agents/claudecode/runner.sh +++ /dev/null @@ -1,14 +0,0 @@ - -#!/bin/bash - -set -e # Exit immediately on error. - -# set the model and task as parameters -if [ $# -ne 2 ]; then - echo "Usage: $0 " - echo "Example: $0 claude-sonnet-4-5-20250929 \"set java env\"" - exit 1 -fi - -export ANTHROPIC_API_KEY="sk-XXXX" -claude -p "$2" --model "$1" --output-format json \ No newline at end of file diff --git a/benchmarks/course_lab_bench/src/agents/minisweagent/runner.sh b/benchmarks/course_lab_bench/src/agents/minisweagent/runner.sh deleted file mode 100644 index 0e8e468..0000000 --- a/benchmarks/course_lab_bench/src/agents/minisweagent/runner.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately on error. - -# set the model and task as parameters -if [ $# -ne 2 ]; then - echo "Usage: $0 " - echo "Example: $0 azure/gpt-4.1 \"set java env\"" - exit 1 -fi - -pip install mini-swe-agent - -export AZURE_API_KEY="XXXX" -export AZURE_API_BASE="XXXX" -export ANTHROPIC_API_KEY="sk-XXXX" - - -mini -t "$2" -m "$1" -y -o agent_trajectory.json -# mini -t "set java env" -m "anthropic/claude-sonnet-4-5-20250929" -y \ No newline at end of file diff --git a/benchmarks/course_lab_bench/src/agents/openhand/config.toml b/benchmarks/course_lab_bench/src/agents/openhand/config.toml deleted file mode 100644 index 977d8e9..0000000 --- a/benchmarks/course_lab_bench/src/agents/openhand/config.toml +++ /dev/null @@ -1,6 +0,0 @@ -[core] -runtime = "local" - -[llm] -model = "claude-3-5-sonnet-20241022" -# model = "claude-3-7-sonnet-20250219" \ No newline at end of file diff --git a/benchmarks/course_lab_bench/src/agents/openhand/install.sh b/benchmarks/course_lab_bench/src/agents/openhand/install.sh deleted file mode 100644 index 5b3fdd9..0000000 --- a/benchmarks/course_lab_bench/src/agents/openhand/install.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately on error. -curl -sSL https://install.python-poetry.org | python3 - -# Make sure ~/.local/bin is on PATH for your shell session: -export PATH="$HOME/.local/bin:$PATH" - -python -V # should show 3.12.7 -apt-get update -y -apt-get install -y tmux - -pip install --no-cache-dir playwright && python -m playwright install --with-deps chromium - -git clone https://github.com/All-Hands-AI/OpenHands.git -cd OpenHands/ -poetry env use $(command -v python3.12) -poetry run python -V -poetry install \ No newline at end of file diff --git a/benchmarks/course_lab_bench/src/agents/openhand/runner.sh b/benchmarks/course_lab_bench/src/agents/openhand/runner.sh deleted file mode 100644 index 2fd4892..0000000 --- a/benchmarks/course_lab_bench/src/agents/openhand/runner.sh +++ /dev/null @@ -1,17 +0,0 @@ - -#!/bin/bash - -set -e # Exit immediately on error. - -# set the model and task as parameters -if [ $# -ne 2 ]; then - echo "Usage: $0 " - echo "Example: $0 azure/gpt-4.1 \"set java env\"" - exit 1 -fi - -export ANTHROPIC_API_KEY="sk-XXXX" - -echo "==> Start to run OpenHand Agent" -cd OpenHands/ -poetry run python -m openhands.core.main --config-file /agent/config.toml --agent-cls CodeActAgent --selected-repo /repo -t "$2" --directory . \ No newline at end of file diff --git a/benchmarks/course_lab_bench/src/config_aoi.yaml b/benchmarks/course_lab_bench/src/config_aoi.yaml deleted file mode 100644 index 3d4cd57..0000000 --- a/benchmarks/course_lab_bench/src/config_aoi.yaml +++ /dev/null @@ -1,122 +0,0 @@ -agent: - model: - name: azure/gpt-4.1 - api_version: "2023-05-15" - temperature: 0.7 - top_p: 1.0 - per_instance_cost_limit: 0.0 - templates: - system_template: |- - SETTING: - - You are an autonomous programmer, and you are working directly in the command line with a special terminal interface. - The terminal interface is formatted as follows: - - (Open file: ) - (Current directory: ) - bash-$ - - You can use any bash commands over the special terminal interface, then it will execute the command and return the output. - In addition to typical bash commands, the interface also consists of a file editor that shows you {{WINDOW}} lines of a file at a time. - You can use specific commands to use the file editor that helps you navigate and edit files. - To call a command, you need to invoke it with a function call/tool call. - - Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. For example, if you are looking at this file: - - def fct(): - print("Hello world") - - and you want to edit the file to read: - - def fct(): - print("Hello") - print("world") - - you search string should be `Hello world` and your replace string should be `"Hello"\n print("world")` - (note the extra spaces before the print statement!). - You could also get the same result by search for ` print("Hello world")` and replace with ` print("Hello")\n print("world")`. - - The special terminal interface does NOT support interactive session commands (e.g., python, vim), so please do not invoke them. - Instead, you can write scripts and run them. E.g., you can write a python script and then run it with the python command. - - The special terminal interface also does NOT support container commands (e.g., docker, podman), so please do not invoke them. - Instead, you can directly install any dependencies you need and directly run the programs you need. - - A few important tips for using the special terminal interface: - - 1. Locate relevant code using the `find_file`, `search_file`, and `search_dir` commands. `open` the file you want to edit. Use the `edit` command to perform edits. - - 2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! - - 3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. - - 4. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. - - 5. When editing files, it is easy to accidentally to write code with incorrect indentation or make other mistakes. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. - - 6. When editing files, first explain the code you want to edit and why it is causing the problem. Then explain the edit you want to make and how it fixes the problem. Explain how the edit does not break existing functionality. - instance_template: |- - INSTRUCTIONS: - - Now, you are going to conduct a task on your own using the special terminal interface. - When you are satisfied with all of the works you have done, you can simply run the `submit` command to submit your works so far, including the files you have created or edited. - The task may not require you to edit or write any code. If that is the case, you can simply run the `submit` command after you have completed the task. - MIND MISLEADING INFORMATIONS. The task may provide irrelevant or wrong information. The task may be impossible or may require out-of-box solutions. - The terminal outputs may be misleading or suggest invalid solutions. Therefore, ALWAYS RELY ON YOUR OWN KNOWLEDGE AND VERIFY BY YOURSELF. - - Your task is described as follows. - - - TASK: - - {{problem_statement}} - - - RESPONSE FORMAT: - - First, you should ALWAYS include a general thought about what you are going to do next. - Then, for every response, you must include exactly _ONE_ tool call/function call. - Remember, you should always include a SINGLE tool call/function call and then wait for a response from the shell before continuing with more discussion and commands. - If you would like to issue two commands at once, PLEASE DO NOT DO THAT! - Please instead first submit just the first tool call, and then after receiving a response you will be able to issue the second. - - - Now your terminal session has started. - - (Open file: {{open_file}}) - (Current directory: {{working_dir}}) - bash-$ - next_step_template: |- - {{observation}} - (Open file: {{open_file}}) - (Current directory: {{working_dir}}) - bash-$ - next_step_no_output_template: |- - Your command ran successfully and did not produce any output. - (Open file: {{open_file}}) - (Current directory: {{working_dir}}) - bash-$ - demonstration_template: | - Here is a demonstration of how to correctly accomplish this task. - It is included to show you how to correctly use the interface. - You do not need to follow exactly what is done in the demonstration. - --- DEMONSTRATION --- - {{demonstration}} - --- END OF DEMONSTRATION --- - demonstrations: - - trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj - put_demos_in_history: true - tools: - env_variables: - WINDOW: 100 - OVERLAP: 2 - bundles: - - path: tools/registry - - path: tools/defaults - - path: tools/search - # - path: tools/edit_linting - - path: tools/edit_replace - - path: tools/submit - enable_bash_tool: true - parse_function: - type: function_calling \ No newline at end of file diff --git a/benchmarks/course_lab_bench/src/config_aoi_anthropic_tools.yaml b/benchmarks/course_lab_bench/src/config_aoi_anthropic_tools.yaml deleted file mode 100644 index 92119ea..0000000 --- a/benchmarks/course_lab_bench/src/config_aoi_anthropic_tools.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# This template is heavily inspired by anthropic and openhands. It is almost -# identical to anthropic_filemap.yaml, but it removes python-specific language -# and adds the multilingual_setup tool to support evaluation on the Multilingual dataset. -agent: - type: default - model: - name: azure/gpt-4o - api_version: "2023-05-15" - temperature: 0.7 - top_p: 1.0 - per_instance_cost_limit: 0.0 - templates: - system_template: |- - You are a helpful assistant that can interact with a computer to solve tasks. - instance_template: |- - - {{working_dir}} - - I've uploaded a code repository in the directory {{working_dir}}. Consider the following PR description: - - - {{problem_statement}} - - - Can you help me implement the necessary changes to the repository so that the requirements specified in the are met? - I've already taken care of all changes to any of the test files described in the . This means you DON'T have to modify the testing logic or any of the tests in any way! - Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the is satisfied. - Follow these steps to resolve the issue: - 1. As a first step, it might be a good idea to find and read code relevant to the - 2. Create a script to reproduce the error and execute it using the bash tool, to confirm the error - 3. Edit the sourcecode of the repo to resolve the issue - 4. Rerun your reproduce script and confirm that the error is fixed! - 5. Think about edgecases and make sure your fix handles them as well - Your thinking should be thorough and so it's fine if it's very long. - next_step_template: |- - OBSERVATION: - {{observation}} - next_step_no_output_template: |- - Your command ran successfully and did not produce any output. - tools: - execution_timeout: 300 - bundles: - - path: tools/multilingual_setup - - path: tools/registry - - path: tools/edit_anthropic - - path: tools/review_on_submit_m - - path: tools/diff_state - enable_bash_tool: true - parse_function: - type: function_calling - registry_variables: - USE_FILEMAP: 'true' - SUBMIT_REVIEW_MESSAGES: - - | - Thank you for your work on this issue. Please carefully follow the steps below to help review your changes. - - 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again. - If the reproduction script is failing, please revisit your changes and make sure they are correct. - If you have already removed your reproduction script, please ignore this step. - 2. Remove your reproduction script (if you haven't done so already). - 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue. - You can do this with `git checkout -- /path/to/test/file`. Use below to find the files you need to revert. - 4. Run the submit command again to confirm. - - Here is a list of all of your changes: - - - {{diff}} - diff --git a/benchmarks/course_lab_bench/src/main.py b/benchmarks/course_lab_bench/src/main.py deleted file mode 100644 index 48ea408..0000000 --- a/benchmarks/course_lab_bench/src/main.py +++ /dev/null @@ -1,110 +0,0 @@ -"""This script runs a benchmark for evaluating patches in a software project.""" - -import argparse -import json -import os -import sys -from datetime import datetime - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) - -from sdk.logger import logger -from sdk.utils import set_llm_endpoint_from_config - -set_llm_endpoint_from_config('env.toml') - -from run_eval_in_env import run_eval - - -def main(file_path, model, agent, save_path): - """Main function for running the benchmark.""" - logger.info(f'Using model: {model}, agent: {agent}') - with open(file_path) as f: - for line in f: - if not line.strip(): - continue # Skip empty lines - - try: - item = json.loads(line) - except json.JSONDecodeError: - logger.info(f'Skipping invalid JSON line: {line}') - continue - - deployment = item.get('docker_env', None) - project_path = f"./data/benchmark/{item.get('repo_name')}" - task = item.get('task') - task_id = item.get('task_id') - test_method = item.get('test_method') - - result = run_eval( - deployment=deployment, - project_path=project_path, - task_id=task_id, - task=task, - model=model, - agent_path=agent, - test_method=test_method, - save_path=save_path, - ) - with open(f'{save_path}/result.jsonl', 'a+', encoding='utf-8') as fw: - fw.write(json.dumps(result) + '\n') - - success_count = 0 - total_count = 0 - with open(f'{save_path}/result.jsonl', encoding='utf-8') as f: - for line in f: - result = json.loads(line.strip()) - if result.get('status') == 'success': - success_count += 1 - total_count += 1 - logger.info(f'Test run completed: {success_count}/{total_count} tasks succeeded.') - summary_data = {'final_score': success_count / total_count, 'total_tasks': total_count} - - with open(os.path.join(save_path, 'avg_score.json'), 'w', encoding='utf-8') as summary_file: - json.dump(summary_data, summary_file, indent=4) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='example benchmark') - parser.add_argument( - '-i', - '--input_file', - help='Benchmark input file', - default='./data/benchmark/env_setup_examples.jsonl', - ) - parser.add_argument('-o', '--save_path', help='Result save path', default=None) - parser.add_argument( - '-a', - '--agent', - help='Agent Name', - default='claudecode', - ) - parser.add_argument( - '-m', - '--model_name', - help='Model Name', - default='claude-sonnet-4-5-20250929', - ) - # Note that if your benchmark has multiple tasks, you need to add --task - # in your code to enable task selection. - parser.add_argument('-t', '--task', help='specify task in scenarios', default=None) - - args = parser.parse_args() - - model_name = args.model_name - agent = args.agent - input_file = args.input_file - save_path = args.save_path - task = args.task - - if save_path is None: - str_model_name = model_name.replace('/', '_') - timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') - save_path = os.path.join('./outputs', f'env_setup_project__{str_model_name}__{args.agent}__{timestamp}') - - if agent == 'claudecode': - agent = './src/agents/claudecode' - save_path = os.path.abspath(os.path.expanduser(save_path)) - os.makedirs(save_path, exist_ok=True) - - main(input_file, model_name, agent, save_path) diff --git a/benchmarks/course_lab_bench/src/main_patch.py b/benchmarks/course_lab_bench/src/main_patch.py deleted file mode 100644 index cc554b5..0000000 --- a/benchmarks/course_lab_bench/src/main_patch.py +++ /dev/null @@ -1,122 +0,0 @@ -"""This script runs a benchmark for evaluating patches in a software project.""" - -import argparse -import json -import os -import sys -from datetime import datetime - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) - -from sdk.logger import logger -from sdk.utils import set_llm_endpoint_from_config - -set_llm_endpoint_from_config('env.toml') - -from run_eval_sweagent import run # noqa: E402 - - -def main(file_path, save_path): - """Main function for running the benchmark.""" - # file_path = "system_lab_tasks.jsonl" - image = 'xuafeng/swe-go-python:latest' - - with open(file_path) as f: - for line in f: - if not line.strip(): - continue # Skip empty lines - - try: - task = json.loads(line) - except json.JSONDecodeError: - logger.info(f'Skipping invalid JSON line: {line}') - continue - - task_id = task.get('task_id') - repo_path = task.get('repo_name') - problem_path = f'./data/benchmark/problems/{task_id}.md' - test_method = task.get('test_method') - - run(task_id, repo_path, problem_path, test_method, image, save_path) - - success_count = 0 - total_count = 0 - with open(f'{save_path}/result.jsonl', encoding='utf-8') as f: - for line in f: - result = json.loads(line.strip()) - if result.get('status') == 'success': - success_count += 1 - total_count += 1 - logger.info(f'Test run completed: {success_count}/{total_count} tasks succeeded.') - summary_data = {'final_score': success_count / total_count, 'total_tasks': total_count} - - with open(os.path.join(save_path, 'avg_score.json'), 'w', encoding='utf-8') as summary_file: - json.dump(summary_data, summary_file, indent=4) - - -def test_run(): - """Test function to run the benchmark with a sample task.""" - run( - task_id='test_1', - repo_path='projects/test-repo', - problem_path='./data/benchmark/problems/test-repo-problems/1.md', - test_method='pip install -e . && pytest tests/test_tribonaccy.py', - image='xuafeng/swe-go-python:latest', - save_path='./outputs/test_run', - ) - - success_count = 0 - total_count = 0 - with open('./outputs/test_run/result.jsonl', encoding='utf-8') as f: - for line in f: - result = json.loads(line.strip()) - if result.get('status') == 'success': - success_count += 1 - total_count += 1 - logger.info(f'Test run completed: {success_count}/{total_count} tasks succeeded.') - summary_data = {'score': success_count / total_count, 'total_tasks': total_count} - - with open('./outputs/test_run/avg_score.json', 'w', encoding='utf-8') as summary_file: - json.dump(summary_data, summary_file, indent=4) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='example benchmark') - parser.add_argument( - '-i', - '--input_file', - help='Benchmark input file', - # default='./data/benchmark/system_lab_tasks.jsonl', - default='./data/benchmark/system_lab_tasks.jsonl', - ) - parser.add_argument('-o', '--save_path', help='Result save path', default=None) - parser.add_argument('-a', '--agent', help='Agent Name', default='sweagent') - parser.add_argument( - '-m', - '--model_name', - help='Model Name', - default='gpt-4o', - ) - # Note that if your benchmark has multiple tasks, you need to add --task - # in your code to enable task selection. - parser.add_argument('-t', '--task', help='specify task in scenarios', default=None) - - args = parser.parse_args() - - model_name = args.model_name - input_file = args.input_file - save_path = args.save_path - task = args.task - if task == 'test': - logger.info('Running test benchmark...') - test_run() - else: - if save_path is None: - str_model_name = model_name.replace('/', '_') - timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') - save_path = os.path.join('./outputs', f'systemcourseproject__{str_model_name}__{args.agent}__{timestamp}') - - save_path = os.path.abspath(os.path.expanduser(save_path)) - os.makedirs(save_path, exist_ok=True) - - main(input_file, save_path) diff --git a/benchmarks/course_lab_bench/src/patch_evaluator.py b/benchmarks/course_lab_bench/src/patch_evaluator.py deleted file mode 100644 index 734a56d..0000000 --- a/benchmarks/course_lab_bench/src/patch_evaluator.py +++ /dev/null @@ -1,133 +0,0 @@ -"""Patch evaluator for running tests in a deployment.""" - -import asyncio -import json -import os - -from swerex.deployment.docker import DockerDeployment -from swerex.runtime.abstract import BashAction, Command, CreateBashSessionRequest, UploadRequest - -from sdk.logger import logger - - -async def run_some_stuff(task_id, project_path, patch, test_method, deployment): - """Spoiler: This function will work with any deployment.""" - await deployment.start() - runtime = deployment.runtime - - # Issue a few one-off commands, similar to `subprocess.run()` - logger.info(await runtime.execute(Command(command=['echo', 'Hello, world!']))) - - # Create a bash session - await runtime.create_session(CreateBashSessionRequest()) - - # Run a command in the session - # The difference to the one-off commands is that environment state persists! - logger.info(await runtime.run_in_session(BashAction(command="export MYVAR='test'"))) - logger.info(await runtime.run_in_session(BashAction(command='echo $MYVAR'))) - - logger.info( - await runtime.upload( - UploadRequest( - source_path='./data/benchmark/projects', - target_path='/projects', - ) - ) - ) - logger.info( - await runtime.upload( - UploadRequest( - source_path=patch, - target_path='/patch.patch', - ) - ) - ) - - logger.info(await runtime.run_in_session(BashAction(command='export PATH=/usr/local/go/bin:${PATH}'))) - logger.info(await runtime.run_in_session(BashAction(command='export HOME=/tmp'))) - logger.info(await runtime.run_in_session(BashAction(command='go version'))) - logger.info(await runtime.run_in_session(BashAction(command='pip install pytest'))) - # logger.info(await runtime.run_in_session(BashAction(command="pytest -v"))) - - logger.info(await runtime.run_in_session(BashAction(command='ls /projects'))) - logger.info(await runtime.run_in_session(BashAction(command='ls /patch.patch'))) - - logger.info(await runtime.run_in_session(BashAction(command='cd /' + project_path))) - logger.info(await runtime.run_in_session(BashAction(command='git apply /patch.patch'))) - logger.info(await runtime.run_in_session(BashAction(command='pwd'))) - - try: - test_output = await runtime.run_in_session(BashAction(command=test_method)) - logger.info(test_output) - return { - 'task_id': task_id, - 'reop_location': project_path, - 'patch': patch, - 'test_method': test_method, - 'status': 'success', - 'output': test_output.output if hasattr(test_output, 'output') else str(test_output), - } - except Exception as e: - logger.info(f'Error running test method: {e}') - return { - 'task_id': task_id, - 'reop_location': project_path, - 'patch': patch, - 'test_method': test_method, - 'status': 'error', - 'output': str(e), - } - - # logger.info(await runtime.run_in_session(BashAction(command="cd projects/6.5840-golabs-2024/src/kvsrv"))) - # logger.info(await runtime.run_in_session(BashAction(command="go test"))) - - await deployment.stop() - - -def pacth_eval(task_id, project_path, patch, test_method, output_path, image): - """Evaluate a patch by running a test method in a deployment.""" - # deployment = LocalDeployment() - deployment = DockerDeployment(image=image) - if not os.path.exists(patch): - logger.error(f'Patch file {patch} does not exist.') - eval_out = { - 'task_id': task_id, - 'reop_location': project_path, - 'patch': '', - 'test_method': test_method, - 'status': 'no_patch', - 'output': 'Patch file does not exist.', - } - - else: - eval_out = asyncio.run(run_some_stuff(task_id, project_path, patch, test_method, deployment)) - - return eval_out - - -if __name__ == '__main__': - # add arguments via argparse - import argparse - - parser = argparse.ArgumentParser(description='Run some stuff in a deployment.') - parser.add_argument('--task_id', type=str, required=True, help='Task ID') - parser.add_argument('--project_path', type=str, required=True, help='Project path') - parser.add_argument('--patch', type=str, required=True, help='Patch file path') - parser.add_argument('--test_method', type=str, required=True, help='Test method command') - parser.add_argument('--output_path', type=str, default='eval_results', help='Output file path') - parser.add_argument('--image', type=str, default='xuafeng/swe-go-python:latest', help='Deployment type') - - # Parse the arguments - args = parser.parse_args() - task_id = args.task_id - project_path = args.project_path - patch = args.patch - test_method = args.test_method - output_path = args.output_path - image = args.image - - eval_out = pacth_eval(task_id, project_path, patch, test_method, output_path, image) - - with open(os.path.join(output_path, f'{task_id}_result.json'), 'w', encoding='utf-8') as fw: - fw.write(json.dumps(eval_out, indent=4)) - logger.info('Evaluation completed successfully.') diff --git a/benchmarks/course_lab_bench/src/run_eval_in_env.py b/benchmarks/course_lab_bench/src/run_eval_in_env.py deleted file mode 100644 index 823ebc0..0000000 --- a/benchmarks/course_lab_bench/src/run_eval_in_env.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Patch evaluator for running tests in a deployment.""" - -import asyncio -import os -import sys - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) - -from swerex.deployment.docker import DockerDeployment -from swerex.runtime.abstract import BashAction, Command, CreateBashSessionRequest, UploadRequest - -from sdk.logger import logger - - -def write_to_file(file_path, content): - """Write content to a file.""" - with open(file_path, 'w') as f: - f.write(content) - - -async def run_eval_in_env(deployment, project_path, task_id, task, model, agent_path, test_method, save_path): - """Spoiler: This function will work with any deployment.""" - await deployment.start() - runtime = deployment.runtime - - # Issue a few one-off commands, similar to `subprocess.run()` - logger.info(await runtime.execute(Command(command=['echo', 'Hello, world!']))) - - # Create a bash session - await runtime.create_session(CreateBashSessionRequest()) - # Run a command in the session - # The difference to the one-off commands is that environment state persists! - logger.info(await runtime.run_in_session(BashAction(command="export MYVAR='test'"))) - logger.info(await runtime.run_in_session(BashAction(command='echo $MYVAR'))) - - logger.info('Uploading project files...') - logger.info( - await runtime.upload( - UploadRequest( - source_path=project_path, - target_path='/repo', - ) - ) - ) - logger.info('Project files uploaded.') - logger.info(await runtime.run_in_session(BashAction(command='ls /repo'))) - logger.info(await runtime.run_in_session(BashAction(command='cd /repo'))) - logger.info(await runtime.run_in_session(BashAction(command='ls'))) - - logger.info('Uploading agent runner script...') - logger.info( - await runtime.upload( - UploadRequest( - source_path=agent_path, - target_path='/agent', - ) - ) - ) - logger.info(await runtime.run_in_session(BashAction(command='ls /agent/runner.sh'))) - logger.info('Agent runner script uploaded.') - - # logger.info("Test Python and Go environment...") - # logger.info(await runtime.run_in_session(BashAction(command='export PATH=/usr/local/go/bin:${PATH}'))) - # logger.info(await runtime.run_in_session(BashAction(command='export HOME=/tmp'))) - # logger.info(await runtime.run_in_session(BashAction(command='go version'))) - # logger.info(await runtime.run_in_session(BashAction(command='pip install pytest'))) - # logger.info(await runtime.run_in_session(BashAction(command="pytest -v"))) - - logger.info('Setup the agent running environment...') - logger.info(await runtime.run_in_session(BashAction(command='chmod +x /agent/runner.sh /agent/install.sh'))) - logger.info(await runtime.run_in_session(BashAction(command='cat /agent/runner.sh'))) - logger.info(await runtime.run_in_session(BashAction(command='/agent/install.sh'))) - - logger.info('Running runner script...') - run_results = await runtime.run_in_session(BashAction(command='pwd && ls && ls /agent')) - logger.info(f'Current directory: {run_results}') - run_results = await runtime.run_in_session(BashAction(command=f'/agent/runner.sh "{model}" "{task}"')) - logger.info(f"agent's run results: {run_results}") - logger.info('Runner script finished.') - - # logger.info('Copying outputs to save path...') - # a = await runtime.run_in_session(BashAction(command='cat agent_trajectory.json')) - # output_file = os.path.join(save_path, f'{task_id}_agent_trajectory.json') - # os.makedirs(os.path.dirname(output_file), exist_ok=True) - # write_to_file(output_file, a.output if hasattr(a, 'output') else str(a)) - # logger.info(f'Output saved to: {output_file}') - - try: - test_output = await runtime.run_in_session(BashAction(command=test_method)) - logger.info(test_output) - return { - 'task': task, - 'project_path': project_path, - 'agent_run_results': run_results.output if hasattr(run_results, 'output') else str(run_results), - 'test_method': test_method, - 'status': 'success', - 'output': test_output.output if hasattr(test_output, 'output') else str(test_output), - } - except Exception as e: - logger.info(f'Error running test method: {e}') - return { - 'task': task, - 'project_path': project_path, - 'agent_run_results': run_results.output if hasattr(run_results, 'output') else str(run_results), - 'test_method': test_method, - 'status': 'error', - 'output': str(e), - } - - await deployment.stop() - - -def run_eval(deployment, project_path, task_id, task, model, agent_path, test_method, save_path): - deployment = ( - DockerDeployment(image=deployment) if deployment else DockerDeployment(image='xuafeng/swe-go-python:latest') - ) - return asyncio.run( - run_eval_in_env(deployment, project_path, task_id, task, model, agent_path, test_method, save_path) - ) - - -def test(): - task = 'The java is not installed. Can you please setup it? Note: you are in a docker with root permission. DO NOT use sudo.' - project_path = '../data/benchmark/projects/test-repo' - test_method = 'java -version' - deployment = 'xuafeng/swe-go-python:latest' - model = 'claude-sonnet-4-5-20250929' - agent_path = './agents/claudecode' - save_path = './eval_results' - task_id = 'test_task_1' - result = run_eval(deployment, project_path, task_id, task, model, agent_path, test_method, save_path) - print('Test result:', result) - - -# TODO: still work on add openhand agent -def test1(): - task = 'The java is not installed. Can you please setup it? Note: you are in a docker with root permission. DO NOT use sudo.' - project_path = '../data/benchmark/projects/test-repo' - test_method = 'java -version' - deployment = 'xuafeng/swe-go-python:latest' - model = 'claude-sonnet-4-5-20250929' - agent_path = './agents/openhand' - save_path = './eval_results' - task_id = 'test_task_1' - result = run_eval(deployment, project_path, task_id, task, model, agent_path, test_method, save_path) - print('Test result:', result) - - -def test2(): - task = "create a python file named hello.py that prints 'hello world'" - project_path = '../data/benchmark/projects/test-repo' - test_method = 'python hello.py' - deployment = 'xuafeng/swe-go-python:latest' - model = 'claude-sonnet-4-5-20250929' - agent_path = './agents/claudecode' - save_path = './eval_results' - task_id = 'test_task_1' - eval_out = asyncio.run( - run_eval_in_env(deployment, project_path, task_id, task, model, agent_path, test_method, save_path) - ) - print(eval_out) - - -if __name__ == '__main__': - test1() diff --git a/benchmarks/course_lab_bench/src/run_eval_sweagent.py b/benchmarks/course_lab_bench/src/run_eval_sweagent.py deleted file mode 100644 index aa5e86a..0000000 --- a/benchmarks/course_lab_bench/src/run_eval_sweagent.py +++ /dev/null @@ -1,53 +0,0 @@ -import sys -import subprocess - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) - -from patch_evaluator import pacth_eval - -from sdk.logger import logger - - -def run(task_id, repo_path, problem_path, test_method, image, save_path): - """Run the benchmark for a specific task.""" - output_dir = f'{save_path}/patch/{task_id}' - patch_file = os.path.join(output_dir, '1c2844', '1c2844.patch') - - # Use sweagent to generate a patch for the task - command = [ - 'sweagent', - 'run', - '--config', - './src/config_aoi.yaml', - '--env.repo.path', - './data/benchmark/' + repo_path, - '--problem_statement.path', - problem_path, - '--output_dir', - output_dir, - '--env.deployment.image', - image, - '--env.post_startup_commands', - '["export PATH=/usr/local/go/bin:${PATH} && export HOME=/tmp"]', - ] - - logger.info('Executing sweagent command...') - subprocess.run(command, check=True, timeout=600) - - logger.info('\n\n==========================') - logger.info(f'Patch file expected at: {patch_file}') - - # Evaluate the generated patch - eval_out = pacth_eval( - task_id=task_id, - project_path=repo_path, - patch=patch_file, - test_method=test_method, - output_path=output_dir, - image=image, - ) - logger.info('Patch evaluation completed.') - - with open(f'{save_path}/result.jsonl', 'a+', encoding='utf-8') as fw: - fw.write(json.dumps(eval_out) + '\n') - logger.info('Evaluation completed successfully.') diff --git a/benchmarks/course_lab_bench/test.sh b/benchmarks/course_lab_bench/test.sh deleted file mode 100644 index 00820da..0000000 --- a/benchmarks/course_lab_bench/test.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately on error. - -source envexamplebench/bin/activate -pytest --version -pytest -deactivate - -echo "==> ExampleBench test is done successfully." diff --git a/benchmarks/course_lab_bench/tests/__init__.py b/benchmarks/course_lab_bench/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/benchmarks/course_lab_bench/go-python.Dockerfile b/benchmarks/courselab_bench/go-python.Dockerfile similarity index 100% rename from benchmarks/course_lab_bench/go-python.Dockerfile rename to benchmarks/courselab_bench/go-python.Dockerfile