From 8843fb475ea2547914f47609572c53792924229a Mon Sep 17 00:00:00 2001 From: Chris Mangum Date: Sat, 31 May 2025 13:39:41 -0700 Subject: [PATCH] Add Q-learning state initialization and new README documentation This commit updates the `QAgent` class to ensure that the Q-table is initialized for new states during the Q-learning update process. Additionally, a new `README.md` file is added to the `agents` directory, providing an overview of the agents module, including descriptions of various agent types and usage examples. New test files for `AlgoAgent`, `DeepQAgent`, `QAgent`, and `RandomAgent` are also introduced to enhance test coverage for the agent functionalities. --- agents/README.md | 92 +++++++++++++++++++++++++++++++ agents/q_agent.py | 2 + tests/agents/__init__.py | 1 + tests/agents/test_algo_agent.py | 65 ++++++++++++++++++++++ tests/agents/test_deep_q_agent.py | 70 +++++++++++++++++++++++ tests/agents/test_q_agent.py | 73 ++++++++++++++++++++++++ tests/agents/test_random_agent.py | 63 +++++++++++++++++++++ 7 files changed, 366 insertions(+) create mode 100644 agents/README.md create mode 100644 tests/agents/__init__.py create mode 100644 tests/agents/test_algo_agent.py create mode 100644 tests/agents/test_deep_q_agent.py create mode 100644 tests/agents/test_q_agent.py create mode 100644 tests/agents/test_random_agent.py diff --git a/agents/README.md b/agents/README.md new file mode 100644 index 0000000..76e3cd6 --- /dev/null +++ b/agents/README.md @@ -0,0 +1,92 @@ +# Agents Module + +This module provides a variety of agent classes for use in reinforcement learning and maze navigation environments. Agents can be used as-is or extended for custom behaviors. Many agents have both standard and memory-augmented variants that leverage episodic and semantic memory for improved performance. + +## Agent Types + +### 1. `Agent` (Abstract Base Class) +Defines the interface for all agents. To implement a custom agent, inherit from this class and implement the required methods. + +**API:** +```python +class Agent(ABC): + def __init__(self, agent_id: str, action_space, **kwargs): ... + @abstractmethod + def act(self, observation: MazeObservation, epsilon: float = 0.1) -> int: ... + @abstractmethod + def set_demo_path(self, path: list[int]) -> None: ... +``` + +### 2. `RandomAgent` +Selects actions randomly from the action space. Useful as a baseline. + +### 3. `MemoryRandomAgent` +A random agent that also stores and retrieves state/action information from a memory system, biasing action selection toward previously successful actions. + +### 4. `AlgoAgent` +A planning agent that uses search algorithms (BFS/DFS or custom) to plan a path to the target. Good for deterministic environments. + +### 5. `MemoryAlgoAgent` +A planning agent with memory augmentation. Retrieves similar states from memory to bias planning and action selection. + +### 6. `QAgent` +Implements tabular Q-learning. Maintains a Q-table for state-action values and uses an epsilon-greedy policy. + +### 7. `MemoryQAgent` +A Q-learning agent with memory augmentation. Stores and retrieves states, actions, and interactions from memory to bias exploration and exploitation. + +### 8. `DeepQAgent` +Implements Deep Q-Learning using PyTorch. Uses a neural network to approximate Q-values and experience replay for training. + +### 9. `MemoryDeepQAgent` +A deep Q-learning agent with memory augmentation. Stores and retrieves states and interactions from memory to bias action selection and learning. + +--- + +## Usage + +> **Note:** Only the abstract `Agent` is exposed in `agents/__init__.py`. To use concrete agents, import them directly from their respective files: + +```python +from agents.random_agent import RandomAgent, MemoryRandomAgent +from agents.algo_agent import AlgoAgent, MemoryAlgoAgent +from agents.q_agent import QAgent, MemoryQAgent +from agents.deep_q_agent import DeepQAgent, MemoryDeepQAgent +``` + +## Example + +```python +from agents.q_agent import QAgent +from memory.api.models import MazeObservation + +agent = QAgent(agent_id="A1", action_space=4) +obs = MazeObservation(position=(0,0), target=(3,3), steps=0, nearby_obstacles=[]) +action = agent.act(obs) +``` + +## Extending Agents +To create your own agent, inherit from `Agent` and implement the `act` and `set_demo_path` methods. + +## Memory-Augmented Agents +Memory-augmented agents use a `MemorySpace` object to store and retrieve states, actions, and interactions. This enables: +- Retrieval of similar past states for biasing action selection +- Storing successful actions/interactions for future use +- Episodic and semantic memory integration + +## Requirements +- `memory` module (for memory-augmented agents) +- `numpy`, `torch` (for DeepQAgent) + +--- + +## File Overview +- `base.py`: Abstract base class +- `random_agent.py`: RandomAgent, MemoryRandomAgent +- `algo_agent.py`: AlgoAgent, MemoryAlgoAgent +- `q_agent.py`: QAgent, MemoryQAgent +- `deep_q_agent.py`: DeepQAgent, MemoryDeepQAgent + +--- + +For more details, see the docstrings in each agent class. \ No newline at end of file diff --git a/agents/q_agent.py b/agents/q_agent.py index cffc8b5..f1cc5a7 100644 --- a/agents/q_agent.py +++ b/agents/q_agent.py @@ -118,6 +118,8 @@ def update_q_value( self.q_table[next_state_key] = np.zeros(self.action_space) # Q-learning update + if state_key not in self.q_table: + self.q_table[state_key] = np.zeros(self.action_space) current_q = self.q_table[state_key][action] if done: diff --git a/tests/agents/__init__.py b/tests/agents/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/tests/agents/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tests/agents/test_algo_agent.py b/tests/agents/test_algo_agent.py new file mode 100644 index 0000000..ddb9f34 --- /dev/null +++ b/tests/agents/test_algo_agent.py @@ -0,0 +1,65 @@ +import pytest +import numpy as np +from unittest.mock import MagicMock + +from agents.algo_agent import AlgoAgent, MemoryAlgoAgent +from memory.api.models import MazeObservation + +@pytest.fixture +def sample_observation(): + return MazeObservation( + position=(1, 1), + target=(2, 2), + nearby_obstacles=[(0, 1), (1, 0)], + steps=5, + ) + + +def test_algo_agent_bfs_path(sample_observation): + agent = AlgoAgent(agent_id="test", action_space=4, search_algo="bfs") + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_algo_agent_dfs_path(sample_observation): + agent = AlgoAgent(agent_id="test", action_space=4, search_algo="dfs") + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_algo_agent_demo_path(sample_observation): + agent = AlgoAgent(agent_id="test", action_space=4) + agent.set_demo_path([1, 2]) + assert agent.act(sample_observation) == 1 + assert agent.act(sample_observation) == 2 + # After demo path, should revert to planning + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_memory_algo_agent_act_returns_valid_action(sample_observation): + agent = MemoryAlgoAgent(agent_id="test", action_space=4) + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [] + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_memory_algo_agent_demo_path(sample_observation): + agent = MemoryAlgoAgent(agent_id="test", action_space=4) + agent.set_demo_path([3, 0]) + assert agent.act(sample_observation) == 3 + assert agent.act(sample_observation) == 0 + + +def test_memory_algo_agent_memory_action(sample_observation): + agent = MemoryAlgoAgent(agent_id="test", action_space=4) + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [ + {"content": {"action": 1, "reward": 1}} + ] + np_random_backup = np.random.random + np.random.random = lambda: 0.5 + action = agent.act(sample_observation) + np.random.random = np_random_backup + assert action == 1 \ No newline at end of file diff --git a/tests/agents/test_deep_q_agent.py b/tests/agents/test_deep_q_agent.py new file mode 100644 index 0000000..5b4b518 --- /dev/null +++ b/tests/agents/test_deep_q_agent.py @@ -0,0 +1,70 @@ +import pytest +import numpy as np +from unittest.mock import MagicMock +import torch + +from agents.deep_q_agent import DeepQAgent, MemoryDeepQAgent +from memory.api.models import MazeObservation + +@pytest.fixture +def sample_observation(): + return MazeObservation( + position=(0, 0), + target=(1, 1), + nearby_obstacles=[(0, 1)], + steps=1, + ) + +@pytest.fixture +def next_observation(): + return MazeObservation( + position=(0, 1), + target=(1, 1), + nearby_obstacles=[(1, 1)], + steps=2, + ) + +def test_deep_q_agent_epsilon_greedy_action(sample_observation): + agent = DeepQAgent(agent_id="test", action_space=4) + np_random_backup = np.random.random + np.random.random = lambda: 0.05 + action = agent.act(sample_observation, epsilon=1.0) + np.random.random = np_random_backup + assert 0 <= action < 4 + +def test_deep_q_agent_demo_path(sample_observation): + agent = DeepQAgent(agent_id="test", action_space=4) + agent.set_demo_path([2, 1]) + assert agent.act(sample_observation) == 2 + assert agent.act(sample_observation) == 1 + +def test_deep_q_agent_experience_replay(sample_observation, next_observation): + agent = DeepQAgent(agent_id="test", action_space=4, batch_size=1) + agent.remember(sample_observation, 1, 1.0, next_observation, False) + # Should not raise error + agent.update() + +def test_memory_deep_q_agent_act_returns_valid_action(sample_observation): + agent = MemoryDeepQAgent(agent_id="test", action_space=4) + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [] + action = agent.act(sample_observation) + assert 0 <= action < 4 + +def test_memory_deep_q_agent_demo_path(sample_observation): + agent = MemoryDeepQAgent(agent_id="test", action_space=4) + agent.set_demo_path([3, 0]) + assert agent.act(sample_observation) == 3 + assert agent.act(sample_observation) == 0 + +def test_memory_deep_q_agent_memory_action(sample_observation): + agent = MemoryDeepQAgent(agent_id="test", action_space=4) + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [ + {"content": {"action": 2, "reward": 1}} + ] + np_random_backup = np.random.random + np.random.random = lambda: 0.5 + action = agent.act(sample_observation) + np.random.random = np_random_backup + assert action == 2 \ No newline at end of file diff --git a/tests/agents/test_q_agent.py b/tests/agents/test_q_agent.py new file mode 100644 index 0000000..0a3ed48 --- /dev/null +++ b/tests/agents/test_q_agent.py @@ -0,0 +1,73 @@ +import pytest +import numpy as np +from unittest.mock import MagicMock + +from agents.q_agent import QAgent, MemoryQAgent +from memory.api.models import MazeObservation + +@pytest.fixture +def sample_observation(): + return MazeObservation( + position=(0, 0), + target=(1, 1), + nearby_obstacles=[(0, 1)], + steps=1, + ) + +@pytest.fixture +def next_observation(): + return MazeObservation( + position=(0, 1), + target=(1, 1), + nearby_obstacles=[(1, 1)], + steps=2, + ) + +def test_q_agent_epsilon_greedy_action(sample_observation): + agent = QAgent(agent_id="test", action_space=4) + # Force random action + np_random_backup = np.random.random + np.random.random = lambda: 0.05 + action = agent.act(sample_observation, epsilon=1.0) + np.random.random = np_random_backup + assert 0 <= action < 4 + +def test_q_agent_demo_path(sample_observation): + agent = QAgent(agent_id="test", action_space=4) + agent.set_demo_path([2, 1]) + assert agent.act(sample_observation) == 2 + assert agent.act(sample_observation) == 1 + +def test_q_agent_q_value_update(sample_observation, next_observation): + agent = QAgent(agent_id="test", action_space=4) + action = 1 + reward = 1.0 + done = False + agent.update_q_value(sample_observation, action, reward, next_observation, done) + state_key = agent._get_state_key(sample_observation) + assert agent.q_table[state_key][action] != 0 + +def test_memory_q_agent_act_returns_valid_action(sample_observation): + agent = MemoryQAgent(agent_id="test", action_space=4) + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [] + action = agent.act(sample_observation) + assert 0 <= action < 4 + +def test_memory_q_agent_demo_path(sample_observation): + agent = MemoryQAgent(agent_id="test", action_space=4) + agent.set_demo_path([3, 0]) + assert agent.act(sample_observation) == 3 + assert agent.act(sample_observation) == 0 + +def test_memory_q_agent_memory_action(sample_observation): + agent = MemoryQAgent(agent_id="test", action_space=4) + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [ + {"content": {"action": 2, "reward": 1}} + ] + np_random_backup = np.random.random + np.random.random = lambda: 0.5 + action = agent.act(sample_observation) + np.random.random = np_random_backup + assert action == 2 \ No newline at end of file diff --git a/tests/agents/test_random_agent.py b/tests/agents/test_random_agent.py new file mode 100644 index 0000000..bc45dfd --- /dev/null +++ b/tests/agents/test_random_agent.py @@ -0,0 +1,63 @@ +import pytest +import numpy as np +from unittest.mock import MagicMock + +from agents.random_agent import RandomAgent, MemoryRandomAgent +from memory.api.models import MazeObservation, MazeActionSpace + +@pytest.fixture +def sample_observation(): + return MazeObservation( + position=(1, 1), + target=(2, 2), + nearby_obstacles=[(0, 1), (1, 0)], + steps=5, + ) + + +def test_random_agent_act_returns_valid_action(sample_observation): + agent = RandomAgent(agent_id="test", action_space=4) + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_random_agent_demo_path(sample_observation): + agent = RandomAgent(agent_id="test", action_space=4) + agent.set_demo_path([2, 3, 1]) + assert agent.act(sample_observation) == 2 + assert agent.act(sample_observation) == 3 + assert agent.act(sample_observation) == 1 + # After demo path, should revert to random + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_memory_random_agent_act_returns_valid_action(sample_observation, monkeypatch): + agent = MemoryRandomAgent(agent_id="test", action_space=4) + # Patch memory.retrieve_similar_states to return empty + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [] + action = agent.act(sample_observation) + assert 0 <= action < 4 + + +def test_memory_random_agent_demo_path(sample_observation): + agent = MemoryRandomAgent(agent_id="test", action_space=4) + agent.set_demo_path([1, 0]) + assert agent.act(sample_observation) == 1 + assert agent.act(sample_observation) == 0 + + +def test_memory_random_agent_memory_action(sample_observation): + agent = MemoryRandomAgent(agent_id="test", action_space=4) + # Patch memory.retrieve_similar_states to return a memory with action 2 + agent.memory = MagicMock() + agent.memory.retrieve_similar_states.return_value = [ + {"content": {"action": 2, "reward": 1}} + ] + # Patch np.random.random to always return 0.5 (> 0.2) + np_random_backup = np.random.random + np.random.random = lambda: 0.5 + action = agent.act(sample_observation) + np.random.random = np_random_backup + assert action == 2 \ No newline at end of file