From fea1a6fd05f7da544de0ab03fc70e526a8f33dd6 Mon Sep 17 00:00:00 2001
From: Federico Galatolo <galatolo.federico@gmail.com>
Date: Fri, 21 Feb 2020 13:54:09 +0100
Subject: [PATCH 1/3] Wrote test for vectorized environments discount

---
 tests/unit/rl_tests.py | 48 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tests/unit/rl_tests.py b/tests/unit/rl_tests.py
index 6bd0263..ccd63b9 100644
--- a/tests/unit/rl_tests.py
+++ b/tests/unit/rl_tests.py
@@ -9,6 +9,8 @@
 TAU = 0.9
 NUM_SAMPLES = 10
 VECTOR_SIZE = 5
+TIME_STEPS = 10
+NUM_ENVS = 4
 
 """
 TODO: Should test each method to make sure that they properly handle different
@@ -61,6 +63,52 @@ def setUp(self):
 
     def tearDown(self):
         pass
+    
+
+    def test_vectorized_discount(self):
+        state = th.randn(TIME_STEPS, NUM_ENVS, VECTOR_SIZE)
+        action = th.randn(TIME_STEPS, NUM_ENVS)
+        reward = th.randn(TIME_STEPS, NUM_ENVS)
+        boostrap = th.randn(NUM_ENVS)
+        done = th.zeros_like(reward)
+        for i in list(reversed(range(TIME_STEPS)))[:4]:
+            done[i,i%NUM_ENVS] = 1
+        
+
+        # Computing the discounted rewards
+        # as non-vectorized environment
+        nonvec_discounted_rewards = []
+        for i in range(NUM_ENVS):
+            replay = ch.ExperienceReplay()
+            for t in range(TIME_STEPS):
+                replay.append(
+                    state[t, i, :], action[t, i], 
+                    reward[t, i], state[t, i, :], done[t, i]
+                )
+            nonvec_discounted_rewards.append(
+                ch.td.discount(
+                    GAMMA, replay.reward(), replay.done(), boostrap[i]
+                )
+            )
+        # Computing the discounted rewards
+        # as vectorized environment
+        replay = ch.ExperienceReplay()
+        for t in range(TIME_STEPS):
+            replay.append(
+                state[t, :, :], action[t, :], 
+                reward[t, :], state[t, :, :], done[t, :]
+            )        
+        vec_discounted_rewards = ch.td.discount(
+            GAMMA, replay.reward(), replay.done(), boostrap
+        )
+
+        for i in range(NUM_ENVS):
+            assert th.all(
+                    nonvec_discounted_rewards[i][:, 0] 
+                    == 
+                    vec_discounted_rewards[:, i],
+                )
+
 
     def test_discount(self):
         vector = th.randn(VECTOR_SIZE)

From 89898c4d7a15a73a5e40c132b9613f32e8d70c1c Mon Sep 17 00:00:00 2001
From: Federico Galatolo <galatolo.federico@gmail.com>
Date: Fri, 21 Feb 2020 14:05:39 +0100
Subject: [PATCH 2/3] Bugfix: td.discount now correctly support vectorized
 replays

---
 cherry/td.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cherry/td.py b/cherry/td.py
index 03338d9..c62a64c 100644
--- a/cherry/td.py
+++ b/cherry/td.py
@@ -52,7 +52,7 @@ def discount(gamma, rewards, dones, bootstrap=0.0):
 
     msg = 'dones and rewards must have equal length.'
     assert rewards.size(0) == dones.size(0), msg
-    R = th.zeros_like(rewards[0]) + bootstrap
+    R = th.zeros_like(rewards) + bootstrap
     discounted = th.zeros_like(rewards)
     length = discounted.size(0)
     for t in reversed(range(length)):

From 576236089e93a3183499bcc4cd2b75aee4c142c0 Mon Sep 17 00:00:00 2001
From: Federico Galatolo <galatolo.federico@gmail.com>
Date: Wed, 26 Feb 2020 13:00:08 +0100
Subject: [PATCH 3/3] Wrote changes in changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4edc402..6c578a2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,5 +18,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+* Bugfix when using `td.discount` with replays coming from vectorized environments (@galatolofederico) 
 * Actor-critic integration test being to finicky.
 * `cherry.onehot` support for numpy's float and integer types. (thanks @ngoby)