From a4e70aa1fbd08a839921514a906c6cc1b675585e Mon Sep 17 00:00:00 2001 From: dchou1618 Date: Fri, 2 Jan 2026 21:59:43 -0800 Subject: [PATCH 1/2] cleanup algs and add randomized set randomized set and test --- Algorithms/datastructs/hash_table.py | 30 +++++++++ Algorithms/datastructs/num_in_intervals.py | 73 ---------------------- tests/test_hash_table.py | 40 ++++++++++++ 3 files changed, 70 insertions(+), 73 deletions(-) create mode 100644 Algorithms/datastructs/hash_table.py delete mode 100644 Algorithms/datastructs/num_in_intervals.py create mode 100644 tests/test_hash_table.py diff --git a/Algorithms/datastructs/hash_table.py b/Algorithms/datastructs/hash_table.py new file mode 100644 index 0000000..0f4274c --- /dev/null +++ b/Algorithms/datastructs/hash_table.py @@ -0,0 +1,30 @@ +import random +class RandomizedSet: + + def __init__(self): + self.hash_table = dict() + self.lst = [] + def insert(self, val: int) -> bool: + if val not in self.hash_table: + self.hash_table[val] = len(self.lst) + self.lst.append(val) + return True + else: + return False + def remove(self, val: int) -> bool: + if val in self.hash_table: + idx = self.hash_table.pop(val) + # swap if index is not last + if idx != len(self.lst)-1: + last = self.lst[-1] + self.lst[-1] = self.lst[idx] + self.lst[idx] = last + self.hash_table[last] = idx + self.lst.pop() + return True + else: + return False + def getRandom(self) -> int: + if not self.lst: + raise IndexError("getRandom() called on empty set") + return self.lst[random.randint(0, len(self.lst) - 1)] \ No newline at end of file diff --git a/Algorithms/datastructs/num_in_intervals.py b/Algorithms/datastructs/num_in_intervals.py deleted file mode 100644 index 56e348b..0000000 --- a/Algorithms/datastructs/num_in_intervals.py +++ /dev/null @@ -1,73 +0,0 @@ -from sortedcontainers import SortedList -import random -import time - -# we use the abstracted library heapq -import heapq as hq - -def inefficient_coverage_func(intervals, nums): - start = time.time() - for i,num in enumerate(nums): - covered = 0 - for lo, hi in intervals: - if num >= lo and num <= hi: - covered += 1 - nums[i] = covered - end = time.time() - print(f"{end-start} seconds to run inefficient.") - return nums - -def identify_coverage(intervals, nums): - """ - identify_coverage - given a list of intervals, return a list with length equal - to the length of nums, where each entry denotes how many intervals that number - lies within. We use the SortedList struct for this problem. - |I| = length of intervals - |N| = length of nums - * sweep line algorithm - one pass through, but keep track - of how many lamps have started and how many have ended. - """ - start_time = time.time() - lamps_active = 0 - # process the points in priority order of - # 1 - note the active lamps - # 2 - encounter point - # 3 - stop lamp activity - heap_lst = [] - # O(|I|) - # we have a min heap by default - for start, end in intervals: - # start denoted by -1 - heap_lst.append((start, -1)) - # end is the last priority - heap_lst.append((end, 1)) - # O(|N|) - for i, num in enumerate(nums): - heap_lst.append((num, 0, i)) - hq.heapify(heap_lst) - print(heap_lst) - # O(|I|+|N|) - while len(heap_lst) > 0: - val = hq.heappop(heap_lst) - if val[1] == -1: - lamps_active+=1 - elif val[1] == 1: - lamps_active-=1 - else: - # we have a num - nums[val[2]] = lamps_active - end_time = time.time() - print(f"Took {end_time-start_time} seconds to run efficient.") - return nums - -if __name__ == "__main__": - intervals = [[random.randint(1, 10000), random.randint(10001, 20000)] for _ in range(10000)] - numbers = [random.randint(1, 20000) for _ in range(2000)] - #intervals = [[1,7],[5,11],[7,9]] - #numbers = [1,5,7,9,10,15] - coverages1 = identify_coverage(intervals, numbers) - coverages2 = inefficient_coverage_func(intervals, numbers) - #print(coverages2) - print(coverages1 == coverages2) - - diff --git a/tests/test_hash_table.py b/tests/test_hash_table.py new file mode 100644 index 0000000..e682962 --- /dev/null +++ b/tests/test_hash_table.py @@ -0,0 +1,40 @@ +from Algorithms.datastructs.hash_table import RandomizedSet + +def test_randomized_set(): + rs = RandomizedSet() + + # Insert 0, should return True + assert rs.insert(0) == True + # Remove 0, should return True + assert rs.remove(0) == True + # Remove 0 again, should return False (already removed) + assert rs.remove(0) == False + # Insert 0 again, should return True + assert rs.insert(0) == True + # getRandom should return 0 (only element) + assert rs.getRandom() == 0 + # Remove 0, should return True + assert rs.remove(0) == True + # Insert -1, should return True + assert rs.insert(-1) == True + # Remove 0 (not present), should return False + assert rs.remove(0) == False + + # Multiple getRandom calls — since only -1 is present, it should always return -1 + for _ in range(10): + val = rs.getRandom() + assert val == -1, f"Expected -1 but got {val}" + + # Remove -1, should return True + assert rs.remove(-1) == True + # Now getRandom on empty set — handle gracefully by catching exception or custom behavior + try: + rs.getRandom() + print("getRandom on empty set did not raise error — consider adding handling for empty set.") + except IndexError: + print("getRandom on empty set raised IndexError as expected.") + + print("All tests passed!") + +if __name__ == "__main__": + test_randomized_set() \ No newline at end of file From eee1688812f96aa988b199c58b95318ceb7deb24 Mon Sep 17 00:00:00 2001 From: dchou1618 Date: Fri, 2 Jan 2026 22:35:55 -0800 Subject: [PATCH 2/2] add randomized collection --- Algorithms/datastructs/hash_table.py | 38 +++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Algorithms/datastructs/hash_table.py b/Algorithms/datastructs/hash_table.py index 0f4274c..de98286 100644 --- a/Algorithms/datastructs/hash_table.py +++ b/Algorithms/datastructs/hash_table.py @@ -1,4 +1,5 @@ import random + class RandomizedSet: def __init__(self): @@ -27,4 +28,39 @@ def remove(self, val: int) -> bool: def getRandom(self) -> int: if not self.lst: raise IndexError("getRandom() called on empty set") - return self.lst[random.randint(0, len(self.lst) - 1)] \ No newline at end of file + return self.lst[random.randint(0, len(self.lst) - 1)] + +class RandomizedCollection: + + def __init__(self): + self.table = dict() + self.lst = [] + + def insert(self, val: int) -> bool: + self.lst.append(val) + if val not in self.table: + self.table[val] = {len(self.lst)-1} + return True + else: + self.table[val].add(len(self.lst)-1) + return False + + def remove(self, val: int) -> bool: + if val in self.table: + idx = self.table[val].pop() + if idx != len(self.lst)-1: + # not last + last = self.lst[-1] + self.lst[-1] = self.lst[idx] + self.lst[idx] = last + self.table[last].remove(len(self.lst)-1) + self.table[last].add(idx) + self.lst.pop() + if not self.table[val]: + del self.table[val] + return True + else: + return False + + def getRandom(self) -> int: + return self.lst[random.randint(0, len(self.lst)-1)] \ No newline at end of file