Skip to content

Commit 19374dc

Browse files
committed
feat: add find_median_from_data_stream
1 parent d40fce4 commit 19374dc

File tree

8 files changed

+362
-1
lines changed

8 files changed

+362
-1
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"problem_name": "find_median_from_data_stream",
3+
"solution_class_name": "MedianFinder",
4+
"problem_number": "295",
5+
"problem_title": "Find Median from Data Stream",
6+
"difficulty": "Hard",
7+
"topics": "Two Pointers, Design, Sorting, Heap (Priority Queue), Data Stream",
8+
"tags": ["grind-75"],
9+
"readme_description": "The **median** is the middle value in an ordered integer list. If the size of the list is even, there is no middle value, and the median is the mean of the two middle values.\n\n- For example, for `arr = [2,3,4]`, the median is `3`.\n- For example, for `arr = [2,3]`, the median is `(2 + 3) / 2 = 2.5`.\n\nImplement the MedianFinder class:\n\n- `MedianFinder()` initializes the `MedianFinder` object.\n- `void addNum(int num)` adds the integer `num` from the data stream to the data structure.\n- `double findMedian()` returns the median of all elements so far. Answers within `10^-5` of the actual answer will be accepted.",
10+
"readme_examples": [
11+
{
12+
"content": "```\nInput\n[\"MedianFinder\", \"addNum\", \"addNum\", \"findMedian\", \"addNum\", \"findMedian\"]\n[[], [1], [2], [], [3], []]\nOutput\n[null, null, null, 1.5, null, 2.0]\n```\n\n**Explanation:**\n```\nMedianFinder medianFinder = new MedianFinder();\nmedianFinder.addNum(1); // arr = [1]\nmedianFinder.addNum(2); // arr = [1, 2]\nmedianFinder.findMedian(); // return 1.5 (i.e., (1 + 2) / 2)\nmedianFinder.addNum(3); // arr = [1, 2, 3]\nmedianFinder.findMedian(); // return 2.0\n```"
13+
}
14+
],
15+
"readme_constraints": "- `-10^5 <= num <= 10^5`\n- There will be at least one element in the data structure before calling `findMedian`.\n- At most `5 * 10^4` calls will be made to `addNum` and `findMedian`.",
16+
"readme_additional": "**Follow up:**\n\n- If all integer numbers from the stream are in the range `[0, 100]`, how would you optimize your solution?\n- If `99%` of all integer numbers from the stream are in the range `[0, 100]`, how would you optimize your solution?",
17+
"solution_imports": "",
18+
"solution_methods": [
19+
{ "name": "__init__", "parameters": "", "return_type": "None", "dummy_return": "" },
20+
{ "name": "add_num", "parameters": "num: int", "return_type": "None", "dummy_return": "" },
21+
{ "name": "find_median", "parameters": "", "return_type": "float", "dummy_return": "0.0" }
22+
],
23+
"test_imports": "import pytest\nfrom leetcode_py.test_utils import logged_test\nfrom .solution import MedianFinder",
24+
"test_class_name": "FindMedianFromDataStream",
25+
"test_helper_methods": [],
26+
"test_methods": [
27+
{
28+
"name": "test_median_finder",
29+
"parametrize": "operations, inputs, expected",
30+
"parametrize_typed": "operations: list[str], inputs: list[list[int]], expected: list[float | None]",
31+
"test_cases": "[([\"MedianFinder\", \"addNum\", \"addNum\", \"findMedian\", \"addNum\", \"findMedian\"], [[], [1], [2], [], [3], []], [None, None, None, 1.5, None, 2.0])]",
32+
"body": "mf: MedianFinder | None = None\nresults: list[float | None] = []\nfor i, op in enumerate(operations):\n if op == \"MedianFinder\":\n mf = MedianFinder()\n results.append(None)\n elif op == \"addNum\" and mf is not None:\n mf.add_num(inputs[i][0])\n results.append(None)\n elif op == \"findMedian\" and mf is not None:\n results.append(mf.find_median())\nassert results == expected"
33+
}
34+
],
35+
"playground_imports": "from solution import MedianFinder",
36+
"playground_test_case": "# Example test case\noperations = ['MedianFinder', 'addNum', 'addNum', 'findMedian', 'addNum', 'findMedian']\ninputs = [[], [1], [2], [], [3], []]\nexpected = [None, None, None, 1.5, None, 2.0]",
37+
"playground_execution": "mf = None\nresults: list[float | None] = []\nfor i, op in enumerate(operations):\n if op == 'MedianFinder':\n mf = MedianFinder()\n results.append(None)\n elif op == 'addNum' and mf is not None:\n mf.add_num(inputs[i][0])\n results.append(None)\n elif op == 'findMedian' and mf is not None:\n results.append(mf.find_median())\nresults",
38+
"playground_assertion": "assert results == expected"
39+
}

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
PYTHON_VERSION = 3.13
2-
PROBLEM ?= serialize_and_deserialize_binary_tree
2+
PROBLEM ?= find_median_from_data_stream
33
FORCE ?= 0
44
COMMA := ,
55

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Find Median from Data Stream
2+
3+
**Difficulty:** Hard
4+
**Topics:** Two Pointers, Design, Sorting, Heap (Priority Queue), Data Stream
5+
**Tags:** grind-75
6+
7+
**LeetCode:** [Problem 295](https://leetcode.com/problems/find-median-from-data-stream/description/)
8+
9+
## Problem Description
10+
11+
The **median** is the middle value in an ordered integer list. If the size of the list is even, there is no middle value, and the median is the mean of the two middle values.
12+
13+
- For example, for `arr = [2,3,4]`, the median is `3`.
14+
- For example, for `arr = [2,3]`, the median is `(2 + 3) / 2 = 2.5`.
15+
16+
Implement the MedianFinder class:
17+
18+
- `MedianFinder()` initializes the `MedianFinder` object.
19+
- `void addNum(int num)` adds the integer `num` from the data stream to the data structure.
20+
- `double findMedian()` returns the median of all elements so far. Answers within `10^-5` of the actual answer will be accepted.
21+
22+
## Examples
23+
24+
### Example 1:
25+
26+
```
27+
Input
28+
["MedianFinder", "addNum", "addNum", "findMedian", "addNum", "findMedian"]
29+
[[], [1], [2], [], [3], []]
30+
Output
31+
[null, null, null, 1.5, null, 2.0]
32+
```
33+
34+
**Explanation:**
35+
36+
```
37+
MedianFinder medianFinder = new MedianFinder();
38+
medianFinder.addNum(1); // arr = [1]
39+
medianFinder.addNum(2); // arr = [1, 2]
40+
medianFinder.findMedian(); // return 1.5 (i.e., (1 + 2) / 2)
41+
medianFinder.addNum(3); // arr = [1, 2, 3]
42+
medianFinder.findMedian(); // return 2.0
43+
```
44+
45+
## Constraints
46+
47+
- `-10^5 <= num <= 10^5`
48+
- There will be at least one element in the data structure before calling `findMedian`.
49+
- At most `5 * 10^4` calls will be made to `addNum` and `findMedian`.
50+
51+
**Follow up:**
52+
53+
- If all integer numbers from the stream are in the range `[0, 100]`, how would you optimize your solution?
54+
- If `99%` of all integer numbers from the stream are in the range `[0, 100]`, how would you optimize your solution?

leetcode/find_median_from_data_stream/__init__.py

Whitespace-only changes.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "imports",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from solution import MedianFinder"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"id": "setup",
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"# Example test case\n",
21+
"operations = [\"MedianFinder\", \"addNum\", \"addNum\", \"findMedian\", \"addNum\", \"findMedian\"]\n",
22+
"inputs = [[], [1], [2], [], [3], []]\n",
23+
"expected = [None, None, None, 1.5, None, 2.0]"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"id": "execute",
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"mf = None\n",
34+
"results: list[float | None] = []\n",
35+
"for i, op in enumerate(operations):\n",
36+
" if op == \"MedianFinder\":\n",
37+
" mf = MedianFinder()\n",
38+
" results.append(None)\n",
39+
" elif op == \"addNum\" and mf is not None:\n",
40+
" mf.add_num(inputs[i][0])\n",
41+
" results.append(None)\n",
42+
" elif op == \"findMedian\" and mf is not None:\n",
43+
" results.append(mf.find_median())\n",
44+
"results"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"id": "test",
51+
"metadata": {},
52+
"outputs": [],
53+
"source": [
54+
"assert results == expected"
55+
]
56+
}
57+
],
58+
"metadata": {
59+
"kernelspec": {
60+
"display_name": "leetcode-py-py3.13",
61+
"language": "python",
62+
"name": "python3"
63+
},
64+
"language_info": {
65+
"codemirror_mode": {
66+
"name": "ipython",
67+
"version": 3
68+
},
69+
"file_extension": ".py",
70+
"mimetype": "text/x-python",
71+
"name": "python",
72+
"nbconvert_exporter": "python3",
73+
"pygments_lexer": "ipython3",
74+
"version": "3.13.7"
75+
}
76+
},
77+
"nbformat": 4,
78+
"nbformat_minor": 5
79+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import heapq
2+
3+
4+
class MedianFinder:
5+
# Two balanced heaps approach for general streaming median
6+
# Time: O(1) init
7+
# Space: O(n)
8+
def __init__(self) -> None:
9+
self.small: list[int] = [] # max heap (negated)
10+
self.large: list[int] = [] # min heap
11+
12+
# Time: O(log n)
13+
# Space: O(1)
14+
def add_num(self, num: int) -> None:
15+
heapq.heappush(self.small, -num)
16+
17+
if self.small and self.large and (-self.small[0] > self.large[0]):
18+
heapq.heappush(self.large, -heapq.heappop(self.small))
19+
20+
if len(self.small) > len(self.large) + 1:
21+
heapq.heappush(self.large, -heapq.heappop(self.small))
22+
if len(self.large) > len(self.small) + 1:
23+
heapq.heappush(self.small, -heapq.heappop(self.large))
24+
25+
# Time: O(1)
26+
# Space: O(1)
27+
def find_median(self) -> float:
28+
if len(self.small) > len(self.large):
29+
return -self.small[0]
30+
if len(self.large) > len(self.small):
31+
return self.large[0]
32+
return (-self.small[0] + self.large[0]) / 2.0
33+
34+
35+
class MedianFinderHybrid:
36+
# Hybrid counting array + heaps for bounded ranges with outliers
37+
# Time: O(1) init
38+
# Space: O(R + k) where R = range_size, k = outliers
39+
def __init__(self, min_val: int = 0, max_val: int = 100) -> None:
40+
self.min_val = min_val
41+
self.max_val = max_val
42+
self.counts = [0] * (max_val - min_val + 1)
43+
self.outliers_small: list[int] = [] # max heap for < min_val
44+
self.outliers_large: list[int] = [] # min heap for > max_val
45+
self.total = 0
46+
47+
# Time: O(1) for range, O(log k) for outliers
48+
# Space: O(1)
49+
def add_num(self, num: int) -> None:
50+
if self.min_val <= num <= self.max_val:
51+
self.counts[num - self.min_val] += 1
52+
elif num < self.min_val:
53+
heapq.heappush(self.outliers_small, -num)
54+
else:
55+
heapq.heappush(self.outliers_large, num)
56+
self.total += 1
57+
58+
# Time: O(R + k log k) worst case, O(R) typical, O(1) if R constant
59+
# Space: O(k) for sorting outliers
60+
def find_median(self) -> float:
61+
target = self.total // 2
62+
count = 0
63+
64+
# Count outliers < 0
65+
outliers_small_count = len(self.outliers_small)
66+
if count + outliers_small_count > target:
67+
sorted_small = sorted([-x for x in self.outliers_small])
68+
if self.total % 2 == 1:
69+
return sorted_small[target - count]
70+
else:
71+
if target - count == 0:
72+
return (sorted_small[0] + self._get_next_value(0)) / 2.0
73+
return (sorted_small[target - count - 1] + sorted_small[target - count]) / 2.0
74+
count += outliers_small_count
75+
76+
# Count [min_val, max_val] range
77+
for i in range(len(self.counts)):
78+
if count + self.counts[i] > target:
79+
val = i + self.min_val
80+
if self.total % 2 == 1:
81+
return val
82+
else:
83+
if target == count:
84+
return (self._get_prev_value(count - 1) + val) / 2.0
85+
return val
86+
count += self.counts[i]
87+
88+
# Must be in outliers > 100
89+
sorted_large = sorted(self.outliers_large)
90+
idx = target - count
91+
if self.total % 2 == 1:
92+
return sorted_large[idx]
93+
else:
94+
if idx == 0:
95+
return (self._get_prev_value(count - 1) + sorted_large[0]) / 2.0
96+
return (sorted_large[idx - 1] + sorted_large[idx]) / 2.0
97+
98+
def _get_prev_value(self, pos: int) -> int:
99+
count = 0
100+
# Check outliers < 0
101+
if pos < len(self.outliers_small):
102+
return sorted([-x for x in self.outliers_small])[pos]
103+
count += len(self.outliers_small)
104+
105+
# Check [min_val, max_val] range
106+
for i in range(len(self.counts)):
107+
if count + self.counts[i] > pos:
108+
return i + self.min_val
109+
count += self.counts[i]
110+
111+
# Must be in outliers > 100
112+
return sorted(self.outliers_large)[pos - count]
113+
114+
def _get_next_value(self, pos: int) -> int:
115+
return self._get_prev_value(pos + 1)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pytest
2+
3+
from leetcode_py.test_utils import logged_test
4+
5+
from .solution import MedianFinder, MedianFinderHybrid
6+
7+
8+
class TestFindMedianFromDataStream:
9+
@pytest.mark.parametrize(
10+
"finder_class, operations, inputs, expected",
11+
[
12+
(
13+
MedianFinder,
14+
["MedianFinder", "addNum", "addNum", "findMedian", "addNum", "findMedian"],
15+
[[], [1], [2], [], [3], []],
16+
[None, None, None, 1.5, None, 2.0],
17+
),
18+
(
19+
MedianFinderHybrid,
20+
["MedianFinder", "addNum", "addNum", "findMedian", "addNum", "findMedian"],
21+
[[], [1], [2], [], [3], []],
22+
[None, None, None, 1.5, None, 2.0],
23+
),
24+
(
25+
MedianFinder,
26+
["MedianFinder", "addNum", "findMedian"],
27+
[[], [5], []],
28+
[None, None, 5.0],
29+
),
30+
(
31+
MedianFinderHybrid,
32+
["MedianFinder", "addNum", "findMedian"],
33+
[[], [5], []],
34+
[None, None, 5.0],
35+
),
36+
(
37+
MedianFinder,
38+
["MedianFinder", "addNum", "addNum", "addNum", "addNum", "findMedian"],
39+
[[], [1], [3], [2], [4], []],
40+
[None, None, None, None, None, 2.5],
41+
),
42+
(
43+
MedianFinderHybrid,
44+
["MedianFinder", "addNum", "addNum", "addNum", "addNum", "findMedian"],
45+
[[], [1], [3], [2], [4], []],
46+
[None, None, None, None, None, 2.5],
47+
),
48+
(
49+
MedianFinderHybrid,
50+
["MedianFinder", "addNum", "addNum", "addNum", "findMedian"],
51+
[[], [-1], [50], [101], []],
52+
[None, None, None, None, 50.0],
53+
),
54+
],
55+
)
56+
@logged_test
57+
def test_median_finder(
58+
self, finder_class, operations: list[str], inputs: list[list[int]], expected: list[float | None]
59+
):
60+
mf = None
61+
results: list[float | None] = []
62+
for i, op in enumerate(operations):
63+
if op == "MedianFinder":
64+
mf = finder_class()
65+
results.append(None)
66+
elif op == "addNum" and mf is not None:
67+
mf.add_num(inputs[i][0])
68+
results.append(None)
69+
elif op == "findMedian" and mf is not None:
70+
results.append(mf.find_median())
71+
assert results == expected

leetcode/word_ladder/solution.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ def ladder_length(self, begin_word: str, end_word: str, word_list: list[str]) ->
55
if end_word not in word_list:
66
return 0
77

8+
if begin_word == end_word:
9+
return 1
10+
811
word_set = set(word_list)
912
begin_set = {begin_word}
1013
end_set = {end_word}

0 commit comments

Comments
 (0)