From b29283c20db16f2fddffd85d859d8f3b906729e5 Mon Sep 17 00:00:00 2001 From: Bharat Gusaiwal Date: Sat, 13 Dec 2025 19:54:22 +0530 Subject: [PATCH 1/5] Add median in a stream using heap-based approach --- data_structures/heap/median_in_a_stream.py | 98 ++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 data_structures/heap/median_in_a_stream.py diff --git a/data_structures/heap/median_in_a_stream.py b/data_structures/heap/median_in_a_stream.py new file mode 100644 index 000000000000..a8fb85c70605 --- /dev/null +++ b/data_structures/heap/median_in_a_stream.py @@ -0,0 +1,98 @@ +import heapq +from typing import List + + +def signum(a: int, b: int) -> int: + """ + Compare two integers. + + Returns: + 1 if a > b + -1 if a < b + 0 if a == b + """ + if a > b: + return 1 + if a < b: + return -1 + return 0 + + +def call_median( + element: int, + max_heap: List[int], + min_heap: List[int], + median: int, +) -> int: + """ + Insert an element into heaps and update median. + """ + case = signum(len(max_heap), len(min_heap)) + + # Case 0: both heaps have same size + if case == 0: + if element > median: + heapq.heappush(min_heap, element) + median = min_heap[0] + else: + heapq.heappush(max_heap, -element) + median = -max_heap[0] + + # Case 1: max heap has more elements + elif case == 1: + if element > median: + heapq.heappush(min_heap, element) + else: + heapq.heappush(min_heap, -heapq.heappop(max_heap)) + heapq.heappush(max_heap, -element) + median = (-max_heap[0] + min_heap[0]) // 2 + + # Case -1: min heap has more elements + else: + if element > median: + heapq.heappush(max_heap, -heapq.heappop(min_heap)) + heapq.heappush(min_heap, element) + else: + heapq.heappush(max_heap, -element) + median = (-max_heap[0] + min_heap[0]) // 2 + + return median + + +def median_in_a_stream(numbers: List[int]) -> List[int]: + """ + Find the median after each insertion in a stream of integers. + + Uses two heaps and follows the classic running median logic. + + Args: + numbers: List of integers + + Returns: + List of medians after each insertion + + Raises: + ValueError: If the input list is empty + + >>> median_in_a_stream([20, 14, 13, 16, 17]) + [20, 17, 14, 15, 16] + >>> median_in_a_stream([5, 15, 1, 3]) + [5, 10, 5, 4] + >>> median_in_a_stream([]) + Traceback (most recent call last): + ... + ValueError: Input list must not be empty + """ + if not numbers: + raise ValueError("Input list must not be empty") + + max_heap: List[int] = [] + min_heap: List[int] = [] + median = 0 + result: List[int] = [] + + for element in numbers: + median = call_median(element, max_heap, min_heap, median) + result.append(median) + + return result From 11162aea27baa4c290a836ac5bc603089cdcf068 Mon Sep 17 00:00:00 2001 From: Bharat Gusaiwal Date: Sat, 13 Dec 2025 19:59:56 +0530 Subject: [PATCH 2/5] Add reference link for running median algorithm --- data_structures/heap/median_in_a_stream.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/data_structures/heap/median_in_a_stream.py b/data_structures/heap/median_in_a_stream.py index a8fb85c70605..30e0cf292ff0 100644 --- a/data_structures/heap/median_in_a_stream.py +++ b/data_structures/heap/median_in_a_stream.py @@ -64,7 +64,10 @@ def median_in_a_stream(numbers: List[int]) -> List[int]: Find the median after each insertion in a stream of integers. Uses two heaps and follows the classic running median logic. - + + Reference: + https://en.wikipedia.org/wiki/Median#Running_median + Args: numbers: List of integers From 54f306dc8509e80b9b991806ce86cb4132b58487 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Dec 2025 14:31:45 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/heap/median_in_a_stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/heap/median_in_a_stream.py b/data_structures/heap/median_in_a_stream.py index 30e0cf292ff0..d92bededa1d0 100644 --- a/data_structures/heap/median_in_a_stream.py +++ b/data_structures/heap/median_in_a_stream.py @@ -64,10 +64,10 @@ def median_in_a_stream(numbers: List[int]) -> List[int]: Find the median after each insertion in a stream of integers. Uses two heaps and follows the classic running median logic. - + Reference: https://en.wikipedia.org/wiki/Median#Running_median - + Args: numbers: List of integers From 399d6d72e43a2c4ee64269787ff65556401b7b75 Mon Sep 17 00:00:00 2001 From: Bharat Gusaiwal Date: Sat, 13 Dec 2025 20:06:44 +0530 Subject: [PATCH 4/5] Use built-in list type hints for Python 3.9+ --- data_structures/heap/median_in_a_stream.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/data_structures/heap/median_in_a_stream.py b/data_structures/heap/median_in_a_stream.py index d92bededa1d0..3af4d91c6432 100644 --- a/data_structures/heap/median_in_a_stream.py +++ b/data_structures/heap/median_in_a_stream.py @@ -1,5 +1,4 @@ import heapq -from typing import List def signum(a: int, b: int) -> int: @@ -20,16 +19,15 @@ def signum(a: int, b: int) -> int: def call_median( element: int, - max_heap: List[int], - min_heap: List[int], + max_heap: list[int], + min_heap: list[int], median: int, ) -> int: """ - Insert an element into heaps and update median. + Insert an element into heaps and update the median. """ case = signum(len(max_heap), len(min_heap)) - # Case 0: both heaps have same size if case == 0: if element > median: heapq.heappush(min_heap, element) @@ -38,7 +36,6 @@ def call_median( heapq.heappush(max_heap, -element) median = -max_heap[0] - # Case 1: max heap has more elements elif case == 1: if element > median: heapq.heappush(min_heap, element) @@ -47,7 +44,6 @@ def call_median( heapq.heappush(max_heap, -element) median = (-max_heap[0] + min_heap[0]) // 2 - # Case -1: min heap has more elements else: if element > median: heapq.heappush(max_heap, -heapq.heappop(min_heap)) @@ -59,7 +55,7 @@ def call_median( return median -def median_in_a_stream(numbers: List[int]) -> List[int]: +def median_in_a_stream(numbers: list[int]) -> list[int]: """ Find the median after each insertion in a stream of integers. @@ -89,10 +85,10 @@ def median_in_a_stream(numbers: List[int]) -> List[int]: if not numbers: raise ValueError("Input list must not be empty") - max_heap: List[int] = [] - min_heap: List[int] = [] + max_heap: list[int] = [] + min_heap: list[int] = [] median = 0 - result: List[int] = [] + result: list[int] = [] for element in numbers: median = call_median(element, max_heap, min_heap, median) From 77b882d4022c7a5a5607f90895e813452dcf4ec6 Mon Sep 17 00:00:00 2001 From: Bharat Gusaiwal Date: Sat, 13 Dec 2025 20:53:39 +0530 Subject: [PATCH 5/5] Add median in a stream using heap-based approach with doctests --- data_structures/heap/median_in_a_stream.py | 81 ++++++++++++---------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/data_structures/heap/median_in_a_stream.py b/data_structures/heap/median_in_a_stream.py index 3af4d91c6432..e534c5214c13 100644 --- a/data_structures/heap/median_in_a_stream.py +++ b/data_structures/heap/median_in_a_stream.py @@ -1,14 +1,16 @@ +""" +Median in a stream using a heap-based approach. + +Reference: +https://en.wikipedia.org/wiki/Median#Running_median +""" + import heapq def signum(a: int, b: int) -> int: """ - Compare two integers. - - Returns: - 1 if a > b - -1 if a < b - 0 if a == b + Return 1 if a > b, -1 if a < b, 0 if equal. """ if a > b: return 1 @@ -24,30 +26,35 @@ def call_median( median: int, ) -> int: """ - Insert an element into heaps and update the median. + Update heaps and median based on the new element. + + Args: + element (int): new element in stream + max_heap (list[int]): max heap (as negative numbers) + min_heap (list[int]): min heap + median (int): current median + + Returns: + int: updated median """ - case = signum(len(max_heap), len(min_heap)) + size_diff = signum(len(max_heap), len(min_heap)) - if case == 0: + if size_diff == 0: if element > median: heapq.heappush(min_heap, element) median = min_heap[0] else: heapq.heappush(max_heap, -element) median = -max_heap[0] - - elif case == 1: + elif size_diff == 1: if element > median: heapq.heappush(min_heap, element) else: - heapq.heappush(min_heap, -heapq.heappop(max_heap)) - heapq.heappush(max_heap, -element) + heapq.heappush(min_heap, -heapq.heappushpop(max_heap, -element)) median = (-max_heap[0] + min_heap[0]) // 2 - - else: + else: # size_diff == -1 if element > median: - heapq.heappush(max_heap, -heapq.heappop(min_heap)) - heapq.heappush(min_heap, element) + heapq.heappush(max_heap, -heapq.heappushpop(min_heap, element)) else: heapq.heappush(max_heap, -element) median = (-max_heap[0] + min_heap[0]) // 2 @@ -55,23 +62,15 @@ def call_median( return median -def median_in_a_stream(numbers: list[int]) -> list[int]: +def median_in_a_stream(arr: list[int]) -> list[int]: """ - Find the median after each insertion in a stream of integers. - - Uses two heaps and follows the classic running median logic. - - Reference: - https://en.wikipedia.org/wiki/Median#Running_median + Return the median after each new element in the stream. Args: - numbers: List of integers + arr (list[int]): list of integers Returns: - List of medians after each insertion - - Raises: - ValueError: If the input list is empty + list[int]: running medians >>> median_in_a_stream([20, 14, 13, 16, 17]) [20, 17, 14, 15, 16] @@ -82,16 +81,24 @@ def median_in_a_stream(numbers: list[int]) -> list[int]: ... ValueError: Input list must not be empty """ - if not numbers: + if not arr: raise ValueError("Input list must not be empty") - max_heap: list[int] = [] - min_heap: list[int] = [] - median = 0 - result: list[int] = [] + max_heap: list[int] = [] # left side (as negative numbers) + min_heap: list[int] = [] # right side + median = arr[0] + max_heap.append(-arr[0]) + medians: list[int] = [median] - for element in numbers: + for element in arr[1:]: median = call_median(element, max_heap, min_heap, median) - result.append(median) + medians.append(median) + + return medians + - return result +if __name__ == "__main__": + n = int(input("Enter number of elements: ").strip()) + arr = [int(input().strip()) for _ in range(n)] + result = median_in_a_stream(arr) + print("Running medians:", result)