|
1 | 1 | import functools |
2 | 2 | import time |
3 | | -from multiprocessing import get_context |
| 3 | +from multiprocessing import Process, Queue |
4 | 4 | from typing import Iterable, List, Optional, Tuple |
5 | 5 | from itertools import islice |
6 | 6 |
|
@@ -106,40 +106,54 @@ def search_all( |
106 | 106 | used_queries = queries_list |
107 | 107 |
|
108 | 108 | if parallel == 1: |
| 109 | + # Single-threaded execution |
109 | 110 | start = time.perf_counter() |
110 | | - precisions, latencies = list( |
111 | | - zip(*[search_one(query) for query in tqdm.tqdm(used_queries)]) |
112 | | - ) |
| 111 | + results = [search_one(query) for query in tqdm.tqdm(used_queries)] |
| 112 | + total_time = time.perf_counter() - start |
113 | 113 | else: |
114 | | - ctx = get_context(self.get_mp_start_method()) |
| 114 | + # Dynamically calculate chunk size |
| 115 | + chunk_size = max(1, len(used_queries) // parallel) |
| 116 | + query_chunks = list(chunked_iterable(used_queries, chunk_size)) |
115 | 117 |
|
116 | | - def process_initializer(): |
117 | | - """Initialize each process before starting the search.""" |
| 118 | + # Function to be executed by each worker process |
| 119 | + def worker_function(chunk, result_queue): |
118 | 120 | self.__class__.init_client( |
119 | 121 | self.host, |
120 | 122 | distance, |
121 | 123 | self.connection_params, |
122 | 124 | self.search_params, |
123 | 125 | ) |
124 | 126 | self.setup_search() |
| 127 | + results = process_chunk(chunk, search_one) |
| 128 | + result_queue.put(results) |
125 | 129 |
|
126 | | - # Dynamically chunk the generator |
127 | | - query_chunks = list(chunked_iterable(used_queries, max(1, len(used_queries) // parallel))) |
128 | | - |
129 | | - with ctx.Pool( |
130 | | - processes=parallel, |
131 | | - initializer=process_initializer, |
132 | | - ) as pool: |
133 | | - if parallel > 10: |
134 | | - time.sleep(15) # Wait for all processes to start |
135 | | - start = time.perf_counter() |
136 | | - results = pool.starmap( |
137 | | - process_chunk, |
138 | | - [(chunk, search_one) for chunk in query_chunks], |
139 | | - ) |
140 | | - precisions, latencies = zip(*[result for chunk in results for result in chunk]) |
| 130 | + # Create a queue to collect results |
| 131 | + result_queue = Queue() |
| 132 | + |
| 133 | + # Create and start worker processes |
| 134 | + processes = [] |
| 135 | + for chunk in query_chunks: |
| 136 | + process = Process(target=worker_function, args=(chunk, result_queue)) |
| 137 | + processes.append(process) |
| 138 | + process.start() |
| 139 | + |
| 140 | + # Start measuring time for the critical work |
| 141 | + start = time.perf_counter() |
141 | 142 |
|
142 | | - total_time = time.perf_counter() - start |
| 143 | + # Collect results from all worker processes |
| 144 | + results = [] |
| 145 | + for _ in processes: |
| 146 | + results.extend(result_queue.get()) |
| 147 | + |
| 148 | + # Wait for all worker processes to finish |
| 149 | + for process in processes: |
| 150 | + process.join() |
| 151 | + |
| 152 | + # Stop measuring time for the critical work |
| 153 | + total_time = time.perf_counter() - start |
| 154 | + |
| 155 | + # Extract precisions and latencies (outside the timed section) |
| 156 | + precisions, latencies = zip(*results) |
143 | 157 |
|
144 | 158 | self.__class__.delete_client() |
145 | 159 |
|
@@ -179,3 +193,8 @@ def chunked_iterable(iterable, size): |
179 | 193 | def process_chunk(chunk, search_one): |
180 | 194 | """Process a chunk of queries using the search_one function.""" |
181 | 195 | return [search_one(query) for query in chunk] |
| 196 | + |
| 197 | + |
| 198 | +def process_chunk_wrapper(chunk, search_one): |
| 199 | + """Wrapper to process a chunk of queries.""" |
| 200 | + return process_chunk(chunk, search_one) |
0 commit comments