@@ -84,36 +84,69 @@ def search_all(
8484
8585 # Handle num_queries parameter
8686 if num_queries > 0 :
87- # If we need more queries than available, cycle through the list
87+ # If we need more queries than available, use a cycling generator
8888 if num_queries > len (queries_list ) and len (queries_list ) > 0 :
8989 print (f"Requested { num_queries } queries but only { len (queries_list )} are available." )
90- print (f"Extending queries by cycling through the available ones." )
91- # Calculate how many complete cycles and remaining items we need
92- complete_cycles = num_queries // len (queries_list )
93- remaining = num_queries % len (queries_list )
94-
95- # Create the extended list
96- extended_queries = []
97- for _ in range (complete_cycles ):
98- extended_queries .extend (queries_list )
99- extended_queries .extend (queries_list [:remaining ])
100-
101- used_queries = extended_queries
90+ print (f"Using a cycling generator to efficiently process queries." )
91+
92+ # Create a cycling generator function
93+ def cycling_query_generator (queries , total_count ):
94+ """Generate queries by cycling through the available ones."""
95+ count = 0
96+ while count < total_count :
97+ for query in queries :
98+ if count < total_count :
99+ yield query
100+ count += 1
101+ else :
102+ break
103+
104+ # Use the generator instead of creating a full list
105+ used_queries = cycling_query_generator (queries_list , num_queries )
106+ # We need to know the total count for the progress bar
107+ total_query_count = num_queries
102108 else :
103109 used_queries = queries_list [:num_queries ]
110+ total_query_count = len (used_queries )
104111 print (f"Using { num_queries } queries" )
105112 else :
106113 used_queries = queries_list
114+ total_query_count = len (used_queries )
107115
108116 if parallel == 1 :
109117 # Single-threaded execution
110118 start = time .perf_counter ()
111- results = [search_one (query ) for query in tqdm .tqdm (used_queries )]
119+
120+ # Create a progress bar with the correct total
121+ pbar = tqdm .tqdm (total = total_query_count , desc = "Processing queries" , unit = "queries" )
122+
123+ # Process queries with progress updates
124+ results = []
125+ for query in used_queries :
126+ results .append (search_one (query ))
127+ pbar .update (1 )
128+
129+ # Close the progress bar
130+ pbar .close ()
131+
112132 total_time = time .perf_counter () - start
113133 else :
114- # Dynamically calculate chunk size
115- chunk_size = max (1 , len (used_queries ) // parallel )
116- query_chunks = list (chunked_iterable (used_queries , chunk_size ))
134+ # Dynamically calculate chunk size based on total_query_count
135+ chunk_size = max (1 , total_query_count // parallel )
136+
137+ # If used_queries is a generator, we need to handle it differently
138+ if hasattr (used_queries , '__next__' ):
139+ # For generators, we'll create chunks on-the-fly
140+ query_chunks = []
141+ remaining = total_query_count
142+ while remaining > 0 :
143+ current_chunk_size = min (chunk_size , remaining )
144+ chunk = [next (used_queries ) for _ in range (current_chunk_size )]
145+ query_chunks .append (chunk )
146+ remaining -= current_chunk_size
147+ else :
148+ # For lists, we can use the chunked_iterable function
149+ query_chunks = list (chunked_iterable (used_queries , chunk_size ))
117150
118151 # Function to be executed by each worker process
119152 def worker_function (chunk , result_queue ):
@@ -141,8 +174,7 @@ def worker_function(chunk, result_queue):
141174 start = time .perf_counter ()
142175
143176 # Create a progress bar for the total number of queries
144- total_queries = len (used_queries )
145- pbar = tqdm .tqdm (total = total_queries , desc = "Processing queries" , unit = "queries" )
177+ pbar = tqdm .tqdm (total = total_query_count , desc = "Processing queries" , unit = "queries" )
146178
147179 # Collect results from all worker processes
148180 results = []
0 commit comments