@@ -172,6 +172,65 @@ def format_iso_date_for_language(iso_date: str, language: str) -> str:
172172
173173 return format_date (date_obj , format = "long" , locale = locale )
174174
175+ def check_addresses_complete (df : pd .DataFrame ) -> pd .DataFrame :
176+ """
177+ Check if address fields are complete in the DataFrame.
178+
179+ Adds a boolean 'address_complete' column based on presence of
180+ street address, city, province, and postal code.
181+ """
182+
183+ df = df .copy ()
184+
185+ # Normalize text fields: convert to string, strip whitespace, convert "" to NA
186+ address_cols = [
187+ "STREET_ADDRESS_LINE_1" ,
188+ "STREET_ADDRESS_LINE_2" ,
189+ "CITY" ,
190+ "PROVINCE" ,
191+ "POSTAL_CODE" ,
192+ ]
193+
194+ for col in address_cols :
195+ df [col ] = (
196+ df [col ]
197+ .astype (str )
198+ .str .strip ()
199+ .replace ({"" : pd .NA , "nan" : pd .NA })
200+ )
201+
202+ # Build combined address line
203+ df ["ADDRESS" ] = (
204+ df ["STREET_ADDRESS_LINE_1" ].fillna ("" ) + " " +
205+ df ["STREET_ADDRESS_LINE_2" ].fillna ("" )
206+ ).str .strip ()
207+
208+ df ["ADDRESS" ] = df ["ADDRESS" ].replace ({"" : pd .NA })
209+
210+ # Check completeness
211+ df ["address_complete" ] = (
212+ df ["ADDRESS" ].notna ()
213+ & df ["CITY" ].notna ()
214+ & df ["PROVINCE" ].notna ()
215+ & df ["POSTAL_CODE" ].notna ()
216+ )
217+
218+ if not df ["address_complete" ].all ():
219+ incomplete_count = (~ df ["address_complete" ]).sum ()
220+ LOG .warning (
221+ "There are %d records with incomplete address information." ,
222+ incomplete_count ,
223+ )
224+
225+ incomplete_records = df .loc [~ df ["address_complete" ]]
226+
227+ incomplete_path = Path ("output/incomplete_addresses.csv" )
228+ incomplete_records .to_csv (incomplete_path , index = False )
229+ LOG .info ("Incomplete address records written to %s" , incomplete_path )
230+
231+ # Return only rows with complete addresses
232+ return df .loc [df ["address_complete" ]].drop (columns = ["address_complete" ])
233+
175234
176235def convert_date_iso (date_str : str ) -> str :
177236 """Convert a date from English display format to ISO format.
0 commit comments