88
99import structlog
1010
11- from api .API_ingest .volgistics import insert_volgistics_people
11+ from api .API_ingest .volgistics_db import insert_volgistics_people , insert_volgistics_shifts
1212logger = structlog .get_logger ()
1313
1414
15- from sqlalchemy import insert , Table , Column , MetaData , exc
16- from sqlalchemy .dialects .postgresql import Insert
15+ from sqlalchemy import Table , MetaData
1716
1817from sqlalchemy .orm import sessionmaker
1918
4140
4241
4342def open_volgistics (filename ):
44- logger .info ("Loading '%s' - this is slow " , filename .filename )
43+ logger .info ("Loading '%s' " , filename .filename )
4544 start = datetime .now ()
46- wb = load_workbook (filename ) # ,read_only=True should be faster but gets size incorrect
45+ wb = load_workbook (filename , read_only = True ) # read_only=True caused problems (got size wrong) in an earlier version
46+ # works fine now and is much faster
47+
4748 end = datetime .now ()
4849 logger .info ("Loaded '%s' complete in %d seconds" , filename .filename , (end - start ).seconds )
50+
51+ try :
52+ ws = wb ['Service' ]
53+ wp = wb ['Master' ]
54+
55+ except Exception as e :
56+ logger .error ("Could not open expected tab in '%s' - not a Volgistics xlsx file?: %s" , filename .filename , e )
57+ return None
58+
4959 return wb
5060
51- def validate_import_vs (workbook , conn ):
61+ def validate_import_vs (workbook ):
5262 """ Validate that the XLSX column names int the file are close enough to expectations that we can trust the data.
5363 If so, insert the data into the volgisticsshifts table.
5464 """
5565
56- # logger.info('------ Loading %s ', filename.filename )
57- # wb = load_workbook(filename) # ,read_only=True should be faster but gets size incorrect
66+
5867 ws = workbook ['Service' ] # Needs to be 'Service' sheet
5968 # ws.reset_dimensions() # Tells openpyxl to ignore what sheet says and check for itself
6069 ws .calculate_dimension ()
@@ -84,26 +93,19 @@ def validate_import_vs(workbook, conn):
8493
8594 if min_similarity >= MINIMUM_SIMILARITY : # Good enough to trust
8695
87- vs = Table ("volgisticsshifts" , metadata , autoload = True , autoload_with = engine )
88-
8996 seen_header = False # Used to skip header row
9097
9198 # Stats for import
92- dupes = 0
93- other_integrity = 0
94- other_exceptions = 0
9599 row_count = 0
96100 missing_volgistics_id = 0
97101
98-
99- #TODO: Perform bulk insert as for people_insert
100-
102+ shifts_rows = [];
101103
102104 for row in ws .values :
103105 if seen_header :
104106 row_count += 1
105- if (row_count % 1000 == 0 ) and (row_count % 5000 != 0 ):
106- logger .debug ("Row: %s" , str (row_count ) )
107+ # if (row_count % 1000 == 0) and (row_count % 5000 != 0):
108+ # logger.debug("Row: %s", str(row_count) )
107109 if row_count % 5000 == 0 :
108110 logger .info ("Row: %s" , str (row_count ) )
109111 zrow = dict (zip (expected_shifts_columns .values (), row ))
@@ -114,52 +116,23 @@ def validate_import_vs(workbook, conn):
114116 except KeyError :
115117 pass
116118
117- # Cleanup time! Many older imports have... peculiarities
118-
119- # End cleanup
120-
121119 if zrow ['volg_id' ] : # No point in importing if there's nothing to match
122- # Finally ready to insert row into the table
123- #
124-
125- stmt = Insert (vs ).values (zrow )
126-
127- skip_dupes = stmt .on_conflict_do_nothing (
128- constraint = 'uq_shift'
129- )
130- try :
131- result = conn .execute (skip_dupes )
132- except exc .IntegrityError as e : # Catch-all for several more specific exceptions
133- if re .match ('duplicate key value' , str (e .orig ) ):
134- dupes += 1
135- pass
136- else :
137- other_integrity += 1
138- logger .error (e )
139- except Exception as e :
140- other_exceptions += 1
141- logger .error (e )
142-
120+ shifts_rows .append (zrow )
143121 else : # Missing contact_id
144122 missing_volgistics_id += 1
145123
146-
147124 else : # Haven't seen header, so this was first row.
148125 seen_header = True
149126
150- # NOTE: we now run this in a engine.begin() context manager, so our
151- # parent will commit. Don't commit here!
152-
127+ rows = insert_volgistics_shifts (shifts_rows )
153128
154- logger .info ("Total rows: %s Dupes: %s Missing volgistics id: %s" , str (row_count ), str (dupes ), str (missing_volgistics_id ) )
155- logger .info ("Other integrity exceptions: %s Other exceptions: %s" , str (other_exceptions ), str (other_integrity ) )
156- # workbook.close()
129+ logger .info ("Total rows: %d Missing volgistics id: %d" , rows , missing_volgistics_id )
157130 return { True : "File imported" }
158131
159132
160- def volgistics_people_import (workbook , conn ):
133+ def volgistics_people_import (workbook ):
161134
162- ws = workbook ['Master' ] # Needs to be 'Service ' sheet
135+ ws = workbook ['Master' ] # Needs to be 'Master ' sheet
163136 # ws.reset_dimensions() # Tells openpyxl to ignore what sheet says and check for itself
164137 ws .calculate_dimension ()
165138
@@ -185,38 +158,56 @@ def volgistics_people_import(workbook,conn):
185158 col [cell .value ] = idx
186159 idx += 1
187160
188-
189-
161+ # This table has something like 115 columns - not interested in handling each even if empty
162+ # Get the column numbers of the ones we care about
163+ col_number = col ['Number' ]
164+ col_lastname = col ['Last name' ]
165+ col_firstname = col ['First name' ]
166+ col_middlename = col ['Middle name' ]
167+ col_complete_address = col ['Complete address' ]
168+ col_street1 = col ['Street 1' ]
169+ col_street2 = col ['Street 2' ]
170+ col_street3 = col ['Street 3' ]
171+ col_city = col ['City' ]
172+ col_state = col ['State' ]
173+ col_zip = col ['Zip' ]
174+ col_all_phones = col ['All phone numbers' ]
175+ col_home = col ['Home' ]
176+ col_work = col ['Work' ]
177+ col_cell = col ['Cell' ]
178+ col_email = col ['Email' ]
190179 time_stamp = datetime .utcnow ()
191180
192181 try :
193182 for r in ws .iter_rows (min_row = 2 , max_col = 42 ,values_only = True ):
194183 insert_list .append (
195184 {
196- "number" : r [col [ 'Number' ] ],
197- "last_name" : r [col [ 'Last name' ] ],
198- "first_name" : r [col [ 'First name' ] ],
199- "middle_name" : r [col [ 'Middle name' ] ],
200- "complete_address" : r [col [ 'Complete address' ] ],
201- "street_1" : r [col [ 'Street 1' ] ],
202- "street_2" : r [col [ 'Street 2' ] ],
203- "street_3" : r [col [ 'Street 3' ] ],
204- "city" : r [col [ 'City' ] ],
205- "state" : r [col [ 'State' ] ],
206- "zip" : r [col [ 'Zip' ] ],
207- "all_phone_numbers" : r [col [ 'All phone numbers' ] ],
208- "home" : r [col [ 'Home' ] ],
209- "work" : r [col [ 'Work' ] ],
210- "cell" : r [col [ 'Cell' ] ],
211- "email" : r [col [ 'Email' ] ],
185+ "number" : r [col_number ],
186+ "last_name" : r [col_lastname ],
187+ "first_name" : r [col_firstname ],
188+ "middle_name" : r [col_middlename ],
189+ "complete_address" : r [col_complete_address ],
190+ "street_1" : r [col_street1 ],
191+ "street_2" : r [col_street2 ],
192+ "street_3" : r [col_street3 ],
193+ "city" : r [col_city ],
194+ "state" : r [col_state ],
195+ "zip" : r [col_zip ],
196+ "all_phone_numbers" : r [col_all_phones ],
197+ "home" : r [col_home ],
198+ "work" : r [col_work ],
199+ "cell" : r [col_cell ],
200+ "email" : r [col_email ],
212201 "created_date" : time_stamp
213202 }
214203 )
215204 except KeyError as e :
216205 logger .error ("Volgistics source XLSX file 'Master' tab missing expected column (see following) - cannot import" )
217206 logger .exception (e )
218207
219-
208+ except Exception as e :
209+ logger .error ("Unhandled exception preparing Volgistics people records for import" )
210+ logger .exception (e )
220211
221212 rows = insert_volgistics_people (insert_list )
222213
0 commit comments