11import re
22from flask .globals import current_app
3-
3+ from datetime import datetime , timedelta
44from openpyxl import load_workbook
55from jellyfish import jaro_similarity
66
1212
1313from sqlalchemy import insert , Table , Column , MetaData , exc
1414from sqlalchemy .dialects .postgresql import Insert
15+
16+ from sqlalchemy .orm import sessionmaker
17+
18+
1519metadata = MetaData ()
1620
1721
1822MINIMUM_SIMILARITY = 0.85 # How good does the table match need to be?
1923
20- expected_columns = {
24+ expected_shifts_columns = {
2125 'Number' : 'volg_id' ,
2226 'Site' : 'site' ,
2327 'Place' : None ,
3337 'Volunteers' : None
3438 }
3539
36- def validate_import_vs (filename , conn ):
40+
41+ def open_volgistics (filename ):
42+ logger .info ("Loading '%s' - this is slow" , filename .filename )
43+ start = datetime .now ()
44+ wb = load_workbook (filename ) # ,read_only=True should be faster but gets size incorrect
45+ end = datetime .now ()
46+ logger .info ("Loaded '%s' complete in %d seconds" , filename .filename , (end - start ).seconds )
47+ return wb
48+
49+ def validate_import_vs (workbook , conn ):
3750 """ Validate that the XLSX column names int the file are close enough to expectations that we can trust the data.
3851 If so, insert the data into the volgisticsshifts table.
3952 """
4053
41- logger .info ('------ Loading %s ' , filename .filename )
42- wb = load_workbook (filename ) # ,read_only=True should be faster but gets size incorrect
43- ws = wb ['Service' ] # Needs to be 'Service' sheet
54+ # logger.info('------ Loading %s ', filename.filename )
55+ # wb = load_workbook(filename) # ,read_only=True should be faster but gets size incorrect
56+ ws = workbook ['Service' ] # Needs to be 'Service' sheet
4457 # ws.reset_dimensions() # Tells openpyxl to ignore what sheet says and check for itself
4558 ws .calculate_dimension ()
4659
4760 columns = ws .max_column
4861 if columns > 26 :
49- # TODO: Handle AA, AB, usw...
50- logger .warn ("Column count > 26; columns after Z not processed" )
62+ # Only 13 actually populated
63+ logger .info ("Column count > 26; columns after Z not processed" )
5164 columns = 26
5265
5366 header = [cell .value for cell in ws [1 ]]
5467
5568 min_similarity = 1.0
5669 min_column = None
5770
58- for expected , got in zip (expected_columns .keys (), header ):
71+ for expected , got in zip (expected_shifts_columns .keys (), header ):
5972 jsim = jaro_similarity (expected , got )
6073 if jsim < min_similarity :
6174 min_similarity = jsim
@@ -83,9 +96,11 @@ def validate_import_vs(filename, conn):
8396 for row in ws .values :
8497 if seen_header :
8598 row_count += 1
86- if row_count % 1000 == 0 :
99+ if ( row_count % 1000 == 0 ) and ( row_count % 5000 != 0 ) :
87100 logger .debug ("Row: %s" , str (row_count ) )
88- zrow = dict (zip (expected_columns .values (), row ))
101+ if row_count % 5000 == 0 :
102+ logger .info ("Row: %s" , str (row_count ) )
103+ zrow = dict (zip (expected_shifts_columns .values (), row ))
89104 # zrow is a dict of db_col:value pairs, with at most one key being None (as it overwrote any previous)
90105 # We need to remove the None item, if it exists
91106 try :
@@ -132,5 +147,59 @@ def validate_import_vs(filename, conn):
132147
133148 logger .info ("Total rows: %s Dupes: %s Missing volgistics id: %s" , str (row_count ), str (dupes ), str (missing_volgistics_id ) )
134149 logger .info ("Other integrity exceptions: %s Other exceptions: %s" , str (other_exceptions ), str (other_integrity ) )
135- wb .close ()
136- return { True : "File imported" }
150+ # workbook.close()
151+ return { True : "File imported" }
152+
153+
154+ def volgistics_people_import (workbook ,conn ):
155+
156+ ws = workbook ['Master' ] # Needs to be 'Service' sheet
157+ # ws.reset_dimensions() # Tells openpyxl to ignore what sheet says and check for itself
158+ ws .calculate_dimension ()
159+
160+ columns = ws .max_column
161+
162+ #TODO: Validate header row to ensure source cols haven't changed
163+
164+ Session = sessionmaker (engine )
165+ session = Session ()
166+ metadata = MetaData ()
167+ volg_table = Table ("volgistics" , metadata , autoload = True , autoload_with = engine )
168+
169+
170+ # Cells are addressed as ws[row][col] with row being 1-based and col being 0-based
171+
172+ insert_list = []
173+
174+ #TODO: Create a dict from header row so can reference r["number"] instead of r[15]
175+
176+
177+ for r in ws .iter_rows (min_row = 2 , max_col = 42 ,values_only = True ):
178+ insert_list .append (
179+ {
180+ "number" : r [15 ],
181+ "last_name" : r [3 ],
182+ "first_name" : r [4 ],
183+ "middle_name" : r [5 ],
184+ "complete_address" : r [16 ],
185+ "street_1" : r [17 ],
186+ "street_2" : r [18 ],
187+ "street_3" : r [19 ],
188+ "city" : r [20 ],
189+ "state" : r [21 ],
190+ "zip" : r [22 ],
191+ "all_phone_numbers" : r [27 ],
192+ "home" : r [28 ],
193+ "work" : r [30 ],
194+ "cell" : r [32 ],
195+ "email" : r [41 ]
196+ }
197+ )
198+
199+
200+ ret = session .execute (volg_table .insert (insert_list ))
201+
202+ session .commit () # Commit all inserted rows
203+ session .close ()
204+
205+ logger .debug ('%d rows inserted' , ret .rowcount )
0 commit comments