Skip to content

Commit ed63fe0

Browse files
committed
Polishing importers
1 parent 79331b7 commit ed63fe0

File tree

3 files changed

+121
-105
lines changed

3 files changed

+121
-105
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from sqlalchemy import Table, MetaData
2+
from sqlalchemy.orm import sessionmaker
3+
4+
from config import engine
5+
6+
import structlog
7+
logger = structlog.get_logger()
8+
9+
def insert_volgistics_people(row_list):
10+
11+
row_count = 0
12+
try:
13+
Session = sessionmaker(engine)
14+
session = Session()
15+
metadata = MetaData()
16+
volg_table = Table("volgistics", metadata, autoload=True, autoload_with=engine)
17+
18+
result = session.execute("TRUNCATE table volgistics;")
19+
ret = session.execute(volg_table.insert(row_list))
20+
21+
row_count = ret.rowcount
22+
23+
session.commit() # Commit all inserted rows
24+
session.close()
25+
except Exception as e:
26+
row_count = 0
27+
logger.error("Exception inserting volgistics people")
28+
logger.exception(e)
29+
return row_count
30+
31+
32+
def insert_volgistics_shifts(row_list):
33+
34+
row_count = 0
35+
try:
36+
Session = sessionmaker(engine)
37+
session = Session()
38+
metadata = MetaData()
39+
volg_table = Table("volgisticsshifts", metadata, autoload=True, autoload_with=engine)
40+
41+
result = session.execute("TRUNCATE table volgisticsshifts;")
42+
ret = session.execute(volg_table.insert(row_list))
43+
44+
row_count = ret.rowcount
45+
46+
session.commit() # Commit all inserted rows
47+
session.close()
48+
except Exception as e:
49+
row_count = 0
50+
logger.error("Exception inserting volgistics shifts")
51+
logger.exception(e.pgerror)
52+
return row_count

src/server/api/file_uploader.py

Lines changed: 7 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from donations_importer import validate_import_sfd
44
from flask import current_app
55
from models import ManualMatches, SalesForceContacts, ShelterluvPeople, Volgistics
6-
from shifts_importer import open_volgistics, validate_import_vs, volgistics_people_import
6+
from volgistics_importer import open_volgistics, validate_import_vs, volgistics_people_import
77
from werkzeug.utils import secure_filename
88

99
import structlog
@@ -27,42 +27,15 @@ def determine_upload_type(file, file_extension, conn):
2727
# what kind of data we had.
2828
if file_extension == "csv":
2929
logger.warn("%s: We no longer support CSV files", file.filename)
30-
# df = pd.read_csv(file, dtype="string")
31-
32-
# if {"salesforcecontacts", "volgistics", "shelterluvpeople"}.issubset(df.columns):
33-
# logger.debug("File appears to be salesforcecontacts, volgistics, or shelterluvpeople (manual)")
34-
# ManualMatches.insert_from_df(df, conn)
35-
# return
36-
# elif {"Animal_ids", "Internal-ID"}.issubset(df.columns):
37-
# logger.debug("File appears to be shelterluvpeople")
38-
# ShelterluvPeople.insert_from_df(df, conn)
3930
return
4031

4132
if file_extension == "xlsx":
42-
# excel_file = pd.ExcelFile(file)
43-
# if {"Master", "Service"}.issubset(excel_file.sheet_names):
44-
logger.debug("File appears to be Volgistics")
45-
# Volgistics
46-
workbook = open_volgistics(file)
47-
validate_import_vs(workbook, conn)
48-
# Volgistics.insert_from_file(excel_file, conn)
49-
# Insert Volg people
50-
volgistics_people_import(workbook,conn)
33+
# Assume it's Volgistics
34+
workbook = open_volgistics(file)
35+
if workbook:
36+
validate_import_vs(workbook)
37+
volgistics_people_import(workbook)
5138
workbook.close()
52-
return
53-
54-
# df = pd.read_excel(excel_file)
55-
# if "Contact ID 18" in df.columns:
56-
# # Salesforce something-or-other
57-
# if "Amount" in df.columns:
58-
# # Salesforce donations
59-
# logger.debug("File appears to be Salesforce donations")
60-
# validate_import_sfd(file, conn)
61-
# return
62-
# else:
63-
# # Salesforce contacts
64-
# logger.debug("File appears to be Salesforce contacts")
65-
# SalesForceContacts.insert_from_file_df(df, conn)
66-
# return
39+
return
6740

6841
logger.error("Don't know how to process file: %s", file.filename)
Lines changed: 62 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88

99
import structlog
1010

11-
from api.API_ingest.volgistics import insert_volgistics_people
11+
from api.API_ingest.volgistics_db import insert_volgistics_people, insert_volgistics_shifts
1212
logger = structlog.get_logger()
1313

1414

15-
from sqlalchemy import insert, Table, Column, MetaData, exc
16-
from sqlalchemy.dialects.postgresql import Insert
15+
from sqlalchemy import Table, MetaData
1716

1817
from sqlalchemy.orm import sessionmaker
1918

@@ -41,20 +40,30 @@
4140

4241

4342
def open_volgistics(filename):
44-
logger.info("Loading '%s' - this is slow", filename.filename )
43+
logger.info("Loading '%s' ", filename.filename )
4544
start = datetime.now()
46-
wb = load_workbook(filename) # ,read_only=True should be faster but gets size incorrect
45+
wb = load_workbook(filename, read_only=True) # read_only=True caused problems (got size wrong) in an earlier version
46+
# works fine now and is much faster
47+
4748
end = datetime.now()
4849
logger.info("Loaded '%s' complete in %d seconds", filename.filename, (end-start).seconds )
50+
51+
try:
52+
ws = wb['Service']
53+
wp = wb['Master']
54+
55+
except Exception as e:
56+
logger.error("Could not open expected tab in '%s' - not a Volgistics xlsx file?: %s", filename.filename, e )
57+
return None
58+
4959
return wb
5060

51-
def validate_import_vs(workbook, conn):
61+
def validate_import_vs(workbook):
5262
""" Validate that the XLSX column names int the file are close enough to expectations that we can trust the data.
5363
If so, insert the data into the volgisticsshifts table.
5464
"""
5565

56-
# logger.info('------ Loading %s ', filename.filename )
57-
# wb = load_workbook(filename) # ,read_only=True should be faster but gets size incorrect
66+
5867
ws = workbook['Service'] # Needs to be 'Service' sheet
5968
# ws.reset_dimensions() # Tells openpyxl to ignore what sheet says and check for itself
6069
ws.calculate_dimension()
@@ -84,26 +93,19 @@ def validate_import_vs(workbook, conn):
8493

8594
if min_similarity >= MINIMUM_SIMILARITY : # Good enough to trust
8695

87-
vs = Table("volgisticsshifts", metadata, autoload=True, autoload_with=engine)
88-
8996
seen_header = False # Used to skip header row
9097

9198
# Stats for import
92-
dupes = 0
93-
other_integrity = 0
94-
other_exceptions = 0
9599
row_count = 0
96100
missing_volgistics_id = 0
97101

98-
99-
#TODO: Perform bulk insert as for people_insert
100-
102+
shifts_rows = [];
101103

102104
for row in ws.values:
103105
if seen_header:
104106
row_count += 1
105-
if (row_count % 1000 == 0) and (row_count % 5000 != 0):
106-
logger.debug("Row: %s", str(row_count) )
107+
# if (row_count % 1000 == 0) and (row_count % 5000 != 0):
108+
# logger.debug("Row: %s", str(row_count) )
107109
if row_count % 5000 == 0:
108110
logger.info("Row: %s", str(row_count) )
109111
zrow = dict(zip(expected_shifts_columns.values(), row))
@@ -114,52 +116,23 @@ def validate_import_vs(workbook, conn):
114116
except KeyError:
115117
pass
116118

117-
# Cleanup time! Many older imports have... peculiarities
118-
119-
# End cleanup
120-
121119
if zrow['volg_id'] : # No point in importing if there's nothing to match
122-
# Finally ready to insert row into the table
123-
#
124-
125-
stmt = Insert(vs).values(zrow)
126-
127-
skip_dupes = stmt.on_conflict_do_nothing(
128-
constraint='uq_shift'
129-
)
130-
try:
131-
result = conn.execute(skip_dupes)
132-
except exc.IntegrityError as e: # Catch-all for several more specific exceptions
133-
if re.match('duplicate key value', str(e.orig) ):
134-
dupes += 1
135-
pass
136-
else:
137-
other_integrity += 1
138-
logger.error(e)
139-
except Exception as e:
140-
other_exceptions += 1
141-
logger.error(e)
142-
120+
shifts_rows.append(zrow)
143121
else: # Missing contact_id
144122
missing_volgistics_id += 1
145123

146-
147124
else: # Haven't seen header, so this was first row.
148125
seen_header = True
149126

150-
# NOTE: we now run this in a engine.begin() context manager, so our
151-
# parent will commit. Don't commit here!
152-
127+
rows = insert_volgistics_shifts(shifts_rows)
153128

154-
logger.info("Total rows: %s Dupes: %s Missing volgistics id: %s", str(row_count), str(dupes), str(missing_volgistics_id) )
155-
logger.info("Other integrity exceptions: %s Other exceptions: %s", str(other_exceptions), str(other_integrity) )
156-
# workbook.close()
129+
logger.info("Total rows: %d Missing volgistics id: %d", rows, missing_volgistics_id )
157130
return { True : "File imported" }
158131

159132

160-
def volgistics_people_import(workbook,conn):
133+
def volgistics_people_import(workbook):
161134

162-
ws = workbook['Master'] # Needs to be 'Service' sheet
135+
ws = workbook['Master'] # Needs to be 'Master' sheet
163136
# ws.reset_dimensions() # Tells openpyxl to ignore what sheet says and check for itself
164137
ws.calculate_dimension()
165138

@@ -185,38 +158,56 @@ def volgistics_people_import(workbook,conn):
185158
col[cell.value] = idx
186159
idx += 1
187160

188-
189-
161+
# This table has something like 115 columns - not interested in handling each even if empty
162+
# Get the column numbers of the ones we care about
163+
col_number = col['Number']
164+
col_lastname = col['Last name']
165+
col_firstname = col['First name']
166+
col_middlename = col['Middle name']
167+
col_complete_address = col['Complete address']
168+
col_street1 = col['Street 1']
169+
col_street2 = col['Street 2']
170+
col_street3 = col['Street 3']
171+
col_city = col['City']
172+
col_state = col['State']
173+
col_zip = col['Zip']
174+
col_all_phones = col['All phone numbers']
175+
col_home = col['Home']
176+
col_work = col['Work']
177+
col_cell = col['Cell']
178+
col_email = col['Email']
190179
time_stamp = datetime.utcnow()
191180

192181
try:
193182
for r in ws.iter_rows(min_row=2, max_col=42,values_only=True):
194183
insert_list.append(
195184
{
196-
"number": r[col['Number']],
197-
"last_name": r[col['Last name']],
198-
"first_name": r[col['First name']],
199-
"middle_name": r[col['Middle name']],
200-
"complete_address": r[col['Complete address']],
201-
"street_1": r[col['Street 1']],
202-
"street_2": r[col['Street 2']],
203-
"street_3": r[col['Street 3']],
204-
"city": r[col['City']],
205-
"state": r[col['State']],
206-
"zip": r[col['Zip']],
207-
"all_phone_numbers": r[col['All phone numbers']],
208-
"home": r[col['Home']],
209-
"work": r[col['Work']],
210-
"cell": r[col['Cell']],
211-
"email": r[col['Email']],
185+
"number": r[col_number],
186+
"last_name": r[col_lastname],
187+
"first_name": r[col_firstname],
188+
"middle_name": r[col_middlename],
189+
"complete_address": r[col_complete_address],
190+
"street_1": r[col_street1],
191+
"street_2": r[col_street2],
192+
"street_3": r[col_street3],
193+
"city": r[col_city],
194+
"state": r[col_state],
195+
"zip": r[col_zip],
196+
"all_phone_numbers": r[col_all_phones],
197+
"home": r[col_home],
198+
"work": r[col_work],
199+
"cell": r[col_cell],
200+
"email": r[col_email],
212201
"created_date" : time_stamp
213202
}
214203
)
215204
except KeyError as e:
216205
logger.error("Volgistics source XLSX file 'Master' tab missing expected column (see following) - cannot import")
217206
logger.exception(e)
218207

219-
208+
except Exception as e:
209+
logger.error("Unhandled exception preparing Volgistics people records for import")
210+
logger.exception(e)
220211

221212
rows = insert_volgistics_people(insert_list)
222213

0 commit comments

Comments
 (0)