Skip to content

Commit 79331b7

Browse files
committed
Remove pd & numpy, bulk insert volg ppl
1 parent d1a6bb5 commit 79331b7

File tree

4 files changed

+112
-94
lines changed

4 files changed

+112
-94
lines changed

src/server/api/file_uploader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import pandas as pd
1+
# import pandas as pd
22
from config import engine
33
from donations_importer import validate_import_sfd
44
from flask import current_app

src/server/api/internal_api.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from api.API_ingest import ingest_sources_from_api, salesforce_contacts
77
from api.api import internal_api
8-
from rfm_funcs.create_scores import create_scores
8+
# from rfm_funcs.create_scores import create_scores
99
from api.API_ingest import updated_data
1010

1111
logger = structlog.get_logger()
@@ -37,12 +37,12 @@ def ingest_raw_data():
3737
return jsonify({'outcome': 'OK'}), 200
3838

3939

40-
@internal_api.route("/api/internal/create_scores", methods=["GET"])
41-
def hit_create_scores():
42-
logger.info("Hitting create_scores() ")
43-
tuple_count = create_scores()
44-
logger.info("create_scores() processed %s scores", str(tuple_count) )
45-
return jsonify(200)
40+
# @internal_api.route("/api/internal/create_scores", methods=["GET"])
41+
# def hit_create_scores():
42+
# logger.info("Hitting create_scores() ")
43+
# tuple_count = create_scores()
44+
# logger.info("create_scores() processed %s scores", str(tuple_count) )
45+
# return jsonify(200)
4646

4747

4848
@internal_api.route("/api/internal/get_updated_data", methods=["GET"])

src/server/models.py

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33
from itertools import combinations
44

5-
import pandas as pd
5+
# import pandas as pd
66
import sqlalchemy as sa
77
from sqlalchemy import (
88
Boolean,
@@ -296,35 +296,35 @@ class Volgistics(Base):
296296
json = Column(JSONB)
297297
created_date = Column(DateTime, default=datetime.datetime.utcnow)
298298

299-
@classmethod
300-
def insert_from_file(cls, xl_file, conn):
301-
df = pd.read_excel(xl_file, sheet_name="Master")
302-
303-
column_translation = get_source_column_translation(cls)
304-
df = df[column_translation.keys()]
305-
df = df.rename(columns=column_translation)
306-
307-
df["home"] = df["home"].apply(normalize_phone_number)
308-
df["work"] = df["work"].apply(normalize_phone_number)
309-
df["cell"] = df["cell"].apply(normalize_phone_number)
310-
311-
dedup_on = [col for col in cls.__table__.columns if col.name in df.columns]
312-
df["created_date"] = datetime.datetime.utcnow()
313-
df.to_sql(
314-
cls.__tablename__,
315-
conn,
316-
if_exists="append",
317-
index=False,
318-
)
319-
conn.execute(
320-
dedup_consecutive(
321-
cls.__table__,
322-
unique_id=cls._id,
323-
id=cls.number,
324-
order_by=cls.created_date,
325-
dedup_on=tuple_(*dedup_on),
326-
)
327-
)
299+
# @classmethod
300+
# def insert_from_file(cls, xl_file, conn):
301+
# df = pd.read_excel(xl_file, sheet_name="Master")
302+
303+
# column_translation = get_source_column_translation(cls)
304+
# df = df[column_translation.keys()]
305+
# df = df.rename(columns=column_translation)
306+
307+
# df["home"] = df["home"].apply(normalize_phone_number)
308+
# df["work"] = df["work"].apply(normalize_phone_number)
309+
# df["cell"] = df["cell"].apply(normalize_phone_number)
310+
311+
# dedup_on = [col for col in cls.__table__.columns if col.name in df.columns]
312+
# df["created_date"] = datetime.datetime.utcnow()
313+
# df.to_sql(
314+
# cls.__tablename__,
315+
# conn,
316+
# if_exists="append",
317+
# index=False,
318+
# )
319+
# conn.execute(
320+
# dedup_consecutive(
321+
# cls.__table__,
322+
# unique_id=cls._id,
323+
# id=cls.number,
324+
# order_by=cls.created_date,
325+
# dedup_on=tuple_(*dedup_on),
326+
# )
327+
# )
328328

329329
@classmethod
330330
def insert_into_pdp_contacts(cls):
@@ -356,29 +356,29 @@ class ManualMatches(Base):
356356
source_type_2 = Column(String, primary_key=True)
357357
source_id_2 = Column(String, primary_key=True)
358358

359-
@classmethod
360-
def insert_from_df(cls, df, conn):
361-
# Our input csv has columns like "salesforcecontacts," "volgistics," and
362-
# "shelterluvpeople," where two columns are non-null if there is an
363-
# association between those two ids. We massage this table into one that
364-
# is easier to join on.
359+
# @classmethod
360+
# def insert_from_df(cls, df, conn):
361+
# # Our input csv has columns like "salesforcecontacts," "volgistics," and
362+
# # "shelterluvpeople," where two columns are non-null if there is an
363+
# # association between those two ids. We massage this table into one that
364+
# # is easier to join on.
365365

366-
match_dicts = df.to_dict(orient="records")
367-
368-
matched_pairs = []
369-
for match in match_dicts:
370-
non_nulls = {k: v for (k, v) in match.items() if not pd.isna(v)}
371-
for ((st1, sid1), (st2, sid2)) in combinations(non_nulls.items(), 2):
372-
matched_pairs.append(
373-
{
374-
"source_type_1": st1,
375-
"source_id_1": sid1,
376-
"source_type_2": st2,
377-
"source_id_2": sid2,
378-
}
379-
)
380-
381-
conn.execute(insert(cls).values(matched_pairs).on_conflict_do_nothing())
366+
# match_dicts = df.to_dict(orient="records")
367+
368+
# matched_pairs = []
369+
# for match in match_dicts:
370+
# non_nulls = {k: v for (k, v) in match.items() if not pd.isna(v)}
371+
# for ((st1, sid1), (st2, sid2)) in combinations(non_nulls.items(), 2):
372+
# matched_pairs.append(
373+
# {
374+
# "source_type_1": st1,
375+
# "source_id_1": sid1,
376+
# "source_type_2": st2,
377+
# "source_id_2": sid2,
378+
# }
379+
# )
380+
381+
# conn.execute(insert(cls).values(matched_pairs).on_conflict_do_nothing())
382382

383383
class SalesforceDonations(Base):
384384
__tablename__ = "salesforcedonations"

src/server/shifts_importer.py

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from config import engine
88

99
import structlog
10+
11+
from api.API_ingest.volgistics import insert_volgistics_people
1012
logger = structlog.get_logger()
1113

1214

@@ -93,6 +95,10 @@ def validate_import_vs(workbook, conn):
9395
row_count = 0
9496
missing_volgistics_id = 0
9597

98+
99+
#TODO: Perform bulk insert as for people_insert
100+
101+
96102
for row in ws.values:
97103
if seen_header:
98104
row_count += 1
@@ -166,40 +172,52 @@ def volgistics_people_import(workbook,conn):
166172
metadata = MetaData()
167173
volg_table = Table("volgistics", metadata, autoload=True, autoload_with=engine)
168174

169-
170-
# Cells are addressed as ws[row][col] with row being 1-based and col being 0-based
175+
# Worksheet cells are addressed as ws[row][col] with row being 1-based and col being 0-based
171176

172177
insert_list = []
173178

174-
#TODO: Create a dict from header row so can reference r["number"] instead of r[15]
175-
176-
177-
for r in ws.iter_rows(min_row=2, max_col=42,values_only=True):
178-
insert_list.append(
179-
{
180-
"number": r[15],
181-
"last_name": r[3],
182-
"first_name": r[4],
183-
"middle_name": r[5],
184-
"complete_address": r[16],
185-
"street_1": r[17],
186-
"street_2": r[18],
187-
"street_3": r[19],
188-
"city": r[20],
189-
"state": r[21],
190-
"zip": r[22],
191-
"all_phone_numbers": r[27],
192-
"home": r[28],
193-
"work": r[30],
194-
"cell": r[32],
195-
"email": r[41]
196-
}
197-
)
198-
199-
200-
ret = session.execute(volg_table.insert(insert_list))
201-
202-
session.commit() # Commit all inserted rows
203-
session.close()
204-
205-
logger.debug('%d rows inserted', ret.rowcount)
179+
# Create a dict from header row so can reference columns by name
180+
# e.g., r[col['Number']] instead of r[15]
181+
header = ws[1]
182+
col = {};
183+
idx = 0
184+
for cell in header:
185+
col[cell.value] = idx
186+
idx += 1
187+
188+
189+
190+
time_stamp = datetime.utcnow()
191+
192+
try:
193+
for r in ws.iter_rows(min_row=2, max_col=42,values_only=True):
194+
insert_list.append(
195+
{
196+
"number": r[col['Number']],
197+
"last_name": r[col['Last name']],
198+
"first_name": r[col['First name']],
199+
"middle_name": r[col['Middle name']],
200+
"complete_address": r[col['Complete address']],
201+
"street_1": r[col['Street 1']],
202+
"street_2": r[col['Street 2']],
203+
"street_3": r[col['Street 3']],
204+
"city": r[col['City']],
205+
"state": r[col['State']],
206+
"zip": r[col['Zip']],
207+
"all_phone_numbers": r[col['All phone numbers']],
208+
"home": r[col['Home']],
209+
"work": r[col['Work']],
210+
"cell": r[col['Cell']],
211+
"email": r[col['Email']],
212+
"created_date" : time_stamp
213+
}
214+
)
215+
except KeyError as e:
216+
logger.error("Volgistics source XLSX file 'Master' tab missing expected column (see following) - cannot import")
217+
logger.exception(e)
218+
219+
220+
221+
rows = insert_volgistics_people(insert_list)
222+
223+
logger.debug('Inserted %d Volgistics people rows', rows)

0 commit comments

Comments
 (0)