From a0ba00b5b4e5eb6ab505d7c93783de9939d809ed Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Tue, 11 Dec 2018 16:15:04 -0800 Subject: [PATCH 1/7] Update Housing Outcomes.py Updated versioning number to check out bransh pushes. Atom is handling this strangely. --- Housing Outcomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Housing Outcomes.py b/Housing Outcomes.py index 51414f6..c2ffe66 100644 --- a/Housing Outcomes.py +++ b/Housing Outcomes.py @@ -1,5 +1,5 @@ __author__ = "David Marienburg" -__version__ = "1.11" +__version__ = "1.12" """ This script is for processing the Housing Services - Housing Outcomes v2.0 report that is used by From 5eacdd7470400f6e3aaccbd0abca67ecfd3e8554 Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Tue, 11 Dec 2018 17:01:12 -0800 Subject: [PATCH 2/7] RE: Issue #4 Add Comments Comments have been added to the main HousingOutcomes.py script as well as some cleanup to make the pre-existing comments easier to read. The spreadsheet will now have a default name and a default extension. The Rundate class, which was previously an inactive element of the HousingOutcomes.py script. This class has been moved into its own .py file so that it can be worked on idependently without making the code overly confusing. I feel like having only a single class per .py file also conforms to best practices. --- Housing Outcomes.py | 57 +++++++++++++++++++-------------------------- RunDate.py | 34 +++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 33 deletions(-) create mode 100644 RunDate.py diff --git a/Housing Outcomes.py b/Housing Outcomes.py index c2ffe66..b6cbe73 100644 --- a/Housing Outcomes.py +++ b/Housing Outcomes.py @@ -3,71 +3,62 @@ """ This script is for processing the Housing Services - Housing Outcomes v2.0 report that is used by -the follow-ups specialistself. +the follow-ups specialist. This script should identify when it is being run and adjust its date parameters to reflect being run on the first of the month or after it. This well prevent future staff members from needing to fiddle with the code every time they run the report. """ +# import required libraries import pandas as pd -from datetime import date from datetime import datetime -from calendar import monthrange -from dateutil.relativedelta import relativedelta from tkinter.filedialog import askopenfilename from tkinter.filedialog import asksaveasfilename -class RunDate: - """ - This class is not currently used by the CreateRequiredFollowUps class but I would like to - eventually make it so that the sheetnames output by that class are modified by the relation to - the current month. - """ - def __init__(self): - self.today = datetime.now().date() - self.check_date() - - def check_date(self): - if self.today.day <= 5: - last_month = self.today + relativedelta(months=-1) - end_of_month = date( - year=last_month.year, - month=last_month.month, - day=monthrange(last_month.year, last_month.month)[1] - ) - return end_of_month - else: - end_of_month = date( - year=self.today.year, - month=self.today.month, - day=monthrange(self.today.year, self.today.month)[1] - ) - return end_of_month - - class CreateRequiredFollowUps: def __init__(self, file_path): + # read the excel report into a pandas data frame self.raw_data = pd.read_excel(file_path) + # not currently in use self.run_date = RunDate() + # create a immutable list of unique months during which follow-ups are + # due self.month_range = set( [value.strftime("%B") for value in self.raw_data["Follow Up Due Date(2512)"]] ) + # create month and year name variables for the name of the processed + # report self.current_month = datetime.now().month + self.current_year = datetime.now().year def process(self): + # create a local copy of the self.raw_data data frame data = self.raw_data + # initiate the ExcelWriter object variable writer = pd.ExcelWriter( - asksaveasfilename(title="Save the Required Follow-ups Report"), + asksaveasfilename( + title="Save the Required Follow-ups Report", + defaultextension=".xlsx", + initialfile="Required Follow-ups for {} {}".format( + self.current_month, + self.current_year + ) + ), engine="xlsxwriter" ) + # loop through the values of the self.month_range set creating dataframes + # where the value of Follow Up Due Date(2512) column is equal to the set + # item's value creating excel sheets for each of these data drames for month in self.month_range: month_data = data[ (data["Follow Up Due Date(2512)"].dt.strftime("%B") == month) & data["Actual Follow Up Date(2518)"].isna() ].drop_duplicates(subset="Client Uid") month_data.to_excel(writer, sheet_name="{} Follow-Ups".format(month), index=False) + # create an excel sheet containing the raw data data.to_excel(writer, sheet_name="Raw Data", index=False) + # save the spreadsheet writer.save() if __name__ == "__main__": diff --git a/RunDate.py b/RunDate.py new file mode 100644 index 0000000..f96a9c8 --- /dev/null +++ b/RunDate.py @@ -0,0 +1,34 @@ +__author__ == "David Marienburg" +__version__ == ".1" + +from datetime import date +from datetime import datetime +from calendar import monthrange +from dateutil.relativedelta import relativedelta + +class RunDate: + """ + This class is not currently used by the CreateRequiredFollowUps class but I would like to + eventually make it so that the sheetnames output by that class are modified by the relation to + the current month. + """ + def __init__(self): + self.today = datetime.now().date() + self.check_date() + + def check_date(self): + if self.today.day <= 5: + last_month = self.today + relativedelta(months=-1) + end_of_month = date( + year=last_month.year, + month=last_month.month, + day=monthrange(last_month.year, last_month.month)[1] + ) + return end_of_month + else: + end_of_month = date( + year=self.today.year, + month=self.today.month, + day=monthrange(self.today.year, self.today.month)[1] + ) + return end_of_month From 6edc621fa7ded0cf195699a47bedac99b1b06559 Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Wed, 12 Dec 2018 09:15:18 -0800 Subject: [PATCH 3/7] Program Identifiers Added! Placement program identifiers have been added to the report so that end users can filter for follow ups relevant to them. To that end the ART report was modified to include an addition sheet showing placement information. If the placement dates do not match the follow up dates this will cause an issue. An accompanying DQ report will be required to ensure that this all works as intended and that staff are entering data correctly. Additionally, the RunDate.py file has been removed as it seems to serve no purpose. I wonder what I was thinking when I added it. --- Housing Outcomes.py | 33 ++++++++++++++++++++++++--------- RunDate.py | 34 ---------------------------------- mid_month_addresses.py | 2 +- 3 files changed, 25 insertions(+), 44 deletions(-) delete mode 100644 RunDate.py diff --git a/Housing Outcomes.py b/Housing Outcomes.py index b6cbe73..6c514aa 100644 --- a/Housing Outcomes.py +++ b/Housing Outcomes.py @@ -18,14 +18,13 @@ class CreateRequiredFollowUps: def __init__(self, file_path): - # read the excel report into a pandas data frame - self.raw_data = pd.read_excel(file_path) - # not currently in use - self.run_date = RunDate() + # read the excel report into a pandas data frames + self.raw_fu_data = pd.read_excel(file_path, sheet_name="FollowUps") + self.raw_placement_data = pd.read_excel(file_path, sheet_name="Placements") # create a immutable list of unique months during which follow-ups are # due self.month_range = set( - [value.strftime("%B") for value in self.raw_data["Follow Up Due Date(2512)"]] + [value.strftime("%B") for value in self.raw_fu_data["Follow Up Due Date(2512)"]] ) # create month and year name variables for the name of the processed # report @@ -34,7 +33,12 @@ def __init__(self, file_path): def process(self): # create a local copy of the self.raw_data data frame - data = self.raw_data + data = self.raw_fu_data.merge( + self.raw_placement_data, + how="left", + left_on=["Client Unique Id", "Initial Placement/Eviction Prevention Date(2515)"], + right_on=["Client Unique Id", "Placement Date(3072)"] + ) # initiate the ExcelWriter object variable writer = pd.ExcelWriter( asksaveasfilename( @@ -54,13 +58,24 @@ def process(self): month_data = data[ (data["Follow Up Due Date(2512)"].dt.strftime("%B") == month) & data["Actual Follow Up Date(2518)"].isna() - ].drop_duplicates(subset="Client Uid") - month_data.to_excel(writer, sheet_name="{} Follow-Ups".format(month), index=False) + ].drop_duplicates( + subset="Client Unique Id" + ).drop( + ["Client Unique Id", "Client Uid_y", "Placement Date(3072)"], + axis=1 + ) + month_data.to_excel( + writer, + sheet_name="{} Follow-Ups".format(month), + index=False + ) # create an excel sheet containing the raw data data.to_excel(writer, sheet_name="Raw Data", index=False) # save the spreadsheet writer.save() if __name__ == "__main__": - run = CreateRequiredFollowUps(askopenfilename(title="Open the Housing Outcomes v2.0 Report")) + run = CreateRequiredFollowUps( + askopenfilename(title="Open the Housing Outcomes v2.2 Report") + ) run.process() diff --git a/RunDate.py b/RunDate.py deleted file mode 100644 index f96a9c8..0000000 --- a/RunDate.py +++ /dev/null @@ -1,34 +0,0 @@ -__author__ == "David Marienburg" -__version__ == ".1" - -from datetime import date -from datetime import datetime -from calendar import monthrange -from dateutil.relativedelta import relativedelta - -class RunDate: - """ - This class is not currently used by the CreateRequiredFollowUps class but I would like to - eventually make it so that the sheetnames output by that class are modified by the relation to - the current month. - """ - def __init__(self): - self.today = datetime.now().date() - self.check_date() - - def check_date(self): - if self.today.day <= 5: - last_month = self.today + relativedelta(months=-1) - end_of_month = date( - year=last_month.year, - month=last_month.month, - day=monthrange(last_month.year, last_month.month)[1] - ) - return end_of_month - else: - end_of_month = date( - year=self.today.year, - month=self.today.month, - day=monthrange(self.today.year, self.today.month)[1] - ) - return end_of_month diff --git a/mid_month_addresses.py b/mid_month_addresses.py index 5709dcb..bcf77a5 100644 --- a/mid_month_addresses.py +++ b/mid_month_addresses.py @@ -14,7 +14,7 @@ class CreateAddressList: def __init__(self, file_path): - self.raw_data = pd.read_excel(file_path) + self.raw_data = pd.read_excel(file_path, sheet_name="FollowUps") self.current_month = dt.now().month self.current_year = dt.now().year From ebd610c8b7597182e40add14f942e2243b9feaee Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Tue, 12 Feb 2019 08:10:44 -0800 Subject: [PATCH 4/7] The script will now only provide the most recent address. --- Housing Outcomes.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/Housing Outcomes.py b/Housing Outcomes.py index 6c514aa..25dae16 100644 --- a/Housing Outcomes.py +++ b/Housing Outcomes.py @@ -21,6 +21,7 @@ def __init__(self, file_path): # read the excel report into a pandas data frames self.raw_fu_data = pd.read_excel(file_path, sheet_name="FollowUps") self.raw_placement_data = pd.read_excel(file_path, sheet_name="Placements") + self.raw_address_data = pd.read_excel(file_path, sheet_name="Addresses") # create a immutable list of unique months during which follow-ups are # due self.month_range = set( @@ -32,10 +33,20 @@ def __init__(self, file_path): self.current_year = datetime.now().year def process(self): - # create a local copy of the self.raw_data data frame + # create a local copy of the self.raw_data data frame then merge that + # copy with the address and placement data frames to ensure that all + # followups are related to a TPI placement and that the Addresses + # provided are the newest addresses. data = self.raw_fu_data.merge( - self.raw_placement_data, + self.raw_address_data.sort_values( + by=["Client Unique Id", "Date Added (61-date_added)"], + ascending=False + ).drop_duplicates(subset="Client Unique Id"), how="left", + on="Client Unique Id" + ).merge( + self.raw_placement_data, + how="inner", left_on=["Client Unique Id", "Initial Placement/Eviction Prevention Date(2515)"], right_on=["Client Unique Id", "Placement Date(3072)"] ) From f992a09a167923cb355da9efdca28b97516cf4d1 Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Tue, 12 Feb 2019 09:27:16 -0800 Subject: [PATCH 5/7] New "Contact Attempted" tab added to output. --- Housing Outcomes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Housing Outcomes.py b/Housing Outcomes.py index 25dae16..6d60e0f 100644 --- a/Housing Outcomes.py +++ b/Housing Outcomes.py @@ -81,6 +81,10 @@ def process(self): index=False ) # create an excel sheet containing the raw data + data[ + (data["Follow-Up Status(2729)"] == "Attempted, Unable to contact client") | + (data["Is Client Still in Housing?(2519)"] == "Is Client Still in Housing?(2519)") + ].to_excel(writer, sheet_name="Contact Attempted", index=False) data.to_excel(writer, sheet_name="Raw Data", index=False) # save the spreadsheet writer.save() From 971a8b8c45439c7d4cf5a5f230bef7264ed266a7 Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Tue, 19 Feb 2019 08:48:26 -0800 Subject: [PATCH 6/7] New version of the mid_month_addresses script that can now process the new raw data. --- Housing Outcomes.py | 2 +- mid_month_addresses.py | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/Housing Outcomes.py b/Housing Outcomes.py index 6d60e0f..9ba69e2 100644 --- a/Housing Outcomes.py +++ b/Housing Outcomes.py @@ -91,6 +91,6 @@ def process(self): if __name__ == "__main__": run = CreateRequiredFollowUps( - askopenfilename(title="Open the Housing Outcomes v2.2 Report") + askopenfilename(title="Open the Housing Outcomes v2.3 Report") ) run.process() diff --git a/mid_month_addresses.py b/mid_month_addresses.py index bcf77a5..5d27ba1 100644 --- a/mid_month_addresses.py +++ b/mid_month_addresses.py @@ -15,11 +15,12 @@ class CreateAddressList: def __init__(self, file_path): self.raw_data = pd.read_excel(file_path, sheet_name="FollowUps") + self.raw_addresses = pd.read_excel(file_path, sheet_name="Addresses") self.current_month = dt.now().month self.current_year = dt.now().year def process(self): - data = self.raw_data[ + fu_data = self.raw_data[ ~(self.raw_data["Follow-Up Status(2729)"] == "Client contacted") & ~(self.raw_data["Follow-Up Status(2729)"] == "Other verifiable source contacted") & (self.raw_data["Follow Up Due Date(2512)"].dt.month == self.current_month) & @@ -30,7 +31,36 @@ def process(self): ).drop_duplicates( subset="Client Uid", keep="first" - ) + )[[ + "Client Uid", + "Client First Name", + "Client Last Name", + "Initial Placement/Eviction Prevention Date(2515)", + "Follow Up Due Date(2512)", + "Actual Follow Up Date(2518)", + "Follow-Up Status(2729)", + "Is Client Still in Housing?(2519)" + ]] + + address_data = self.raw_addresses( + self.raw_addresses["Client Uid"].isin(fu_data["Client Unique Id"]) + ).sort_values( + by=["Client Uid", "Date Added (61-date_added)"], + ascending=False + ).drop_duplicates( + subset="Client Uid", + keep="first" + )[[ + "Client Uid", + "Client's Street Address(62)", + "Client's Apartment Number(71)", + "Client's City(509)", + "Client's State(510)", + "Client's ZIP(496)", + "Home Phone Number(511)" + ]] + + writer = pd.ExcelWriter( asafn( title="Save the Non-Contacted Follow-Ups Report", @@ -39,7 +69,11 @@ def process(self): ), engine="xlsxwriter" ) - data.to_excel(writer, sheet_name="Data", index=False) + data.merge( + address_data, + on="Client Uid", + how="left" + ).to_excel(writer, sheet_name="Data", index=False) writer.save() if __name__ == "__main__": From 660edb22404a73878caa011630cfc149efcc4980 Mon Sep 17 00:00:00 2001 From: TPI Marienburg Date: Tue, 19 Feb 2019 08:55:43 -0800 Subject: [PATCH 7/7] typo fixes --- mid_month_addresses.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mid_month_addresses.py b/mid_month_addresses.py index 5d27ba1..81dd1f5 100644 --- a/mid_month_addresses.py +++ b/mid_month_addresses.py @@ -42,9 +42,9 @@ def process(self): "Is Client Still in Housing?(2519)" ]] - address_data = self.raw_addresses( - self.raw_addresses["Client Uid"].isin(fu_data["Client Unique Id"]) - ).sort_values( + address_data = self.raw_addresses[ + self.raw_addresses["Client Uid"].isin(fu_data["Client Uid"]) + ].sort_values( by=["Client Uid", "Date Added (61-date_added)"], ascending=False ).drop_duplicates( @@ -69,7 +69,7 @@ def process(self): ), engine="xlsxwriter" ) - data.merge( + fu_data.merge( address_data, on="Client Uid", how="left"