bluebonnet-data · KathrynPanger · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ __pycache__
 .vscode
 .tox
 .python-version
+.DS_Store
 
 # Sublime environment
 *.sublime-project
@@ -18,6 +19,8 @@ build
 
 # User files
 user
+/local_files
+/cached_csvs
 
 # Shapefiles
 *.dbf
@@ -31,6 +34,11 @@ user
 *.json
 *.html
 *.csv
+*.txt
 
 # Shh, secrets
 census_api_key.txt
+
+#api key
+config.py
+*config.py
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/censusExplorer.iml b/.idea/censusExplorer.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -38,14 +38,14 @@ For our working example, we'll use median household income (which is coded in th
 We can simply downloaded the census data like so:
 
 ```python
->>> from bbd import census
->>> data = census.get_acs(
->>>    geography=census.Geography.CD,
->>>    variables="group(DP03),NAME",
->>>    year=2018,
->>>    state="co",
->>>    dataset=census.DataSets.ACS5_PROFILE,
->>> )
+>> > from bbd import census
+>> > data = census.get_acs(
+            >> > geography = census.GEOGRAPHY.CD,
+>> > variables = "group(DP03),NAME",
+>> > year = 2018,
+>> > state = "co",
+>> > dataset = census.DataSet.ACS5_PROFILE,
+>> > )
 ```
 
     https://api.census.gov/data/2018/acs/acs1/profile?get=group(DP03),NAME&for=congressional%20district:*&in=state:08
@@ -86,12 +86,12 @@ When you request it from the API with `get_acs`, it is automatically converted t
 Luckily, the census provides shapefiles for pretty much every `GEO_ID` you can find in the census API. You can automatically download them like this:
 
 ```python
->>> shapefile_path = census.get_shapefile(
->>>     geography=census.Geography.CD,
->>>     state="co",
->>>     year=2018,
->>>     cache=True,
->>> )
+>> > shapefile_path = census.get_shapefile(
+                      >> > geography = census.GEOGRAPHY.CD,
+>> > state = "co",
+>> > year = 2018,
+>> > cache = True,
+>> > )
 ```
 
 To get these files manually, just head to [this website](https://www.census.gov/cgi-bin/geo/shapefiles/index.php) and select the relevent geography/location.

diff --git a/examples/co_income.py b/examples/co_income.py
@@ -26,7 +26,7 @@
     geography=census.Geography.TRACT,
     variables=["NAME", "DP03_0062E"],
     year=2018,
-    dataset=census.DataSets.ACS5_PROFILE,
+    dataset=census.DataSet.ACS5_PROFILE,
     state="co",
     county="069",  # Larimer County
     cache=True,

diff --git a/examples/create_census_example.py b/examples/create_census_example.py
@@ -0,0 +1,37 @@
+from bbd.census import Census
+from bbd.census.geographic_unit import GeographicUnit
+# HOW TO CREATE A CENSUS OBJECT FOR PERFORMING API PULLS
+# 1. Save the API key securely to a variable
+# 2. Create GeographyUnit objects
+# 3. Create the object
+
+###########################
+# Import API key from file
+###########################
+api_key_file = Path(__file__).parent.absolute() / "census_api_key.txt"
+with open(api_key_file, "r") as f:
+    API_KEY = f.readlines()[0]
+
+##############################
+# Create GeographyUnit Objects
+##############################
+# First, decide on the unit of analysis your final dataset will have.
+# This will determine the setup for your GeographyUnit objects
+# GeographyUnits take in two arguments: a geography, and an analysis level
+# the analysis level indicates whether you want data "for" all locations of this type,
+# or if this geography represents the top-level; the place you want to find data "in". You can also
+# use "none_level" for api-pulls which require unorthodox arguments, which we will discuss in a future guide.
+
+# Example: geographic units targeting data for all counties in the state of New York (36 = Census code for New York)
+county = GeographicUnit(analysis_level=AnalysisLevel.FOR, geography=Geography.COUNTY, value="*")
+state = GeographicUnit(analysis_level=AnalysisLevel.IN, geography=Geography.STATE, value="36")
+geographic_units = [state, county]
+
+########################
+# Create a Census Object
+########################
+# Specify the year and dataset you want.
+year = 2019
+dataset = DataSet.ACS1
+# Create the census object.
+census = Census(api_key=api_key, geographic_units=geographic_units, year=year, dataset=dataset)
diff --git a/examples/get_acs_example.py b/examples/get_acs_example.py
@@ -14,7 +14,7 @@
     geography=census.Geography.STATE,
     variables="NAME,B03003_001E",
     year=2018,
-    dataset=census.DataSets.ACS5_DETAIL,
+    dataset=census.DataSet.ACS5,
 )
 
 pprint(data)
diff --git a/examples/get_data_example.py b/examples/get_data_example.py
@@ -0,0 +1,33 @@
+
+
+# HOW TO PULL ACS DATA FROM A CENSUS OBJECT
+# 1. Create a census object (see create_census_example.py for instructions)
+# 2. Pull the data, as shown below
+
+#Create a census object (see instructions in create_census_example.py)
+api_key_file = Path(__file__).parent.absolute() / "census_api_key.txt"
+with open(api_key_file, "r") as f:
+    API_KEY = f.readlines()[0]
+
+county = GeographicUnit(analysis_level=AnalysisLevel.FOR, geography=Geography.COUNTY, value="*")
+state = GeographicUnit(analysis_level=AnalysisLevel.IN, geography=Geography.STATE, value="36")
+geographic_units = [state, county]
+year = 2019
+dataset = DataSet.ACS1
+census = Census(api_key=API_KEY, geographic_units=geographic_units, year=year, dataset=dataset)
+
+
+###############
+# Pull the data
+###############
+# Specify any census variables you want.
+variables = ["NAME", "B01001_001E"]
+# Specify any census groups you want; results will include all variables belonging to that group.
+groups = ["B02015"]
+# pull the data for those variables
+result = census.get_data(variables = variables, groups = groups)
+# The result will also be stored in census.results, which is a list of CensusResult objects created during this session.
+# Get the dataframe out of the result object.
+df = result.dataframe
+print(df.head())
+
diff --git a/examples/search_variables_example.py b/examples/search_variables_example.py
@@ -0,0 +1,30 @@
+from bbd.census import Census
+
+# HOW TO FIND THE CENSUS CODES FOR THE VARIABLES AND GROUPS YOU WANT TO EXAMINE
+#Create a census object (see instructions in create_census_example.py)
+
+api_key_file = Path(__file__).parent.absolute() / "census_api_key.txt"
+with open(api_key_file, "r") as f:
+    API_KEY = f.readlines()[0]
+
+county = GeographicUnit(analysis_level=AnalysisLevel.FOR, geography=Geography.COUNTY, value="*")
+state = GeographicUnit(analysis_level=AnalysisLevel.IN, geography=Geography.STATE, value="36")
+geographic_units = [state, county]
+year = 2019
+dataset = DataSet.ACS1
+census = Census(api_key=API_KEY, geographic_units=geographic_units, year=year, dataset=dataset)
+
+# Get a dataframe of all available variables in no particular order.
+df = census.search_variables()
+print(df)
+
+# Get a dataframe of all available variables sorted by similarity of description to a given search string.
+search_string = "female household"
+df = census.search_variables(search_string = search_string)
+print(df)
+
+# Get a dataframe of variables sorted by similarity to a search string, limiting the number of results.
+search_string = "housing cost"
+number_of_results = 10
+df = census.search_variables(search_string = search_string, number_of_results = number_of_results)
+print(df)
diff --git a/examples/tx_hispanic_or_latino.py b/examples/tx_hispanic_or_latino.py
@@ -36,7 +36,7 @@
     geography=census.Geography.BLOCKGROUP,
     variables=["NAME", "B03003_001E", "B03003_002E", "B03003_003E"],
     year=2018,
-    dataset=census.DataSets.ACS5_DETAIL,
+    dataset=census.DataSet.ACS5,
     state="tx",
     county="201",  # Harris County
     cache=True,

diff --git a/examples/tx_zip_code_by_race.py b/examples/tx_zip_code_by_race.py
@@ -2,7 +2,7 @@
 
 1) Make sure python is installed
 
-2) Install the bluebonnet data package by running this command in your terminal:
+2) Install the cloud_storage data package by running this command in your terminal:
     pip install bbd
 
 3) Run this python file with the following command:
@@ -56,7 +56,7 @@
     geography=census.Geography.ZCTA,
     variables=list(variables.keys()),
     year=2018,
-    dataset=census.DataSets.ACS5_DETAIL,
+    dataset=census.DataSet.ACS5,
     # state="tx",
     # county="201": "Harris County
     cache=True,