diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b21f7f1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: ci +on: + push: + branches: + - master + - main +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: ${{ github.ref }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material + - run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/.gitignore b/.gitignore index 00d4014..b9e114b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # environment variables .env .Renviron +/venv # Output files /outputs diff --git a/README.md b/README.md index 6837ae5..1953459 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # Indicators of Potential Disadvantage -This project automates DVRPC's Indicators of Potential Disadvantage (IPD) analysis, including data download, processing, and export. For more on IPD analysis, see [Equity Analysis for the Greater Philadelphia Region v2.0](https://www.dvrpc.org/webmaps/ipd/). +This project automates DVRPC's Indicators of Potential Disadvantage (IPD) analysis, including data download, processing, and export. For more on IPD analysis, see [Equity Analysis for the Greater Philadelphia Region](https://www.dvrpc.org/webmaps/ipd/). ## Getting the Code and Software -1. Clone the repository. -2. Download and install R from https://www.r-project.org/ -3. Download and install R Studio from https://www.rstudio.com/products/rstudio/#Desktop +1. Clone the [Github repository](https://github.com/dvrpc/ipd) +2. [Download and install R](https://cran.rstudio.com/) +3. [Download and install R Studio](https://posit.co/download/rstudio-desktop/) ## Installing Package Dependencies @@ -49,14 +49,9 @@ Please provide your own API Key (this is required for the `tidycensus` package, After the code has finished, outputs are saved in the /outputs subdirectory of where you cloned the repository on your local machine, including: -- ipd.csv: tract-level statistics and scores for IPD's nine indicators -- ipd.shp: spatial version of ipd.csv -- breaks_by_indicator.csv: bin breaks by indicator -- counts_by_indicator.csv: census tract counts by bin and indicator -- summary_by_indicator.csv: basic summary stats by indicator -- mean_by_county.csv: population-weighted county means by indicator - -## Additional Information - -- [documentation/discussion.pdf](https://github.com/dvrpc/ipd/blob/master/documentation/discussion.pdf) shows the essential math required to compute IPD scores. -- [documentation/script_reference.pdf](https://github.com/dvrpc/ipd/blob/master/documentation/script_reference.pdf) is a companion document to the script and explains the way the script downloads data and implements IPD analysis, code chunk by code chunk. +- ipd_`ipd_year`.csv: tract-level statistics and scores for IPD's nine indicators +- ipd_`ipd_year`.shp: spatial version of ipd.csv +- breaks_by_indicator_`ipd_year`.csv: bin breaks by indicator +- counts_by_indicator_`ipd_year`.csv: census tract counts by bin and indicator +- summary_by_indicator_`ipd_year`.csv: basic summary stats by indicator +- means_by_county_`ipd_year`.csv: population-weighted county means by indicator \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..ef8940e --- /dev/null +++ b/docs/index.md @@ -0,0 +1,39 @@ +## Background +DVRPC's [Indicators of Potential Disadvantage](https://www.dvrpc.org/webmaps/ipd/#home) (IPD) identify populations of interest under Title VI of the Civil Rights Act and the Executive Order on Environmental Justice (#12898) using American Community Survey (ACS) five-year estimates from the U.S. Census Bureau. IPD analysis assists both DVRPC and outside organizations in equity work by identifying populations of interest, including youth, older adults, female, racial minority, ethnic minority, foreign-born, limited English proficiency, disabled, and low-income populations at the census tract level in DVRPC's nine-county region. + +Below is an overview of the calculations required to conduct IPD analysis and is meant to provide enough information to replicate prior years of IPD analysis. + +## IPD Indicators and ACS Data +| IPD Indicator | ACS Data Table | Protected Class Representation | Authorizing Source/Guiding Document | +| ------------- | -------------- | ------------------------------ | ----------------------------------- | +| Youth | B09001: Population Under 18 Years by Age | Age | FHWA's Title VI Program and Related Authorities: 23 CFR 200 | +| Older Adults | S0101: Age and Sex | Age | FHWA's Title VI Program and Related Authorities: 23 CFR 200 | +| Female | S0101: Age and Sex | Sex | FHWA's Title VI Program and Related Authorities: 23 CFR 200 | +| Racial Minority | B02001: Race | Race and Minority | Executive Order 12898, Title VI of the Civil Rights Act of 1964, FHWA's Title VI Program and Related Authorities: 23 CFR, and Title VI Requirements and Guidelines for FTA Recipients | +| Ethnic Minority | B03002: Hispanic or Latino Origin by Race | Minority and National Origin | Executive Order 12898, Title VI of the Civil Rights Act of 1964, FHWA's Title VI Program and Related Authorities: 23 CFR, and Title VI Requirements and Guidelines for FTA Recipients | +| Foreign-Born | B05012: Nativity in the United States | National Origin | Title VI of the Civil Rights Act of 1964, FHWA's Title VI Program and Related Authorities: 23 CFR, and Title VI Requirements and Guidelines for FTA Recipients | +| Limited English Proficiency | S1601: Language Spoken at Home | Limited English Proficiency and National Origin | Title VI of the Civil Rights Act of 1964, FHWA's Title VI Program and Related Authorities: 23 CFR, and Title VI Requirements and Guidelines for FTA Recipients | +| Disabled | S1810: Disability Characteristics | Disability | FHWA's Title VI Program and Related Authorities: 23 CFR | +| Low-Income | S1701: Poverty Status in Past 12 Months | Low-Income | Executive Order 12898 and FHWA's Title VI Program and Related Authorities: 23 CFR 200 | + +## IPD Score +IPD Scores can be presented in two ways: individual IPD score and composite IPD score. + +### Individual IPD Score +For each indicator, percent estimates are split into five bins, which are detailed in the table below. There is one exception to the standard deviation classification: if `mean(pop) - (1.5 * stdev(pop))` is a negative value, it is manually reassigned to 0.1. This ensures that at least some census tracts fall in the bottom bin regardless of the spread of the indicator. + +`p` = percent estimate +`mean(pop)` = mean for indicator population +`stdev(pop)` = standard deviation for indicator population + +| IPD Score | IPD Classification | Standard Deviations | +|:---------:|:------------------:|:-------------------:| +| 0 | Well Below Average | p < mean(pop) - (1.5 * stdev(pop)) | +| 1 | Below Average | mean(pop) - (1.5 * stdev(pop)) <= p < mean(pop) - (0.5 * stdev(pop))| +| 2 | Average | mean(pop) - (0.5 * stdev(pop)) <= p < mean(pop) + (0.5 * stdev(pop))| +| 3 | Above Average | mean(pop) + (0.5 * stdev(pop)) <= p < mean(pop) + (1.5 * stdev(pop))| +| 4 | Well Above Average | p >= mean(pop) + (1.5 * stdev(pop))| + +### Composite IPD Score +The composite IPD score is computed by summing the individual IPD scores. In theory, the composite IPD score can range from 0 to 36, since each indicator's IPD score can be as high as 4. In practice, the mean composite score in 2021 is 17.57, and the highest observed composite score is 32. + diff --git a/docs/script_reference.md b/docs/script_reference.md new file mode 100644 index 0000000..6495eb4 --- /dev/null +++ b/docs/script_reference.md @@ -0,0 +1,570 @@ +## About +DVRPC's IPD analysis identifies populations of interest under Title VI of the Civil Rights Act and the Executive Order on Environmental Justice (#12898) using American Community Survey (ACS) five-year estimates from the U.S. Census Bureau. IPD analysis assists both DVRPC and outside organizations in equity work by identifying populations of interest, including youth, older adults, female, racial minority, ethnic minority, foreign-born, limited English proficiency, disabled, and low-income populations at the census tract level in DVRPC's nine-county region. + +There are many ways of identifying these populations of interest. This document discusses DVRPC's process, which is automated in an `R` script. + +### Output Abbreviation +Components of field names that you'll see in `outputs` and throughout the script. + + | Component | Equivalent | + | --------- | --------------------------------- | + | d | Disabled | + | em | Ethnic Minority | + | f | Female | + | fb | Foreign-Born | + | lep | Limited English Proficiency | + | li | Low-Income | + | oa | Older Adults | + | rm | Racial Minority | + | y | Youth | + + +Abbreviations of field names that you'll see in `outputs` *not* comprised of the above components. + + +| Abbreviation | Equivalent | +|:-------------|:-----------| +| GEOID | Census Tract Identifier | +| STATEFP | State FIPS Code | +| COUNTYFP | County FIPS Code | +| NAME | Census Tract FIPS Code | + + +## Setup +### Dependencies +Packages required to run this script. If you don't have the packages, you'll get the warning `Error in library () : there is no package called ''`, in which case you'll need to install the package before proceeding. + +```{r packages, message = FALSE} +library(plyr); library(here); library(sf); library(summarytools); +library(tidycensus); library(tidyverse); library(tigris); library(dplyr); library(descr); +``` + +### Census API Key +Placeholder if you have never installed an API key before. If this is your first time accessing the Census API using `R`, see `getting_started.pdf` in the `documentation` folder. + +```{r api_key} +# Census API Key +# census_api_key("YOUR API KEY GOES HERE", install = TRUE) +``` + +### Inputs and Settings + +``` +ipd_year <- 2022 +ipd_states <- c("NJ", "PA") +dvrpc_counties <- c('^34005|^34007|^34015|^34021|^42017|^42029|^42045|^42091|^42101') +ipd_counties <- c("34005", "34007", "34015", "34021", "42017", "42029", "42045", "42091", "42101") +output_dir <- "data\\" +``` + +### Fields +The base information we need for IPD analysis are universes, counts, and percentages for nine indicators at the census tract level. For each indicator, the table below shows the indicator name, its abbreviation used in the script, its universe, its count, and its percentage field if applicable. Percentage estimates will be calculated for indicators if they are not provided in the ACS. + + +| Indicator | Abbreviation | Universe | Count | Percentage | +|:----------|:------------:|:--------:|:-----:|:----------:| +| Disabled | d | S1810_C01_001 | S1810_C02_001 | S1810_C03_001 | +| Ethnic Minority | em | B03002_001 | B03002_012 | N/A | +| Female | f | S0101_C01_001 | S0101_C05_001 | DP05_0003PE | +| Foreign-Born | fb | B05012_001 | B05012_003 | N/A | +| Limited English Proficiency | lep | S1601_C01_001 | S1601_C05_001 | S1601_C06_001 | +| Low-Income | li | S1701_C01_001 | S1701_C01_042 | N/A | +| Older Adults | oa | S0101_C01_001 | S0101_C01_030 | S0101_C02_030 | +| Racial Minority | rm | B02001_001 | B02001_003...008 | N/A | +| Youth | y | B03002_001 | B09001_001 | N/A | + + +The user should check that the field names point to the correct [API request](https://www.census.gov/data/developers/data-sets.html). For a history of the ACS variables used in previous IPD results, check prior [IPD releases](https://github.com/dvrpc/ipd/releases). + + +## Preparing Census Data + +### Data Table Lists +Fields are organized in vectors based on the data table it is located in. This will make it easier to pull the data using the `get_acs` function. + +`dt` = Detailed Tables +`st` = Subject Tables +`dp` = Data Profiles + +``` +acs5_dt_list <- c( + tot_pop = "B01003_001", # Total Population + em_uni = "B03002_001", # Ethnic Minority + em_est = "B03002_012", + fb_uni = "B05012_001", # Foreign-born + fb_est = "B05012_003", + rm_uni = "B02001_001", # Racial minority + blk_est = "B02001_003", # Black or African American alone + aia_est = "B02001_004", # American Indian and Alaska Native alone + asn_est = "B02001_005", # Asian alone + hpi_est = "B02001_006", # Native Hawaiian and Other Pacific Islander alone + oth_est = "B02001_007", # Some other race alone + two_est = "B02001_008", # Two or more races + y_est = "B09001_001" # Youth +) + +acs5_st_list <- c( + lep_uni = "S1601_C01_001", # Limited English Proficiency + lep_est = "S1601_C05_001", + lep_pct = "S1601_C06_001", + d_uni = "S1810_C01_001", # Disabled + d_est = "S1810_C02_001", + d_pct = "S1810_C03_001", + f_uni = "S0101_C01_001", + f_est = "S0101_C05_001", # Female + li_uni = "S1701_C01_001", # Low Income + li_est = "S1701_C01_042", + oa_uni = "S0101_C01_001", # Older Population + oa_est = "S0101_C01_030", + oa_pct = "S0101_C02_030" +) + +acs5_dp_list <- c( + f_pct = "DP05_0003P" +) +``` + +### Pull ACS Data +``` +raw_dt_data <- get_acs(geography = "tract", + variables = acs5_dt_list, + year = ipd_year, + state = ipd_states, + survey = "acs5", + output = "wide" +) %>% + mutate(year = ipd_year) %>% + filter(str_detect(GEOID, dvrpc_counties)) %>% + dplyr::select(-NAME) %>% + 'colnames<-'(str_replace(colnames(.), "E$", "")) %>% + 'colnames<-'(str_replace(colnames(.), "M$", "_MOE")) + +raw_st_data <- get_acs(geography = "tract", + variables = acs5_st_list, + year = ipd_year, + state = ipd_states, + survey = "acs5", + output = "wide" +) %>% + mutate(year = ipd_year) %>% + filter(str_detect(GEOID, dvrpc_counties)) %>% + dplyr::select(-NAME) %>% + 'colnames<-'(str_replace(colnames(.), "E$", "")) %>% + 'colnames<-'(str_replace(colnames(.), "M$", "_MOE")) + +raw_dp_data <- get_acs(geography = "tract", + variables = acs5_dp_list, + year = ipd_year, + state = ipd_states, + survey = "acs5", + output = "wide" +) %>% + mutate(year = ipd_year) %>% + filter(str_detect(GEOID, dvrpc_counties)) %>% + dplyr::select(-NAME) %>% + 'colnames<-'(str_replace(colnames(.), "E$", "")) %>% + 'colnames<-'(str_replace(colnames(.), "M$", "_MOE")) +``` + +### Combine Tables +``` +raw_data_combined <- raw_dt_data %>% + inner_join(raw_st_data) %>% + inner_join(raw_dp_data) +``` + +## Data Transformations +Before calculating IPD scores, the raw ACS data needs to be transformed. This includes calculating percent and margin of error (MOE) estimates that are not provided by the ACS and removing census tract geographies to reduce data skew. + +### Calculate Percentages and MOEs +The ACS does not provide percent or MOE estimates for the following IPD fields: Ethnic Minority, Foreign-Born, Low-Income, Youth. The percent estimates are calculated by dividing the count estimate for each variable by its population estimate. The MOE is calculated using the `moe_prop` function in R. + +``` +estimates_table <- raw_data_combined %>% + mutate(rm_est = blk_est + aia_est + asn_est + hpi_est + oth_est + two_est) %>% # Racial minority calculation + select(-blk_est, -aia_est, -asn_est, -hpi_est, -oth_est, -two_est, -blk_est_MOE, -aia_est_MOE, -asn_est_MOE, -hpi_est_MOE, -oth_est_MOE, -two_est_MOE) %>% + mutate(rm_pct = round(100 * (rm_est/rm_uni), digits = 1)) %>% + mutate(em_pct = round(100 * (em_est/em_uni), digits = 1)) %>% + mutate(fb_pct = round(100 * (fb_est/fb_uni), digits = 1)) %>% + mutate(li_pct = round(100 * (li_est/li_uni), digits = 1)) %>% + mutate(y_pct = round(100 * (y_est/tot_pop), digits = 1)) %>% + mutate(em_pct_MOE = round(moe_prop(em_est,em_uni,em_est_MOE,em_uni_MOE) * 100,1)) %>% + mutate(fb_pct_MOE = round(moe_prop(fb_est,fb_uni,fb_est_MOE,fb_uni_MOE) * 100,1)) %>% + mutate(li_pct_MOE = round(moe_prop(li_est,li_uni,li_est_MOE,li_uni_MOE) * 100,1)) %>% + mutate(y_pct_MOE = round(moe_prop(y_est,tot_pop,y_est_MOE,tot_pop_MOE) * 100,1)) +``` + +### Calculate Racial Minority MOE +The racial minority indicator is created by summing up several subgroups in ACS Table B03002. This means that the MOE for the count has to be computed. While the ACS has issued guidance on computing the MOE by aggregating subgroups, using the approximation formula can artificially deflate the derived MOE. Variance replicate tables are used instead to account for covariance and compute a more accurate MOE. + +See the Census Bureau's [Variance Replicate Tables Documentation](https://www.census.gov/programs-surveys/acs/data/variance-tables.html) for additional guidance on working with variance replicates. + +``` +ipd_states_numeric <- fips_codes %>% + filter(state %in% ipd_states) %>% + select(state_code) %>% distinct(.) %>% pull(.) +var_rep <- NULL + +for (i in 1:length(ipd_states)){ + url <- paste0("https://www2.census.gov/programs-surveys/acs/replicate_estimates/", + ipd_year, + "/data/5-year/140/B02001_", + ipd_states_numeric[i], + ".csv.zip") + temp <- tempfile() + download.file(url, temp) + var_rep_i <- read.csv(unzip(temp)) + var_rep <- dplyr::bind_rows(var_rep, var_rep_i) +} + +# function to calculate sqdiff +sqdiff_fn <- function(v, e) (v - e) ^ 2 + +var_rep <- var_rep %>% + mutate_at(vars(GEOID), ~(str_sub(., 10, 20))) %>% + filter(str_sub(GEOID, 1, 5) %in% ipd_counties) %>% + select(-TBLID, -NAME, -ORDER, -MOE, -CME, -SE) %>% + filter(TITLE %in% c("Black or African American alone", + "American Indian and Alaska Native alone", + "Asian alone", + "Native Hawaiian and Other Pacific Islander alone", + "Some other race alone", + "Two or more races:")) %>% + group_by(GEOID) %>% + summarize_if(is.numeric, ~ sum(.)) + +ids <- var_rep %>% select(GEOID) %>% pull(.) +rep_estimates <- var_rep %>% select(ESTIMATE) +replicates <- var_rep %>% select(-GEOID, -ESTIMATE) + +sqdiff <- mapply(sqdiff_fn, replicates, rep_estimates) +sum_sqdiff <- rowSums(sqdiff, dims=1) +moe <- round(sqrt(0.05 * sum_sqdiff) * 1.645, 0) #sqrt(variance) * 1.645 +rm_moe <- cbind(ids, moe) %>% + as_tibble(.) %>% + rename(GEOID = ids, rm_est_MOE = moe) %>% + mutate_at(vars(rm_est_MOE), as.numeric) + +estimates_table <- estimates_table %>% + left_join(., rm_moe) %>% + mutate(rm_pct_MOE = round(moe_prop(rm_est,rm_uni,rm_est_MOE,rm_uni_MOE) * 100,1)) +``` + +### Drop Exceptional Census Tracts +There are 33 census tracts dropped from the IPD calculation. These tracts either have low population counts or contain correctional facilities or military bases. These tracts are removed from the IPD calculation to avoid skewing the standard deviation results. + + +``` +low_pop_tracts <- c("34005981802","34005982200","34021980000","42017980000", + "42045980300","42045980000","42045980200","42091980100", + "42091980000","42091980200","42091980300","42101036901", + "42101980001","42101980002","42101980003","42101980300", + "42101980701","42101980702","42101980800","42101980100", + "42101980200", "42101980400","42101980500","42101980600", + "42101980901","42101980902","42101980903","42101980904", + "42101980905","42101980906", "42101989100","42101989200", + "42101989300") + + +estimates_table_clean <- estimates_table %>% + select(-matches("_uni")) %>% + filter(!GEOID %in% low_pop_tracts) +``` + +## Calculate IPD Score + +`p` = percent estimate +`mean(pop)` = mean for indicator population +`stdev(pop)` = standard deviation for indicator population + +| IPD Score | IPD Classification | Standard Deviations | +|:---------:|:------------------:|:-------------------:| +| 0 | Well Below Average | p < mean(pop) - (1.5 * stdev(pop)) | +| 1 | Below Average | mean(pop) - (1.5 * stdev(pop)) <= p < mean(pop) - (0.5 * stdev(pop))| +| 2 | Average | mean(pop) - (0.5 * stdev(pop)) <= p < mean(pop) + (0.5 * stdev(pop))| +| 3 | Above Average | mean(pop) + (0.5 * stdev(pop)) <= p < mean(pop) + (1.5 * stdev(pop))| +| 4 | Well Above Average | p >= mean(pop) + (1.5 * stdev(pop))| + +``` +# Define Test Table +test_table <- estimates_table_clean + + +# Variables +vars <- list("lep_pct", "d_pct", "oa_pct", "rm_pct", "f_pct", "em_pct", "fb_pct", "li_pct", "y_pct") + + +# Function to calculate indicator percentile and score +calculate_score <- function(data, var) { + means <- mean(data[[var]], na.rm = TRUE) + stdev <- sd(data[[var]], na.rm = TRUE) + score_col <- paste0(var, "_score") + class_col <- paste0(var, "_class") + pctile_col <- paste0(var, "_pctile") + data <- data %>% + mutate(!!score_col := case_when( + data[[var]] < ifelse(means - (1.5 * stdev) < 0, 0.1, means - (1.5 * stdev)) ~ 0, + data[[var]] >= means - (1.5 * stdev) & data[[var]] < means - (0.5 * stdev) ~ 1, + data[[var]] >= means - (0.5 * stdev) & data[[var]] < means + (0.5 * stdev) ~ 2, + data[[var]] >= means + (0.5 * stdev) & data[[var]] < means + (1.5 * stdev) ~ 3, + data[[var]] >= means + (1.5 * stdev) ~ 4 + )) %>% + mutate(!!class_col := case_when( + data[[var]] < ifelse(means - (1.5 * stdev) < 0, 0.1, means - (1.5 * stdev)) ~ "Well Below Average", + data[[var]] >= means - (1.5 * stdev) & data[[var]] < means - (0.5 * stdev) ~ "Below Average", + data[[var]] >= means - (0.5 * stdev) & data[[var]] < means + (0.5 * stdev) ~ "Average", + data[[var]] >= means + (0.5 * stdev) & data[[var]] < means + (1.5 * stdev) ~ "Above Average", + data[[var]] >= means + (1.5 * stdev) ~ "Well Above Average" + )) %>% + mutate(!!pctile_col := round(percent_rank(data[[var]]), 2)) + return(data) +} + +# Applying the function to each variable +for (var in vars) { + test_table <- calculate_score(test_table, var) +} + +# Calculate Total IPD Score +test_table$ipd_score <- rowSums(select(test_table, ends_with("_score")), na.rm = TRUE) +``` + + +## Spatial Data +``` +ipd_table <- ipd_table %>% + rename(GEOID20 = GEOID) %>% + mutate(STATEFP20 = str_sub(GEOID20, 1, 2)) %>% + mutate(COUNTYFP20 = str_sub(GEOID20, 3, 5)) %>% + mutate(NAME20 = str_sub(GEOID20, 6, 11)) %>% + mutate(namelsad = paste(substr(GEOID20, 6, 9), substr(GEOID20, 10, 11), sep = ".")) + +pa_tracts <- tracts("42", c("017", "029", "045", "091", "101")) +nj_tracts <- tracts("34", c("005", "007", "015", "021")) + +region_tracts <- rbind(pa_tracts, nj_tracts) %>% + st_transform(., 26918) + +ipd_shapefile <- region_tracts %>% + left_join(ipd_table, by=c("GEOID"="GEOID20")) + +# Import Tract to MCD Lookup +tract_mcd_lookup <- st_read("U:\\_OngoingProjects\\Census\\_Geographies\\Census_Boundaries_2020.gdb", layer="TractToMCD_Lookup20") %>% + select(geoid20, mun1, mun2, mun3, mcdgeo1, mcdgeo2, mcdgeo3) + +# Join IPD table with Lookup +ipd_shapefile <- ipd_shapefile %>% + left_join(tract_mcd_lookup, by=c("GEOID"="geoid20")) +``` + +## Summary Tables +This section generates a handful of other deliverables, including: + +### Counts by Indicator +``` +counts <- ipd_table %>% select(ends_with("_class")) + +export_counts <- apply(counts, 2, function(i) plyr::count(i)) +for(i in 1:length(export_counts)){ + export_counts[[i]]$var <- names(export_counts)[i] +} + +export_counts <- map_dfr(export_counts, `[`, c("var", "x", "freq")) + +colnames(export_counts) <- c("Variable", "Classification", "Count") + +export_counts$Classification <- factor(export_counts$Classification, + levels = c("Well Below Average", + "Below Average", + "Average", + "Above Average", + "Well Above Average", + "NA")) + +export_counts <- arrange(export_counts, Variable, Classification) + +counts_table <- export_counts %>% + spread(Classification, Count) %>% + mutate(TOTAL = rowSums(.[2:7], na.rm = TRUE)) +``` + +### Breaks by Indicator +``` +breaks_table <- ipd_table %>% + select(ends_with("_pct")) + +calculate_class_breaks <- function(input_df) { + breaks_df <- data.frame(matrix(NA, nrow = 6, ncol = ncol(input_df) + 1)) + colnames(breaks_df) <- c("Break", colnames(input_df)) + + breaks_df$Break <- c("Min", "1", "2", "3", "4", "Max") + + for (i in 1:ncol(input_df)) { + x <- input_df[[i]] + mean_x <- mean(x, na.rm = TRUE) + sd_x <- sd(x, na.rm = TRUE) + + min_break <- 0 + b1 <- round(mean_x - (1.5 * sd_x), 1) + if (b1 < 0) { + b1 <- 0.1 + } + b2 <- round(mean_x - (0.5 * sd_x), 1) + b3 <- round(mean_x + (0.5 * sd_x), 1) + b4 <- round(mean_x + (1.5 * sd_x), 1) + max_break <- round(max(x, na.rm = TRUE), 1) + + breaks_df[, i + 1] <- c(min_break, b1, b2, b3, b4, max_break) + } + + return(breaks_df) +} + + +class_breaks_table <- calculate_class_breaks(breaks_table) +``` +### Summary by Indicator +``` +description <- function(i) { + des <- as.numeric(summarytools::descr(i, na.rm = TRUE, + stats = c("min", "med", "mean", "sd", "max"))) + des <- c(des[1:4], des[4] / 2, des[5]) + return(des) +} + +pcts <- ipd_table %>% select(ends_with("_pct")) + +round_1 <- function(i) round(i, 1) +round_2 <- function(i) round(i, 2) + +summary_data <- apply(pcts, MARGIN=2, description) + +summary_table <- as_tibble(summary_data) %>% + mutate_all(round_2) %>% + mutate(Statistic = c("Minimum", "Median", "Mean", "SD", "Half-SD", "Maximum")) %>% + select(Statistic, tidyselect::peek_vars()) +``` +### County-Level Means by Indicator +``` +means_table <- estimates_table %>% + mutate(county_fips = str_sub(GEOID, 1, 5)) %>% + select(-GEOID, tot_pop, ends_with("_est"), ends_with("_uni"), -matches("MOE"), -year) %>% + group_by(county_fips) %>% + summarise( + d_pctest = sum(d_est)/sum(d_uni), + em_pctest = sum(em_est)/sum(em_uni), + f_pctest = sum(f_est)/sum(f_uni), + fb_pctest = sum(fb_est)/sum(fb_uni), + lep_pctest = sum(lep_est)/sum(lep_uni), + li_pctest = sum(li_est)/sum(li_uni), + oa_pctest = sum(oa_est)/sum(tot_pop), + rm_pctest = sum(rm_est)/sum(rm_uni), + y_pctest = sum(y_est)/sum(tot_pop) + ) %>% + mutate_if(is.numeric, ~ . * 100) %>% + mutate_if(is.numeric, round_1) +``` + +## Export Data +Results are saved in `outputs`. + +``` +write.csv(ipd_table, paste(output_dir,"ipd_", ipd_year, ".csv", sep="")) +st_write(ipd_shapefile, paste(output_dir,"ipd_", ipd_year, ".shp", sep="")) + +write.csv(counts_table, paste(output_dir,"counts_by_indicator_", ipd_year, ".csv", sep="")) +write.csv(class_breaks_table, paste(output_dir,"breaks_by_indicator_", ipd_year, ".csv", sep="")) +write.csv(summary_table, paste(output_dir,"summary_by_indicator_", ipd_year, ".csv", sep="")) +write.csv(means_table, paste(output_dir,"means_by_county_", ipd_year, ".csv", sep="")) +``` + +## Metadata Table +This is a table of the final output with some additional data such as municipality name and area added through GIS processes but not included in the R script. + + +| Variable | Concept | acs table | acs variable | data source | Source Type | Universe Variable | +|------------|---------------------------------------------|-----------|--------------|-------------|-------------|-------------------| +| geoid20 | 11-digit tract GEOID | n/a | n/a | ACS 5-year | n/a | n/a | +| statefp20 | 2-digit state GEOID | n/a | n/a | ACS 5-year | n/a | n/a | +| countyfp20 | 3-digit county GEOID | n/a | n/a | ACS 5-year | n/a | n/a | +| name20 | Tract and county name | n/a | n/a | ACS 5-year | n/a | n/a | +| d_class | Disabled percentile class | n/a | n/a | calculated | calculated | S1810_C01_001 | +| d_cntest | Disabled count estimate | S1810_C02_001_E | acs variable | ACS 5-year | acs variable | S1810_C01_001 | +| d_cntmoe | Disabled count margin of error | S1810_C02_001_M | acs variable | ACS 5-year | acs variable | S1810_C01_001 | +| d_pctest | Disabled percent estimate | S1810_C03_001_E | acs variable | ACS 5-year | acs variable | S1810_C01_001 | +| d_pctile | Disabled percentile | n/a | n/a | calculated | calculated | S1810_C01_001 | +| d_pctmoe | Disabled percent margin of error | S1810_C03_001_M | acs variable | ACS 5-year | acs variable | S1810_C01_001 | +| d_score | Disabled percentile score | n/a | n/a | calculated | calculated | S1810_C01_001 | +| em_class | Ethnic minority percentile class | B03002 | n/a | calculated | calculated | B03002_001 | +| em_cntest | Ethnic minority count estimate | B03002 | B03002_012_E | ACS 5-year | acs variable | B03002_001 | +| em_cntmoe | Ethnic minority count margin of error | B03002 | B03002_012_M | ACS 5-year | acs variable | B03002_001 | +| em_pctest | Ethnic minority percent estimate | B03002 | n/a | calculated | calculated | B03002_001 | +| em_pctile | Ethnic minority percentile | B03002 | n/a | calculated | calculated | B03002_001 | +| em_pctmoe | Ethnic minority percent margin of error | B03002 | n/a | calculated | calculated | B03002_001 | +| em_score | Ethnic minority percentile score | B03002 | n/a | calculated | calculated | B03002_001 | +| f_class | Female percentile class | S0101 | n/a | calculated | calculated | S0101_C01_001 | +| f_cntest | Female count estimate | S0101 | S0101_C05_001_E | ACS 5-year | acs variable | S0101_C01_001 | +| f_cntmoe | Female count margin of error | S0101 | S0101_C05_001_M | ACS 5-year | acs variable | S0101_C01_001 | +| f_pctest | Female percent estimate | S0101 | DP05_0003PE_E | ACS 5-year | acs variable | S0101_C01_001 | +| f_pctile | Female percentile | S0101 | n/a | calculated | calculated | S0101_C01_001 | +| f_pctmoe | Female percent margin of error | S0101 | DP05_0003PE_M | ACS 5-year | acs variable | S0101_C01_001 | +| f_score | Female percentile score | S0101 | n/a | calculated | calculated | S0101_C01_001 | +| fb_class | Foreign-born percentile class | B05012 | n/a | calculated | calculated | B05012_001 | +| fb_cntest | Foreign-born count estimate | B05012 | B05012_003_E | ACS 5-year | acs variable | B05012_001 | +| fb_cntmoe | Foreign-born count margin of error | B05012 | B05012_003_M | ACS 5-year | acs variable | B05012_001 | +| fb_pctest | Foreign-born percent estimate | B05012 | n/a | calculated | calculated | B05012_001 | +| fb_pctile | Foreign-born percentile | B05012 | n/a | calculated | calculated | B05012_001 | +| fb_pctmoe | Foreign-born percent margin of error | B05012 | n/a | calculated | calculated | B05012_001 | +| fb_score | Foreign-born percentile score | B05012 | n/a | calculated | calculated | S1601_C01_001 | +| lep_class | Limited English proficiency percentile class | S1601 | n/a | calculated | calculated | S1601_C01_001 | +| lep_cntest | Limited English proficiency count estimate | S1601 | S1601_C05_001_E | ACS 5-year | acs variable | S1601_C01_001 | +| lep_cntmoe | Limited English proficiency count margin of error | S1601 | S1601_C05_001_M | ACS 5-year | acs variable | S1601_C01_001 | +| lep_pctest | Limited English proficiency percent estimate | S1601 | S1601_C06_001_E | ACS 5-year | acs variable | S1601_C01_001 | +| lep_pctile | Limited English proficiency percentile | S1601 | n/a | calculated | calculated | S1601_C01_001 | +| lep_pctmoe | Limited English proficiency percent margin of error | S1601 | S1601_C06_001_M | ACS 5-year | acs variable | S1601_C01_001 | +| lep_score | Limited English proficiency percentile score | S1601 | n/a | calculated | calculated | S1601_C01_001 | +| li_class | Low-income percentile class | n/a | n/a | calculated | calculated | S1701_C01_001 | +| li_cntest | Low-income count estimate | S1701 | S1701_C01_042_E | ACS 5-year | acs variable | S1701_C01_001 | +| li_cntmoe | Low-income count margin of error | S1701 | S1701_C01_042_M | ACS 5-year | acs variable | S1701_C01_001 | +| li_pctest | Low-income percent estimate | n/a | n/a | calculated | calculated | S1701_C01_001 | +| li_pctile | Low-income percentile | n/a | n/a | calculated | calculated | S1701_C01_001 | +| li_pctmoe | Low-income percent margin of error | n/a | n/a | calculated | calculated | S1701_C01_001 | +| li_score | Low-income percentile score | n/a | n/a | calculated | calculated | S1701_C01_001 | +| oa_class | Older adult percentile class | S0101 | n/a | calculated | calculated | B02001_001 | +| oa_cntest | Older adult count estimate | S0101 | S0101_C01_001_E | ACS 5-year | acs variable | B02001_001 | +| oa_cntmoe | Older adult count margin of error | S0101 | S0101_C01_001_M | ACS 5-year | acs variable | B02001_001 | +| oa_pctest | Older adult percent estimate | S0101 | S0101_C02_030_E | ACS 5-year | acs variable | B02001_001 | +| oa_pctile | Older adult percentile | S0101 | n/a | calculated | calculated | B02001_001 | +| oa_pctmoe | Older adult percent margin of error | S0101 | S0101_C02_030_M | ACS 5-year | acs variable | B02001_001 | +| oa_score | Older adult percentile score | S0101 | n/a | calculated | calculated | B02001_001 | +| rm_class | Racial minority percentile class | B02001 | n/a | calculated | calculated | B02001_001 | +| rm_cntest | Racial minority count estimate | B02001 | B02001_002_E | ACS 5-year | acs variable | B02001_001 | +| rm_cntmoe | Racial minority count margin of error | B02001 | B02001_002_M | ACS 5-year | acs variable | B02001_001 | +| rm_pctest | Racial minority percent estimate | B02001 | n/a | calculated | calculated | B02001_001 | +| rm_pctile | Racial minority percentile | B02001 | n/a | calculated | calculated | B02001_001 | +| rm_pctmoe | Racial minority percent margin of error | B02001 | n/a | calculated | calculated | B02001_001 | +| rm_score | Racial minority percentile score | B02001 | n/a | calculated | calculated | B02001_001 | +| y_class | Youth percentile class | B09001 | n/a | calculated | calculated | B03002_001 | +| y_cntest | Youth count estimate | B09001 | B09001_001 | ACS 5-year | acs variable | B03002_001 | +| y_cntmoe | Youth count margin of error | B09001 | B09001_001 | ACS 5-year | acs variable | B03002_001 | +| y_pctest | Youth population percentage estimate | B09001 | n/a | calculated | calculated | B03002_001 | +| y_pctile | Youth population percentile | B09001 | n/a | calculated | calculated | B03002_001 | +| y_pctmoe | Youth population percentage margin of error | B09001 | n/a | calculated | calculated | B03002_001 | +| y_score | Youth percentile score | B09001 | n/a | calculated | calculated | B03002_001 | +| ipd_score | Indicator of potential disadvantage score | n/a | n/a | calculated | calculated | n/a | +| u_tpopest | Total population estimate | B02001 | B02001_001_E | ACS 5-year | acs variable | B02001_001 | +| u_tpopmoe | Total population margin of error | B02001 | B02001_001_E | ACS 5-year | acs variable | B02001_001 | +| u_pop6est | Population over 6 years of age estimate | S1601 | S1601_C01_001_E | ACS 5-year | acs variable | S1601_C01_001 | +| u_pop6moe | Population over 6 years of age margin of error | S1601 | S1601_C01_001_M | ACS 5-year | acs variable | S1601_C01_001 | +| u_ppovest | Total population poverty rate estimate | S1701 | S1701_C01_001_E | ACS 5-year | acs variable | S1701_C01_001 | +| u_ppovmoe | Total population poverty rate margin of error | S1701 | S1701_C01_001_M | ACS 5-year | acs variable | S1701_C01_001 | +| u_pnicest | Disabled universe total estimate | S1810 | S1810_C01_001_E | ACS 5-year | acs variable | S1810_C01_001 | +| u_pnicmoe | Disabled universe total margin of error | S1810 | S1810_C01_001_M | ACS 5-year | acs variable | S1810_C01_001 | +| namelsad | Geography name | n/a | n/a | ACS 5-year | acs variable | n/a | +| mun1 | First municipality name | n/a | n/a | calculated | calculated | n/a | +| mun2 | Second municipality name | n/a | n/a | calculated | calculated | n/a | +| mun3 | Third municipality name | n/a | n/a | calculated | calculated | n/a | +| co_name | County Name | n/a | n/a | calculated | calculated | n/a | +| state | State name | n/a | n/a | ACS 5-year | n/a | n/a | +| st_area(shape) | Area of a geometry | n/a | n/a | ACS 5-year | n/a | n/a | +| st_perimeter(shape) | Perimeter of the geometry | n/a | n/a | ACS 5-year | n/a | n/a | \ No newline at end of file diff --git a/docs/setup.md b/docs/setup.md new file mode 100644 index 0000000..235861b --- /dev/null +++ b/docs/setup.md @@ -0,0 +1,57 @@ +# Code Setup + +This project automates DVRPC's Indicators of Potential Disadvantage (IPD) analysis, including data download, processing, and export. For more on IPD analysis, see [Equity Analysis for the Greater Philadelphia Region](https://www.dvrpc.org/webmaps/ipd/). + +## Getting the Code and Software + +1. Clone the [Github repository](https://github.com/dvrpc/ipd) +2. [Download and install R](https://cran.rstudio.com/) +3. [Download and install R Studio](https://posit.co/download/rstudio-desktop/) + +## Installing Package Dependencies + +The R script has the following dependencies: + +- plyr +- here +- sf +- summarytools +- tidycensus +- tidyverse +- tigris +- dplyr +- descr + +If you have not previously installed the dependencies, you will need to do so. If you try to run the script without installing the packages, you will get an error message like +`Error in library (name_of_package) : there is no package called 'name_of_package'`. + +Install each package from R Studio's console (typically at the bottom of the screen in R Studio) with the command `install.packages('name_of_package')` (include the quotation marks). + +## Updating the Script for a New 5-Year Dataset + +If you are running the code against a newly released 5-year ACS dataset, do the following: + +1. Update the `ipd_year` in the `ipd.r` to be the end year of the dataset. +2. Update the `output_dir` with the output location for the files. +3. Verify the field names (listed under the `# Fields` section). Follow the link provided to check the schema for that dataset. + +## Running the Code + +1. Open RStudio. +2. Open the R file (File -> Open File) +3. Run the code by clicking the Source button or Ctrl+A followed by Ctrl+Enter. + +If you see an error about packages not being installed, see [Installing Package Dependencies](#installing-package-dependencies) above. + +Please provide your own API Key (this is required for the `tidycensus` package, not the Census API), you may get another one [here](https://api.census.gov/data/key_signup.html). + +### Outputs + +After the code has finished, outputs are saved in the /outputs subdirectory of where you cloned the repository on your local machine, including: + +- ipd_`ipd_year`.csv: tract-level statistics and scores for IPD's nine indicators +- ipd_`ipd_year`.shp: spatial version of ipd.csv +- breaks_by_indicator_`ipd_year`.csv: bin breaks by indicator +- counts_by_indicator_`ipd_year`.csv: census tract counts by bin and indicator +- summary_by_indicator_`ipd_year`.csv: basic summary stats by indicator +- means_by_county_`ipd_year`.csv: population-weighted county means by indicator \ No newline at end of file diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..8019da1 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,8 @@ +[data-md-color-scheme="custom"] { + --md-primary-fg-color: rgb(88, 34, 103); + --md-primary-bg-color: rgb(255, 255, 255); + --md-accent-fg-color: rgb(89, 89, 89); + --md-default-fg-color: rgb(88, 34, 103); + --md-typeset-a-color: rgb(88, 34, 103); + --md-code-hl-color--light: rgb(89, 89, 89); +} \ No newline at end of file diff --git a/documentation/ipdDiscussion.pdf b/documentation/ipdDiscussion.pdf deleted file mode 100644 index a073fd4..0000000 Binary files a/documentation/ipdDiscussion.pdf and /dev/null differ diff --git a/documentation/script_reference.md b/documentation/script_reference.md deleted file mode 100644 index cea6dfb..0000000 --- a/documentation/script_reference.md +++ /dev/null @@ -1,1061 +0,0 @@ -### Technical Reference ### - -# Outline - -1. [About](#about) - a. [Getting started](#one_a) - b. [Output abbreviations](#one_b) - c. [Project structure](#one_c) -2. [Setup](#setup) - a. [Dependencies](#two_a) - b. [Fields](#two_b) - c. [Year](#two_c) - d. [States](#two_d) - e. [Counties](#two_e) - f. [Census API key](#two_f) - f. [Functions](#two_g) - 1. [Override `base` and `stats` function defaults](#two_g_i) - 2. [Create custom half-standard deviation breaks](#two_g_ii) - 3. [*Exception*](#two_g_iii) - 4. [Move column or vector of columns to last position](#two_g_iv) - 5. [Summarize data](#two_g_v) -3. [Variance replicate table download](#variance_replicate_table_download) - a. [Download variance replicates from Census website](#three_a) - b. [Combine and format downloads](#three_b) -4. [Variance replicate table processing](#variance_replicate_table_processing) - a. [Compute racial minority count MOE](#four_a) - b. [Save results](#four_b) -5. [ACS estimates download](#acs_estimates_download) - a. [Fields](#five_a) - b. [Download counts and universes from Census API](#five_b) - 1. [*Exception*](#five_b_i) - c. [Download percentages from Census API](#five_c) - d. [Format downloads](#five_d) - 1. [*Exception*](#five_d_i) - 2. [*Exception*](#five_d_ii) - 3. [*Exception*](#five_d_iii) -6. [ACS estimates calculations](#acs_estimates_calculations) - a. [Percentages and percentage MOEs](#six_a) - 1. [Calculation](#six_a_i) - 2. [Result](#six_a_ii) - 3. [*Exception*](#six_a_iii) - 4. [*Exception*](#six_a_iv) - b. [Percentile](#six_b) - 1. [Calculation](#six_b_i) - 2. [Result](#six_b_ii) - c. [IPD score and classification](#six_c) - 1. [Calculation](#six_c_i) - 2. [Result](#six_c_ii) - d. [Composite IPD score](#six_d) - 1. [Calculation](#six_d_i) - 2. [Result](#six_d_ii) -7. [ACS estimates cleaning](#acs_estimates_cleaning) -8. [Summary tables](#summary_tables) - a. [Counts by indicator](#eight_a) - b. [Breaks by indicator](#eight_b) - c. [Summary by indicator](#eight_c) - d. [County means by indicator](#eight_d) -9. [Export](#export) - a. [Append to TIGER/LINE file](#nine_a) - b. [Export files](#nine_b) -10. [Metadata table with sources](#metadata) - - -# 1. About {#about} -DVRPC's IPD analysis identifies populations of interest under Title VI of the Civil Rights Act and the Executive Order on Environmental Justice (#12898) using 2013-2017 American Community Survey (ACS) five-year estimates from the U.S. Census Bureau. IPD analysis assists both DVRPC and outside organizations in equity work by identifying populations of interest, including youth, older adults, female, racial minority, ethnic minority, foreign-born, limited English proficiency, disabled, and low-income populations at the census tract level in DVRPC's nine-county region. - -There are many ways of identifying these populations of interest. This document discusses DVRPC's process, which is automated in an `R` script. - -## 1a. Getting started {#one_a} -For guidance on software prerequisites and how to run this script, see `getting_started.pdf` in the `documentation` folder. - -## 1b. Output abbreviations {#one_b} -Components of field names that you'll see in `outputs` and throughout the script. - -Component| Equivalent | -:--------| :-------------- | -D | Disabled | -EM | Ethnic Minority | -F | Female | -FB | Foreign-Born | -LEP | Limited English Proficiency | -LI | Low-Income | -OA | Older Adults | -RM | Racial Minority | -Y | Youth | -CntEst | Count Estimate | -CntMOE | Count MOE | -PctEst | Percentage Estimate | -PctMOE | Percentage MOE | -Pctile | Percentile | -Score | Score | -Class | Classification | -
- -Abbreviations of field names that you'll see in `outputs` *not* comprised of the above components. - - -| Abbreviation | Equivalent | -|:-------------|:-----------| -| GEOID | Census Tract Identifier | -| STATEFP | State FIPS Code | -| COUNTYFP | County FIPS Code | -| NAME | Census Tract FIPS Code | -| IPD_Score | Composite IPD Score | -| U_TPopEst | Total Population Estimate | -| U_TPopMOE | Total Population MOE | -| U_Pop5Est | Population 5+ Estimate | -| U_Pop5MOE | Population 5+ MOE | -| U_PPovEst | Poverty Status Population Estimate | -| U_PPovMOE | Poverty Status Population MOE | -| U_PNICEst | Non-Institutional Civilian Population Estimate | -| U_PNICMOE | Non-Institutional Civilian Population MOE | - -## 1c. Project structure {#one_c} -This script uses relative file paths based off the location of `ipd.Rproj`. As long as you download the entire repository, the script should have no trouble locating the correct subfolders. All of the subsequent years files are based on the same architecture. The project is structured as follows: - -```{r file_structure, eval = FALSE} -ipd -ipd.Rproj - script.R - documentation - discussion.pdf - getting_started.pdf - script_reference.pdf - script_reference.Rmd - variables.csv - outputs - breaks_by_indicator.csv - counts_by_indicator.csv - ipd.csv - ipd.dbf - ipd.prj - ipd.shp - ipd.shx - mean_by_county.csv - summary_by_indicator.csv -``` - -# 2. Setup {#setup} -## 2a. Dependencies {#two_a} -Packages required to run this script. If you don't have the packages, you'll get the warning `Error in library () : there is no package called ''`, in which case you'll need to install the package before proceeding. - -```{r packages, message = FALSE} -library(plyr); library(here); library(sf); library(summarytools); -library(tidycensus); library(tidyverse); library(tigris); library(dplyr); library(descr); -``` - -## 2b. Fields {#two_b} -The base information we need for IPD analysis are universes, counts, and percentages for nine indicators at the census tract level. For each indicator, the table below shows the indicator name, its abbreviation used in the script, its universe, its count, and its percentage field if applicable. Because the schemata of ACS tables can change with each annual ACS update, these field names are applicable *only* to 2013-2017 ACS Five-Year Estimates. - -Some percentage fields are empty. This is okay: we will compute the percentages when they are not directly available from the ACS. - -Note that variable B02001_002 ("Estimate; Total: - White alone") is listed as the count for Racial Minority. This is a mathematical shortcut: otherwise, we would need to add several subfields to compute the same estimate. The desired count is B02001_001 (Universe) $-$ B02001_002 ("Estimate; Total: - White alone"). The subtraction is computed after download in Section 5d.i., making a correct estimate and an incorrect MOE. The correct MOE for the count, as calculated in Section 4, will be appended later. - -| Indicator | Abbreviation | Universe | Count | Percentage | -|:----------|:------------:|:--------:|:-----:|:----------:| -| Disabled | D | S1810_C01_001 | S1810_C02_001 | S1810_C03_001 | -| Ethnic Minority | EM | B03002_001 | B03002_012 | N/A | -| Female | F | S0101_C01_001 | S0101_C05_001 | DP05_0003PE | -| Foreign-Born | FB | B05012_001 | B05012_003 | N/A | -| Limited English Proficiency | LEP | S1601_C01_001 | S1601_C05_001 | S1601_C06_001 | -| Low-Income | LI | S1701_C01_001 | S1701_C01_042 | N/A | -| Older Adults | OA | S0101_C01_001 | S0101_C01_030 | S0101_C02_030 | -| Racial Minority | RM | B02001_001 | B02001_002 | N/A | -| Youth | Y | B03002_001 | B09001_001 | N/A | -
- -While it's quicker to embed the names of the desired columns into the code, fields are explicitly spelled out in this script. This is a purposeful design choice. The user should check that the field names point to the correct API request with every IPD update. The best way to check the field names is to visit Census Developers [(link)](https://www.census.gov/developers/) and select the corresponding API. For a history of the ACS variables used in IPD 2015, 2016, and 2017, see `variables.csv` in the `documentation` folder. -
- - -disabled_universe <- "S1810_C01_001" - -disabled_count <- "S1810_C02_001" - -disabled_percent <- "S1810_C03_001" - -ethnic_minority_universe <- "B03002_001" - -ethnic_minority_count <- "B03002_012" - -ethnic_minority_percent <- NA - -female_universe <- "S0101_C01_001" - -female_count <- "S0101_C05_001" - -female_percent <- "DP05_0003PE" - -foreign_born_universe <- "B05012_001" - -foreign_born_count <- "B05012_003" - -foreign_born_percent <- NA - -limited_english_proficiency_universe <- "S1601_C01_001" - -limited_english_proficiency_count <- "S1601_C05_001" - -limited_english_proficiency_percent <- "S1601_C06_001" - -low_income_universe <- "S1701_C01_001" - -low_income_count <- "S1701_C01_042" - -low_income_percent <- NA - -older_adults_universe <- "S0101_C01_001" - -older_adults_count <- "S0101_C01_030" - -older_adults_percent <- "S0101_C02_030" - -racial_minority_universe <- "B02001_001" - -racial_minority_count <- "B02001_002" - -racial_minority_percent <- NA - -youth_universe <- "B03002_001" - -youth_count <- "B09001_001" - -youth_percent <- NA -
- -
- -## 2c. Year {#two_c} -The data download year. -```{r year} -ipd_year <- 2017 -``` -
- -## 2d. States {#two_d} -The data download state or states. Use the two-character text abbreviation. -
-```{r states} -ipd_states <- c("NJ", "PA") -``` -
- -## 2e. Counties {#two_e} -The counties in your study area. Use five-digit characters concatenating the two-digit state and three-digit county FIPS codes. -
-```{r counties} -ipd_counties <- c("34005", "34007", "34015", "34021", - "42017", "42029", "42045", "42091", "42101") -``` -
- -## 2f. Census API Key {#two_f} -Placeholder if you have never installed an API key before. If this is your first time accessing the Census API using `R`, see `getting_started.pdf` in the `documentation` folder. -
-```{r api_key} -# Census API Key -# census_api_key("YOUR API KEY GOES HERE", install = TRUE) -``` - -# *THE TYPICAL USER SHOULD NOT HAVE TO EDIT ANYTHING BELOW THIS POINT.* - -## 2g. Functions {#two_g} -Load custom functions. - -### 2g.i. Override `base` and `stats` function defaults {#two_g_i} -A time-saver so that it's not required to call `na.rm = TRUE` every time common functions are called. -
-```{r override} -min <- function(i, ..., na.rm = TRUE) { - base::min(i, ..., na.rm = na.rm) -} -mean <- function(i, ..., na.rm = TRUE) { - base::mean(i, ..., na.rm = na.rm) -} -sd <- function(i, ..., na.rm = TRUE) { - stats::sd(i, ..., na.rm = na.rm) -} -max <- function(i, ..., na.rm = TRUE) { - base::max(i, ..., na.rm = na.rm) -} -``` - -### 2g.ii. Create custom half-standard deviation breaks {#two_g_ii} -For a given vector of numbers `x` and a number of bins `i`, `st_dev_breaks` computes the bin breaks starting at $-0.5 \cdot st dev$ and $0.5 \cdot st dev$. For the purposes of IPD analysis, `i = 5`, and `st_dev_breaks` calculates the minimum, $-1.5 \cdot st dev$, $-0.5 \cdot st dev$, $0.5 \cdot st dev$, $1.5 \cdot st dev$, and maximum values. These values are later used to slice the vector into five bins. - -### 2g.iii. *Exception* {#two_g_iii} -All minima are coerced to equal zero. If the first bin break ($-1.5 \cdot st dev$) is negative, as happens when the data has a large spread and therefore a large standard deviation, then this bin break is coerced to equal 0.1. In these cases, only estimates of 0 percent will be placed in the bottom bin. -
-```{r st_dev_breaks} -st_dev_breaks <- function(x, i, na.rm = TRUE){ - half_st_dev_count <- c(-1 * rev(seq(1, i, by = 2)), - seq(1, i, by = 2)) - if((i %% 2) == 1) { - half_st_dev_breaks <- sapply(half_st_dev_count, - function(i) (0.5 * i * sd(x)) + mean(x)) - half_st_dev_breaks[[1]] <- 0 - half_st_dev_breaks[[2]] <- ifelse(half_st_dev_breaks[[2]] < 0, - 0.1, - half_st_dev_breaks[[2]]) - half_st_dev_breaks[[i + 1]] <- ifelse(max(x) > half_st_dev_breaks[[i + 1]], - max(x), half_st_dev_breaks[[i + 1]]) - } else { - half_st_dev_breaks <- NA - } - return(half_st_dev_breaks) -} -``` - -### 2g.iv. Move column or vector of columns to last position {#two_g_iv} -The requested schema for IPD data export renames and places all relevant universes in the final columns of the dataset. `move_last` moves a column or vector of column names to the last position(s) in a data frame. -
-```{r move_last} -move_last <- function(df, last_col) { - match(c(setdiff(names(df), last_col), last_col), names(df)) -} -``` - -### 2g.v. Summarize data {#two_g_v} -`description` tailors the exports from `summarytools::descr` to create summary tables with the requested fields. $0.5 \cdot st dev$ is returned after $stdev$. -
-```{r description} -description <- function(i) { - des <- as.numeric(descr(i, na.rm = TRUE, - stats = c("min", "med", "mean", "sd", "max"))) - des <- c(des[1:4], des[4] / 2, des[5]) - return(des) -} -``` - -# 3. Variance replicate table download {#variance_replicate_table_download} -This will feel out of order, but it's necessary. The racial minority indicator is created by summing up several subgroups in ACS Table B03002. This means that the MOE for the count has to be computed. While the ACS has issued guidance on computing the MOE by aggregating subgroups, using the approximation formula can artificially deflate the derived MOE. Variance replicate tables are used instead to account for covariance and compute a more accurate MOE. The MOE computed from variance replicates is substituted in for the racial minority count MOE in Section 5d.ii. - -See the Census Bureau's Variance Replicate Tables Documentation [(link)](https://www.census.gov/programs-surveys/acs/technical-documentation/variance-tables.html) for additional guidance on working with variance replicates. - -## 3a. Download variance replicates from Census website {#three_a} -Download, unzip, and read variance replicate tables for Table B02001. Results are combined into a single table called `var_rep`. -
-```{r varrep_download, tidy = TRUE, message = FALSE} -ipd_states_numeric <- fips_codes %>% - filter(state %in% ipd_states) %>% - select(state_code) %>% distinct(.) %>% pull(.) -var_rep <- NULL -for (i in 1:length(ipd_states)){ - url <- paste0("https://www2.census.gov/programs-surveys/acs/replicate_estimates/", - ipd_year, - "/data/5-year/140/B02001_", - ipd_states_numeric[i], - ".csv.gz") - temp <- tempfile() - download.file(url, temp) - var_rep_i <- read_csv(gzfile(temp)) - var_rep <- rbind(var_rep, var_rep_i) -} -``` - -## 3b. Combine and format downloads {#three_b} -Subset `var_rep` for the study area defined in `ipd_counties` and extract the necessary subgroups. -
-```{r varrep_merge, message = FALSE} -var_rep <- var_rep %>% - mutate_at(vars(GEOID), funs(str_sub(., 8, 18))) %>% - filter(str_sub(GEOID, 1, 5) %in% ipd_counties) %>% - select(-TBLID, -NAME, -ORDER, -moe, -CME, -SE) %>% - filter(TITLE %in% c("Black or African American alone", - "American Indian and Alaska Native alone", - "Asian alone", - "Native Hawaiian and Other Pacific Islander alone", - "Some other race alone", - "Two or more races:")) -``` - -# 4. Variance replicate table processing {#variance_replicate_table_processing} -## 4a. Compute racial minority count MOE {#four_a} -Add up the racial minority counts into a single count per census tract for the estimate and 80 variance replicates. Separate the resulting data frame into estimates and variance replicates. -
-```{r varrep_subset, message = FALSE} -num <- var_rep %>% - group_by(GEOID) %>% - summarize_if(is.numeric, funs(sum)) %>% - select(-GEOID) -estim <- num %>% select(estimate) -individual_replicate <- num %>% select(-estimate) -``` -Compute the variance replicate for the count. GEOIDs are stored as `id` to be re-appended to the MOEs after they are calculated. -
-```{r varrep_calc, message = FALSE} -id <- var_rep %>% select(GEOID) %>% distinct(.) %>% pull(.) -sqdiff_fun <- function(v, e) (v - e) ^ 2 -sqdiff <- mapply(sqdiff_fun, individual_replicate, estim) -sum_sqdiff <- rowSums(sqdiff) -variance <- 0.05 * sum_sqdiff -moe <- round(sqrt(variance) * 1.645, 0) -``` - -## 4b. Save results {#four_b} -Save the racial minority MOE. -
-```{r varrep_save, message = FALSE} -rm_moe <- cbind(id, moe) %>% - as_tibble(.) %>% - rename(GEOID10 = id, RM_CntMOE = moe) %>% - mutate_at(vars(RM_CntMOE), as.numeric) -``` -Here are the first few lines of `rm_moe`: -
-```{r varrep_preview} -head(rm_moe) -``` - -# 5. ACS estimates download {#acs_estimates_download} -## 5a. Fields {#five_a} -Fields for downloads from the ACS API were discussed in Section 2b. - -## 5b. Download counts and universes from Census API {#five_b} -Download counts and percentages for each of IPD's nine indicators. Note that the download is for all census tracts in `ipd_states`. - -Input data for IPD comes from ACS Subject Tables, Detailed Tables, and Data Profiles. While one can request all the fields for Subject Tables in one batch, mixing requests for two or more different types of tables will result in failure. For this reason, the counts and universe fields supplied by the user in Section 2b are evaluated for their contents and split into three batches: `s_counts` for Subject Tables, `d_counts` for Detailed Tables, and `dp_counts` for Data Profiles. - -The chunk below zips the user-defined calls from the API with the output abbreviations into a data frame called `counts_calls` and separates the calls into three batches. -
-```{r api_counts, message = FALSE} -counts <- c(disabled_count, disabled_universe, - ethnic_minority_count, ethnic_minority_universe, - female_count, female_universe, - foreign_born_count, foreign_born_universe, - limited_english_proficiency_count, limited_english_proficiency_universe, - low_income_count, low_income_universe, - older_adults_count, older_adults_universe, - racial_minority_count, racial_minority_universe, - youth_count, youth_universe) -counts_ids <- c("D_C", "D_U", "EM_C", "EM_U", "F_C", "F_U", - "FB_C", "FB_U", "LEP_C", "LEP_U", "LI_C", "LI_U", - "OA_C", "OA_U", "RM_C", "RM_U", "Y_C", "Y_U") -counts_calls <- tibble(id = counts_ids, api = counts) %>% - drop_na(.) -s_calls <- counts_calls %>% - filter(str_sub(api, 1, 1) == "S") -d_calls <- counts_calls %>% - filter(str_sub(api, 1, 1) == "B") -dp_calls <- counts_calls %>% - filter(str_sub(api, 1, 1) == "D") -``` -API calls are made separately for ACS Subject Tables, Detailed Tables, and Data Profiles and appended to `dl_counts`. Sometimes there are no requests for an ACS table type; in these situations, the script bypasses a download attempt. Then, information from `counts_calls` is used to rename the downloads to the appropriate abbreviation. -
-```{r api_counts_calls, message = FALSE} -dl_counts <- NULL -if(length(s_calls$id > 0)){ - s_counts <- get_acs(geography = "tract", - state = ipd_states, - output = "wide", - year = ipd_year, - variables = s_calls$api) %>% - select(-NAME) - dl_counts <- bind_cols(dl_counts, s_counts) -} -if(length(d_calls$id > 0)){ - d_counts <- get_acs(geography = "tract", - state = ipd_states, - output = "wide", - year = ipd_year, - variables = d_calls$api) %>% - select(-NAME) - dl_counts <- left_join(dl_counts, d_counts) -} -if(length(dp_calls$id > 0)){ - dp_counts <- get_acs(geography = "tract", - state = ipd_states, - output = "wide", - year = ipd_year, - variables = dp_calls$api) %>% - select(-NAME) - dl_counts <- left_join(dl_counts, dp_counts) -} -counts_calls$api <- str_replace(counts_calls$api, "E$", "") -for(i in 1:length(counts_calls$id)){ - names(dl_counts) <- str_replace(names(dl_counts), - counts_calls$api[i], - counts_calls$id[i]) -} -dl_counts <- dl_counts %>% - rename(GEOID10 = GEOID) -``` - -### 5b.i. *Exception* {#five_b_i} - -The API does not allow redundant downloads, so universes for Older Adults and Youth are duplicated after download. `duplicate_cols` identifies duplicate API calls, and `combined_rows` serves as a crosswalk to duplicate and rename fields. -
-```{r api_counts_duplicator} -duplicate_cols <- counts_calls %>% - group_by(api) %>% - filter(n()>1) %>% - summarize(orig = id[1], - duplicator = id[2]) -e_paste <- function(i) paste0(i, "E") -m_paste <- function(i) paste0(i, "M") -e_rows <- apply(duplicate_cols, 2, e_paste) -m_rows <- apply(duplicate_cols, 2, m_paste) -combined_rows <- as_tibble(rbind(e_rows, m_rows)) %>% - mutate_all(as.character) -for(i in 1:length(combined_rows$api)){ - dl_counts[combined_rows$duplicator[i]] <- dl_counts[combined_rows$orig[i]] -} -``` - -## 5c. Download percentages from Census API {#five_c} -Download percentage tables that are available for four of IPD's nine indicators. We will compute percentages and their associated MOEs for the rest of the dataset later. The procedure is identical to that described in Section 5b. -
-```{r api_percs, message = FALSE} -percs <- c(disabled_percent, - ethnic_minority_percent, - female_percent, - foreign_born_percent, - limited_english_proficiency_percent, - low_income_percent, - older_adults_percent, - racial_minority_percent, - youth_percent) -percs_ids <- c("D_P", "EM_P", "F_P", "FB_P", "LEP_P", - "LI_P", "OA_P", "RM_P", "Y_P") -percs_calls <- tibble(id = percs_ids, api = percs) %>% - drop_na(.) -s_calls <- percs_calls %>% - filter(str_sub(api, 1, 1) == "S") -d_calls <- percs_calls %>% - filter(str_sub(api, 1, 1) == "B") -dp_calls <- percs_calls %>% - filter(str_sub(api, 1, 1) == "D") -dl_percs <- NULL -if(length(s_calls$id > 0)){ - s_percs <- get_acs(geography = "tract", - state = ipd_states, - output = "wide", - year = ipd_year, - variables = s_calls$api) %>% - select(-NAME) - dl_percs <- bind_cols(dl_percs, s_percs) -} -if(length(d_calls$id > 0)){ - d_percs <- get_acs(geography = "tract", - state = ipd_states, - output = "wide", - year = ipd_year, - variables = d_calls$api) %>% - select(-NAME) - dl_percs <- left_join(dl_percs, d_percs) -} -if(length(dp_calls$id > 0)){ - dp_percs <- get_acs(geography = "tract", - state = ipd_states, - output = "wide", - year = ipd_year, - variables = dp_calls$api) %>% - select(-NAME) - dl_percs <- left_join(dl_percs, dp_percs) -} -percs_calls$api <- str_replace(percs_calls$api, "PE", "") -names(dl_percs) <- str_replace(names(dl_percs), "PE", "E") -names(dl_percs) <- str_replace(names(dl_percs), "PM", "M") -for(i in 1:length(percs_calls$id)){ - names(dl_percs) <- str_replace(names(dl_percs), - percs_calls$api[i], - percs_calls$id[i]) -} -dl_percs <- dl_percs %>% - rename(GEOID10 = GEOID) -``` - -## 5d. Format downloads {#five_d} -Subset `dl_counts` and `dl_percs` for DVRPC's nine-county region. Percentages should range from 0 to 100. -
-```{r dl_counts_dl_percs, message = FALSE} -dl_counts <- dl_counts %>% - filter(str_sub(GEOID10, 1, 5) %in% ipd_counties) -dl_percs <- dl_percs %>% - filter(str_sub(GEOID10, 1, 5) %in% ipd_counties) -``` - -### 5d.i. *Exception* {#five_d_i} -Note that variable B02001_002 ("Estimate; Total: - White alone") was downloaded as the count for racial minority. Compute B02001_001 (Universe) $-$ B02001_002 ("Estimate; Total: - White alone") and substitute for `RM_CE`. -
-```{r perc_excp_1, message = FALSE} -dl_counts <- dl_counts %>% mutate(x = RM_UE - RM_CE) %>% - select(-RM_CE) %>% - rename(RM_CE = x) -``` - -### 5d.ii. *Exception* {#five_d_ii} -Before computing percentages and percentage MOEs, import the count MOE for the racial minority variable computed from variance replicates. If `rm_moe` exists, then this chunk will substitute the correct count MOE in `dl_counts`; if not, this chunk will do nothing. -
-```{r perc_excp_2, message = FALSE} -if(exists("rm_moe")){ - dl_counts <- dl_counts %>% - select(-RM_CM) %>% - left_join(., rm_moe) %>% - rename(RM_CM = RM_CntMOE) %>% - mutate_at(vars(RM_CM), as.numeric) -} -``` - -### 5d.iii. *Exception* {#five_d_iii} -Half-standard deviations serve as the classification bins for IPD scores, and including zero-population tracts affects computed standard deviation values. Start by removing the 11 census tracts with zero population. -
-```{r perc_excp_3} -slicer <- c("34005981802","34005982200","34021980000","42017980000", - "42045980300","42045980000","42045980200","42091980100", - "42091980000","42091980200","42091980300","42101036901", - "42101980001","42101980002","42101980003","42101980300", - "42101980701","42101980702","42101980800","42101980100", - "42101980200", "42101980400","42101980500","42101980600", - "42101980901","42101980902","42101980903","42101980904", - "42101980905","42101980906", "42101989100","42101989200", - "42101989300") -dl_counts <- dl_counts %>% filter(!(GEOID10 %in% slicer)) -dl_percs <- dl_percs %>% filter(!(GEOID10 %in% slicer)) -``` -Here are the first few lines of `dl_counts` and `dl_percs`. Notice the naming convention: - -- `UE` = universe estimate -- `UM` = universe MOE -- `CE` = count estimate -- `CM` = count MOE -- `PE` = percentage estimate -- `PM` = percentage MOE - -We use these strings to select columns, so consistency is key. -
-```{r acs_preview} -head(dl_counts) -head(dl_percs) -``` - -# 6. ACS estimates calculations {#acs_estimates_calculations} -For all nine indicators, this section computes: - -a. Percentages and percentage MOEs -b. Percentile -c. IPD score and classification -d. Composite IPD score - -Split `dl_counts` into a list named `comp` for processing and arrange column names in alphabetical order. The name of the list, `comp`, is a nod to the "component parts" of `dl_counts`. The structure of `comp` is similar to a four-tab Excel spreadsheet: for example, `comp` is the name of the `.xlsx` file, `uni_est` is a tab for universe estimates, and `uni_est` has nine columns and 1,368 rows, where the column is the IPD indicator and the row is the census tract observation. - -The order of columns is important because processing is based on vector position. We want to make sure that the first column of every tab corresponds to the Disabled indicator, the second to Ethnic Minority, et cetera. -
-```{r comp} -comp <- list() -comp$uni_est <- dl_counts %>% select(ends_with("UE")) %>% select(sort(current_vars())) -comp$uni_moe <- dl_counts %>% select(ends_with("UM")) %>% select(sort(current_vars())) -comp$count_est <- dl_counts %>% select(ends_with("CE")) %>% select(sort(current_vars())) -comp$count_moe <- dl_counts %>% select(ends_with("CM")) %>% select(sort(current_vars())) -``` - -## 6a. Percentages and percentage MOEs {#six_a} -### 6a.i. Calculation {#six_a_i} -MOEs of the percentage values are obtained using the `tidycensus` function `moe_prop`. This chunk mentions `r` and `c` several times: continuing the spreadsheet analogy, think of `r` as the row number and `c` as the column number for a given spreadsheet tab. -
-```{r perc} -pct_matrix <- NULL -pct_moe_matrix <- NULL -for (c in 1:length(comp$uni_est)){ - pct <- unlist(comp$count_est[,c] / comp$uni_est[,c]) - pct_matrix <- cbind(pct_matrix, pct) - moe <- NULL - for (r in 1:length(comp$uni_est$LI_UE)){ - moe_indiv <- as.numeric(moe_prop(comp$count_est[r,c], - comp$uni_est[r,c], - comp$count_moe[r,c], - comp$uni_moe[r,c])) - moe <- append(moe, moe_indiv) - } - pct_moe_matrix <- cbind(pct_moe_matrix, moe) -} -``` - -### 6a.ii. Result {#six_a_ii} -`pct` and `pct_moe` stores the percentages and associated MOEs for the nine indicator variables. Results are rounded to the tenths place and range from 0 to 100. -
-```{r perc_res, warning = FALSE} -pct <- as_tibble(pct_matrix) %>% mutate_all(funs(. * 100)) %>% mutate_all(round, 1) -names(pct) <- str_replace(names(comp$uni_est), "_UE", "_PctEst") -pct_moe <- as_tibble(pct_moe_matrix) %>% mutate_all(funs(. * 100)) %>% mutate_all(round, 1) -names(pct_moe) <- str_replace(names(comp$uni_est), "_UE", "_PctMOE") -``` - -### 6a.iii. *Exception* {#six_a_iii} -If the percentage MOE equals 0, then overwrite it to equal 0.1. This should be a rare occurence with survey data at the census tract level. -
-```{r perc_excp_4} -pct_moe <- pct_moe %>% replace(., . == 0, 0.1) -``` - -### 6a.iv. *Exception* {#six_a_iv} -Substitute percentages and associated MOEs when available. This applies to the older adults, female, limited English proficiency, and disabled variables. -
-```{r perc_excp_5} -pct <- pct %>% mutate(D_PctEst = dl_percs$D_PE, - OA_PctEst = dl_percs$OA_PE, - LEP_PctEst = dl_percs$LEP_PE, - F_PctEst = dl_percs$F_PE) -pct_moe <- pct_moe %>% mutate(D_PctMOE = dl_percs$D_PM, - OA_PctMOE = dl_percs$OA_PM, - LEP_PctMOE = dl_percs$LEP_PM, - F_PctMOE = dl_percs$F_PM) -``` -Here are the first few lines of `pct` and `pct_moe`: -```{r pct_preview} -head(pct) -head(pct_moe) -``` - -## 6b. Percentile {#six_b} -### 6b.i. Calculation {#six__b_i} -Add percentiles (an additional "spreadsheet tab") to `comp`, making sure to first sort column names alphabetically. Compute the empirical cumulative distribution function for each of the nine indicator variables. The ECDF can range from 0 to 1, where 1 indicates the largest observed percentage. -
-```{r percentile} -comp$pct_est <- pct %>% select(sort(current_vars())) -percentile_matrix <- NULL -for (c in 1:length(comp$uni_est)){ - p <- unlist(comp$pct_est[,c]) - rank <- ecdf(p)(p) - percentile_matrix <- cbind(percentile_matrix, rank) -} -``` - -### 6b.ii. Result {#six_b_ii} -`percentile` stores the percentile for the nine indicator variables. Results are rounded to the hundredths place. -
-```{r percentile_res, warning = FALSE} -percentile <- as_tibble(percentile_matrix) %>% mutate_all(round, 2) -names(percentile) <- str_replace(names(comp$uni_est), "_UE", "_Pctile") -``` -Here are the first few lines of `percentile`: -
-```{r percentile_preview} -head(percentile) -``` - -## 6c. IPD score and classification {#six_c} -Each observation is assigned an IPD score for each indicator. The IPD score for an individual indicator can range from 0 to 4, which corresponds to the following classification and bin breaks: - -| IPD Score | IPD Classification | Standard Deviations | -|:---------:|:------------------:|:-------------------:| -| 0 | Well Below Average | x $< -1.5 \cdot stdev$ | -| 1 | Below Average | $-1.5 \cdot stdev \leq$ x $<-0.5 \cdot stdev$ | -| 2 | Average | $-0.5 \cdot stdev \leq$ x $<0.5 \cdot stdev$ | -| 3 | Above Average | $0.5 \cdot stdev \leq$ x $<1.5 \cdot stdev$ | -| 4 | Well Above Average | x $\geq 1.5 \cdot stdev$ | -
- -### 6c.i. Calculation {#six_c_i} -The function `st_dev_breaks` is called to compute the bin breaks for each indicator. These breaks determine the IPD score stored in `score`. Note that we divide *rounded* `PctEst` columns by *unrounded* half-standard deviation breaks to compute the `score`. `class` is a textual explanation of the IPD score. -
-```{r score_class} -score_matrix <- NULL -class_matrix <- NULL -for (c in 1:length(comp$uni_est)){ - p <- unlist(comp$pct_est[,c]) - breaks <- st_dev_breaks(p, 5, na.rm = TRUE) - score <- case_when(p < breaks[2] ~ 0, - p >= breaks[2] & p < breaks[3] ~ 1, - p >= breaks[3] & p < breaks[4] ~ 2, - p >= breaks[4] & p < breaks[5] ~ 3, - p >= breaks[5] ~ 4) - class <- case_when(score == 0 ~ "Well Below Average", - score == 1 ~ "Below Average", - score == 2 ~ "Average", - score == 3 ~ "Above Average", - score == 4 ~ "Well Above Average") - score_matrix <- cbind(score_matrix, score) - class_matrix <- cbind(class_matrix, class) -} -``` - -### 6c.ii. Result {#six_c_ii} -`score` and `class` store the IPD scores and associated descriptions for the nine indicator variables. -
-```{r score_class_res, warning = FALSE} -score <- as_tibble(score_matrix) -names(score) <- str_replace(names(comp$uni_est), "_UE", "_Score") -class <- as_tibble(class_matrix) -names(class) <- str_replace(names(comp$uni_est), "_UE", "_Class") -``` -Here are the first few lines of `score` and `class`: -
-```{r score_preview} -head(score) -head(class) -``` - -## 6d. Composite IPD score {#six_d} -### 6d.i. Calculation {#six_d_i} -Sum the IPD scores for the nine indicator variables to determine the composite IPD score. -
-```{r ipd_score} -score <- score %>% mutate(IPD_Score = rowSums(.)) -``` - -### 6d.ii. Result {#six_d_ii} -Here are the first few records of the composite IPD score: -
-```{r ipd_score_preview} -head(score$IPD_Score) -``` - -# 7. ACS estimates cleaning {#acs_estimates_cleaning} -There is a specific output format for `ipd.csv`, including column names, column order, flags for missing data, and census tracts with insufficient data. This section ensures conformity with the output formatting. - -Merge the percentage estimates, percentage MOEs, percentile, score, and class data frames into a single data frame called `ipd`. -
-```{r merge} -ipd <- bind_cols(dl_counts, pct) %>% - bind_cols(., pct_moe) %>% - bind_cols(., percentile) %>% - bind_cols(., score) %>% - bind_cols(., class) -``` -Rename columns. -
-```{r rename} -names(ipd) <- str_replace(names(ipd), "_CE", "_CntEst") -names(ipd) <- str_replace(names(ipd), "_CM", "_CntMOE") -ipd <- ipd %>% mutate(STATEFP10 = str_sub(GEOID10, 1, 2), - COUNTYFP10 = str_sub(GEOID10, 3, 5), - NAME10 = str_sub(GEOID10, 6, 11), - U_TPopEst = F_UE, - U_TPopMOE = F_UM, - U_Pop5Est = LEP_UE, - U_Pop5MOE = LEP_UM, - U_PPovEst = LI_UE, - U_PPovMOE = LI_UM, - U_PNICEst = D_UE, - U_PNICMOE = D_UM) %>% - select(-ends_with("UE"), -ends_with("UM")) -``` -Reorder columns, with `GEOID` and FIPS codes first, the following variables in alphabetical order, and the total IPD score and universes at the end. -
-```{r reorder} -ipd <- ipd %>% select(GEOID10, STATEFP10, COUNTYFP10, NAME10, sort(current_vars())) %>% - select(move_last(., c("IPD_Score", "U_TPopEst", "U_TPopMOE", - "U_Pop5Est", "U_Pop5MOE", "U_PPovEst", - "U_PPovMOE", "U_PNICEst", "U_PNICMOE"))) -``` -At the beginning of processing, we removed 11 census tracts from processing because their populations were equal to zero. Tack these back on to the dataset. -
-```{r tack} -slicer <- enframe(slicer, name = NULL, value = "GEOID10") -ipd <- plyr::rbind.fill(ipd, slicer) -``` -Replace `NA` values with `NoData` if character and `-99999` if numeric. -
-```{r replace} -ipd <- ipd %>% mutate_if(is.character, funs(ifelse(is.na(.), "NoData", .))) %>% - mutate_if(is.numeric, funs(ifelse(is.na(.), -99999, .))) -``` - -# 8. Summary Tables {#summary_tables} -This section generates a handful of other deliverables, including: - -a. Counts by indicator -b. Breaks by indicator -c. Summary by indicator -d. County means by indicator - -Replace `-99999` with `NA` for numeric columns to avoid distorting summary statistics. -
-```{r summary_prep} -ipd_summary <- ipd -ipd_summary[ipd_summary == -99999] <- NA -``` - -## 8a. Counts by indicator {#eight_a} -The number of census tracts that fall in each bin. Count census tracts by indicator and bin. Reorder factor levels so that "Well Below Average" appears before "Below Average," and the like. -
-```{r summary_counts, message = FALSE, warning = FALSE} -counts <- ipd_summary %>% select(ends_with("Class")) -export_counts <- apply(counts, 2, function(i) plyr::count(i)) -for(i in 1:length(export_counts)){ - export_counts[[i]]$var <- names(export_counts)[i] -} -export_counts <- map_dfr(export_counts, `[`, c("var", "x", "freq")) -colnames(export_counts) <- c("Variable", "Classification", "Count") -export_counts$Classification <- factor(export_counts$Classification, - levels = c("Well Below Average", - "Below Average", - "Average", - "Above Average", - "Well Above Average", - "NoData")) -export_counts <- arrange(export_counts, Variable, Classification) -export_counts <- export_counts %>% - spread(Classification, Count) %>% - mutate_all(funs(replace_na(., 0))) %>% - mutate(TOTAL = rowSums(.[2:7], na.rm = TRUE)) -``` - -## 8b. Breaks by indicator {#eight_b} -The bin breaks for each indicator. Apply the `st_dev_breaks` function to all percentage values and export results. -
-```{r summary_breaks} -breaks <- ipd_summary %>% select(ends_with("PctEst")) -export_breaks <- round(mapply(st_dev_breaks, x = breaks, i = 5, na.rm = TRUE), digits = 3) -export_breaks <- as_tibble(export_breaks) %>% - mutate(Class = c("Min", "1", "2", "3", "4", "Max")) %>% - select(Class, current_vars()) -``` - -## 8c. Summary by indicator {#eight_c} -Summary statistics of each indicator. Round results to two decimal places. -
-```{r summary_summary} -pcts <- ipd_summary %>% select(ends_with("PctEst")) -summary_data <- apply(pcts, 2, description) -export_summary <- as_tibble(summary_data) %>% - mutate_all(round, 2) %>% - mutate(Statistic = c("Minimum", "Median", "Mean", "SD", "Half-SD", "Maximum")) %>% - select(Statistic, current_vars()) -``` - -## 8d. County means by indicator {#eight_d} -Population-weighted means by county and indicator. For the most accurate percentage values, aggregate all counts back to the county level and compute percentages. In the export file, counties are referred to by the five-digit character supplied by the user to `ipd_counties`. -
-```{r summary_county, warning = FALSE, message = FALSE} -export_means <- dl_counts %>% select(GEOID10, ends_with("UE"), ends_with("CE")) %>% - select(GEOID10, sort(current_vars())) %>% - mutate(County = str_sub(GEOID10, 1, 5)) %>% - select(-GEOID10) %>% - group_by(County) %>% - summarize(D_PctEst = sum(D_CE) / sum(D_UE), - EM_PctEst = sum(EM_CE) / sum(EM_UE), - F_PctEst = sum(F_CE) / sum(F_UE), - FB_PctEst = sum(FB_CE) / sum(FB_UE), - LEP_PctEst = sum(LEP_CE) / sum(LEP_UE), - LI_PctEst = sum(LI_CE) / sum(LI_UE), - OA_PctEst = sum(OA_CE) / sum(OA_UE), - RM_PctEst = sum(RM_CE) / sum(RM_UE), - Y_PctEst = sum(Y_CE) / sum(Y_UE)) %>% - mutate_if(is.numeric, funs(. * 100)) %>% - mutate_if(is.numeric, round, 1) -``` - -# 9. Export {#export} -## 9a. Append to TIGER/LINE file {#nine_a} -Using the arguments supplied in `ipd_county`, download the relevant census tracts and append `ipd` to them. Uncommenting `cb = TRUE` will greatly speed processing time by downloading generalized tract boundary shapefiles instead of detailed ones. -
-```{r shapefile, message = FALSE, warning = FALSE} -options(tigris_use_cache = TRUE, tigris_class = "sf") -st <- str_sub(ipd_counties, 1, 2) -cty <- str_sub(ipd_counties, 3, 5) -trct <- map2(st, cty, ~{tracts(state = .x, - county = .y, - #cb = TRUE, - year = ipd_year)}) %>% - rbind_tigris() %>% - st_transform(., 26918) %>% - select(GEOID) %>% - left_join(., ipd, by = c("GEOID" = "GEOID10")) %>% - rename(GEOID10 = GEOID) -``` - -## 9b. Export files {#nine_b} -Results are saved in `outputs`. -
-```{r happy_trails, message = FALSE, warning = FALSE} -st_write(trct, here("outputs", "ipd.shp"), delete_dsn = TRUE, quiet = TRUE) -write_csv(ipd, here("outputs", "ipd.csv")) -write_csv(export_counts, here("outputs", "counts_by_indicator.csv")) -write_csv(export_breaks, here("outputs", "breaks_by_indicator.csv")) -write_csv(export_summary, here("outputs", "summary_by_indicator.csv")) -write_csv(export_means, here("outputs", "mean_by_county.csv")) -``` - -# 10. Metadata table with sources {#metadata} -This is a table of the final output with some additional data such as municipality name and area added through GIS processes but not included in the R script. -
- -| Variable | Concept | acs table | acs variable | data source | Source Type | Universe Variable | -|------------|---------------------------------------------|-----------|--------------|-------------|-------------|-------------------| -| geoid20 | 11-digit tract GEOID | n/a | n/a | ACS 5-year | n/a | n/a | -| statefp20 | 2-digit state GEOID | n/a | n/a | ACS 5-year | n/a | n/a | -| countyfp20 | 3-digit county GEOID | n/a | n/a | ACS 5-year | n/a | n/a | -| name20 | Tract and county name | n/a | n/a | ACS 5-year | n/a | n/a | -| d_class | Disabled percentile class | n/a | n/a | calculated | calculated | S1810_C01_001 | -| d_cntest | Disabled count estimate | S1810_C02_001_E | acs variable | ACS 5-year | acs variable | S1810_C01_001 | -| d_cntmoe | Disabled count margin of error | S1810_C02_001_M | acs variable | ACS 5-year | acs variable | S1810_C01_001 | -| d_pctest | Disabled percent estimate | S1810_C03_001_E | acs variable | ACS 5-year | acs variable | S1810_C01_001 | -| d_pctile | Disabled percentile | n/a | n/a | calculated | calculated | S1810_C01_001 | -| d_pctmoe | Disabled percent margin of error | S1810_C03_001_M | acs variable | ACS 5-year | acs variable | S1810_C01_001 | -| d_score | Disabled percentile score | n/a | n/a | calculated | calculated | S1810_C01_001 | -| em_class | Ethnic minority percentile class | B03002 | n/a | calculated | calculated | B03002_001 | -| em_cntest | Ethnic minority count estimate | B03002 | B03002_012_E | ACS 5-year | acs variable | B03002_001 | -| em_cntmoe | Ethnic minority count margin of error | B03002 | B03002_012_M | ACS 5-year | acs variable | B03002_001 | -| em_pctest | Ethnic minority percent estimate | B03002 | n/a | calculated | calculated | B03002_001 | -| em_pctile | Ethnic minority percentile | B03002 | n/a | calculated | calculated | B03002_001 | -| em_pctmoe | Ethnic minority percent margin of error | B03002 | n/a | calculated | calculated | B03002_001 | -| em_score | Ethnic minority percentile score | B03002 | n/a | calculated | calculated | B03002_001 | -| f_class | Female percentile class | S0101 | n/a | calculated | calculated | S0101_C01_001 | -| f_cntest | Female count estimate | S0101 | S0101_C05_001_E | ACS 5-year | acs variable | S0101_C01_001 | -| f_cntmoe | Female count margin of error | S0101 | S0101_C05_001_M | ACS 5-year | acs variable | S0101_C01_001 | -| f_pctest | Female percent estimate | S0101 | DP05_0003PE_E | ACS 5-year | acs variable | S0101_C01_001 | -| f_pctile | Female percentile | S0101 | n/a | calculated | calculated | S0101_C01_001 | -| f_pctmoe | Female percent margin of error | S0101 | DP05_0003PE_M | ACS 5-year | acs variable | S0101_C01_001 | -| f_score | Female percentile score | S0101 | n/a | calculated | calculated | S0101_C01_001 | -| fb_class | Foreign-born percentile class | B05012 | n/a | calculated | calculated | B05012_001 | -| fb_cntest | Foreign-born count estimate | B05012 | B05012_003_E | ACS 5-year | acs variable | B05012_001 | -| fb_cntmoe | Foreign-born count margin of error | B05012 | B05012_003_M | ACS 5-year | acs variable | B05012_001 | -| fb_pctest | Foreign-born percent estimate | B05012 | n/a | calculated | calculated | B05012_001 | -| fb_pctile | Foreign-born percentile | B05012 | n/a | calculated | calculated | B05012_001 | -| fb_pctmoe | Foreign-born percent margin of error | B05012 | n/a | calculated | calculated | B05012_001 | -| fb_score | Foreign-born percentile score | B05012 | n/a | calculated | calculated | S1601_C01_001 | -| lep_class | Limited English proficiency percentile class | S1601 | n/a | calculated | calculated | S1601_C01_001 | -| lep_cntest | Limited English proficiency count estimate | S1601 | S1601_C05_001_E | ACS 5-year | acs variable | S1601_C01_001 | -| lep_cntmoe | Limited English proficiency count margin of error | S1601 | S1601_C05_001_M | ACS 5-year | acs variable | S1601_C01_001 | -| lep_pctest | Limited English proficiency percent estimate | S1601 | S1601_C06_001_E | ACS 5-year | acs variable | S1601_C01_001 | -| lep_pctile | Limited English proficiency percentile | S1601 | n/a | calculated | calculated | S1601_C01_001 | -| lep_pctmoe | Limited English proficiency percent margin of error | S1601 | S1601_C06_001_M | ACS 5-year | acs variable | S1601_C01_001 | -| lep_score | Limited English proficiency percentile score | S1601 | n/a | calculated | calculated | S1601_C01_001 | -| li_class | Low-income percentile class | n/a | n/a | calculated | calculated | S1701_C01_001 | -| li_cntest | Low-income count estimate | S1701 | S1701_C01_042_E | ACS 5-year | acs variable | S1701_C01_001 | -| li_cntmoe | Low-income count margin of error | S1701 | S1701_C01_042_M | ACS 5-year | acs variable | S1701_C01_001 | -| li_pctest | Low-income percent estimate | n/a | n/a | calculated | calculated | S1701_C01_001 | -| li_pctile | Low-income percentile | n/a | n/a | calculated | calculated | S1701_C01_001 | -| li_pctmoe | Low-income percent margin of error | n/a | n/a | calculated | calculated | S1701_C01_001 | -| li_score | Low-income percentile score | n/a | n/a | calculated | calculated | S1701_C01_001 | -| oa_class | Older adult percentile class | S0101 | n/a | calculated | calculated | B02001_001 | -| oa_cntest | Older adult count estimate | S0101 | S0101_C01_001_E | ACS 5-year | acs variable | B02001_001 | -| oa_cntmoe | Older adult count margin of error | S0101 | S0101_C01_001_M | ACS 5-year | acs variable | B02001_001 | -| oa_pctest | Older adult percent estimate | S0101 | S0101_C02_030_E | ACS 5-year | acs variable | B02001_001 | -| oa_pctile | Older adult percentile | S0101 | n/a | calculated | calculated | B02001_001 | -| oa_pctmoe | Older adult percent margin of error | S0101 | S0101_C02_030_M | ACS 5-year | acs variable | B02001_001 | -| oa_score | Older adult percentile score | S0101 | n/a | calculated | calculated | B02001_001 | -| rm_class | Racial minority percentile class | B02001 | n/a | calculated | calculated | B02001_001 | -| rm_cntest | Racial minority count estimate | B02001 | B02001_002_E | ACS 5-year | acs variable | B02001_001 | -| rm_cntmoe | Racial minority count margin of error | B02001 | B02001_002_M | ACS 5-year | acs variable | B02001_001 | -| rm_pctest | Racial minority percent estimate | B02001 | n/a | calculated | calculated | B02001_001 | -| rm_pctile | Racial minority percentile | B02001 | n/a | calculated | calculated | B02001_001 | -| rm_pctmoe | Racial minority percent margin of error | B02001 | n/a | calculated | calculated | B02001_001 | -| rm_score | Racial minority percentile score | B02001 | n/a | calculated | calculated | B02001_001 | -| y_class | Youth percentile class | B09001 | n/a | calculated | calculated | B03002_001 | -| y_cntest | Youth count estimate | B09001 | B09001_001 | ACS 5-year | acs variable | B03002_001 | -| y_cntmoe | Youth count margin of error | B09001 | B09001_001 | ACS 5-year | acs variable | B03002_001 | -| y_pctest | Youth population percentage estimate | B09001 | n/a | calculated | calculated | B03002_001 | -| y_pctile | Youth population percentile | B09001 | n/a | calculated | calculated | B03002_001 | -| y_pctmoe | Youth population percentage margin of error | B09001 | n/a | calculated | calculated | B03002_001 | -| y_score | Youth percentile score | B09001 | n/a | calculated | calculated | B03002_001 | -| ipd_score | Indicator of potential disadvantage score | n/a | n/a | calculated | calculated | n/a | -| u_tpopest | Total population estimate | B02001 | B02001_001_E | ACS 5-year | acs variable | B02001_001 | -| u_tpopmoe | Total population margin of error | B02001 | B02001_001_E | ACS 5-year | acs variable | B02001_001 | -| u_pop6est | Population over 6 years of age estimate | S1601 | S1601_C01_001_E | ACS 5-year | acs variable | S1601_C01_001 | -| u_pop6moe | Population over 6 years of age margin of error | S1601 | S1601_C01_001_M | ACS 5-year | acs variable | S1601_C01_001 | -| u_ppovest | Total population poverty rate estimate | S1701 | S1701_C01_001_E | ACS 5-year | acs variable | S1701_C01_001 | -| u_ppovmoe | Total population poverty rate margin of error | S1701 | S1701_C01_001_M | ACS 5-year | acs variable | S1701_C01_001 | -| u_pnicest | Disabled universe total estimate | S1810 | S1810_C01_001_E | ACS 5-year | acs variable | S1810_C01_001 | -| u_pnicmoe | Disabled universe total margin of error | S1810 | S1810_C01_001_M | ACS 5-year | acs variable | S1810_C01_001 | -| namelsad | Geography name | n/a | n/a | ACS 5-year | acs variable | n/a | -| mun1 | First municipality name | n/a | n/a | calculated | calculated | n/a | -| mun2 | Second municipality name | n/a | n/a | calculated | calculated | n/a | -| mun3 | Third municipality name | n/a | n/a | calculated | calculated | n/a | -| co_name | County Name | n/a | n/a | calculated | calculated | n/a | -| state | State name | n/a | n/a | ACS 5-year | n/a | n/a | -| st_area(shape) | Area of a geometry | n/a | n/a | ACS 5-year | n/a | n/a | -| st_perimeter(shape) | Perimeter of the geometry | n/a | n/a | ACS 5-year | n/a | n/a | \ No newline at end of file diff --git a/documentation/script_reference.pdf b/documentation/script_reference.pdf deleted file mode 100644 index 116bd66..0000000 Binary files a/documentation/script_reference.pdf and /dev/null differ diff --git a/documentation/variables.csv b/documentation/variables.csv deleted file mode 100644 index 2270984..0000000 --- a/documentation/variables.csv +++ /dev/null @@ -1,28 +0,0 @@ -Field,year_2015,year_2016,year_2017,year_2018,year_2019,year_2020,year_2021 -disabled_universe,S1810_C01_001,S1810_C01_001,S1810_C01_001,S1810_C01_001,S1810_C01_001,S1810_C01_001,S1810_C01_001 -disabled_count,S1810_C02_001,S1810_C02_001,S1810_C02_001,S1810_C02_001,S1810_C02_001,S1810_C02_001,S1810_C02_001 -disabled_percent,S1810_C03_001,S1810_C03_001,S1810_C03_001,S1810_C03_001,S1810_C03_001,S1810_C03_001,S1810_C03_001 -ethnic_minority_universe,B03002_001,B03002_001,B03002_001,B03002_001,B03002_001,B03002_001,B03002_001 -ethnic_minority_count,B03002_012,B03002_012,B03002_012,B03002_012,B03002_012,B03002_012,B03002_012 -ethnic_minority_percent,NA,NA,NA,NA,NA,NA,NA -female_universe,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001 -female_count,S0101_C03_001,S0101_C03_001,S0101_C05_001,S0101_C05_001,S0101_C05_001,S0101_C05_001,S0101_C05_001 -female_percent,DP05_0003PE,DP05_0003PE,DP05_0003PE,DP05_0003PE,DP05_0003PE,DP05_0003PE,DP05_0003PE -foreign_born_universe,B05012_001,B05012_001,B05012_001,B05012_001,B05012_001,B05012_001,B05012_001 -foreign_born_count,B05012_003,B05012_003,B05012_003,B05012_003,B05012_003,B05012_003,B05012_003 -foreign_born_percent,NA,NA,NA,NA,NA,NA,NA -limited_english_proficiency_universe,S1601_C01_001,S1601_C01_001,S1601_C01_001,S1601_C01_001,S1601_C01_001,S1601_C01_001,S1601_C01_001 -limited_english_proficiency_count,S1601_C05_001,S1601_C05_001,S1601_C05_001,S1601_C05_001,S1601_C05_001,S1601_C05_001,S1601_C05_001 -limited_english_proficiency_percent,S1601_C06_001,S1601_C06_001,S1601_C06_001,S1601_C06_001,S1601_C06_001,S1601_C06_001,S1601_C06_001 -low_income_universe,S1701_C01_001,S1701_C01_001,S1701_C01_001,S1701_C01_001,S1701_C01_001,S1701_C01_001,S1701_C01_001 -low_income_count,S1701_C01_042,S1701_C01_042,S1701_C01_042,S1701_C01_042,S1701_C01_042,S1701_C01_042,S1701_C01_042 -low_income_percent,NA,NA,NA,NA,NA,NA,NA -older_adults_universe,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001,S0101_C01_001 -older_adults_count,DP05_0025E,DP05_0025E,DP05_0025E,S0101_C01_030,S0101_C01_030,S0101_C01_030,S0101_C01_030 -older_adults_percent,S0101_C02_028,S0101_C02_028,S0101_C02_030,S0101_C02_030,S0101_C02_030,S0101_C02_030,S0101_C02_030 -racial_minority_universe,B02001_001,B02001_001,B02001_001,B02001_001,B02001_001,B02001_001,B02001_001 -racial_minority_count,B02001_002,B02001_002,B02001_002,B02001_002,B02001_002,B02001_002,B02001_003...B02001_008 -racial_minority_percent,NA,NA,NA,NA,NA,NA,NA -youth_universe,B03002_001,B03002_001,B03002_001,B03002_001,B03002_001,B03002_001,B03002_001 -youth_count,B09001_001,B09001_001,B09001_001,B09001_001,B09001_001,B09001_001,B09001_001 -youth_percent,NA,NA,NA,NA,NA,NA,NA diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..c12503a --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,56 @@ +site_name: DVRPC IPD +theme: + name: material + features: + - navigation.sections + - toc.integrate + - toc.follow + - navigation.top + - navigation.path + - navigation.tabs + - search.suggest + - search.highlight + - content.tabs.link + - content.code.annotation + - content.code.copy + language: en + palette: + - scheme: custom + accent: purple +extra_css: + - stylesheets/extra.css +markdown_extensions: + - admonition + - footnotes + - pymdownx.tabbed: + alternate_style: true + - pymdownx.tasklist: + custom_checkbox: true + - attr_list + - def_list + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - pymdownx.details + - admonition + - pymdownx.arithmatex: + generic: true + - pymdownx.caret + - pymdownx.mark + - pymdownx.tilde + - pymdownx.critic + - pymdownx.mark + - pymdownx.details + - md_in_html + - toc: + permalink: true + - pymdownx.tasklist: + custom_checkbox: true +nav: + - About: index.md + - Documentation: script_reference.md + - Code Setup: setup.md diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..72ad464 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,75 @@ +asttokens==2.4.1 +attrs==23.2.0 +Babel==2.14.0 +backcall==0.2.0 +beautifulsoup4==4.12.3 +bleach==6.1.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +decorator==5.1.1 +defusedxml==0.7.1 +docopt==0.6.2 +executing==2.0.1 +fastjsonschema==2.19.1 +ghp-import==2.1.0 +idna==3.6 +importlib_metadata==7.0.2 +importlib_resources==6.3.2 +ipython==8.12.3 +jedi==0.19.1 +Jinja2==3.1.3 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +jupyter_client==8.6.1 +jupyter_core==5.7.2 +jupyterlab_pygments==0.3.0 +Markdown==3.6 +MarkupSafe==2.1.5 +matplotlib-inline==0.1.6 +mergedeep==1.3.4 +mistune==3.0.2 +mkdocs==1.5.3 +mkdocs-material==9.5.14 +mkdocs-material-extensions==1.3.1 +nbclient==0.10.0 +nbconvert==7.16.2 +nbformat==5.10.3 +packaging==24.0 +paginate==0.5.6 +pandocfilters==1.5.1 +parso==0.8.3 +pathspec==0.12.1 +pickleshare==0.7.5 +pipreqs==0.5.0 +pkgutil_resolve_name==1.3.10 +platformdirs==4.2.0 +prompt-toolkit==3.0.43 +pure-eval==0.2.2 +Pygments==2.17.2 +pymdown-extensions==10.7.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 +pywin32==306 +PyYAML==6.0.1 +pyyaml_env_tag==0.1 +pyzmq==25.1.2 +referencing==0.34.0 +regex==2023.12.25 +requests==2.31.0 +rpds-py==0.18.0 +six==1.16.0 +soupsieve==2.5 +stack-data==0.6.3 +tinycss2==1.2.1 +tornado==6.4 +traitlets==5.14.2 +typing_extensions==4.10.0 +urllib3==2.2.1 +watchdog==4.0.0 +wcwidth==0.2.13 +webencodings==0.5.1 +yarg==0.1.9 +zipp==3.18.1