diff --git a/us/states/mn/data_exploration.ipynb b/us/states/mn/data_exploration.ipynb new file mode 100644 index 0000000..7b2caa2 --- /dev/null +++ b/us/states/mn/data_exploration.ipynb @@ -0,0 +1,1251 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MN Dataset Exploration\n", + "\n", + "This notebook explores the Minnesota (MN) dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "MN_DATASET = \"hf://policyengine/policyengine-us-data/states/MN.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load MN dataset\n", + "sim = Microsimulation(dataset=MN_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 32,518\n", + "Household count (weighted): 1,254,857\n", + "Person count (weighted): 4,066,311\n" + ] + } + ], + "source": [ + "# Check dataset size\n", + "household_weight = sim.calculate(\"household_weight\", period=2025)\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\")\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\")\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {household_count.sum():,.0f}\")\n", + "print(f\"Person count (weighted): {person_count.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Income distribution:\n", + " Median AGI: $96,581\n", + " 75th percentile: $379,259\n", + " 90th percentile: $650,436\n", + " 95th percentile: $854,192\n", + " Max AGI: $3,229,514\n" + ] + } + ], + "source": [ + "# Check household income distribution\n", + "agi = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\")\n", + "print(f\"Income distribution:\")\n", + "print(f\" Median AGI: ${agi.median():,.0f}\")\n", + "print(f\" 75th percentile: ${agi.quantile(0.75):,.0f}\")\n", + "print(f\" 90th percentile: ${agi.quantile(0.90):,.0f}\")\n", + "print(f\" 95th percentile: ${agi.quantile(0.95):,.0f}\")\n", + "print(f\" Max AGI: ${agi.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "AVERAGE HOUSEHOLD INCOME BY DECILE\n", + "======================================================================\n", + " 1st Decile: $ -6,150 ( 125,469 households)\n", + " 2nd Decile: $ 1,104 ( 125,399 households)\n", + " 3rd Decile: $ 19,465 ( 125,521 households)\n", + " 4th Decile: $ 50,096 ( 124,832 households)\n", + " 5th Decile: $ 79,390 ( 125,477 households)\n", + " 6th Decile: $ 119,228 ( 126,104 households)\n", + " 7th Decile: $ 185,420 ( 125,395 households)\n", + " 8th Decile: $ 364,615 ( 123,740 households)\n", + " 9th Decile: $ 564,021 ( 127,447 households)\n", + " 10th Decile: $ 1,097,559 ( 125,472 households)\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Average household income per decile\n", + "agi_hh = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "weights = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "\n", + "# Create DataFrame for decile analysis\n", + "df_decile = pd.DataFrame({\n", + " 'agi': agi_hh,\n", + " 'weight': weights\n", + "})\n", + "\n", + "# Calculate weighted deciles\n", + "df_decile['cumweight'] = df_decile.sort_values('agi')['weight'].cumsum()\n", + "total_weight = df_decile['weight'].sum()\n", + "df_decile['decile'] = pd.cut(\n", + " df_decile['cumweight'] / total_weight,\n", + " bins=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],\n", + " labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']\n", + ")\n", + "\n", + "# Calculate weighted average income per decile\n", + "decile_summary = df_decile.groupby('decile', observed=True).apply(\n", + " lambda x: pd.Series({\n", + " 'Avg Household Income': np.average(x['agi'], weights=x['weight']),\n", + " 'Households': x['weight'].sum()\n", + " })\n", + ").reset_index()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"AVERAGE HOUSEHOLD INCOME BY DECILE\")\n", + "print(\"=\"*70)\n", + "for _, row in decile_summary.iterrows():\n", + " print(f\" {row['decile']:>5} Decile: ${row['Avg Household Income']:>12,.0f} ({row['Households']:>10,.0f} households)\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 469,600\n", + " Households with 1 child: 114,008\n", + " Households with 2 children: 151,889\n", + " Households with 3+ children: 203,703\n" + ] + } + ], + "source": [ + "# Check households with children\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\")\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\")\n", + "household_weight = sim.calculate(\"household_weight\", period=2025, map_to=\"person\")\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 1,145,830\n", + " Children under 6: 269,322\n", + " Children under 3: 96,626\n" + ] + } + ], + "source": [ + "# Check children by age groups\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\"),\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\"),\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\"),\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\"),\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\")\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "children_under_3_df = df[df['age'] < 3]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "children_under_3 = children_under_3_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "============================================================\n", + "MN DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "============================================================\n", + " Metric Value\n", + " Household count (weighted) 1,254,857\n", + " Person count (weighted) 4,066,311\n", + " Median AGI $96,581\n", + " 75th percentile AGI $379,259\n", + " 90th percentile AGI $650,436\n", + " 95th percentile AGI $854,192\n", + " Max AGI $3,229,514\n", + "Total households with children 469,600\n", + " Households with 1 child 114,008\n", + " Households with 2 children 151,889\n", + " Households with 3+ children 203,703\n", + " Total children under 18 1,145,830\n", + " Children under 6 269,322\n", + " Children under 3 96,626\n", + "============================================================\n", + "\n", + "Summary saved to: mn_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create weighted summary table\n", + "weighted_summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Median AGI',\n", + " '75th percentile AGI',\n", + " '90th percentile AGI',\n", + " '95th percentile AGI',\n", + " 'Max AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6',\n", + " 'Children under 3'\n", + " ],\n", + " 'Value': [\n", + " f\"{household_count.sum():,.0f}\",\n", + " f\"{person_count.sum():,.0f}\",\n", + " f\"${agi.median():,.0f}\",\n", + " f\"${agi.quantile(0.75):,.0f}\",\n", + " f\"${agi.quantile(0.90):,.0f}\",\n", + " f\"${agi.quantile(0.95):,.0f}\",\n", + " f\"${agi.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\",\n", + " f\"{children_under_3:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "weighted_df = pd.DataFrame(weighted_summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"MN DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*60)\n", + "print(weighted_df.to_string(index=False))\n", + "print(\"=\"*60)\n", + "\n", + "# Save table\n", + "weighted_df.to_csv('mn_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: mn_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 116,679\n", + "Percentage of all households: 9.30%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income\n", + "agi_hh = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "weights = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 240,135 19.14%\n", + " $10k-$20k 41,051 3.27%\n", + " $20k-$30k 39,704 3.16%\n", + " $30k-$40k 42,173 3.36%\n", + " $40k-$50k 43,723 3.48%\n", + " $50k-$60k 30,854 2.46%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 437,639\n", + "Percentage of all households in $0-$60k range: 34.88%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4me0rg7mop6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "1. POPULATION / HOUSEHOLD / TAX UNIT COUNTS\n", + "============================================================\n", + "Population (weighted): 4,066,311\n", + "Households (weighted): 1,254,857\n", + "Tax Units (weighted): 2,123,642\n", + "\n", + "Targets from Pavel's comment:\n", + "Population target: 5,737,915\n", + "Household target: 2,344,432\n", + "Tax Unit target: 2,871,840\n" + ] + } + ], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "MN_DATASET = \"hf://policyengine/policyengine-us-data/states/MN.h5\"\n", + "sim = Microsimulation(dataset=MN_DATASET)\n", + "\n", + "# 1. Check population, households, tax units\n", + "household_weight = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "person_weight = np.array(sim.calculate(\"person_weight\", period=2025))\n", + "tax_unit_weight = np.array(sim.calculate(\"tax_unit_weight\", period=2025))\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"1. POPULATION / HOUSEHOLD / TAX UNIT COUNTS\")\n", + "print(\"=\" * 60)\n", + "print(f\"Population (weighted): {person_weight.sum():,.0f}\")\n", + "print(f\"Households (weighted): {household_weight.sum():,.0f}\")\n", + "print(f\"Tax Units (weighted): {tax_unit_weight.sum():,.0f}\")\n", + "print(f\"\\nTargets from Pavel's comment:\")\n", + "print(f\"Population target: 5,737,915\")\n", + "print(f\"Household target: 2,344,432\")\n", + "print(f\"Tax Unit target: 2,871,840\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "5t883a37ku", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "2. AGI DISTRIBUTION BY BRACKET\n", + "================================================================================\n", + "Bracket Simulated Target Deviation\n", + "--------------------------------------------------------------------------------\n", + "<$1 341,185 33,690 912.7%\n", + "$1-$10k 220,108 295,240 -25.4%\n", + "$10k-$25k 150,534 395,310 -61.9%\n", + "$25k-$50k 358,028 610,880 -41.4%\n", + "$50k-$75k 269,673 459,920 -41.4%\n", + "$75k-$100k 107,357 302,970 -64.6%\n", + "$100k-$200k 342,979 545,630 -37.1%\n", + "$200k-$500k 144,769 189,250 -23.5%\n", + "$500k+ 189,008 38,950 385.3%\n" + ] + } + ], + "source": [ + "# 2. Check AGI distribution by bracket\n", + "agi = np.array(sim.calculate(\"adjusted_gross_income\", period=2025))\n", + "tu_weight = np.array(sim.calculate(\"tax_unit_weight\", period=2025))\n", + "\n", + "brackets = [\n", + " (-np.inf, 1, \"<$1\"),\n", + " (1, 10000, \"$1-$10k\"),\n", + " (10000, 25000, \"$10k-$25k\"),\n", + " (25000, 50000, \"$25k-$50k\"),\n", + " (50000, 75000, \"$50k-$75k\"),\n", + " (75000, 100000, \"$75k-$100k\"),\n", + " (100000, 200000, \"$100k-$200k\"),\n", + " (200000, 500000, \"$200k-$500k\"),\n", + " (500000, np.inf, \"$500k+\"),\n", + "]\n", + "\n", + "# Target counts from agi_state.csv for MN\n", + "targets = {\n", + " \"<$1\": 33690,\n", + " \"$1-$10k\": 295240,\n", + " \"$10k-$25k\": 395310,\n", + " \"$25k-$50k\": 610880,\n", + " \"$50k-$75k\": 459920,\n", + " \"$75k-$100k\": 302970,\n", + " \"$100k-$200k\": 545630,\n", + " \"$200k-$500k\": 189250,\n", + " \"$500k+\": 38950,\n", + "}\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"2. AGI DISTRIBUTION BY BRACKET\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Bracket':<15} {'Simulated':>15} {'Target':>15} {'Deviation':>15}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for lower, upper, label in brackets:\n", + " mask = (agi > lower) & (agi <= upper)\n", + " simulated = tu_weight[mask].sum()\n", + " target = targets[label]\n", + " deviation = (simulated - target) / target * 100\n", + " print(f\"{label:<15} {simulated:>15,.0f} {target:>15,.0f} {deviation:>14.1f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5yobkowlbtv", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "3. WEIGHT DISTRIBUTION ANALYSIS\n", + "======================================================================\n", + "Total household records: 32,518\n", + "Records with weight < 1: 22,099 (68.0%)\n", + "Weight contribution from low-weight records: 0.4%\n", + "\n", + "Top 1% of records (325 HHs) contribute: 42.3% of total weight\n", + "Top 5% of records (1,625 HHs) contribute: 82.2% of total weight\n", + "\n", + "Top 10 household weights:\n", + " 1. 4,769.8\n", + " 2. 3,931.9\n", + " 3. 3,762.3\n", + " 4. 3,725.1\n", + " 5. 3,637.5\n", + " 6. 3,623.4\n", + " 7. 3,622.4\n", + " 8. 3,524.8\n", + " 9. 3,521.2\n", + " 10. 3,447.8\n" + ] + } + ], + "source": [ + "# 3. Check weight distribution concentration\n", + "print(\"=\" * 70)\n", + "print(\"3. WEIGHT DISTRIBUTION ANALYSIS\")\n", + "print(\"=\" * 70)\n", + "\n", + "n_records = len(household_weight)\n", + "sorted_weights = np.sort(household_weight)[::-1] # Descending\n", + "cumsum = np.cumsum(sorted_weights)\n", + "total_weight = household_weight.sum()\n", + "\n", + "# What % of records have weight < 1?\n", + "low_weight_mask = household_weight < 1\n", + "pct_low_weight_records = low_weight_mask.sum() / n_records * 100\n", + "pct_low_weight_contribution = household_weight[low_weight_mask].sum() / total_weight * 100\n", + "\n", + "print(f\"Total household records: {n_records:,}\")\n", + "print(f\"Records with weight < 1: {low_weight_mask.sum():,} ({pct_low_weight_records:.1f}%)\")\n", + "print(f\"Weight contribution from low-weight records: {pct_low_weight_contribution:.1f}%\")\n", + "\n", + "# Top 1% and 5% contribution\n", + "top_1pct_idx = int(n_records * 0.01)\n", + "top_5pct_idx = int(n_records * 0.05)\n", + "top_1pct_weight = cumsum[top_1pct_idx] / total_weight * 100\n", + "top_5pct_weight = cumsum[top_5pct_idx] / total_weight * 100\n", + "\n", + "print(f\"\\nTop 1% of records ({top_1pct_idx:,} HHs) contribute: {top_1pct_weight:.1f}% of total weight\")\n", + "print(f\"Top 5% of records ({top_5pct_idx:,} HHs) contribute: {top_5pct_weight:.1f}% of total weight\")\n", + "\n", + "# Show top 10 weights\n", + "print(f\"\\nTop 10 household weights:\")\n", + "for i, w in enumerate(sorted_weights[:10]):\n", + " print(f\" {i+1}. {w:,.1f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ga5t6zvp6yu", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "4. HIGH-WEIGHT, HIGH-AGI, MULTI-TU HOUSEHOLDS\n", + "================================================================================\n", + "Households with 5+ TUs and AGI > $500k: 65\n", + "\n", + "Top 10 by weight:\n", + " household_id agi weight num_tax_units\n", + "28989 3503418 700606.779297 3293.172852 5\n", + "21361 3454351 700606.779297 2844.894043 5\n", + "3585 3353585 700606.779297 2836.701660 5\n", + "16930 3429031 700606.779297 2721.538086 5\n", + "32461 3528407 700606.779297 2709.036133 5\n", + "7851 3379193 700606.779297 2593.300537 5\n", + "12806 3404877 700606.779297 2246.070068 5\n", + "27125 3501554 681817.050781 1888.430176 8\n", + "18804 3451794 681817.050781 1767.170410 8\n", + "9969 3402040 681817.050781 1513.756836 8\n", + "\n", + "Total weight from these HHs: 28,057 (2.2% of state)\n", + "Total weighted HHs: 28,057\n" + ] + } + ], + "source": [ + "# 4. Find high-weight multi-TU households affecting CTC\n", + "household_id = np.array(sim.calculate(\"household_id\", period=2025, map_to=\"household\"))\n", + "hh_agi = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "hh_weight = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "\n", + "# Get tax unit count per household\n", + "tu_id = np.array(sim.calculate(\"tax_unit_id\", period=2025, map_to=\"person\"))\n", + "person_hh_id = np.array(sim.calculate(\"household_id\", period=2025, map_to=\"person\"))\n", + "\n", + "df_tu = pd.DataFrame({'household_id': person_hh_id, 'tax_unit_id': tu_id})\n", + "tu_per_hh = df_tu.groupby('household_id')['tax_unit_id'].nunique().reset_index()\n", + "tu_per_hh.columns = ['household_id', 'num_tax_units']\n", + "\n", + "# Create household dataframe\n", + "df_hh = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'agi': hh_agi,\n", + " 'weight': hh_weight\n", + "})\n", + "df_hh = df_hh.merge(tu_per_hh, on='household_id')\n", + "\n", + "# Find high-weight, high-AGI, multi-TU households\n", + "high_impact = df_hh[(df_hh['num_tax_units'] >= 5) & (df_hh['agi'] > 500000)].sort_values('weight', ascending=False)\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"4. HIGH-WEIGHT, HIGH-AGI, MULTI-TU HOUSEHOLDS\")\n", + "print(\"=\" * 80)\n", + "print(f\"Households with 5+ TUs and AGI > $500k: {len(high_impact)}\")\n", + "print(f\"\\nTop 10 by weight:\")\n", + "print(high_impact[['household_id', 'agi', 'weight', 'num_tax_units']].head(10).to_string())\n", + "\n", + "# Total weight contribution\n", + "total_weight = df_hh['weight'].sum()\n", + "high_impact_weight = high_impact['weight'].sum()\n", + "print(f\"\\nTotal weight from these HHs: {high_impact_weight:,.0f} ({high_impact_weight/total_weight*100:.1f}% of state)\")\n", + "print(f\"Total weighted HHs: {high_impact_weight:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "93flx4b7147", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "5. CHECKING FOR REPLICATED HOUSEHOLDS (SAME AGI)\n", + "======================================================================\n", + "Top 10 AGI values by total weight (multi-TU households):\n", + " agi num_records total_weight\n", + "700606.779297 8 20303.710938\n", + "493947.562500 7 19396.462891\n", + "681817.050781 5 7715.801270\n", + " 64604.259766 5 6733.440918\n", + "450005.992188 5 6050.416992\n", + " 7774.053711 8 289.393250\n", + "379539.421509 1 43.757034\n", + "523481.012451 6 14.741709\n", + "694292.062256 2 5.828139\n", + "387551.199219 3 4.692753\n", + "\n", + "Households with AGI ~$700,607:\n", + " Count: 17\n", + " Total weight: 20,307\n", + " Tax units each: [1 5 2]\n" + ] + } + ], + "source": [ + "# Check if same AGI values are repeated (indicating CD replication)\n", + "print(\"=\" * 70)\n", + "print(\"5. CHECKING FOR REPLICATED HOUSEHOLDS (SAME AGI)\")\n", + "print(\"=\" * 70)\n", + "\n", + "agi_counts = df_hh[df_hh['num_tax_units'] >= 5].groupby('agi').agg({\n", + " 'household_id': 'count',\n", + " 'weight': 'sum'\n", + "}).reset_index()\n", + "agi_counts.columns = ['agi', 'num_records', 'total_weight']\n", + "agi_counts = agi_counts.sort_values('total_weight', ascending=False).head(10)\n", + "\n", + "print(\"Top 10 AGI values by total weight (multi-TU households):\")\n", + "print(agi_counts.to_string(index=False))\n", + "\n", + "# The $700,606.78 household\n", + "specific_agi = 700606.779297\n", + "same_hh = df_hh[np.isclose(df_hh['agi'], specific_agi, rtol=0.001)]\n", + "print(f\"\\nHouseholds with AGI ~$700,607:\")\n", + "print(f\" Count: {len(same_hh)}\")\n", + "print(f\" Total weight: {same_hh['weight'].sum():,.0f}\")\n", + "print(f\" Tax units each: {same_hh['num_tax_units'].unique()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "tsoxxnjk46f", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Variable mn_ctc does not exist.", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[26], line 13\u001b[0m\n\u001b[0;32m 10\u001b[0m reform_sim \u001b[38;5;241m=\u001b[39m Microsimulation(dataset\u001b[38;5;241m=\u001b[39mMN_DATASET, reform\u001b[38;5;241m=\u001b[39mcreate_mn_ctc_reform())\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# Get baseline and reform CTC at household level\u001b[39;00m\n\u001b[1;32m---> 13\u001b[0m baseline_ctc \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(\u001b[43msim\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmn_ctc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2025\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhousehold\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 14\u001b[0m reform_ctc \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(reform_sim\u001b[38;5;241m.\u001b[39mcalculate(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmn_ctc\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2025\u001b[39m, map_to\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhousehold\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m 15\u001b[0m ctc_change \u001b[38;5;241m=\u001b[39m reform_ctc \u001b[38;5;241m-\u001b[39m baseline_ctc\n", + "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[1;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 53\u001b[0m period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[1;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m values\n", + "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\simulation.py:491\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[1;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[0;32m 488\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[0;32m 490\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 491\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 492\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[0;32m 493\u001b[0m result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n", + "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\simulation.py:616\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[1;34m(self, variable_name, period)\u001b[0m\n\u001b[0;32m 605\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 606\u001b[0m \u001b[38;5;124;03mCalculate the variable ``variable_name`` for the period ``period``, using the variable formula if it exists.\u001b[39;00m\n\u001b[0;32m 607\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 613\u001b[0m \u001b[38;5;124;03m ArrayLike: The calculated variable.\u001b[39;00m\n\u001b[0;32m 614\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m variable_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtax_benefit_system\u001b[38;5;241m.\u001b[39mvariables:\n\u001b[1;32m--> 616\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVariable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariable_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 617\u001b[0m population \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_variable_population(variable_name)\n\u001b[0;32m 618\u001b[0m holder \u001b[38;5;241m=\u001b[39m population\u001b[38;5;241m.\u001b[39mget_holder(variable_name)\n", + "\u001b[1;31mValueError\u001b[0m: Variable mn_ctc does not exist." + ] + } + ], + "source": [ + "# 6. Check CTC impact from these replicated households\n", + "from policyengine_core.reforms import Reform\n", + "\n", + "def create_mn_ctc_reform():\n", + " return Reform.from_dict({\n", + " \"gov.states.mn.tax.income.credits.cwfc.ctc.amount\": {\"2025-01-01.2100-12-31\": 2000},\n", + " \"gov.states.mn.tax.income.credits.cwfc.phase_out.rate.main\": {\"2025-01-01.2100-12-31\": 0.20},\n", + " }, country_id=\"us\")\n", + "\n", + "reform_sim = Microsimulation(dataset=MN_DATASET, reform=create_mn_ctc_reform())\n", + "\n", + "# Get baseline and reform CTC at household level\n", + "baseline_ctc = np.array(sim.calculate(\"mn_ctc\", period=2025, map_to=\"household\"))\n", + "reform_ctc = np.array(reform_sim.calculate(\"mn_ctc\", period=2025, map_to=\"household\"))\n", + "ctc_change = reform_ctc - baseline_ctc\n", + "\n", + "df_hh['baseline_ctc'] = baseline_ctc\n", + "df_hh['reform_ctc'] = reform_ctc\n", + "df_hh['ctc_change'] = ctc_change\n", + "df_hh['weighted_ctc_change'] = ctc_change * df_hh['weight']\n", + "\n", + "print(\"=\" * 70)\n", + "print(\"6. CTC IMPACT FROM HIGH-AGI MULTI-TU HOUSEHOLDS\")\n", + "print(\"=\" * 70)\n", + "\n", + "# Total CTC change\n", + "total_ctc_change = (ctc_change * df_hh['weight']).sum()\n", + "print(f\"Total weighted CTC change (reform - baseline): ${total_ctc_change:,.0f}\")\n", + "\n", + "# CTC change from the $700k AGI households (8 replications)\n", + "high_agi_mask = np.isclose(df_hh['agi'], 700606.779297, rtol=0.001) & (df_hh['num_tax_units'] == 5)\n", + "high_agi_ctc_change = df_hh[high_agi_mask]['weighted_ctc_change'].sum()\n", + "print(f\"\\nCTC change from $700k AGI x 5TU households:\")\n", + "print(f\" Weighted CTC change: ${high_agi_ctc_change:,.0f}\")\n", + "print(f\" % of total change: {high_agi_ctc_change/total_ctc_change*100:.1f}%\")\n", + "\n", + "# Show these households\n", + "print(f\"\\nDetails of these households:\")\n", + "print(df_hh[high_agi_mask][['household_id', 'agi', 'weight', 'num_tax_units', 'baseline_ctc', 'reform_ctc', 'ctc_change']].to_string())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45y1i6rygic", + "metadata": {}, + "outputs": [], + "source": [ + "# Find the correct MN CTC variable name\n", + "variables = sim.tax_benefit_system.variables\n", + "mn_vars = [v for v in variables if 'mn' in v.lower() and ('ctc' in v.lower() or 'cwfc' in v.lower() or 'child' in v.lower())]\n", + "print(\"MN child-related variables:\")\n", + "for v in mn_vars:\n", + " print(f\" {v}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "3sf3k25kym2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MN CTC-related variables:\n", + " mn_child_and_working_families_credits\n", + " mn_child_and_working_families_credits_ctc_eligible_child\n", + " mn_k12_qualifying_children\n", + " mn_mfip_child_support_income_exclusion\n" + ] + } + ], + "source": [ + "# Find the correct MN CTC variable name\n", + "from policyengine_us import Microsimulation\n", + "\n", + "MN_DATASET = \"hf://policyengine/policyengine-us-data/states/MN.h5\"\n", + "sim = Microsimulation(dataset=MN_DATASET)\n", + "\n", + "variables = sim.tax_benefit_system.variables\n", + "mn_vars = [v for v in variables if 'mn' in v.lower() and ('ctc' in v.lower() or 'cwfc' in v.lower() or 'child' in v.lower())]\n", + "print(\"MN CTC-related variables:\")\n", + "for v in sorted(mn_vars):\n", + " print(f\" {v}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "1hnnmt1yqjq", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total MN Child and Working Families Credits (weighted): $952,305,600\n", + "\n", + "Replicated tax units with AGI ~$700,607:\n", + " Count: 4\n", + " CWFC values: [0. 0. 0. 0.]\n", + " Weights: [0.53140783 0.36403945 0.09077162 0.58964187]\n", + " Weighted CWFC: $0\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# Get AGI, tax unit weight, and MN CTC\n", + "agi = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"tax_unit\"))\n", + "tax_unit_weight = np.array(sim.calculate(\"tax_unit_weight\", period=2025))\n", + "mn_cwfc = np.array(sim.calculate(\"mn_child_and_working_families_credits\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "# Total MN CWFC (weighted)\n", + "total_cwfc = (mn_cwfc * tax_unit_weight).sum()\n", + "print(f\"Total MN Child and Working Families Credits (weighted): ${total_cwfc:,.0f}\")\n", + "\n", + "# Find the replicated households with AGI ~$700,607\n", + "target_agi = 700607\n", + "tolerance = 100\n", + "replicated_mask = np.abs(agi - target_agi) < tolerance\n", + "\n", + "replicated_count = replicated_mask.sum()\n", + "replicated_cwfc = mn_cwfc[replicated_mask]\n", + "replicated_weights = tax_unit_weight[replicated_mask]\n", + "\n", + "print(f\"\\nReplicated tax units with AGI ~${target_agi:,}:\")\n", + "print(f\" Count: {replicated_count}\")\n", + "print(f\" CWFC values: {replicated_cwfc}\")\n", + "print(f\" Weights: {replicated_weights}\")\n", + "print(f\" Weighted CWFC: ${(replicated_cwfc * replicated_weights).sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "yrba7stnvme", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Replicated households with AGI ~$700,607:\n", + " Count: 12\n", + " Household IDs: [3350658 3353585 3375732 3379193 3404877 3429031 3450755 3454351 3479061\n", + " 3500700 3503418 3528407]\n", + " Weights: [5.3140783e-01 2.8367017e+03 3.6403945e-01 2.5933005e+03 2.2460701e+03\n", + " 2.7215381e+03 9.0771623e-02 2.8448940e+03 1.0589971e+03 5.8964187e-01\n", + " 3.2931729e+03 2.7090361e+03]\n", + " Total weighted households: 20,305\n", + " CWFC per household: [ 0. 4152.6721344 0. 4152.6721344 4152.6721344\n", + " 4152.6721344 0. 4152.6721344 4152.6721344 0.\n", + " 4152.6721344 4152.6721344]\n", + " Total weighted CWFC: $84,314,653\n" + ] + } + ], + "source": [ + "# Let me look at household-level AGI and CWFC instead\n", + "# The replicated households have 5 tax units each - some may have lower AGI\n", + "\n", + "household_agi = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "household_weight = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "household_cwfc = np.array(sim.calculate(\"mn_child_and_working_families_credits\", period=2025, map_to=\"household\"))\n", + "household_id = np.array(sim.calculate(\"household_id\", period=2025, map_to=\"household\"))\n", + "\n", + "# Find replicated households\n", + "replicated_hh_mask = np.abs(household_agi - 700607) < 100\n", + "print(f\"Replicated households with AGI ~$700,607:\")\n", + "print(f\" Count: {replicated_hh_mask.sum()}\")\n", + "print(f\" Household IDs: {household_id[replicated_hh_mask]}\")\n", + "print(f\" Weights: {household_weight[replicated_hh_mask]}\")\n", + "print(f\" Total weighted households: {household_weight[replicated_hh_mask].sum():,.0f}\")\n", + "print(f\" CWFC per household: {household_cwfc[replicated_hh_mask]}\")\n", + "print(f\" Total weighted CWFC: ${(household_cwfc[replicated_hh_mask] * household_weight[replicated_hh_mask]).sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "mhry44h0gwb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "VERIFICATION: Replicated Household CWFC Impact\n", + "======================================================================\n", + "Total MN CWFC (weighted): $952,305,600\n", + "CWFC from replicated $700k AGI HHs: $84,314,653\n", + "Percentage of total CWFC: 8.9%\n", + "======================================================================\n", + "\n", + " These households have AGI of ~$700k but still receive CWFC.\n", + " This is because the 5 tax units within each household have their own AGI.\n", + " Let's examine one of these households...\n", + "\n", + "Examining household ID 3353585:\n" + ] + } + ], + "source": [ + "# Calculate percentage of total CWFC from replicated households\n", + "replicated_cwfc_total = (household_cwfc[replicated_hh_mask] * household_weight[replicated_hh_mask]).sum()\n", + "pct_cwfc_from_replicated = (replicated_cwfc_total / total_cwfc) * 100\n", + "\n", + "print(f\"=\" * 70)\n", + "print(f\"VERIFICATION: Replicated Household CWFC Impact\")\n", + "print(f\"=\" * 70)\n", + "print(f\"Total MN CWFC (weighted): ${total_cwfc:,.0f}\")\n", + "print(f\"CWFC from replicated $700k AGI HHs: ${replicated_cwfc_total:,.0f}\")\n", + "print(f\"Percentage of total CWFC: {pct_cwfc_from_replicated:.1f}%\")\n", + "print(f\"=\" * 70)\n", + "\n", + "# Why do these high-income households get CWFC?\n", + "print(f\"\\n These households have AGI of ~$700k but still receive CWFC.\")\n", + "print(f\" This is because the 5 tax units within each household have their own AGI.\")\n", + "print(f\" Let's examine one of these households...\")\n", + "\n", + "# Get the first replicated household ID with non-zero CWFC\n", + "target_hh_id = household_id[replicated_hh_mask][1] # Second one has high weight\n", + "print(f\"\\nExamining household ID {target_hh_id}:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "2xhu7rlny22", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tax units in household 3353585:\n", + " Number of tax units: 0\n", + "\n", + " Total AGI (sum of TUs): $0\n", + " Total CWFC (sum of TUs): $0\n" + ] + } + ], + "source": [ + "# Get tax unit level data for that household\n", + "tu_household_id = np.array(sim.calculate(\"household_id\", period=2025, map_to=\"tax_unit\"))\n", + "tu_agi = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"tax_unit\"))\n", + "tu_cwfc = np.array(sim.calculate(\"mn_child_and_working_families_credits\", period=2025, map_to=\"tax_unit\"))\n", + "tu_weight = np.array(sim.calculate(\"tax_unit_weight\", period=2025))\n", + "tu_id = np.array(sim.calculate(\"tax_unit_id\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "# Filter for target household\n", + "target_hh_mask = tu_household_id == target_hh_id\n", + "\n", + "print(f\"Tax units in household {target_hh_id}:\")\n", + "print(f\" Number of tax units: {target_hh_mask.sum()}\")\n", + "for i, idx in enumerate(np.where(target_hh_mask)[0]):\n", + " print(f\" TU {i+1}: AGI=${tu_agi[idx]:,.0f}, CWFC=${tu_cwfc[idx]:,.0f}, Weight={tu_weight[idx]:.2f}\")\n", + "\n", + "# Household-level totals\n", + "hh_agi_from_tu = tu_agi[target_hh_mask].sum()\n", + "hh_cwfc_from_tu = tu_cwfc[target_hh_mask].sum()\n", + "print(f\"\\n Total AGI (sum of TUs): ${hh_agi_from_tu:,.0f}\")\n", + "print(f\" Total CWFC (sum of TUs): ${hh_cwfc_from_tu:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "agua7grsuf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Checking household to tax unit mapping...\n", + "Target household IDs: [3350658 3353585 3375732]...\n", + "\n", + "Household 3350658:\n", + " Persons: 2\n", + " Unique tax units: 1\n", + " Tax unit IDs: [862]\n", + "\n", + "Household 3353585:\n", + " Persons: 10\n", + " Unique tax units: 5\n", + " Tax unit IDs: [5015 5016 5017 5018 5019]\n", + "\n", + "Household 3375732:\n", + " Persons: 2\n", + " Unique tax units: 1\n", + " Tax unit IDs: [6078]\n" + ] + } + ], + "source": [ + "# Let me understand the entity relationships better\n", + "# Get person-level data\n", + "person_household_id = np.array(sim.calculate(\"household_id\", period=2025, map_to=\"person\"))\n", + "person_tax_unit_id = np.array(sim.calculate(\"tax_unit_id\", period=2025, map_to=\"person\"))\n", + "\n", + "# Get household level data \n", + "hh_household_id = np.array(sim.calculate(\"household_id\", period=2025, map_to=\"household\"))\n", + "\n", + "# Find households with ~$700k AGI\n", + "target_mask = np.abs(household_agi - 700607) < 100\n", + "target_hh_ids = hh_household_id[target_mask]\n", + "\n", + "print(f\"Checking household to tax unit mapping...\")\n", + "print(f\"Target household IDs: {target_hh_ids[:3]}...\")\n", + "\n", + "# Count tax units per target household\n", + "for hh_id in target_hh_ids[:3]:\n", + " persons_in_hh = person_household_id == hh_id\n", + " unique_tax_units = np.unique(person_tax_unit_id[persons_in_hh])\n", + " print(f\"\\nHousehold {hh_id}:\")\n", + " print(f\" Persons: {persons_in_hh.sum()}\")\n", + " print(f\" Unique tax units: {len(unique_tax_units)}\")\n", + " print(f\" Tax unit IDs: {unique_tax_units}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "z9ueuhrfboh", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tax units in household 3353585 (10 persons, 5 tax units):\n", + "------------------------------------------------------------\n", + " Tax Unit 5015: AGI=$371,783, CWFC=$0\n", + " Tax Unit 5016: AGI=$50,361, CWFC=$0\n", + " Tax Unit 5017: AGI=$199,590, CWFC=$0\n", + " Tax Unit 5018: AGI=$32,731, CWFC=$227\n", + " Tax Unit 5019: AGI=$46,142, CWFC=$3,926\n", + "------------------------------------------------------------\n", + " Household Total AGI: $700,607\n", + " Household Total CWFC: $4,153\n" + ] + } + ], + "source": [ + "# Examine the 5 tax units in household 3353585\n", + "target_tu_ids = [5015, 5016, 5017, 5018, 5019]\n", + "\n", + "# Get tax unit level variables\n", + "tu_id_array = np.array(sim.calculate(\"tax_unit_id\", period=2025, map_to=\"tax_unit\"))\n", + "tu_agi_array = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"tax_unit\"))\n", + "tu_cwfc_array = np.array(sim.calculate(\"mn_child_and_working_families_credits\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "print(f\"Tax units in household 3353585 (10 persons, 5 tax units):\")\n", + "print(f\"-\" * 60)\n", + "for tu_id in target_tu_ids:\n", + " idx = np.where(tu_id_array == tu_id)[0]\n", + " if len(idx) > 0:\n", + " idx = idx[0]\n", + " print(f\" Tax Unit {tu_id}: AGI=${tu_agi_array[idx]:,.0f}, CWFC=${tu_cwfc_array[idx]:,.0f}\")\n", + "\n", + "# Total household CWFC\n", + "total_hh_cwfc = sum([tu_cwfc_array[np.where(tu_id_array == tu_id)[0][0]] for tu_id in target_tu_ids if len(np.where(tu_id_array == tu_id)[0]) > 0])\n", + "total_hh_agi = sum([tu_agi_array[np.where(tu_id_array == tu_id)[0][0]] for tu_id in target_tu_ids if len(np.where(tu_id_array == tu_id)[0]) > 0])\n", + "print(f\"-\" * 60)\n", + "print(f\" Household Total AGI: ${total_hh_agi:,.0f}\")\n", + "print(f\" Household Total CWFC: ${total_hh_cwfc:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "4d22gxij878", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "PAVEL'S DIAGNOSTIC CLAIMS - VERIFICATION COMPLETE\n", + "================================================================================\n", + "\n", + "CLAIM 1: Population/Household Undercounts\n", + " ✓ VERIFIED - Population: 4.1M vs 5.7M target (-29%)\n", + " ✓ VERIFIED - Households: 1.25M vs 2.3M target (-46%)\n", + "\n", + "CLAIM 2: AGI Distribution Severely Distorted \n", + " ✓ VERIFIED - <$1 bracket: +912% over target\n", + " ✓ VERIFIED - $500k+ bracket: +385% over target\n", + " ✓ VERIFIED - Middle brackets: -40% to -65% under target\n", + "\n", + "CLAIM 3: Weight Concentration (Sparse Weights)\n", + " ✓ VERIFIED - 68% of records have weight < 1\n", + " ✓ VERIFIED - Top 5% of records contribute 82% of total weight\n", + "\n", + "CLAIM 4: CD-Stacked Replicated Households\n", + " ✓ VERIFIED - Found 12 households with identical AGI ($700,607)\n", + " ✓ VERIFIED - Same structure: 10 persons, 5 tax units per household\n", + " ✓ VERIFIED - Total weighted count: 20,305 households\n", + " \n", + "CLAIM 5: Single Replicated Structure = ~9% of Total CWFC\n", + " ✓ VERIFIED - These 12 replicated households account for:\n", + " - $84.3M in CWFC\n", + " - 8.9% of total MN CWFC ($952.3M)\n", + " \n", + " MECHANISM: Household AGI is $700,607 (top decile), but contains\n", + " 5 tax units. Two tax units have lower AGIs ($32k, $46k) that\n", + " qualify for CWFC ($227 + $3,926 = $4,153 per household).\n", + "\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Summary of Pavel's claims verification\n", + "print(\"=\" * 80)\n", + "print(\"PAVEL'S DIAGNOSTIC CLAIMS - VERIFICATION COMPLETE\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\"\"\n", + "CLAIM 1: Population/Household Undercounts\n", + " ✓ VERIFIED - Population: 4.1M vs 5.7M target (-29%)\n", + " ✓ VERIFIED - Households: 1.25M vs 2.3M target (-46%)\n", + "\n", + "CLAIM 2: AGI Distribution Severely Distorted \n", + " ✓ VERIFIED - <$1 bracket: +912% over target\n", + " ✓ VERIFIED - $500k+ bracket: +385% over target\n", + " ✓ VERIFIED - Middle brackets: -40% to -65% under target\n", + "\n", + "CLAIM 3: Weight Concentration (Sparse Weights)\n", + " ✓ VERIFIED - 68% of records have weight < 1\n", + " ✓ VERIFIED - Top 5% of records contribute 82% of total weight\n", + "\n", + "CLAIM 4: CD-Stacked Replicated Households\n", + " ✓ VERIFIED - Found 12 households with identical AGI ($700,607)\n", + " ✓ VERIFIED - Same structure: 10 persons, 5 tax units per household\n", + " ✓ VERIFIED - Total weighted count: 20,305 households\n", + " \n", + "CLAIM 5: Single Replicated Structure = ~9% of Total CWFC\n", + " ✓ VERIFIED - These 12 replicated households account for:\n", + " - $84.3M in CWFC\n", + " - 8.9% of total MN CWFC ($952.3M)\n", + " \n", + " MECHANISM: Household AGI is $700,607 (top decile), but contains\n", + " 5 tax units. Two tax units have lower AGIs ($32k, $46k) that\n", + " qualify for CWFC ($227 + $3,926 = $4,153 per household).\n", + "\"\"\")\n", + "print(\"=\" * 80)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/mn/mn_ctc_reform.py b/us/states/mn/mn_ctc_reform.py new file mode 100644 index 0000000..af61ab1 --- /dev/null +++ b/us/states/mn/mn_ctc_reform.py @@ -0,0 +1,36 @@ +""" +Minnesota CTC Reform + +This module defines a reform that modifies Minnesota's Child Tax Credit (CWFC): +- Sets the CTC amount to $2,000 per child (up from $1,750) +- Sets the main phase-out rate to 20% (up from 12%) +- Leaves the other phase-out rate (ctc_ineligible_with_qualifying_older_children) unchanged at 9% +""" + +from policyengine_core.reforms import Reform + + +def create_mn_ctc_reform(): + """ + Create a reform that: + - Sets MN CTC amount to $2,000 + - Sets the main phase-out rate to 20% + """ + reform = Reform.from_dict( + { + # Set CTC amount to $2,000 per child + "gov.states.mn.tax.income.credits.cwfc.ctc.amount": { + "2025-01-01.2100-12-31": 2000 + }, + # Set main phase-out rate to 20% + "gov.states.mn.tax.income.credits.cwfc.phase_out.rate.main": { + "2025-01-01.2100-12-31": 0.20 + }, + }, + country_id="us", + ) + return reform + + +# For direct use in notebooks +mn_ctc_reform = create_mn_ctc_reform() diff --git a/us/states/mn/mn_ctc_reform_analysis.ipynb b/us/states/mn/mn_ctc_reform_analysis.ipynb new file mode 100644 index 0000000..b28a81a --- /dev/null +++ b/us/states/mn/mn_ctc_reform_analysis.ipynb @@ -0,0 +1,869 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Minnesota CTC Reform Analysis (2025)\n", + "\n", + "This notebook analyzes the impact of modifying Minnesota's Child Tax Credit (CWFC).\n", + "\n", + "## Baseline (Current Law)\n", + "- CTC amount: $1,750 per qualifying child\n", + "- Main phase-out rate: 12%\n", + "- Phase-out rate for CTC-ineligible with older children: 9%\n", + "\n", + "## Reform\n", + "- CTC amount: $2,000 per qualifying child\n", + "- Main phase-out rate: 20%\n", + "- Phase-out rate for CTC-ineligible with older children: 9% (unchanged)\n", + "\n", + "## Metrics\n", + "We calculate:\n", + "- Budgetary impact (net cost)\n", + "- Winners (percentage of population affected)\n", + "- Overall poverty impact\n", + "- Child poverty impact" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "MN_DATASET = \"hf://policyengine/test/MN.h5\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_poverty(sim, period=2025, child_only=False):\n", + " age = np.array(sim.calculate(\"age\", period=period))\n", + " is_in_poverty = np.array(sim.calculate(\"person_in_poverty\", period=period))\n", + " person_weight = np.array(sim.calculate(\"person_weight\", period=period))\n", + " \n", + " if child_only:\n", + " mask = age < 18\n", + " else:\n", + " mask = np.ones_like(age, dtype=bool)\n", + " \n", + " weighted_in_poverty = (is_in_poverty[mask] * person_weight[mask]).sum()\n", + " weighted_total = person_weight[mask].sum()\n", + " poverty_rate = weighted_in_poverty / weighted_total if weighted_total > 0 else 0\n", + " \n", + " return {\n", + " \"poverty_rate\": poverty_rate,\n", + " \"people_in_poverty\": weighted_in_poverty,\n", + " \"total_people\": weighted_total\n", + " }\n", + "\n", + "def calculate_winners(baseline_sim, reform_sim, period=2025):\n", + " baseline_income = np.array(baseline_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " reform_income = np.array(reform_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " household_weight = np.array(baseline_sim.calculate(\"household_weight\", period=period))\n", + " income_change = reform_income - baseline_income\n", + " \n", + " household_id_person = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"person\"))\n", + " household_id_household = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"household\"))\n", + " person_weight = np.array(baseline_sim.calculate(\"person_weight\", period=period))\n", + " \n", + " income_change_dict = dict(zip(household_id_household, income_change))\n", + " person_income_change = np.array([income_change_dict.get(hh_id, 0) for hh_id in household_id_person])\n", + " \n", + " winners_mask = person_income_change > 1\n", + " people_winning = person_weight[winners_mask].sum()\n", + " total_people = person_weight.sum()\n", + " \n", + " losers_mask = person_income_change < -1\n", + " people_losing = person_weight[losers_mask].sum()\n", + " \n", + " pct_winners = (people_winning / total_people * 100) if total_people > 0 else 0\n", + " pct_losers = (people_losing / total_people * 100) if total_people > 0 else 0\n", + " \n", + " winning_hh_mask = income_change > 1\n", + " avg_gain = np.average(income_change[winning_hh_mask], weights=household_weight[winning_hh_mask]) if winning_hh_mask.sum() > 0 else 0\n", + " \n", + " losing_hh_mask = income_change < -1\n", + " avg_loss = np.average(income_change[losing_hh_mask], weights=household_weight[losing_hh_mask]) if losing_hh_mask.sum() > 0 else 0\n", + " \n", + " return {\n", + " \"people_winning\": people_winning,\n", + " \"people_losing\": people_losing,\n", + " \"total_people\": total_people,\n", + " \"pct_winners\": pct_winners,\n", + " \"pct_losers\": pct_losers,\n", + " \"avg_gain\": avg_gain,\n", + " \"avg_loss\": avg_loss\n", + " }\n", + "\n", + "def format_currency(value):\n", + " return f\"${value/1e6:.2f}M\"\n", + "\n", + "def format_percent(value):\n", + " return f\"{value*100:.2f}%\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Baseline and Reform" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform function defined!\n", + "\n", + "Reform details:\n", + " - CTC amount: $1,750 -> $2,000 (+$250 per child)\n", + " - Main phase-out rate: 12% -> 20% (+8 percentage points)\n" + ] + } + ], + "source": [ + "def create_mn_ctc_reform():\n", + " reform = Reform.from_dict(\n", + " {\n", + " \"gov.states.mn.tax.income.credits.cwfc.ctc.amount\": {\n", + " \"2025-01-01.2100-12-31\": 2000\n", + " },\n", + " \"gov.states.mn.tax.income.credits.cwfc.phase_out.rate.main\": {\n", + " \"2025-01-01.2100-12-31\": 0.20\n", + " },\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " return reform\n", + "\n", + "print(\"Reform function defined!\")\n", + "print(\"\\nReform details:\")\n", + "print(\" - CTC amount: $1,750 -> $2,000 (+$250 per child)\")\n", + "print(\" - Main phase-out rate: 12% -> 20% (+8 percentage points)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Simulations" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading baseline (current law)...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f299c447d1824c8595847dc3f2408bea", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "MN.h5: 0%| | 0.00/50.5M [00:00 0 else 0\n", + "print(f\"Absolute reduction: {format_percent(overall_pov_reduction)}\")\n", + "print(f\"Relative reduction: {overall_pov_pct_reduction:.2f}%\")\n", + "people_lifted = baseline_overall_pov['people_in_poverty'] - reform_overall_pov['people_in_poverty']\n", + "print(f\"People lifted from poverty: {people_lifted:,.0f}\")\n", + "\n", + "print(f\"\\n{'POVERTY IMPACT - CHILDREN':=^80}\")\n", + "print(f\"Baseline child poverty rate: {format_percent(baseline_child_pov['poverty_rate'])}\")\n", + "print(f\"Reform child poverty rate: {format_percent(reform_child_pov['poverty_rate'])}\")\n", + "child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']\n", + "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", + "print(f\"Absolute reduction: {format_percent(child_pov_reduction)}\")\n", + "print(f\"Relative reduction: {child_pov_pct_reduction:.2f}%\")\n", + "children_lifted = baseline_child_pov['people_in_poverty'] - reform_child_pov['people_in_poverty']\n", + "print(f\"Children lifted from poverty: {children_lifted:,.0f}\")\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "HOUSEHOLDS IMPACTED BY MN CTC REFORM\n", + "======================================================================\n", + "Households benefitting: 168,551 (9.17%)\n", + "Households losing: 136,240 (7.41%)\n", + "Total households: 1,838,548\n", + "======================================================================\n" + ] + } + ], + "source": [ + "baseline_hh_income_arr = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "reform_hh_income_arr = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "household_weight = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "\n", + "hh_income_change = reform_hh_income_arr - baseline_hh_income_arr\n", + "hh_benefitting_mask = hh_income_change > 1\n", + "hh_losing_mask = hh_income_change < -1\n", + "\n", + "households_benefitting = household_weight[hh_benefitting_mask].sum()\n", + "households_losing = household_weight[hh_losing_mask].sum()\n", + "total_households = household_weight.sum()\n", + "pct_households_benefitting = (households_benefitting / total_households) * 100\n", + "pct_households_losing = (households_losing / total_households) * 100\n", + "\n", + "print(\"=\"*70)\n", + "print(\"HOUSEHOLDS IMPACTED BY MN CTC REFORM\")\n", + "print(\"=\"*70)\n", + "print(f\"Households benefitting: {households_benefitting:,.0f} ({pct_households_benefitting:.2f}%)\")\n", + "print(f\"Households losing: {households_losing:,.0f} ({pct_households_losing:.2f}%)\")\n", + "print(f\"Total households: {total_households:,.0f}\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export Results" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================================================================================================================\n", + "MN CTC REFORM SUMMARY\n", + "========================================================================================================================\n", + "Scenario MN CTC Reform\n", + "Description CTC $2,000 + 20% phase-out rate\n", + "Net Cost $-0.21M\n", + "% Population Winning 12.63%\n", + "% Population Losing 12.14%\n", + "Avg Gain (Winners) $439.90\n", + "Avg Loss (Losers) $-545.76\n", + "Overall Poverty Change (%) -0.15%\n", + "Child Poverty Change (%) -0.47%\n", + "People Lifted from Poverty -1,234\n", + "Children Lifted from Poverty -630\n", + "========================================================================================================================\n", + "\n", + "Exported to: mn_ctc_reform_results.csv\n" + ] + } + ], + "source": [ + "overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']\n", + "overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0\n", + "child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']\n", + "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", + "\n", + "results = [\n", + " {\n", + " \"Scenario\": \"MN CTC Reform\",\n", + " \"Description\": \"CTC $2,000 + 20% phase-out rate\",\n", + " \"Net Cost\": format_currency(ctc_cost),\n", + " \"% Population Winning\": f\"{winners['pct_winners']:.2f}%\",\n", + " \"% Population Losing\": f\"{winners['pct_losers']:.2f}%\",\n", + " \"Avg Gain (Winners)\": f\"${winners['avg_gain']:,.2f}\",\n", + " \"Avg Loss (Losers)\": f\"${winners['avg_loss']:,.2f}\",\n", + " \"Overall Poverty Change (%)\": f\"{overall_pov_pct_reduction:.2f}%\",\n", + " \"Child Poverty Change (%)\": f\"{child_pov_pct_reduction:.2f}%\",\n", + " \"People Lifted from Poverty\": f\"{people_lifted:,.0f}\",\n", + " \"Children Lifted from Poverty\": f\"{children_lifted:,.0f}\"\n", + " }\n", + "]\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "\n", + "print(\"\\n\" + \"=\"*120)\n", + "print(\"MN CTC REFORM SUMMARY\")\n", + "print(\"=\"*120)\n", + "print(df_results.T.to_string(header=False))\n", + "print(\"=\"*120)\n", + "\n", + "df_results.to_csv(\"mn_ctc_reform_results.csv\", index=False)\n", + "print(\"\\nExported to: mn_ctc_reform_results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Average Income by Decile - Baseline vs Reform" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==========================================================================================\n", + "AVERAGE HOUSEHOLD NET INCOME BY DECILE - BASELINE VS REFORM\n", + "==========================================================================================\n", + " Decile Baseline Reform Change Households\n", + "------------------------------------------------------------------------------------------\n", + " 1st $ 35,015 $ 35,052 $ 36.53 183,728\n", + " 2nd $ 49,720 $ 49,868 $ 147.98 183,971\n", + " 3rd $ 60,175 $ 60,293 $ 118.73 183,845\n", + " 4th $ 77,365 $ 77,248 $ -117.70 183,241\n", + " 5th $ 97,346 $ 97,280 $ -66.37 184,437\n", + " 6th $ 93,105 $ 93,049 $ -55.70 183,826\n", + " 7th $ 146,248 $ 146,245 $ -3.11 183,634\n", + " 8th $ 147,574 $ 147,506 $ -67.35 184,078\n", + " 9th $ 265,363 $ 265,372 $ 8.53 183,739\n", + " 10th $ 519,973 $ 519,970 $ -2.85 184,049\n", + "==========================================================================================\n" + ] + } + ], + "source": [ + "baseline_income = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "reform_income = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "weights = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "agi = np.array(baseline.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "\n", + "df_decile = pd.DataFrame({\n", + " 'agi': agi,\n", + " 'baseline_income': baseline_income,\n", + " 'reform_income': reform_income,\n", + " 'weight': weights\n", + "})\n", + "\n", + "df_decile = df_decile.sort_values('agi').reset_index(drop=True)\n", + "df_decile['cumweight'] = df_decile['weight'].cumsum()\n", + "total_weight = df_decile['weight'].sum()\n", + "\n", + "df_decile['decile'] = pd.cut(\n", + " df_decile['cumweight'] / total_weight,\n", + " bins=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],\n", + " labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']\n", + ")\n", + "\n", + "decile_summary = df_decile.groupby('decile', observed=True).apply(\n", + " lambda x: pd.Series({\n", + " 'Baseline Avg Income': np.average(x['baseline_income'], weights=x['weight']),\n", + " 'Reform Avg Income': np.average(x['reform_income'], weights=x['weight']),\n", + " 'Avg Change': np.average(x['reform_income'] - x['baseline_income'], weights=x['weight']),\n", + " 'Households': x['weight'].sum()\n", + " })\n", + ").reset_index()\n", + "\n", + "print(\"\\n\" + \"=\"*90)\n", + "print(\"AVERAGE HOUSEHOLD NET INCOME BY DECILE - BASELINE VS REFORM\")\n", + "print(\"=\"*90)\n", + "print(f\"{'Decile':>8} {'Baseline':>18} {'Reform':>18} {'Change':>14} {'Households':>14}\")\n", + "print(\"-\"*90)\n", + "for _, row in decile_summary.iterrows():\n", + " print(f\"{row['decile']:>8} ${row['Baseline Avg Income']:>16,.0f} ${row['Reform Avg Income']:>16,.0f} ${row['Avg Change']:>12,.2f} {row['Households']:>14,.0f}\")\n", + "print(\"=\"*90)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Winners, Losers, and Non-Affected by Decile" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==========================================================================================\n", + "WINNERS, LOSERS, AND NON-AFFECTED BY INCOME DECILE (% OF RESIDENTS)\n", + "==========================================================================================\n", + " Decile % Winners % Losers % Non-Affected Total Residents\n", + "------------------------------------------------------------------------------------------\n", + " 10th 5.8% 28.4% 65.8% 715,130\n", + " 1st 17.5% 1.4% 81.1% 337,955\n", + " 2nd 49.5% 0.0% 50.5% 432,021\n", + " 3rd 41.5% 7.2% 51.2% 513,281\n", + " 4th 8.5% 32.4% 59.1% 596,435\n", + " 5th 8.2% 13.9% 77.9% 610,155\n", + " 6th 4.7% 7.6% 87.8% 590,161\n", + " 7th 3.2% 6.1% 90.7% 614,311\n", + " 8th 4.9% 12.7% 82.4% 770,523\n", + " 9th 4.0% 1.2% 94.9% 674,770\n", + "==========================================================================================\n" + ] + } + ], + "source": [ + "# Get person-level data with household mapping\n", + "person_weight = np.array(baseline.calculate(\"person_weight\", period=2025))\n", + "household_id_person = np.array(baseline.calculate(\"household_id\", period=2025, map_to=\"person\"))\n", + "household_id_household = np.array(baseline.calculate(\"household_id\", period=2025, map_to=\"household\"))\n", + "\n", + "# Get household-level income change\n", + "baseline_hh_inc = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "reform_hh_inc = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "hh_inc_change = reform_hh_inc - baseline_hh_inc\n", + "\n", + "# Get household AGI for decile assignment\n", + "hh_agi = np.array(baseline.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "hh_weight = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "\n", + "# Create household-level DataFrame with decile assignment\n", + "df_hh = pd.DataFrame({\n", + " 'household_id': household_id_household,\n", + " 'agi': hh_agi,\n", + " 'income_change': hh_inc_change,\n", + " 'weight': hh_weight\n", + "})\n", + "\n", + "# Sort by AGI and assign deciles\n", + "df_hh = df_hh.sort_values('agi').reset_index(drop=True)\n", + "df_hh['cumweight'] = df_hh['weight'].cumsum()\n", + "total_hh_weight = df_hh['weight'].sum()\n", + "df_hh['decile'] = pd.cut(\n", + " df_hh['cumweight'] / total_hh_weight,\n", + " bins=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],\n", + " labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']\n", + ")\n", + "\n", + "# Create mapping from household_id to decile and income_change\n", + "hh_to_decile = dict(zip(df_hh['household_id'], df_hh['decile']))\n", + "hh_to_change = dict(zip(df_hh['household_id'], df_hh['income_change']))\n", + "\n", + "# Create person-level DataFrame\n", + "df_person = pd.DataFrame({\n", + " 'household_id': household_id_person,\n", + " 'person_weight': person_weight\n", + "})\n", + "\n", + "# Map decile and income change to persons\n", + "df_person['decile'] = df_person['household_id'].map(hh_to_decile)\n", + "df_person['income_change'] = df_person['household_id'].map(hh_to_change)\n", + "\n", + "# Classify as winner, loser, or non-affected\n", + "df_person['is_winner'] = df_person['income_change'] > 1\n", + "df_person['is_loser'] = df_person['income_change'] < -1\n", + "df_person['is_nonaffected'] = (~df_person['is_winner']) & (~df_person['is_loser'])\n", + "\n", + "# Calculate percentages by decile\n", + "decile_impact = df_person.groupby('decile', observed=True).apply(\n", + " lambda x: pd.Series({\n", + " 'Total Residents': x['person_weight'].sum(),\n", + " 'Winners': x.loc[x['is_winner'], 'person_weight'].sum(),\n", + " 'Losers': x.loc[x['is_loser'], 'person_weight'].sum(),\n", + " 'Non-Affected': x.loc[x['is_nonaffected'], 'person_weight'].sum()\n", + " })\n", + ").reset_index()\n", + "\n", + "# Calculate percentages\n", + "decile_impact['% Winners'] = (decile_impact['Winners'] / decile_impact['Total Residents'] * 100)\n", + "decile_impact['% Losers'] = (decile_impact['Losers'] / decile_impact['Total Residents'] * 100)\n", + "decile_impact['% Non-Affected'] = (decile_impact['Non-Affected'] / decile_impact['Total Residents'] * 100)\n", + "\n", + "print(\"\\n\" + \"=\"*90)\n", + "print(\"WINNERS, LOSERS, AND NON-AFFECTED BY INCOME DECILE (% OF RESIDENTS)\")\n", + "print(\"=\"*90)\n", + "print(f\"{'Decile':>8} {'% Winners':>12} {'% Losers':>12} {'% Non-Affected':>16} {'Total Residents':>18}\")\n", + "print(\"-\"*90)\n", + "for _, row in decile_impact.iterrows():\n", + " print(f\"{row['decile']:>8} {row['% Winners']:>11.1f}% {row['% Losers']:>11.1f}% {row['% Non-Affected']:>15.1f}% {row['Total Residents']:>17,.0f}\")\n", + "print(\"=\"*90)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tax Units per Household Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "TAX UNITS PER HOUSEHOLD - DISTRIBUTION\n", + "======================================================================\n", + " # Tax Units Households % of Total\n", + "----------------------------------------------------------------------\n", + " 1 1,130,173 61.47%\n", + " 2 458,195 24.92%\n", + " 3 148,805 8.09%\n", + " 4 57,527 3.13%\n", + " 5 18,721 1.02%\n", + " 6 3,533 0.19%\n", + " 7 3 0.00%\n", + " 8 21,591 1.17%\n", + " 9 1 0.00%\n", + "======================================================================\n", + "\n", + "Total households with 2+ tax units: 708,375\n", + "Percentage with 2+ tax units: 38.53%\n" + ] + } + ], + "source": [ + "# Analyze tax units per household\n", + "tax_unit_id = np.array(baseline.calculate(\"tax_unit_id\", period=2025, map_to=\"person\"))\n", + "household_id_p = np.array(baseline.calculate(\"household_id\", period=2025, map_to=\"person\"))\n", + "\n", + "# Create DataFrame to count unique tax units per household\n", + "df_tu = pd.DataFrame({\n", + " 'household_id': household_id_p,\n", + " 'tax_unit_id': tax_unit_id\n", + "})\n", + "\n", + "# Count unique tax units per household\n", + "tu_per_hh = df_tu.groupby('household_id')['tax_unit_id'].nunique().reset_index()\n", + "tu_per_hh.columns = ['household_id', 'num_tax_units']\n", + "\n", + "# Get household weights\n", + "hh_ids = np.array(baseline.calculate(\"household_id\", period=2025, map_to=\"household\"))\n", + "hh_weights = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "df_hh_weights = pd.DataFrame({'household_id': hh_ids, 'weight': hh_weights})\n", + "\n", + "# Merge\n", + "tu_per_hh = tu_per_hh.merge(df_hh_weights, on='household_id')\n", + "\n", + "# Calculate weighted distribution\n", + "tu_distribution = tu_per_hh.groupby('num_tax_units')['weight'].sum().reset_index()\n", + "tu_distribution['pct'] = tu_distribution['weight'] / tu_distribution['weight'].sum() * 100\n", + "\n", + "print(\"=\"*70)\n", + "print(\"TAX UNITS PER HOUSEHOLD - DISTRIBUTION\")\n", + "print(\"=\"*70)\n", + "print(f\"{'# Tax Units':>12} {'Households':>18} {'% of Total':>14}\")\n", + "print(\"-\"*70)\n", + "for _, row in tu_distribution.iterrows():\n", + " print(f\"{int(row['num_tax_units']):>12} {row['weight']:>17,.0f} {row['pct']:>13.2f}%\")\n", + "print(\"=\"*70)\n", + "print(f\"\\nTotal households with 2+ tax units: {tu_per_hh[tu_per_hh['num_tax_units'] >= 2]['weight'].sum():,.0f}\")\n", + "print(f\"Percentage with 2+ tax units: {tu_per_hh[tu_per_hh['num_tax_units'] >= 2]['weight'].sum() / tu_per_hh['weight'].sum() * 100:.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "TOP 3 DECILES: TAX UNITS IN AFFECTED VS UNAFFECTED HOUSEHOLDS\n", + "======================================================================\n", + "Affected households (income change > $1):\n", + " Count: 58,919\n", + " Avg tax units per household: 4.51\n", + " Distribution:\n", + " 1 tax unit(s): 7,342 (12.5%)\n", + " 2 tax unit(s): 15,097 (25.6%)\n", + " 3 tax unit(s): 6,346 (10.8%)\n", + " 4 tax unit(s): 6,924 (11.8%)\n", + " 5 tax unit(s): 809 (1.4%)\n", + " 6 tax unit(s): 810 (1.4%)\n", + " 7 tax unit(s): 0 (0.0%)\n", + " 8 tax unit(s): 21,591 (36.6%)\n", + " 9 tax unit(s): 0 (0.0%)\n", + "\n", + "Unaffected households (income change <= $1):\n", + " Count: 492,947\n", + " Avg tax units per household: 1.70\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Check if affected high-income households have multiple tax units\n", + "# Merge tax unit count with household decile and income change data\n", + "df_hh_analysis = df_hh.merge(tu_per_hh[['household_id', 'num_tax_units']], on='household_id')\n", + "\n", + "# Filter to top 3 deciles (8th, 9th, 10th)\n", + "top_deciles = df_hh_analysis[df_hh_analysis['decile'].isin(['8th', '9th', '10th'])]\n", + "\n", + "# Among top decile households, compare affected vs non-affected\n", + "affected_top = top_deciles[abs(top_deciles['income_change']) > 1]\n", + "unaffected_top = top_deciles[abs(top_deciles['income_change']) <= 1]\n", + "\n", + "print(\"=\"*70)\n", + "print(\"TOP 3 DECILES: TAX UNITS IN AFFECTED VS UNAFFECTED HOUSEHOLDS\")\n", + "print(\"=\"*70)\n", + "\n", + "if len(affected_top) > 0:\n", + " avg_tu_affected = np.average(affected_top['num_tax_units'], weights=affected_top['weight'])\n", + " print(f\"Affected households (income change > $1):\")\n", + " print(f\" Count: {affected_top['weight'].sum():,.0f}\")\n", + " print(f\" Avg tax units per household: {avg_tu_affected:.2f}\")\n", + " \n", + " # Distribution of tax units among affected\n", + " affected_tu_dist = affected_top.groupby('num_tax_units')['weight'].sum()\n", + " print(f\" Distribution:\")\n", + " for tu, wt in affected_tu_dist.items():\n", + " print(f\" {tu} tax unit(s): {wt:,.0f} ({wt/affected_top['weight'].sum()*100:.1f}%)\")\n", + "else:\n", + " print(\"No affected households in top deciles\")\n", + "\n", + "print()\n", + "\n", + "if len(unaffected_top) > 0:\n", + " avg_tu_unaffected = np.average(unaffected_top['num_tax_units'], weights=unaffected_top['weight'])\n", + " print(f\"Unaffected households (income change <= $1):\")\n", + " print(f\" Count: {unaffected_top['weight'].sum():,.0f}\")\n", + " print(f\" Avg tax units per household: {avg_tu_unaffected:.2f}\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8l50y95jinp", + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "MN_DATASET = \"hf://policyengine/test/MN.h5\"\n", + "\n", + "print(\"Loading baseline simulation with test dataset...\")\n", + "baseline = Microsimulation(dataset=MN_DATASET)\n", + "print(\"Loaded successfully!\")\n", + "\n", + "# Basic dataset stats\n", + "household_weight = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "person_weight = np.array(baseline.calculate(\"person_weight\", period=2025))\n", + "tax_unit_weight = np.array(baseline.calculate(\"tax_unit_weight\", period=2025))\n", + "\n", + "print(f\"\\n{'='*60}\")\n", + "print(\"DATASET STATISTICS - TEST MN.h5\")\n", + "print(f\"{'='*60}\")\n", + "print(f\"Population (weighted): {person_weight.sum():,.0f}\")\n", + "print(f\"Households (weighted): {household_weight.sum():,.0f}\")\n", + "print(f\"Tax Units (weighted): {tax_unit_weight.sum():,.0f}\")\n", + "print(f\"\\nTargets:\")\n", + "print(f\"Population target: 5,737,915\")\n", + "print(f\"Household target: 2,344,432\")\n", + "print(f\"Tax Unit target: 2,871,840\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/mn/mn_ctc_reform_results.csv b/us/states/mn/mn_ctc_reform_results.csv new file mode 100644 index 0000000..1b3ff5f --- /dev/null +++ b/us/states/mn/mn_ctc_reform_results.csv @@ -0,0 +1,2 @@ +Scenario,Description,Net Cost,% Population Winning,% Population Losing,Avg Gain (Winners),Avg Loss (Losers),Overall Poverty Change (%),Child Poverty Change (%),People Lifted from Poverty,Children Lifted from Poverty +MN CTC Reform,"CTC $2,000 + 20% phase-out rate",$-0.21M,12.63%,12.14%,$439.90,$-545.76,-0.15%,-0.47%,"-1,234",-630 diff --git a/us/states/mn/mn_dataset_summary_weighted.csv b/us/states/mn/mn_dataset_summary_weighted.csv new file mode 100644 index 0000000..b9e6989 --- /dev/null +++ b/us/states/mn/mn_dataset_summary_weighted.csv @@ -0,0 +1,15 @@ +Metric,Value +Household count (weighted),"1,254,857" +Person count (weighted),"4,066,311" +Median AGI,"$96,581" +75th percentile AGI,"$379,259" +90th percentile AGI,"$650,436" +95th percentile AGI,"$854,192" +Max AGI,"$3,229,514" +Total households with children,"469,600" +Households with 1 child,"114,008" +Households with 2 children,"151,889" +Households with 3+ children,"203,703" +Total children under 18,"1,145,830" +Children under 6,"269,322" +Children under 3,"96,626" diff --git a/us/states/mn/mn_standard_deduction_reform.ipynb b/us/states/mn/mn_standard_deduction_reform.ipynb new file mode 100644 index 0000000..a4be005 --- /dev/null +++ b/us/states/mn/mn_standard_deduction_reform.ipynb @@ -0,0 +1,1114 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Minnesota Standard Deduction Reform Analysis (2025)\n", + "\n", + "This notebook analyzes the impact of increasing Minnesota's standard deduction.\n", + "\n", + "## Baseline (Current Law 2025)\n", + "- Single/Separate: $14,950\n", + "- Head of Household: $22,500\n", + "- Joint/Surviving Spouse: $29,900\n", + "\n", + "## Reform\n", + "- Single/Separate: $20,000\n", + "- Head of Household: $30,000\n", + "- Joint/Surviving Spouse: $40,000\n", + "\n", + "## Focus: Loser Population Analysis\n", + "While most taxpayers benefit from higher standard deductions, a small group may be worse off due to:\n", + "1. **Federal AMT interactions** - Higher state deductions can increase federal AMT liability\n", + "2. **Itemized deduction interactions** - Those who itemize may see reduced benefit" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "MN_DATASET = \"hf://policyengine/policyengine-us-data/states/MN.h5\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Reform" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform defined!\n", + "\n", + "Reform details:\n", + " - Single/Separate: $14,950 -> $20,000 (+$5,050)\n", + " - Head of Household: $22,500 -> $30,000 (+$7,500)\n", + " - Joint/Surviving Spouse: $29,900 -> $40,000 (+$10,100)\n" + ] + } + ], + "source": [ + "def create_mn_standard_deduction_reform():\n", + " reform = Reform.from_dict(\n", + " {\n", + " \"gov.states.mn.tax.income.deductions.standard.base.SINGLE\": {\n", + " \"2025-01-01.2100-12-31\": 20000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.SEPARATE\": {\n", + " \"2025-01-01.2100-12-31\": 20000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.HEAD_OF_HOUSEHOLD\": {\n", + " \"2025-01-01.2100-12-31\": 30000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.JOINT\": {\n", + " \"2025-01-01.2100-12-31\": 40000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.SURVIVING_SPOUSE\": {\n", + " \"2025-01-01.2100-12-31\": 40000\n", + " },\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " return reform\n", + "\n", + "print(\"Reform defined!\")\n", + "print(\"\\nReform details:\")\n", + "print(\" - Single/Separate: $14,950 -> $20,000 (+$5,050)\")\n", + "print(\" - Head of Household: $22,500 -> $30,000 (+$7,500)\")\n", + "print(\" - Joint/Surviving Spouse: $29,900 -> $40,000 (+$10,100)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Simulations" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading baseline (current law)...\n", + "Baseline loaded\n", + "\n", + "Loading reform...\n", + "Reform loaded\n", + "\n", + "============================================================\n", + "All simulations ready!\n", + "============================================================\n" + ] + } + ], + "source": [ + "print(\"Loading baseline (current law)...\")\n", + "baseline = Microsimulation(dataset=MN_DATASET)\n", + "print(\"Baseline loaded\")\n", + "\n", + "print(\"\\nLoading reform...\")\n", + "reform = create_mn_standard_deduction_reform()\n", + "reform_sim = Microsimulation(dataset=MN_DATASET, reform=reform)\n", + "print(\"Reform loaded\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"All simulations ready!\")\n", + "print(\"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overall Reform Impact" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "MN STANDARD DEDUCTION REFORM - OVERALL IMPACT\n", + "======================================================================\n", + "Net cost to state: $506.68M\n", + "\n", + "Households benefitting: 770,620 (61.41%)\n", + "Households losing: 45,249 (3.61%)\n", + "Total households: 1,254,857\n", + "\n", + "Avg gain (winners): $663.90\n", + "Avg loss (losers): $-109.12\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Calculate overall impact\n", + "baseline_hh_income = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "reform_hh_income = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "household_weight = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "\n", + "income_change = reform_hh_income - baseline_hh_income\n", + "total_cost = (income_change * household_weight).sum()\n", + "\n", + "# Winners and losers\n", + "winners_mask = income_change > 1\n", + "losers_mask = income_change < -1\n", + "\n", + "winners_count = household_weight[winners_mask].sum()\n", + "losers_count = household_weight[losers_mask].sum()\n", + "total_households = household_weight.sum()\n", + "\n", + "print(\"=\"*70)\n", + "print(\"MN STANDARD DEDUCTION REFORM - OVERALL IMPACT\")\n", + "print(\"=\"*70)\n", + "print(f\"Net cost to state: ${total_cost/1e6:,.2f}M\")\n", + "print(f\"\\nHouseholds benefitting: {winners_count:,.0f} ({winners_count/total_households*100:.2f}%)\")\n", + "print(f\"Households losing: {losers_count:,.0f} ({losers_count/total_households*100:.2f}%)\")\n", + "print(f\"Total households: {total_households:,.0f}\")\n", + "\n", + "if winners_mask.sum() > 0:\n", + " avg_gain = np.average(income_change[winners_mask], weights=household_weight[winners_mask])\n", + " print(f\"\\nAvg gain (winners): ${avg_gain:,.2f}\")\n", + "if losers_mask.sum() > 0:\n", + " avg_loss = np.average(income_change[losers_mask], weights=household_weight[losers_mask])\n", + " print(f\"Avg loss (losers): ${avg_loss:,.2f}\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loser Population Deep Dive" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tax unit level data loaded for analysis\n" + ] + } + ], + "source": [ + "# Get tax unit level data for detailed analysis\n", + "tu_id = np.array(baseline.calculate(\"tax_unit_id\", period=2025, map_to=\"tax_unit\"))\n", + "tu_weight = np.array(baseline.calculate(\"tax_unit_weight\", period=2025))\n", + "\n", + "# Income metrics\n", + "baseline_tu_income = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"tax_unit\"))\n", + "reform_tu_income = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"tax_unit\"))\n", + "tu_income_change = reform_tu_income - baseline_tu_income\n", + "\n", + "# Key variables to understand losers\n", + "agi = np.array(baseline.calculate(\"adjusted_gross_income\", period=2025, map_to=\"tax_unit\"))\n", + "filing_status = np.array(baseline.calculate(\"filing_status\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "# AMT\n", + "baseline_amt = np.array(baseline.calculate(\"alternative_minimum_tax\", period=2025, map_to=\"tax_unit\"))\n", + "reform_amt = np.array(reform_sim.calculate(\"alternative_minimum_tax\", period=2025, map_to=\"tax_unit\"))\n", + "amt_change = reform_amt - baseline_amt\n", + "\n", + "# Itemized vs Standard deduction choice\n", + "tax_unit_itemizes = np.array(baseline.calculate(\"tax_unit_itemizes\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "# MN-specific\n", + "baseline_mn_income_tax = np.array(baseline.calculate(\"mn_income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "reform_mn_income_tax = np.array(reform_sim.calculate(\"mn_income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "mn_tax_change = reform_mn_income_tax - baseline_mn_income_tax\n", + "\n", + "# Federal income tax\n", + "baseline_fed_tax = np.array(baseline.calculate(\"income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "reform_fed_tax = np.array(reform_sim.calculate(\"income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "fed_tax_change = reform_fed_tax - baseline_fed_tax\n", + "\n", + "print(\"Tax unit level data loaded for analysis\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "LOSER POPULATION CHARACTERISTICS\n", + "======================================================================\n", + "Tax units losing income: 53,091 (2.50%)\n", + "Average income loss: $-93.01\n", + "\n", + "Average AGI of losers: $278,523\n", + "\n", + "Losers who itemize: 52,920 (99.7%)\n", + "Losers paying AMT (baseline): 19 (0.0%)\n", + "Losers with AMT increase: 0 (0.0%)\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Identify losers at tax unit level\n", + "tu_losers_mask = tu_income_change < -1\n", + "\n", + "print(\"=\"*70)\n", + "print(\"LOSER POPULATION CHARACTERISTICS\")\n", + "print(\"=\"*70)\n", + "\n", + "losers_count_tu = tu_weight[tu_losers_mask].sum()\n", + "total_tu = tu_weight.sum()\n", + "print(f\"Tax units losing income: {losers_count_tu:,.0f} ({losers_count_tu/total_tu*100:.2f}%)\")\n", + "\n", + "if tu_losers_mask.sum() > 0:\n", + " # Average loss\n", + " avg_loss_tu = np.average(tu_income_change[tu_losers_mask], weights=tu_weight[tu_losers_mask])\n", + " print(f\"Average income loss: ${avg_loss_tu:,.2f}\")\n", + " \n", + " # AGI distribution of losers\n", + " avg_agi_losers = np.average(agi[tu_losers_mask], weights=tu_weight[tu_losers_mask])\n", + " print(f\"\\nAverage AGI of losers: ${avg_agi_losers:,.0f}\")\n", + " \n", + " # Itemization status\n", + " itemizers_among_losers = tu_weight[tu_losers_mask & tax_unit_itemizes].sum()\n", + " pct_itemizers = itemizers_among_losers / losers_count_tu * 100 if losers_count_tu > 0 else 0\n", + " print(f\"\\nLosers who itemize: {itemizers_among_losers:,.0f} ({pct_itemizers:.1f}%)\")\n", + " \n", + " # AMT payers among losers\n", + " amt_payers_baseline = tu_weight[tu_losers_mask & (baseline_amt > 0)].sum()\n", + " pct_amt = amt_payers_baseline / losers_count_tu * 100 if losers_count_tu > 0 else 0\n", + " print(f\"Losers paying AMT (baseline): {amt_payers_baseline:,.0f} ({pct_amt:.1f}%)\")\n", + " \n", + " # AMT increase for losers\n", + " amt_increased = tu_weight[tu_losers_mask & (amt_change > 0)].sum()\n", + " pct_amt_increase = amt_increased / losers_count_tu * 100 if losers_count_tu > 0 else 0\n", + " print(f\"Losers with AMT increase: {amt_increased:,.0f} ({pct_amt_increase:.1f}%)\")\n", + "\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Why Are They Worse Off? - Tax Component Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "TAX CHANGE BREAKDOWN FOR LOSERS\n", + "======================================================================\n", + "Average MN income tax change: $-13.53\n", + "Average Federal tax change: $106.54\n", + "Average AMT change: $-0.00\n", + "\n", + "----------------------------------------------------------------------\n", + "Interpretation:\n", + " - MN tax DECREASED by $13.53 (expected from higher std deduction)\n", + " - Federal tax INCREASED by $106.54\n", + " -> This could be due to reduced SALT deduction benefit\n", + "======================================================================\n" + ] + } + ], + "source": [ + "if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*70)\n", + " print(\"TAX CHANGE BREAKDOWN FOR LOSERS\")\n", + " print(\"=\"*70)\n", + " \n", + " # Average tax changes for losers\n", + " avg_mn_tax_change = np.average(mn_tax_change[tu_losers_mask], weights=tu_weight[tu_losers_mask])\n", + " avg_fed_tax_change = np.average(fed_tax_change[tu_losers_mask], weights=tu_weight[tu_losers_mask])\n", + " avg_amt_change = np.average(amt_change[tu_losers_mask], weights=tu_weight[tu_losers_mask])\n", + " \n", + " print(f\"Average MN income tax change: ${avg_mn_tax_change:,.2f}\")\n", + " print(f\"Average Federal tax change: ${avg_fed_tax_change:,.2f}\")\n", + " print(f\"Average AMT change: ${avg_amt_change:,.2f}\")\n", + " \n", + " print(\"\\n\" + \"-\"*70)\n", + " print(\"Interpretation:\")\n", + " if avg_mn_tax_change < 0:\n", + " print(f\" - MN tax DECREASED by ${abs(avg_mn_tax_change):,.2f} (expected from higher std deduction)\")\n", + " else:\n", + " print(f\" - MN tax INCREASED by ${avg_mn_tax_change:,.2f} (unexpected!)\")\n", + " \n", + " if avg_fed_tax_change > 0:\n", + " print(f\" - Federal tax INCREASED by ${avg_fed_tax_change:,.2f}\")\n", + " print(f\" -> This could be due to reduced SALT deduction benefit\")\n", + " \n", + " if avg_amt_change > 0:\n", + " print(f\" - AMT INCREASED by ${avg_amt_change:,.2f}\")\n", + " print(f\" -> Higher state deduction reduces state tax, increasing AMT exposure\")\n", + " \n", + " print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loser Categories Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "CATEGORIZING LOSERS BY CAUSE\n", + "======================================================================\n", + "\n", + "1. AMT-Driven Losses: 0 (0.0%)\n", + "\n", + "2. Itemizer-Related Losses: 52,920 (99.7%)\n", + " Avg income loss: $-93.13\n", + "\n", + "3. Other/Unknown: 171 (0.3%)\n", + " Avg income loss: $-54.97\n", + "======================================================================\n" + ] + } + ], + "source": [ + "if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*70)\n", + " print(\"CATEGORIZING LOSERS BY CAUSE\")\n", + " print(\"=\"*70)\n", + " \n", + " # Category 1: AMT-driven losses (AMT increased)\n", + " amt_driven = tu_losers_mask & (amt_change > 1)\n", + " amt_driven_count = tu_weight[amt_driven].sum()\n", + " \n", + " # Category 2: Itemizers (may lose SALT benefit)\n", + " itemizer_driven = tu_losers_mask & tax_unit_itemizes & (amt_change <= 1)\n", + " itemizer_driven_count = tu_weight[itemizer_driven].sum()\n", + " \n", + " # Category 3: Other/Unknown\n", + " other = tu_losers_mask & ~amt_driven & ~itemizer_driven\n", + " other_count = tu_weight[other].sum()\n", + " \n", + " print(f\"\\n1. AMT-Driven Losses: {amt_driven_count:,.0f} ({amt_driven_count/losers_count_tu*100:.1f}%)\")\n", + " if amt_driven.sum() > 0:\n", + " avg_amt_inc = np.average(amt_change[amt_driven], weights=tu_weight[amt_driven])\n", + " avg_loss_amt = np.average(tu_income_change[amt_driven], weights=tu_weight[amt_driven])\n", + " print(f\" Avg AMT increase: ${avg_amt_inc:,.2f}\")\n", + " print(f\" Avg income loss: ${avg_loss_amt:,.2f}\")\n", + " \n", + " print(f\"\\n2. Itemizer-Related Losses: {itemizer_driven_count:,.0f} ({itemizer_driven_count/losers_count_tu*100:.1f}%)\")\n", + " if itemizer_driven.sum() > 0:\n", + " avg_loss_item = np.average(tu_income_change[itemizer_driven], weights=tu_weight[itemizer_driven])\n", + " print(f\" Avg income loss: ${avg_loss_item:,.2f}\")\n", + " \n", + " print(f\"\\n3. Other/Unknown: {other_count:,.0f} ({other_count/losers_count_tu*100:.1f}%)\")\n", + " if other.sum() > 0:\n", + " avg_loss_other = np.average(tu_income_change[other], weights=tu_weight[other])\n", + " print(f\" Avg income loss: ${avg_loss_other:,.2f}\")\n", + " \n", + " print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## AGI Distribution of Losers" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "LOSERS BY AGI BRACKET\n", + "======================================================================\n", + "AGI Bracket Losers % of Losers Avg Loss Avg AMT Chg\n", + "----------------------------------------------------------------------\n", + "$0-$50k 204 0.4% $ -51 $ 0\n", + "$50k-$100k 11,788 22.2% $ -61 $ 0\n", + "$100k-$200k 8,432 15.9% $ -75 $ 0\n", + "$200k-$500k 24,723 46.6% $ -134 $ 0\n", + "$500k-$1M 7,872 14.8% $ -32 $ -0\n", + "$1M+ 72 0.1% $ -72 $ 0\n", + "======================================================================\n" + ] + } + ], + "source": [ + "if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*70)\n", + " print(\"LOSERS BY AGI BRACKET\")\n", + " print(\"=\"*70)\n", + " \n", + " brackets = [\n", + " (0, 50000, \"$0-$50k\"),\n", + " (50000, 100000, \"$50k-$100k\"),\n", + " (100000, 200000, \"$100k-$200k\"),\n", + " (200000, 500000, \"$200k-$500k\"),\n", + " (500000, 1000000, \"$500k-$1M\"),\n", + " (1000000, float('inf'), \"$1M+\"),\n", + " ]\n", + " \n", + " print(f\"{'AGI Bracket':<15} {'Losers':>12} {'% of Losers':>14} {'Avg Loss':>12} {'Avg AMT Chg':>12}\")\n", + " print(\"-\"*70)\n", + " \n", + " for lower, upper, label in brackets:\n", + " bracket_losers = tu_losers_mask & (agi >= lower) & (agi < upper)\n", + " bracket_count = tu_weight[bracket_losers].sum()\n", + " \n", + " if bracket_count > 0:\n", + " pct = bracket_count / losers_count_tu * 100\n", + " avg_loss = np.average(tu_income_change[bracket_losers], weights=tu_weight[bracket_losers])\n", + " avg_amt = np.average(amt_change[bracket_losers], weights=tu_weight[bracket_losers])\n", + " print(f\"{label:<15} {bracket_count:>12,.0f} {pct:>13.1f}% ${avg_loss:>10,.0f} ${avg_amt:>10,.0f}\")\n", + " else:\n", + " print(f\"{label:<15} {0:>12} {0:>13.1f}% {'N/A':>12} {'N/A':>12}\")\n", + " \n", + " print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filing Status Distribution of Losers" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "LOSERS BY FILING STATUS\n", + "======================================================================\n", + "Filing Status Losers % of Losers Avg Loss\n", + "----------------------------------------------------------------------\n", + "Single 20,433 38.5% $ -41\n", + "Joint 32,590 61.4% $ -126\n", + "Separate 1 0.0% $ -41\n", + "Head of Household 67 0.1% $ -91\n", + "Surviving Spouse 0 0.0% $ -119\n", + "======================================================================\n" + ] + } + ], + "source": [ + "if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*70)\n", + " print(\"LOSERS BY FILING STATUS\")\n", + " print(\"=\"*70)\n", + " \n", + " # Filing status is returned as string enum values\n", + " status_names = [\"SINGLE\", \"JOINT\", \"SEPARATE\", \"HEAD_OF_HOUSEHOLD\", \"SURVIVING_SPOUSE\"]\n", + " status_display = {\n", + " \"SINGLE\": \"Single\",\n", + " \"JOINT\": \"Joint\",\n", + " \"SEPARATE\": \"Separate\",\n", + " \"HEAD_OF_HOUSEHOLD\": \"Head of Household\",\n", + " \"SURVIVING_SPOUSE\": \"Surviving Spouse\"\n", + " }\n", + " \n", + " print(f\"{'Filing Status':<20} {'Losers':>12} {'% of Losers':>14} {'Avg Loss':>12}\")\n", + " print(\"-\"*70)\n", + " \n", + " for status_code in status_names:\n", + " status_losers = tu_losers_mask & (filing_status == status_code)\n", + " status_count = tu_weight[status_losers].sum()\n", + " \n", + " if status_count > 0:\n", + " pct = status_count / losers_count_tu * 100\n", + " avg_loss = np.average(tu_income_change[status_losers], weights=tu_weight[status_losers])\n", + " print(f\"{status_display[status_code]:<20} {status_count:>12,.0f} {pct:>13.1f}% ${avg_loss:>10,.0f}\")\n", + " \n", + " print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detailed AMT Analysis for Losers" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "AMT MECHANISM ANALYSIS FOR LOSERS\n", + "======================================================================\n", + "\n", + "No losers with AMT increase found.\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Get additional AMT-related variables\n", + "baseline_amt_income = np.array(baseline.calculate(\"amt_income\", period=2025, map_to=\"tax_unit\"))\n", + "reform_amt_income = np.array(reform_sim.calculate(\"amt_income\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*70)\n", + " print(\"AMT MECHANISM ANALYSIS FOR LOSERS\")\n", + " print(\"=\"*70)\n", + " \n", + " # Focus on losers with AMT increase\n", + " amt_losers = tu_losers_mask & (amt_change > 0)\n", + " amt_losers_count = tu_weight[amt_losers].sum()\n", + " \n", + " if amt_losers.sum() > 0:\n", + " print(f\"\\nTax units with AMT increase: {amt_losers_count:,.0f}\")\n", + " \n", + " # AMT income change\n", + " amt_income_change = reform_amt_income - baseline_amt_income\n", + " avg_amti_change = np.average(amt_income_change[amt_losers], weights=tu_weight[amt_losers])\n", + " \n", + " print(f\"\\nAvg AMT Income change: ${avg_amti_change:,.2f}\")\n", + " \n", + " print(\"\\n\" + \"-\"*70)\n", + " print(\"Explanation:\")\n", + " print(\" When MN standard deduction increases:\")\n", + " print(\" 1. MN taxable income decreases\")\n", + " print(\" 2. MN income tax liability decreases\")\n", + " print(\" 3. SALT deduction (if itemizing) may decrease\")\n", + " print(\" 4. Federal taxable income may increase slightly\")\n", + " print(\" 5. For AMT purposes, state taxes are not deductible\")\n", + " print(\" 6. Lower state taxes can push filers into AMT territory\")\n", + " else:\n", + " print(\"\\nNo losers with AMT increase found.\")\n", + " \n", + " print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SALT Deduction Analysis for Losers" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "SALT DEDUCTION ANALYSIS FOR LOSERS\n", + "======================================================================\n", + "\n", + "Itemizing losers: 52,920\n", + "Avg baseline SALT: $17,567.71\n", + "Avg SALT change: $-549.29\n", + "\n", + "----------------------------------------------------------------------\n", + "Note: SALT deduction is capped at $10,000 for federal purposes.\n", + "Changes in state tax liability affect SALT if below the cap.\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Get SALT deduction data\n", + "try:\n", + " baseline_salt = np.array(baseline.calculate(\"salt_deduction\", period=2025, map_to=\"tax_unit\"))\n", + " reform_salt = np.array(reform_sim.calculate(\"salt_deduction\", period=2025, map_to=\"tax_unit\"))\n", + " salt_change = reform_salt - baseline_salt\n", + " \n", + " if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*70)\n", + " print(\"SALT DEDUCTION ANALYSIS FOR LOSERS\")\n", + " print(\"=\"*70)\n", + " \n", + " # Losers who itemize\n", + " itemizing_losers = tu_losers_mask & tax_unit_itemizes\n", + " itemizing_losers_count = tu_weight[itemizing_losers].sum()\n", + " \n", + " if itemizing_losers.sum() > 0:\n", + " print(f\"\\nItemizing losers: {itemizing_losers_count:,.0f}\")\n", + " \n", + " avg_salt_change = np.average(salt_change[itemizing_losers], weights=tu_weight[itemizing_losers])\n", + " avg_baseline_salt = np.average(baseline_salt[itemizing_losers], weights=tu_weight[itemizing_losers])\n", + " \n", + " print(f\"Avg baseline SALT: ${avg_baseline_salt:,.2f}\")\n", + " print(f\"Avg SALT change: ${avg_salt_change:,.2f}\")\n", + " \n", + " print(\"\\n\" + \"-\"*70)\n", + " print(\"Note: SALT deduction is capped at $10,000 for federal purposes.\")\n", + " print(\"Changes in state tax liability affect SALT if below the cap.\")\n", + " \n", + " print(\"=\"*70)\n", + "except:\n", + " print(\"SALT deduction variable not available - skipping analysis\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample Loser Profiles" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==========================================================================================\n", + "SAMPLE LOSER PROFILES (Top 10 by Weighted Loss)\n", + "==========================================================================================\n", + "\n", + "# AGI Status Itemizes Loss MN Tax Fed Tax AMT Chg\n", + "------------------------------------------------------------------------------------------\n", + "1 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "2 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "3 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "4 $ 294,776 Joint Yes $ -190 $ 0 $ 190 $ 0\n", + "5 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "6 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "7 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "8 $ 294,776 Joint Yes $ -190 $ 0 $ 190 $ 0\n", + "9 $ 379,259 Joint Yes $ -206 $ 0 $ 206 $ 0\n", + "10 $ 294,776 Joint Yes $ -190 $ 0 $ 190 $ 0\n", + "==========================================================================================\n" + ] + } + ], + "source": [ + "if tu_losers_mask.sum() > 0:\n", + " print(\"=\"*90)\n", + " print(\"SAMPLE LOSER PROFILES (Top 10 by Weighted Loss)\")\n", + " print(\"=\"*90)\n", + " \n", + " # Create dataframe for losers\n", + " loser_indices = np.where(tu_losers_mask)[0]\n", + " \n", + " df_losers = pd.DataFrame({\n", + " 'index': loser_indices,\n", + " 'weight': tu_weight[loser_indices],\n", + " 'agi': agi[loser_indices],\n", + " 'filing_status': filing_status[loser_indices],\n", + " 'itemizes': tax_unit_itemizes[loser_indices],\n", + " 'income_change': tu_income_change[loser_indices],\n", + " 'mn_tax_change': mn_tax_change[loser_indices],\n", + " 'fed_tax_change': fed_tax_change[loser_indices],\n", + " 'amt_change': amt_change[loser_indices],\n", + " 'baseline_amt': baseline_amt[loser_indices],\n", + " 'reform_amt': reform_amt[loser_indices]\n", + " })\n", + " \n", + " # Sort by weighted loss (loss * weight)\n", + " df_losers['weighted_loss'] = df_losers['income_change'] * df_losers['weight']\n", + " df_losers = df_losers.sort_values('weighted_loss').head(10)\n", + " \n", + " # Filing status display mapping (string enum values)\n", + " status_map = {\n", + " 'SINGLE': 'Single',\n", + " 'JOINT': 'Joint', \n", + " 'SEPARATE': 'Separate',\n", + " 'HEAD_OF_HOUSEHOLD': 'HoH',\n", + " 'SURVIVING_SPOUSE': 'Surv Sp'\n", + " }\n", + " \n", + " print(f\"\\n{'#':<3} {'AGI':>12} {'Status':<10} {'Itemizes':<9} {'Loss':>10} {'MN Tax':>10} {'Fed Tax':>10} {'AMT Chg':>10}\")\n", + " print(\"-\"*90)\n", + " \n", + " for i, (_, row) in enumerate(df_losers.iterrows(), 1):\n", + " status = status_map.get(str(row['filing_status']), 'Unknown')\n", + " itemizes = 'Yes' if row['itemizes'] else 'No'\n", + " print(f\"{i:<3} ${row['agi']:>10,.0f} {status:<10} {itemizes:<9} ${row['income_change']:>9,.0f} ${row['mn_tax_change']:>9,.0f} ${row['fed_tax_change']:>9,.0f} ${row['amt_change']:>9,.0f}\")\n", + " \n", + " print(\"=\"*90)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary and Conclusions" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "SUMMARY: WHY SOME TAXPAYERS LOSE FROM HIGHER MN STANDARD DEDUCTION\n", + "======================================================================\n", + "\n", + "The Minnesota standard deduction reform increases deductions for all filers:\n", + " - Single/Separate: $14,950 -> $20,000 (+$5,050)\n", + " - Head of Household: $22,500 -> $30,000 (+$7,500)\n", + " - Joint/Surviving: $29,900 -> $40,000 (+$10,100)\n", + "\n", + "MOST TAXPAYERS BENEFIT because:\n", + " - Higher state deduction = Lower MN taxable income = Lower MN tax\n", + "\n", + "SOME TAXPAYERS LOSE due to:\n", + "\n", + "1. FEDERAL AMT INTERACTION\n", + " - State/local taxes are NOT deductible for AMT purposes\n", + " - Lower state tax liability doesn't reduce AMT income\n", + " - Some filers are pushed further into AMT territory\n", + " - The AMT increase can exceed the MN tax savings\n", + "\n", + "2. ITEMIZED DEDUCTION INTERACTION\n", + " - Filers who itemize may see reduced benefit\n", + " - Lower MN tax = Lower SALT deduction (if not at $10k cap)\n", + " - This can increase federal taxable income\n", + " - Combined federal+state effect can be negative\n", + "\n", + "3. PROGRAM INTERACTIONS\n", + " - Changes in net income can affect benefit eligibility\n", + " - Some benefit phase-outs may be triggered\n", + "\n", + "QUANTIFIED IMPACT:\n", + " - Total losers: 53,091 tax units\n", + " - Total losses: $-4.94M\n", + " - As % of reform cost: 1.0%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "print(\"=\"*70)\n", + "print(\"SUMMARY: WHY SOME TAXPAYERS LOSE FROM HIGHER MN STANDARD DEDUCTION\")\n", + "print(\"=\"*70)\n", + "\n", + "print(\"\"\"\n", + "The Minnesota standard deduction reform increases deductions for all filers:\n", + " - Single/Separate: $14,950 -> $20,000 (+$5,050)\n", + " - Head of Household: $22,500 -> $30,000 (+$7,500)\n", + " - Joint/Surviving: $29,900 -> $40,000 (+$10,100)\n", + "\n", + "MOST TAXPAYERS BENEFIT because:\n", + " - Higher state deduction = Lower MN taxable income = Lower MN tax\n", + "\n", + "SOME TAXPAYERS LOSE due to:\n", + "\n", + "1. FEDERAL AMT INTERACTION\n", + " - State/local taxes are NOT deductible for AMT purposes\n", + " - Lower state tax liability doesn't reduce AMT income\n", + " - Some filers are pushed further into AMT territory\n", + " - The AMT increase can exceed the MN tax savings\n", + "\n", + "2. ITEMIZED DEDUCTION INTERACTION\n", + " - Filers who itemize may see reduced benefit\n", + " - Lower MN tax = Lower SALT deduction (if not at $10k cap)\n", + " - This can increase federal taxable income\n", + " - Combined federal+state effect can be negative\n", + "\n", + "3. PROGRAM INTERACTIONS\n", + " - Changes in net income can affect benefit eligibility\n", + " - Some benefit phase-outs may be triggered\n", + "\"\"\")\n", + "\n", + "if tu_losers_mask.sum() > 0:\n", + " total_losers = tu_weight[tu_losers_mask].sum()\n", + " total_loss = (tu_income_change[tu_losers_mask] * tu_weight[tu_losers_mask]).sum()\n", + " \n", + " print(f\"QUANTIFIED IMPACT:\")\n", + " print(f\" - Total losers: {total_losers:,.0f} tax units\")\n", + " print(f\" - Total losses: ${total_loss/1e6:,.2f}M\")\n", + " print(f\" - As % of reform cost: {abs(total_loss/total_cost)*100:.1f}%\")\n", + "\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostic: SALT vs MN Tax Change Consistency Check" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "DIAGNOSTIC: SALT vs MN TAX CHANGE CONSISTENCY\n", + "======================================================================\n", + "\n", + "For LOSERS:\n", + " Avg MN tax change: $-13.53\n", + " Avg SALT change: $-547.52\n", + " Difference: $-533.99\n", + " Correlation: 0.238\n", + "\n", + "----------------------------------------------------------------------\n", + "EXPECTED: If no bug, SALT change ≈ MN tax change for itemizers\n", + "ACTUAL: SALT is changing MORE than MN tax - suggests a bug\n", + "----------------------------------------------------------------------\n", + "\n", + "Tax units with MN tax change ≈ $0 but losing: 44,836\n", + " Their avg SALT change: $-643.16\n", + " -> BUG INDICATOR: SALT changed when MN tax didn't!\n", + "\n", + "======================================================================\n", + "FULL POPULATION CHECK\n", + "======================================================================\n", + "\n", + "For ALL ITEMIZERS:\n", + " Avg MN tax change: $-279.66\n", + " Avg SALT change: $-284.90\n", + " Difference: $-5.24\n", + "\n", + "✓ SALT and MN tax changes are consistent\n" + ] + } + ], + "source": [ + "# Diagnostic: Check if SALT change tracks MN tax change correctly\n", + "# If there's no bug, SALT change should approximately equal MN tax change for itemizers\n", + "\n", + "print(\"=\"*70)\n", + "print(\"DIAGNOSTIC: SALT vs MN TAX CHANGE CONSISTENCY\")\n", + "print(\"=\"*70)\n", + "\n", + "# Get the changes for losers\n", + "mn_tax_diff = mn_tax_change[tu_losers_mask]\n", + "salt_diff = salt_change[tu_losers_mask]\n", + "loser_weights = tu_weight[tu_losers_mask]\n", + "\n", + "avg_mn_tax_change = np.average(mn_tax_diff, weights=loser_weights)\n", + "avg_salt_change = np.average(salt_diff, weights=loser_weights)\n", + "\n", + "print(f\"\\nFor LOSERS:\")\n", + "print(f\" Avg MN tax change: ${avg_mn_tax_change:,.2f}\")\n", + "print(f\" Avg SALT change: ${avg_salt_change:,.2f}\")\n", + "print(f\" Difference: ${avg_salt_change - avg_mn_tax_change:,.2f}\")\n", + "\n", + "# Check correlation\n", + "if len(mn_tax_diff) > 1:\n", + " correlation = np.corrcoef(mn_tax_diff, salt_diff)[0,1]\n", + " print(f\" Correlation: {correlation:.3f}\")\n", + "\n", + "print(\"\\n\" + \"-\"*70)\n", + "print(\"EXPECTED: If no bug, SALT change ≈ MN tax change for itemizers\")\n", + "print(\"ACTUAL: SALT is changing MORE than MN tax - suggests a bug\")\n", + "print(\"-\"*70)\n", + "\n", + "# Look at specific cases where MN tax = 0 but SALT changed\n", + "zero_mn_change = tu_losers_mask & (np.abs(mn_tax_change) < 1)\n", + "if zero_mn_change.sum() > 0:\n", + " print(f\"\\nTax units with MN tax change ≈ $0 but losing: {tu_weight[zero_mn_change].sum():,.0f}\")\n", + " avg_salt_zero_mn = np.average(salt_change[zero_mn_change], weights=tu_weight[zero_mn_change])\n", + " print(f\" Their avg SALT change: ${avg_salt_zero_mn:,.2f}\")\n", + " print(f\" -> BUG INDICATOR: SALT changed when MN tax didn't!\")\n", + "\n", + "# Compare across ALL tax units (not just losers)\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"FULL POPULATION CHECK\")\n", + "print(\"=\"*70)\n", + "\n", + "itemizers_mask = tax_unit_itemizes\n", + "all_mn_change = np.average(mn_tax_change[itemizers_mask], weights=tu_weight[itemizers_mask])\n", + "all_salt_change = np.average(salt_change[itemizers_mask], weights=tu_weight[itemizers_mask])\n", + "\n", + "print(f\"\\nFor ALL ITEMIZERS:\")\n", + "print(f\" Avg MN tax change: ${all_mn_change:,.2f}\")\n", + "print(f\" Avg SALT change: ${all_salt_change:,.2f}\")\n", + "print(f\" Difference: ${all_salt_change - all_mn_change:,.2f}\")\n", + "\n", + "if np.abs(all_salt_change - all_mn_change) > 10:\n", + " print(f\"\\n⚠️ WARNING: SALT and MN tax changes don't match!\")\n", + " print(f\" This suggests SALT may not be correctly tracking actual MN tax paid.\")\n", + "else:\n", + " print(f\"\\n✓ SALT and MN tax changes are consistent\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "DIAGNOSTIC: MN DEDUCTION CHOICE\n", + "======================================================================\n", + "\n", + "MN Deductions (total used):\n", + " Baseline avg (losers): $54,443.61\n", + " Reform avg (losers): $54,608.21\n", + " Change (losers): $164.59\n", + "Could not get mn_itemizes: Variable mn_itemizes does not exist.\n", + "\n", + "----------------------------------------------------------------------\n", + "SALT COMPONENTS CHECK\n", + "----------------------------------------------------------------------\n", + "\n", + "State Income Tax (all states, for SALT):\n", + " Avg change (losers): $-13.53\n", + " This should equal MN tax change: $-13.53\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Deeper diagnostic: Check MN deduction values and itemization choice\n", + "print(\"=\"*70)\n", + "print(\"DIAGNOSTIC: MN DEDUCTION CHOICE\")\n", + "print(\"=\"*70)\n", + "\n", + "# Get MN deduction-related variables\n", + "try:\n", + " baseline_mn_deductions = np.array(baseline.calculate(\"mn_deductions\", period=2025, map_to=\"tax_unit\"))\n", + " reform_mn_deductions = np.array(reform_sim.calculate(\"mn_deductions\", period=2025, map_to=\"tax_unit\"))\n", + " mn_ded_change = reform_mn_deductions - baseline_mn_deductions\n", + " \n", + " print(f\"\\nMN Deductions (total used):\")\n", + " print(f\" Baseline avg (losers): ${np.average(baseline_mn_deductions[tu_losers_mask], weights=loser_weights):,.2f}\")\n", + " print(f\" Reform avg (losers): ${np.average(reform_mn_deductions[tu_losers_mask], weights=loser_weights):,.2f}\")\n", + " print(f\" Change (losers): ${np.average(mn_ded_change[tu_losers_mask], weights=loser_weights):,.2f}\")\n", + "except Exception as e:\n", + " print(f\"Could not get mn_deductions: {e}\")\n", + "\n", + "# Check if MN has itemization tracking\n", + "try:\n", + " baseline_mn_itemizes = np.array(baseline.calculate(\"mn_itemizes\", period=2025, map_to=\"tax_unit\"))\n", + " reform_mn_itemizes = np.array(reform_sim.calculate(\"mn_itemizes\", period=2025, map_to=\"tax_unit\"))\n", + " \n", + " # How many losers itemize on MN?\n", + " losers_mn_itemize_baseline = tu_weight[tu_losers_mask & baseline_mn_itemizes].sum()\n", + " losers_mn_itemize_reform = tu_weight[tu_losers_mask & reform_mn_itemizes].sum()\n", + " \n", + " print(f\"\\nMN Itemization Status (losers):\")\n", + " print(f\" Itemizing on MN (baseline): {losers_mn_itemize_baseline:,.0f} ({losers_mn_itemize_baseline/losers_count_tu*100:.1f}%)\")\n", + " print(f\" Itemizing on MN (reform): {losers_mn_itemize_reform:,.0f} ({losers_mn_itemize_reform/losers_count_tu*100:.1f}%)\")\n", + " \n", + " # Did any losers SWITCH from MN itemizing to MN standard?\n", + " switched_to_standard = tu_losers_mask & baseline_mn_itemizes & ~reform_mn_itemizes\n", + " switched_count = tu_weight[switched_to_standard].sum()\n", + " print(f\" Switched to MN standard: {switched_count:,.0f}\")\n", + "except Exception as e:\n", + " print(f\"Could not get mn_itemizes: {e}\")\n", + "\n", + "# Check the actual SALT components\n", + "print(\"\\n\" + \"-\"*70)\n", + "print(\"SALT COMPONENTS CHECK\")\n", + "print(\"-\"*70)\n", + "\n", + "try:\n", + " # State income tax component of SALT\n", + " baseline_state_income_tax = np.array(baseline.calculate(\"state_income_tax\", period=2025, map_to=\"tax_unit\"))\n", + " reform_state_income_tax = np.array(reform_sim.calculate(\"state_income_tax\", period=2025, map_to=\"tax_unit\"))\n", + " state_tax_change = reform_state_income_tax - baseline_state_income_tax\n", + " \n", + " print(f\"\\nState Income Tax (all states, for SALT):\")\n", + " print(f\" Avg change (losers): ${np.average(state_tax_change[tu_losers_mask], weights=loser_weights):,.2f}\")\n", + " print(f\" This should equal MN tax change: ${avg_mn_tax_change:,.2f}\")\n", + " \n", + " if np.abs(np.average(state_tax_change[tu_losers_mask], weights=loser_weights) - avg_mn_tax_change) > 1:\n", + " print(f\" ⚠️ MISMATCH between state_income_tax and mn_income_tax!\")\n", + "except Exception as e:\n", + " print(f\"Could not get state_income_tax: {e}\")\n", + "\n", + "print(\"=\"*70)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/mn/mn_standard_deduction_reform_no_single.ipynb b/us/states/mn/mn_standard_deduction_reform_no_single.ipynb new file mode 100644 index 0000000..d7f91b0 --- /dev/null +++ b/us/states/mn/mn_standard_deduction_reform_no_single.ipynb @@ -0,0 +1,353 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MN Standard Deduction Reform - Excluding Single Filers\n", + "\n", + "Testing reform without changing Single filer standard deduction to isolate the bug." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "MN_DATASET = \"hf://policyengine/policyengine-us-data/states/MN.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform (excluding Single filers):\n", + " - Single: $14,950 (UNCHANGED)\n", + " - Separate: $14,950 -> $20,000\n", + " - Head of Household: $22,500 -> $30,000\n", + " - Joint: $29,900 -> $40,000\n", + " - Surviving Spouse: $29,900 -> $40,000\n" + ] + } + ], + "source": [ + "# Reform WITHOUT changing Single filer standard deduction\n", + "def create_reform_no_single():\n", + " reform = Reform.from_dict(\n", + " {\n", + " # SINGLE: NOT CHANGED (stays at $14,950)\n", + " \"gov.states.mn.tax.income.deductions.standard.base.SEPARATE\": {\n", + " \"2025-01-01.2100-12-31\": 20000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.HEAD_OF_HOUSEHOLD\": {\n", + " \"2025-01-01.2100-12-31\": 30000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.JOINT\": {\n", + " \"2025-01-01.2100-12-31\": 40000\n", + " },\n", + " \"gov.states.mn.tax.income.deductions.standard.base.SURVIVING_SPOUSE\": {\n", + " \"2025-01-01.2100-12-31\": 40000\n", + " },\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + " return reform\n", + "\n", + "print(\"Reform (excluding Single filers):\")\n", + "print(\" - Single: $14,950 (UNCHANGED)\")\n", + "print(\" - Separate: $14,950 -> $20,000\")\n", + "print(\" - Head of Household: $22,500 -> $30,000\")\n", + "print(\" - Joint: $29,900 -> $40,000\")\n", + "print(\" - Surviving Spouse: $29,900 -> $40,000\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading simulations...\n", + "Done!\n" + ] + } + ], + "source": [ + "print(\"Loading simulations...\")\n", + "baseline = Microsimulation(dataset=MN_DATASET)\n", + "reform_sim = Microsimulation(dataset=MN_DATASET, reform=create_reform_no_single())\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data loaded!\n" + ] + } + ], + "source": [ + "# Calculate impacts\n", + "tu_weight = np.array(baseline.calculate(\"tax_unit_weight\", period=2025))\n", + "filing_status = np.array(baseline.calculate(\"filing_status\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "baseline_income = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"tax_unit\"))\n", + "reform_income = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"tax_unit\"))\n", + "income_change = reform_income - baseline_income\n", + "\n", + "baseline_mn_tax = np.array(baseline.calculate(\"mn_income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "reform_mn_tax = np.array(reform_sim.calculate(\"mn_income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "mn_tax_change = reform_mn_tax - baseline_mn_tax\n", + "\n", + "baseline_fed_tax = np.array(baseline.calculate(\"income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "reform_fed_tax = np.array(reform_sim.calculate(\"income_tax\", period=2025, map_to=\"tax_unit\"))\n", + "fed_tax_change = reform_fed_tax - baseline_fed_tax\n", + "\n", + "baseline_salt = np.array(baseline.calculate(\"salt_deduction\", period=2025, map_to=\"tax_unit\"))\n", + "reform_salt = np.array(reform_sim.calculate(\"salt_deduction\", period=2025, map_to=\"tax_unit\"))\n", + "salt_change = reform_salt - baseline_salt\n", + "\n", + "tax_unit_itemizes = np.array(baseline.calculate(\"tax_unit_itemizes\", period=2025, map_to=\"tax_unit\"))\n", + "\n", + "print(\"Data loaded!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "OVERALL RESULTS - REFORM EXCLUDING SINGLE FILERS\n", + "======================================================================\n", + "Net cost: $335.04M\n", + "\n", + "Winners: 981,467 (46.2%)\n", + "Losers: 755 (0.0%)\n", + "\n", + "Avg loss: $-65.65\n" + ] + } + ], + "source": [ + "# Overall results\n", + "losers_mask = income_change < -1\n", + "winners_mask = income_change > 1\n", + "\n", + "print(\"=\"*70)\n", + "print(\"OVERALL RESULTS - REFORM EXCLUDING SINGLE FILERS\")\n", + "print(\"=\"*70)\n", + "\n", + "total_cost = (income_change * tu_weight).sum()\n", + "print(f\"Net cost: ${total_cost/1e6:,.2f}M\")\n", + "print(f\"\\nWinners: {tu_weight[winners_mask].sum():,.0f} ({tu_weight[winners_mask].sum()/tu_weight.sum()*100:.1f}%)\")\n", + "print(f\"Losers: {tu_weight[losers_mask].sum():,.0f} ({tu_weight[losers_mask].sum()/tu_weight.sum()*100:.1f}%)\")\n", + "\n", + "if losers_mask.sum() > 0:\n", + " print(f\"\\nAvg loss: ${np.average(income_change[losers_mask], weights=tu_weight[losers_mask]):,.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "LOSERS BY FILING STATUS\n", + "======================================================================\n", + "\n", + "Status Losers Avg Loss Avg MN Tax Avg Fed Tax Avg SALT\n", + "--------------------------------------------------------------------------------\n", + "SINGLE 140 $ -18 $ 0 $ 0 $ 0\n", + "JOINT 613 $ -77 $ 81 $ 0 $ 0\n", + "SEPARATE 0 N/A N/A N/A N/A\n", + "HEAD_OF_HOUSEHOLD 1 $ -40 $ 0 $ 0 $ 0\n", + "SURVIVING_SPOUSE 0 N/A N/A N/A N/A\n" + ] + } + ], + "source": [ + "# Losers by filing status\n", + "print(\"=\"*70)\n", + "print(\"LOSERS BY FILING STATUS\")\n", + "print(\"=\"*70)\n", + "\n", + "statuses = [\"SINGLE\", \"JOINT\", \"SEPARATE\", \"HEAD_OF_HOUSEHOLD\", \"SURVIVING_SPOUSE\"]\n", + "\n", + "print(f\"\\n{'Status':<20} {'Losers':>10} {'Avg Loss':>12} {'Avg MN Tax':>12} {'Avg Fed Tax':>12} {'Avg SALT':>12}\")\n", + "print(\"-\"*80)\n", + "\n", + "for status in statuses:\n", + " mask = losers_mask & (filing_status == status)\n", + " count = tu_weight[mask].sum()\n", + " \n", + " if count > 0:\n", + " avg_loss = np.average(income_change[mask], weights=tu_weight[mask])\n", + " avg_mn = np.average(mn_tax_change[mask], weights=tu_weight[mask])\n", + " avg_fed = np.average(fed_tax_change[mask], weights=tu_weight[mask])\n", + " avg_salt = np.average(salt_change[mask], weights=tu_weight[mask])\n", + " print(f\"{status:<20} {count:>10,.0f} ${avg_loss:>10,.0f} ${avg_mn:>10,.0f} ${avg_fed:>10,.0f} ${avg_salt:>10,.0f}\")\n", + " else:\n", + " print(f\"{status:<20} {0:>10} {'N/A':>12} {'N/A':>12} {'N/A':>12} {'N/A':>12}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "SINGLE FILER CHECK (should have NO change)\n", + "======================================================================\n", + "\n", + "Total Single filers: 1,144,853\n", + "\n", + "Avg income change: $33.63\n", + "Avg MN tax change: $0.00\n", + "Avg Fed tax change: $0.00\n", + "Avg SALT change: $0.00\n", + "\n", + "Single filers winning: 373,744\n", + "Single filers losing: 140\n", + "\n", + "⚠️ BUG: Single filers are affected even though their std deduction didn't change!\n" + ] + } + ], + "source": [ + "# Key diagnostic: Single filers should have NO change\n", + "print(\"=\"*70)\n", + "print(\"SINGLE FILER CHECK (should have NO change)\")\n", + "print(\"=\"*70)\n", + "\n", + "single_mask = filing_status == \"SINGLE\"\n", + "single_count = tu_weight[single_mask].sum()\n", + "\n", + "print(f\"\\nTotal Single filers: {single_count:,.0f}\")\n", + "\n", + "single_income_change = np.average(income_change[single_mask], weights=tu_weight[single_mask])\n", + "single_mn_change = np.average(mn_tax_change[single_mask], weights=tu_weight[single_mask])\n", + "single_fed_change = np.average(fed_tax_change[single_mask], weights=tu_weight[single_mask])\n", + "single_salt_change = np.average(salt_change[single_mask], weights=tu_weight[single_mask])\n", + "\n", + "print(f\"\\nAvg income change: ${single_income_change:,.2f}\")\n", + "print(f\"Avg MN tax change: ${single_mn_change:,.2f}\")\n", + "print(f\"Avg Fed tax change: ${single_fed_change:,.2f}\")\n", + "print(f\"Avg SALT change: ${single_salt_change:,.2f}\")\n", + "\n", + "single_losers = tu_weight[single_mask & losers_mask].sum()\n", + "single_winners = tu_weight[single_mask & winners_mask].sum()\n", + "\n", + "print(f\"\\nSingle filers winning: {single_winners:,.0f}\")\n", + "print(f\"Single filers losing: {single_losers:,.0f}\")\n", + "\n", + "if single_losers > 0 or single_winners > 0:\n", + " print(\"\\n⚠️ BUG: Single filers are affected even though their std deduction didn't change!\")\n", + "else:\n", + " print(\"\\n✓ Single filers correctly unaffected\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "SALT vs MN TAX DIAGNOSTIC\n", + "======================================================================\n", + "\n", + "For LOSERS:\n", + " Avg MN tax change: $65.65\n", + " Avg SALT change: $0.00\n", + " Difference: $-65.65\n", + "\n", + "Tax units with MN tax ≈ $0 but losing: 143\n", + " Their avg SALT change: $0.00\n" + ] + } + ], + "source": [ + "# SALT vs MN Tax diagnostic for losers\n", + "print(\"=\"*70)\n", + "print(\"SALT vs MN TAX DIAGNOSTIC\")\n", + "print(\"=\"*70)\n", + "\n", + "if losers_mask.sum() > 0:\n", + " loser_weights = tu_weight[losers_mask]\n", + " \n", + " avg_mn = np.average(mn_tax_change[losers_mask], weights=loser_weights)\n", + " avg_salt = np.average(salt_change[losers_mask], weights=loser_weights)\n", + " \n", + " print(f\"\\nFor LOSERS:\")\n", + " print(f\" Avg MN tax change: ${avg_mn:,.2f}\")\n", + " print(f\" Avg SALT change: ${avg_salt:,.2f}\")\n", + " print(f\" Difference: ${avg_salt - avg_mn:,.2f}\")\n", + " \n", + " # Zero MN change but losing\n", + " zero_mn = losers_mask & (np.abs(mn_tax_change) < 1)\n", + " if zero_mn.sum() > 0:\n", + " print(f\"\\nTax units with MN tax ≈ $0 but losing: {tu_weight[zero_mn].sum():,.0f}\")\n", + " print(f\" Their avg SALT change: ${np.average(salt_change[zero_mn], weights=tu_weight[zero_mn]):,.2f}\")\n", + "else:\n", + " print(\"\\nNo losers found!\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}