From 5ecfc1f4755a443f9d1d81a6a05d2406e457ccea Mon Sep 17 00:00:00 2001 From: David Trimmer Date: Mon, 5 Jan 2026 14:22:39 -0500 Subject: [PATCH] Analyze Mayor-elect Zohran Mamdani's NYC income tax proposal Fixes #101 --- .../Congressional-Hackathon-2025 | 1 + .../mamdani_income_tax/data_exploration.ipynb | 447 ++++++++++++++++++ .../nyc_dataset_summary_weighted.csv | 13 + .../nyc_mamdani_decile_impacts.csv | 11 + .../nyc_mamdani_income_tax_analysis.ipynb | 431 +++++++++++++++++ .../nyc_mamdani_income_tax_results.csv | 2 + 6 files changed, 905 insertions(+) create mode 160000 obbba_district_impacts/Congressional-Hackathon-2025 create mode 100644 us/states/ny/nyc/mamdani_income_tax/data_exploration.ipynb create mode 100644 us/states/ny/nyc/mamdani_income_tax/nyc_dataset_summary_weighted.csv create mode 100644 us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_decile_impacts.csv create mode 100644 us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_analysis.ipynb create mode 100644 us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_results.csv diff --git a/obbba_district_impacts/Congressional-Hackathon-2025 b/obbba_district_impacts/Congressional-Hackathon-2025 new file mode 160000 index 0000000..3f6d05e --- /dev/null +++ b/obbba_district_impacts/Congressional-Hackathon-2025 @@ -0,0 +1 @@ +Subproject commit 3f6d05e76400c6e396a3a4eddd34a7b3f6919fc3 diff --git a/us/states/ny/nyc/mamdani_income_tax/data_exploration.ipynb b/us/states/ny/nyc/mamdani_income_tax/data_exploration.ipynb new file mode 100644 index 0000000..0d76b42 --- /dev/null +++ b/us/states/ny/nyc/mamdani_income_tax/data_exploration.ipynb @@ -0,0 +1,447 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NYC Dataset Exploration\n", + "\n", + "This notebook explores the New York City (NYC) dataset to understand household counts, income distribution, and demographic characteristics relevant to the Mamdani Millionaire Income Tax analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "NYC_DATASET = \"hf://policyengine/policyengine-us-data/cities/NYC.h5\"\n", + "YEAR = 2026" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Load NYC dataset\n", + "sim = Microsimulation(dataset=NYC_DATASET)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 51,495\n", + "Household count (weighted): 2,353,653\n", + "Person count (weighted): 6,891,060\n", + "\n", + "County values in dataset: ['ALBANY_COUNTY_NY']...\n", + "\n", + "Households in NYC (in_nyc=True, weighted): 0\n", + "in_nyc unique values: [False]\n", + "State names in dataset: ['NY']\n", + "\n", + "NY taxable income - median: $183,056\n", + "NY taxable income - max: $3,067,754\n" + ] + } + ], + "source": [ + "# Check dataset size\n", + "household_weight = sim.calculate(\"household_weight\", period=YEAR)\n", + "household_count = sim.calculate(\"household_count\", period=YEAR, map_to=\"household\")\n", + "person_count = sim.calculate(\"person_count\", period=YEAR, map_to=\"household\")\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {household_count.sum():,.0f}\")\n", + "print(f\"Person count (weighted): {person_count.sum():,.0f}\")\n", + "\n", + "# Check county info (determines in_nyc)\n", + "county_str = sim.calculate(\"county_str\", period=YEAR, map_to=\"household\")\n", + "print(f\"\\nCounty values in dataset: {np.unique(county_str)[:10]}...\") # First 10\n", + "\n", + "# Check if in_nyc is set\n", + "in_nyc = sim.calculate(\"in_nyc\", period=YEAR, map_to=\"household\")\n", + "in_nyc_weighted = (np.array(in_nyc) * np.array(household_weight)).sum()\n", + "print(f\"\\nHouseholds in NYC (in_nyc=True, weighted): {in_nyc_weighted:,.0f}\")\n", + "print(f\"in_nyc unique values: {np.unique(in_nyc)}\")\n", + "\n", + "# Check state\n", + "state_name = sim.calculate(\"state_name\", period=YEAR, map_to=\"household\")\n", + "print(f\"State names in dataset: {np.unique(state_name)}\")\n", + "\n", + "# Check NY taxable income\n", + "ny_taxable = sim.calculate(\"ny_taxable_income\", period=YEAR, map_to=\"household\")\n", + "print(f\"\\nNY taxable income - median: ${np.median(ny_taxable):,.0f}\")\n", + "print(f\"NY taxable income - max: ${np.max(ny_taxable):,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Income distribution:\n", + " Median AGI: $40,031\n", + " 75th percentile: $207,486\n", + " 90th percentile: $821,751\n", + " 95th percentile: $1,864,716\n", + " 99th percentile: $2,118,524\n", + " Max AGI: $3,152,176\n" + ] + } + ], + "source": [ + "# Check household income distribution\n", + "agi = sim.calculate(\"adjusted_gross_income\", period=YEAR, map_to=\"household\")\n", + "print(f\"Income distribution:\")\n", + "print(f\" Median AGI: ${agi.median():,.0f}\")\n", + "print(f\" 75th percentile: ${agi.quantile(0.75):,.0f}\")\n", + "print(f\" 90th percentile: ${agi.quantile(0.90):,.0f}\")\n", + "print(f\" 95th percentile: ${agi.quantile(0.95):,.0f}\")\n", + "print(f\" 99th percentile: ${agi.quantile(0.99):,.0f}\")\n", + "print(f\" Max AGI: ${agi.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NYC Taxable Income distribution:\n", + " Median: $0\n", + " 75th percentile: $0\n", + " 90th percentile: $0\n", + " 95th percentile: $0\n", + " 99th percentile: $0\n", + " Max: $0\n" + ] + } + ], + "source": [ + "# Check NYC taxable income distribution (relevant for Mamdani tax)\n", + "nyc_taxable_income = sim.calculate(\"nyc_taxable_income\", period=YEAR, map_to=\"household\")\n", + "print(f\"NYC Taxable Income distribution:\")\n", + "print(f\" Median: ${nyc_taxable_income.median():,.0f}\")\n", + "print(f\" 75th percentile: ${nyc_taxable_income.quantile(0.75):,.0f}\")\n", + "print(f\" 90th percentile: ${nyc_taxable_income.quantile(0.90):,.0f}\")\n", + "print(f\" 95th percentile: ${nyc_taxable_income.quantile(0.95):,.0f}\")\n", + "print(f\" 99th percentile: ${nyc_taxable_income.quantile(0.99):,.0f}\")\n", + "print(f\" Max: ${nyc_taxable_income.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HIGH INCOME HOUSEHOLDS (NYC Taxable Income)\n", + "======================================================================\n", + "Households with income >= $500K: 0 (0.00%)\n", + "Households with income >= $1M: 0 (0.00%)\n", + "Households with income >= $2M: 0 (0.00%)\n", + "Households with income >= $5M: 0 (0.00%)\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# High income households (relevant for millionaire tax)\n", + "weights = np.array(sim.calculate(\"household_weight\", period=YEAR))\n", + "nyc_taxable = np.array(nyc_taxable_income)\n", + "total_households = weights.sum()\n", + "\n", + "# Households above $1M (Mamdani threshold)\n", + "above_1m_mask = nyc_taxable >= 1_000_000\n", + "above_1m_count = weights[above_1m_mask].sum()\n", + "\n", + "# Households in various high-income brackets\n", + "above_500k_mask = nyc_taxable >= 500_000\n", + "above_500k_count = weights[above_500k_mask].sum()\n", + "\n", + "above_2m_mask = nyc_taxable >= 2_000_000\n", + "above_2m_count = weights[above_2m_mask].sum()\n", + "\n", + "above_5m_mask = nyc_taxable >= 5_000_000\n", + "above_5m_count = weights[above_5m_mask].sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HIGH INCOME HOUSEHOLDS (NYC Taxable Income)\")\n", + "print(\"=\"*70)\n", + "print(f\"Households with income >= $500K: {above_500k_count:,.0f} ({above_500k_count/total_households*100:.2f}%)\")\n", + "print(f\"Households with income >= $1M: {above_1m_count:,.0f} ({above_1m_count/total_households*100:.2f}%)\")\n", + "print(f\"Households with income >= $2M: {above_2m_count:,.0f} ({above_2m_count/total_households*100:.2f}%)\")\n", + "print(f\"Households with income >= $5M: {above_5m_count:,.0f} ({above_5m_count/total_households*100:.2f}%)\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY NYC TAXABLE INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$50k 2,353,654 100.00%\n", + " $50k-$100k 0 0.00%\n", + " $100k-$200k 0 0.00%\n", + " $200k-$500k 0 0.00%\n", + " $500k-$1M 0 0.00%\n", + " $1M-$2M 0 0.00%\n", + " $2M-$5M 0 0.00%\n", + " $5M+ 0 0.00%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Household counts by income brackets (focus on high income for millionaire tax)\n", + "income_brackets = [\n", + " (0, 50000, \"$0-$50k\"),\n", + " (50000, 100000, \"$50k-$100k\"),\n", + " (100000, 200000, \"$100k-$200k\"),\n", + " (200000, 500000, \"$200k-$500k\"),\n", + " (500000, 1000000, \"$500k-$1M\"),\n", + " (1000000, 2000000, \"$1M-$2M\"),\n", + " (2000000, 5000000, \"$2M-$5M\"),\n", + " (5000000, float('inf'), \"$5M+\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " if upper == float('inf'):\n", + " mask = nyc_taxable >= lower\n", + " else:\n", + " mask = (nyc_taxable >= lower) & (nyc_taxable < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY NYC TAXABLE INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "============================================================\n", + "NYC DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "============================================================\n", + " Metric Value\n", + " Household count (weighted) 2,353,653\n", + " Person count (weighted) 6,891,060\n", + " Median AGI $40,031\n", + " 75th percentile AGI $207,486\n", + " 90th percentile AGI $821,751\n", + " 95th percentile AGI $1,864,716\n", + " 99th percentile AGI $2,118,524\n", + " Max AGI $3,152,176\n", + " Median NYC Taxable Income $0\n", + " 99th percentile NYC Taxable Income $0\n", + " Households with income >= $1M 0\n", + "Pct of households with income >= $1M 0.00%\n", + "============================================================\n", + "\n", + "Summary saved to: nyc_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create weighted summary table\n", + "weighted_summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Median AGI',\n", + " '75th percentile AGI',\n", + " '90th percentile AGI',\n", + " '95th percentile AGI',\n", + " '99th percentile AGI',\n", + " 'Max AGI',\n", + " 'Median NYC Taxable Income',\n", + " '99th percentile NYC Taxable Income',\n", + " 'Households with income >= $1M',\n", + " 'Pct of households with income >= $1M'\n", + " ],\n", + " 'Value': [\n", + " f\"{household_count.sum():,.0f}\",\n", + " f\"{person_count.sum():,.0f}\",\n", + " f\"${agi.median():,.0f}\",\n", + " f\"${agi.quantile(0.75):,.0f}\",\n", + " f\"${agi.quantile(0.90):,.0f}\",\n", + " f\"${agi.quantile(0.95):,.0f}\",\n", + " f\"${agi.quantile(0.99):,.0f}\",\n", + " f\"${agi.max():,.0f}\",\n", + " f\"${nyc_taxable_income.median():,.0f}\",\n", + " f\"${nyc_taxable_income.quantile(0.99):,.0f}\",\n", + " f\"{above_1m_count:,.0f}\",\n", + " f\"{above_1m_count/total_households*100:.2f}%\"\n", + " ]\n", + "}\n", + "\n", + "weighted_df = pd.DataFrame(weighted_summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"NYC DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*60)\n", + "print(weighted_df.to_string(index=False))\n", + "print(\"=\"*60)\n", + "\n", + "# Save table\n", + "weighted_df.to_csv('nyc_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: nyc_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## County Discrepancy Investigation\n", + "\n", + "Ben reported seeing different county results when running the same dataset. His results showed multiple NYC counties (Queens, Bronx, etc.) instead of just Albany County. This may be related to local branch differences." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "County value counts:\n", + "ALBANY_COUNTY_NY 51495\n", + "Name: count, dtype: int64\n", + "\n", + "DataFrame columns and sample:\n", + " household_id household_weight congressional_district_geoid state_fips \\\n", + "0 4550000 0.052011 3603 36 \n", + "1 4550001 0.021776 3603 36 \n", + "2 4550002 0.063064 3603 36 \n", + "3 4550003 0.013337 3603 36 \n", + "4 4550004 29.656933 3603 36 \n", + "\n", + " county county_str in_nyc \n", + "0 QUEENS_COUNTY_NY QUEENS_COUNTY_NY True \n", + "1 QUEENS_COUNTY_NY QUEENS_COUNTY_NY True \n", + "2 QUEENS_COUNTY_NY QUEENS_COUNTY_NY True \n", + "3 QUEENS_COUNTY_NY QUEENS_COUNTY_NY True \n", + "4 QUEENS_COUNTY_NY QUEENS_COUNTY_NY True \n", + "\n", + "Unique counties: ['QUEENS_COUNTY_NY' 'KINGS_COUNTY_NY' 'NEW_YORK_COUNTY_NY'\n", + " 'RICHMOND_COUNTY_NY' 'BRONX_COUNTY_NY']\n", + "in_nyc values: [ True]\n" + ] + } + ], + "source": [ + "# Ben's results (from his local environment - may be related to local branches):\n", + "#\n", + "# In [5]: from policyengine_us import Microsimulation\n", + "#\n", + "# In [6]: sim = Microsimulation(dataset=\"hf://policyengine/policyengine-us-data/cities/NYC.h5\")\n", + "#\n", + "# In [7]: sim.calculate(\"county\")\n", + "# Out[7]: \n", + "# value weight\n", + "# 0 QUEENS_COUNTY_NY 0.052011\n", + "# 1 QUEENS_COUNTY_NY 0.021776\n", + "# 2 QUEENS_COUNTY_NY 0.063064\n", + "# 3 QUEENS_COUNTY_NY 0.013337\n", + "# 4 QUEENS_COUNTY_NY 29.656933\n", + "# ... ... ...\n", + "# 51490 BRONX_COUNTY_NY 0.050521\n", + "# 51491 BRONX_COUNTY_NY 249.701233\n", + "# 51492 BRONX_COUNTY_NY 10.212130\n", + "# 51493 BRONX_COUNTY_NY 0.592128\n", + "# 51494 BRONX_COUNTY_NY 0.095578\n", + "#\n", + "# [51495 rows x 2 columns]\n", + "\n", + "# Compare with our results\n", + "county = sim.calculate(\"county\", period=YEAR)\n", + "print(\"County value counts:\")\n", + "print(county.value_counts())\n", + "\n", + "# Filtering demonstration (from Ben's testing)\n", + "df = sim.calculate_dataframe(['household_id', 'household_weight', 'congressional_district_geoid', 'state_fips', 'county', 'county_str', 'in_nyc'])\n", + "pdf = pd.DataFrame(df)\n", + "print(\"\\nDataFrame columns and sample:\")\n", + "print(pdf.head())\n", + "print(f\"\\nUnique counties: {pdf['county'].unique()}\")\n", + "print(f\"in_nyc values: {pdf['in_nyc'].unique()}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/ny/nyc/mamdani_income_tax/nyc_dataset_summary_weighted.csv b/us/states/ny/nyc/mamdani_income_tax/nyc_dataset_summary_weighted.csv new file mode 100644 index 0000000..e48fde4 --- /dev/null +++ b/us/states/ny/nyc/mamdani_income_tax/nyc_dataset_summary_weighted.csv @@ -0,0 +1,13 @@ +Metric,Value +Household count (weighted),"2,353,653" +Person count (weighted),"6,891,060" +Median AGI,"$40,031" +75th percentile AGI,"$207,486" +90th percentile AGI,"$821,751" +95th percentile AGI,"$1,864,716" +99th percentile AGI,"$2,118,524" +Max AGI,"$3,152,176" +Median NYC Taxable Income,$0 +99th percentile NYC Taxable Income,$0 +Households with income >= $1M,0 +Pct of households with income >= $1M,0.00% diff --git a/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_decile_impacts.csv b/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_decile_impacts.csv new file mode 100644 index 0000000..96f8d5d --- /dev/null +++ b/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_decile_impacts.csv @@ -0,0 +1,11 @@ +Decile,Average Income Change +1,0.0 +2,0.0 +3,0.0 +4,0.0 +5,0.0 +6,0.0 +7,0.0 +8,0.0 +9,-6.73497988787043 +10,-51204.50957144633 diff --git a/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_analysis.ipynb b/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_analysis.ipynb new file mode 100644 index 0000000..e9b0c84 --- /dev/null +++ b/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_analysis.ipynb @@ -0,0 +1,431 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NYC Mamdani Millionaire Income Tax Analysis (2026)\n", + "\n", + "This notebook analyzes the fiscal and distributional impacts of Mayor-elect Zohran Mamdani's proposed income tax for New York City.\n", + "\n", + "## Baseline (Current Law)\n", + "- Current NYC income tax structure\n", + "\n", + "## Reform\n", + "- Mamdani Millionaire Income Tax proposal\n", + "\n", + "## Metrics\n", + "We calculate:\n", + "- Budgetary impact (revenue raised)\n", + "- Number and percentage of people/households affected\n", + "- Average change in net income for those affected\n", + "- Distributional analysis by income decile" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_us.reforms.local.ny.mamdani_income_tax import nyc_mamdani_income_tax\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "NYC_DATASET = \"hf://policyengine/policyengine-us-data/cities/NYC.h5\"\n", + "YEAR = 2026\n", + "\n", + "# Create combined reform: structural reform + enable the parameter\n", + "param_reform = Reform.from_dict(\n", + " {\n", + " \"gov.local.ny.mamdani_income_tax.in_effect\": {\n", + " \"2026-01-01.2100-12-31\": True\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + ")\n", + "\n", + "# Combine reforms: parameter reform first, then structural reform\n", + "mamdani_reform = (param_reform, nyc_mamdani_income_tax)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_affected(baseline_sim, reform_sim, period=YEAR):\n", + " \"\"\"\n", + " Calculate people affected by the reform (losers who pay more taxes).\n", + " Returns weighted counts, percentages, and average changes.\n", + " \"\"\"\n", + " # Get household-level income change\n", + " baseline_income = np.array(baseline_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " reform_income = np.array(reform_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " household_weight = np.array(baseline_sim.calculate(\"household_weight\", period=period))\n", + " income_change = reform_income - baseline_income\n", + " \n", + " # Get person-level data\n", + " household_id_person = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"person\"))\n", + " household_id_household = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"household\"))\n", + " person_weight = np.array(baseline_sim.calculate(\"person_weight\", period=period))\n", + " \n", + " # Create mapping of household_id to income_change\n", + " income_change_dict = dict(zip(household_id_household, income_change))\n", + " \n", + " # Map income change to each person\n", + " person_income_change = np.array([income_change_dict.get(hh_id, 0) for hh_id in household_id_person])\n", + " \n", + " # Weighted count of people who are losers (lost more than $1 - paying more taxes)\n", + " losers_mask = person_income_change < -1\n", + " people_losing = person_weight[losers_mask].sum()\n", + " \n", + " total_people = person_weight.sum()\n", + " \n", + " # Calculate percentage\n", + " pct_losers = (people_losing / total_people * 100) if total_people > 0 else 0\n", + " \n", + " # Households affected\n", + " losing_hh_mask = income_change < -1\n", + " households_losing = household_weight[losing_hh_mask].sum()\n", + " total_households = household_weight.sum()\n", + " pct_households_losing = (households_losing / total_households * 100) if total_households > 0 else 0\n", + " \n", + " # Average loss for affected households (weighted)\n", + " avg_loss = np.average(income_change[losing_hh_mask], weights=household_weight[losing_hh_mask]) if losing_hh_mask.sum() > 0 else 0\n", + " \n", + " return {\n", + " \"people_losing\": people_losing,\n", + " \"total_people\": total_people,\n", + " \"pct_losers\": pct_losers,\n", + " \"households_losing\": households_losing,\n", + " \"total_households\": total_households,\n", + " \"pct_households_losing\": pct_households_losing,\n", + " \"avg_loss\": avg_loss\n", + " }\n", + "\n", + "def calculate_decile_impacts(baseline_sim, reform_sim, period=YEAR):\n", + " \"\"\"\n", + " Calculate average income change by income decile.\n", + " \"\"\"\n", + " from microdf import MicroSeries\n", + " \n", + " baseline_net_income = baseline_sim.calculate(\"household_net_income\", map_to=\"household\", period=period)\n", + " reform_net_income = reform_sim.calculate(\"household_net_income\", map_to=\"household\", period=period)\n", + " \n", + " count_people = baseline_sim.calculate(\"household_count_people\", period=period)\n", + " household_weight = baseline_sim.calculate(\"household_weight\", period=period)\n", + " \n", + " weighted_income = MicroSeries(\n", + " baseline_net_income, weights=household_weight * count_people\n", + " )\n", + " decile = weighted_income.decile_rank().values\n", + " \n", + " household_income_decile = (np.where(baseline_net_income < 0, -1, decile)).astype(int)\n", + " \n", + " income_change = reform_net_income - baseline_net_income\n", + " \n", + " # Calculate average change by decile\n", + " average_change = income_change.groupby(household_income_decile).mean()\n", + " \n", + " # Filter to valid deciles (1-10)\n", + " average_change = average_change[average_change.index > 0]\n", + " \n", + " return average_change\n", + "\n", + "def format_currency(value):\n", + " \"\"\"Format value as currency in millions or billions.\"\"\"\n", + " if abs(value) >= 1e9:\n", + " return f\"${value/1e9:.2f}B\"\n", + " else:\n", + " return f\"${value/1e6:.2f}M\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Simulations" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading baseline (current NYC income tax structure)...\n", + "Baseline loaded\n", + "\n", + "Loading reform (Mamdani Millionaire Income Tax)...\n", + "Reform loaded\n", + "\n", + "============================================================\n", + "All simulations ready!\n", + "============================================================\n" + ] + } + ], + "source": [ + "print(\"Loading baseline (current NYC income tax structure)...\")\n", + "baseline = Microsimulation(dataset=NYC_DATASET)\n", + "print(\"Baseline loaded\")\n", + "\n", + "print(\"\\nLoading reform (Mamdani Millionaire Income Tax)...\")\n", + "reform_sim = Microsimulation(dataset=NYC_DATASET, reform=mamdani_reform)\n", + "print(\"Reform loaded\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"All simulations ready!\")\n", + "print(\"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculate Impacts" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All impacts calculated\n" + ] + } + ], + "source": [ + "# Revenue impact - calculated as decrease in household net income (revenue to city)\n", + "baseline_hh_income = baseline.calculate(\"household_net_income\", period=YEAR, map_to=\"household\").sum()\n", + "reform_hh_income = reform_sim.calculate(\"household_net_income\", period=YEAR, map_to=\"household\").sum()\n", + "revenue_raised = baseline_hh_income - reform_hh_income # Positive if reform raises revenue\n", + "\n", + "# Affected population\n", + "affected = calculate_affected(baseline, reform_sim)\n", + "\n", + "# Decile impacts\n", + "decile_impacts = calculate_decile_impacts(baseline, reform_sim)\n", + "\n", + "print(\"All impacts calculated\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "NYC MAMDANI MILLIONAIRE INCOME TAX IMPACTS (2026)\n", + "Baseline: Current NYC income tax | Reform: Mamdani Millionaire Income Tax\n", + "================================================================================\n", + "\n", + "================================BUDGETARY IMPACT================================\n", + "Revenue raised: $10.13B\n", + "\n", + "==============================AFFECTED POPULATION===============================\n", + "People paying more taxes: 628,033 (9.11% of population)\n", + "Households paying more taxes: 184,633 (7.84% of households)\n", + "Average change for affected: $-54,859.30\n", + "================================================================================\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\"*80)\n", + "print(f\"NYC MAMDANI MILLIONAIRE INCOME TAX IMPACTS ({YEAR})\")\n", + "print(\"Baseline: Current NYC income tax | Reform: Mamdani Millionaire Income Tax\")\n", + "print(\"=\"*80)\n", + "\n", + "print(f\"\\n{'BUDGETARY IMPACT':=^80}\")\n", + "print(f\"Revenue raised: {format_currency(revenue_raised)}\")\n", + "\n", + "print(f\"\\n{'AFFECTED POPULATION':=^80}\")\n", + "print(f\"People paying more taxes: {affected['people_losing']:,.0f} ({affected['pct_losers']:.2f}% of population)\")\n", + "print(f\"Households paying more taxes: {affected['households_losing']:,.0f} ({affected['pct_households_losing']:.2f}% of households)\")\n", + "print(f\"Average change for affected: ${affected['avg_loss']:,.2f}\")\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Distributional Analysis by Income Decile" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "AVERAGE HOUSEHOLD INCOME CHANGE BY DECILE\n", + "================================================================================\n", + "Decile Average Change\n", + "------------------------------\n", + "1 $ 0.00\n", + "2 $ 0.00\n", + "3 $ 0.00\n", + "4 $ 0.00\n", + "5 $ 0.00\n", + "6 $ 0.00\n", + "7 $ 0.00\n", + "8 $ 0.00\n", + "9 $ -6.73\n", + "10 $ -51,204.51\n", + "================================================================================\n", + "Note: Negative values indicate income loss (higher taxes)\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\"*80)\n", + "print(\"AVERAGE HOUSEHOLD INCOME CHANGE BY DECILE\")\n", + "print(\"=\"*80)\n", + "print(f\"{'Decile':<10} {'Average Change':>20}\")\n", + "print(\"-\"*30)\n", + "for decile in range(1, 11):\n", + " if decile in decile_impacts.index:\n", + " change = decile_impacts[decile]\n", + " print(f\"{decile:<10} ${change:>18,.2f}\")\n", + "print(\"=\"*80)\n", + "print(\"Note: Negative values indicate income loss (higher taxes)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export Results" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==============================================================================================================\n", + "NYC MAMDANI INCOME TAX REFORM SUMMARY\n", + "==============================================================================================================\n", + " Scenario Description Year Revenue Raised % Population Affected % Households Affected Avg Change for Affected\n", + "Mamdani Income Tax Millionaire Income Tax for NYC 2026 $10.13B 9.11% 7.84% $-54,859.30\n", + "==============================================================================================================\n", + "\n", + "Exported to: nyc_mamdani_income_tax_results.csv\n" + ] + } + ], + "source": [ + "# Create results DataFrame\n", + "results = [\n", + " {\n", + " \"Scenario\": \"Mamdani Income Tax\",\n", + " \"Description\": \"Millionaire Income Tax for NYC\",\n", + " \"Year\": YEAR,\n", + " \"Revenue Raised\": format_currency(revenue_raised),\n", + " \"% Population Affected\": f\"{affected['pct_losers']:.2f}%\",\n", + " \"% Households Affected\": f\"{affected['pct_households_losing']:.2f}%\",\n", + " \"Avg Change for Affected\": f\"${affected['avg_loss']:,.2f}\"\n", + " }\n", + "]\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "\n", + "print(\"\\n\" + \"=\"*110)\n", + "print(\"NYC MAMDANI INCOME TAX REFORM SUMMARY\")\n", + "print(\"=\"*110)\n", + "print(df_results.to_string(index=False))\n", + "print(\"=\"*110)\n", + "\n", + "# Export to CSV\n", + "df_results.to_csv(\"nyc_mamdani_income_tax_results.csv\", index=False)\n", + "print(\"\\nExported to: nyc_mamdani_income_tax_results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decile impacts exported to: nyc_mamdani_decile_impacts.csv\n" + ] + } + ], + "source": [ + "# Export decile impacts\n", + "decile_df = pd.DataFrame({\n", + " 'Decile': decile_impacts.index,\n", + " 'Average Income Change': decile_impacts.values\n", + "})\n", + "decile_df.to_csv(\"nyc_mamdani_decile_impacts.csv\", index=False)\n", + "print(\"Decile impacts exported to: nyc_mamdani_decile_impacts.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_results.csv b/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_results.csv new file mode 100644 index 0000000..3f34296 --- /dev/null +++ b/us/states/ny/nyc/mamdani_income_tax/nyc_mamdani_income_tax_results.csv @@ -0,0 +1,2 @@ +Scenario,Description,Year,Revenue Raised,% Population Affected,% Households Affected,Avg Change for Affected +Mamdani Income Tax,Millionaire Income Tax for NYC,2026,$10.13B,9.11%,7.84%,"$-54,859.30"