Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
- bump: minor
changes:
added:
- Benchmarking experiments for wealth imputation paper draft.
- MDN model to experiments run in imputing-from-scf-to-cps.ipynb.
- Privacy & Terms to microimputation-dashboard.
59 changes: 59 additions & 0 deletions microimputation-dashboard/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,61 @@ import { parseImputationCSV } from '@/utils/csvParser';
import { ImputationDataPoint } from '@/types/imputation';
import { parseDeeplinkParams, GitHubArtifactInfo } from '@/utils/deeplinks';

function PrivacyModal({ isOpen, onClose }: { isOpen: boolean; onClose: () => void }) {
if (!isOpen) return null;

return (
<div className="fixed inset-0 bg-black bg-opacity-50 z-50 flex items-center justify-center p-4">
<div className="bg-white rounded-lg max-w-lg w-full p-6 shadow-xl">
<h2 className="text-xl font-bold text-gray-900 mb-4">Privacy & Terms of Use</h2>

<div className="space-y-4 text-sm text-gray-700">
<div>
<h3 className="font-semibold text-gray-900 mb-1">Data Privacy</h3>
<p>
All data uploaded to this dashboard is processed entirely within your browser.
No data is transmitted to or stored on PolicyEngine servers. When you close or
refresh this page, all loaded data is cleared from memory.
</p>
</div>

<div>
<h3 className="font-semibold text-gray-900 mb-1">Disclaimer</h3>
<p>
This tool is provided &quot;as is&quot; without warranty of any kind, express or implied.
PolicyEngine assumes no responsibility for the security, accuracy, or confidentiality
of any data you choose to load into this application.
</p>
</div>

<div>
<h3 className="font-semibold text-gray-900 mb-1">User Responsibility</h3>
<p>
Users are solely responsible for ensuring they have appropriate rights to use any
data loaded into this dashboard and for compliance with applicable data protection
regulations.
</p>
</div>
</div>

<button
onClick={onClose}
className="mt-6 w-full bg-blue-600 hover:bg-blue-700 text-white font-medium py-2 px-4 rounded-md transition-colors"
>
Close
</button>
</div>
</div>
);
}

function HomeContent() {
const [data, setData] = useState<ImputationDataPoint[]>([]);
const [fileName, setFileName] = useState<string>('');
const [showDashboard, setShowDashboard] = useState(false);
const [isLoadingFromDeeplink, setIsLoadingFromDeeplink] = useState(false);
const [githubArtifactInfo, setGithubArtifactInfo] = useState<GitHubArtifactInfo | null>(null);
const [showPrivacyModal, setShowPrivacyModal] = useState(false);

const searchParams = useSearchParams();
const deeplinkParams = parseDeeplinkParams(searchParams);
Expand Down Expand Up @@ -109,10 +158,20 @@ function HomeContent() {
>
PolicyEngine.org
</a>
{' • '}
<button
onClick={() => setShowPrivacyModal(true)}
className="text-blue-600 hover:text-blue-800"
>
Privacy & Terms
</button>
</p>
</div>
</div>
</footer>

{/* Privacy Modal */}
<PrivacyModal isOpen={showPrivacyModal} onClose={() => setShowPrivacyModal(false)} />
</main>
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps)
<span className="font-semibold text-gray-900">{bestModel.quantileLoss.toFixed(6)}</span>
</div>
{bestModel.quantileTrainTestRatio !== undefined && (
<span className={`text-xs ${bestModel.quantileTrainTestRatio > 1.1 ? 'text-amber-600' : 'text-gray-700'}`}>
<span className={`text-xs ${bestModel.quantileTrainTestRatio > 1.1 ? 'text-gray-700' : 'text-gray-900'}`}>
Train/test ratio: {bestModel.quantileTrainTestRatio.toFixed(3)}
</span>
)}
Expand Down
51 changes: 45 additions & 6 deletions microimputation-dashboard/components/DistributionOverlay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,26 @@ import {
Tooltip,
Legend,
ResponsiveContainer,
Brush,
} from 'recharts';

/**
* Format a number to scientific notation if it's very large or very small
* Returns the original number formatted normally if within reasonable range
*/
function formatLargeNumber(value: number, precision: number = 2): string {
const absValue = Math.abs(value);
// Use scientific notation for values >= 100,000 or <= 0.0001 (but not 0)
if (absValue >= 100000 || (absValue > 0 && absValue <= 0.0001)) {
return value.toExponential(precision);
}
// For smaller numbers, use fixed notation
if (absValue < 1 && absValue > 0) {
return value.toFixed(precision + 2);
}
return value.toFixed(precision);
}

interface DistributionOverlayProps {
data: ImputationDataPoint[];
}
Expand Down Expand Up @@ -72,14 +90,16 @@ export default function DistributionOverlay({
const info = JSON.parse(d.additional_info);

if (d.metric_name === 'histogram_distribution') {
// Numerical variable
// Numerical variable - use scientific notation for large values
const binStartFormatted = formatLargeNumber(info.bin_start);
const binEndFormatted = formatLargeNumber(info.bin_end);
(distributions[variable].data as BinData[]).push({
binIndex: info.bin_index,
binStart: info.bin_start,
binEnd: info.bin_end,
donorHeight: info.donor_height,
receiverHeight: info.receiver_height,
binLabel: `${info.bin_start.toFixed(2)}-${info.bin_end.toFixed(2)}`,
binLabel: `${binStartFormatted}-${binEndFormatted}`,
});
distributions[variable].nSamplesDonor = info.n_samples_donor;
distributions[variable].nSamplesReceiver = info.n_samples_receiver;
Expand Down Expand Up @@ -130,7 +150,7 @@ export default function DistributionOverlay({

return (
<div>
<ResponsiveContainer width="100%" height={400}>
<ResponsiveContainer width="100%" height={580}>
<BarChart
data={chartData}
margin={{ top: 20, right: 30, left: 20, bottom: 60 }}
Expand Down Expand Up @@ -161,11 +181,19 @@ export default function DistributionOverlay({
/>
<Tooltip
formatter={(value: number) => [`${value.toFixed(2)}%`, '']}
labelFormatter={(label) => `Bin: ${label}`}
labelFormatter={(_label, payload) => {
if (payload && payload.length > 0 && payload[0].payload) {
const { binStart, binEnd } = payload[0].payload;
// Show full values with commas in tooltip for readability
const startStr = binStart.toLocaleString(undefined, { maximumFractionDigits: 2 });
const endStr = binEnd.toLocaleString(undefined, { maximumFractionDigits: 2 });
return `Bin: ${startStr} - ${endStr}`;
}
return `Bin: ${_label}`;
}}
contentStyle={{ color: '#000000' }}
labelStyle={{ color: '#000000' }}
/>
<Legend wrapperStyle={{ color: '#000000', paddingTop: '10px' }} />
<Bar
dataKey="Donor"
fill="#3b82f6"
Expand All @@ -178,10 +206,21 @@ export default function DistributionOverlay({
fillOpacity={0.7}
name={`Receiver (n=${dist.nSamplesReceiver})`}
/>
<Brush
dataKey="name"
height={30}
stroke="#8884d8"
fill="#f3f4f6"
tickFormatter={() => ''}
/>
<Legend
verticalAlign="bottom"
wrapperStyle={{ color: '#000000', paddingTop: '45px' }}
/>
</BarChart>
</ResponsiveContainer>
<p className="text-xs text-gray-600 mt-2 text-center">
Histogram with {(dist.data as BinData[]).length} bins. Each bin shows the percentage of values falling within that range.
Histogram with {(dist.data as BinData[]).length} bins. Drag the handles below to zoom into a specific range.
Overlapping bars indicate similar distributions.
</p>
</div>
Expand Down
97 changes: 78 additions & 19 deletions microimputation-dashboard/components/ImputationResults.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ interface DistributionMetric {
method: string;
metricName: string;
value: number;
normalizedValue?: number; // Wasserstein distance as percentage of variable range
variableRange?: number; // Range of the variable for context
}

export default function ImputationResults({ data }: ImputationResultsProps) {
Expand All @@ -22,6 +24,34 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
return data.filter(d => d.type === 'distribution_distance');
}, [data]);

// Extract variable ranges from distribution_bins data
const variableRanges = useMemo(() => {
const ranges: Record<string, { min: number; max: number }> = {};
const distributionBins = data.filter(d => d.type === 'distribution_bins' && d.metric_name === 'histogram_distribution');

distributionBins.forEach(d => {
try {
const info = JSON.parse(d.additional_info);
const variable = d.variable;

if (!ranges[variable]) {
ranges[variable] = { min: Infinity, max: -Infinity };
}

if (info.bin_start !== undefined) {
ranges[variable].min = Math.min(ranges[variable].min, info.bin_start);
}
if (info.bin_end !== undefined) {
ranges[variable].max = Math.max(ranges[variable].max, info.bin_end);
}
} catch (e) {
// Ignore parsing errors
}
});

return ranges;
}, [data]);

// Group by metric type
const { wassersteinData, klDivergenceData } = useMemo(() => {
const wasserstein: DistributionMetric[] = [];
Expand All @@ -36,21 +66,28 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
};

if (d.metric_name === 'wasserstein_distance') {
// Calculate normalized value as percentage of variable range
const range = variableRanges[d.variable];
if (range && range.max > range.min) {
const variableRange = range.max - range.min;
metric.variableRange = variableRange;
metric.normalizedValue = (metric.value / variableRange) * 100;
}
wasserstein.push(metric);
} else if (d.metric_name === 'kl_divergence') {
klDiv.push(metric);
}
});

// Sort by value (ascending - lower is better)
wasserstein.sort((a, b) => a.value - b.value);
// Sort by normalized value if available, otherwise by raw value (ascending - lower is better)
wasserstein.sort((a, b) => (a.normalizedValue ?? a.value) - (b.normalizedValue ?? b.value));
klDiv.sort((a, b) => a.value - b.value);

return {
wassersteinData: wasserstein,
klDivergenceData: klDiv
};
}, [distributionData]);
}, [distributionData, variableRanges]);

const hasWasserstein = wassersteinData.length > 0;
const hasKLDivergence = klDivergenceData.length > 0;
Expand All @@ -59,13 +96,17 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
return null;
}

// Color function based on value quality (lower is better)
const getWassersteinColor = (value: number): string => {
if (value < 0.01) return '#16a34a'; // Dark green - excellent
if (value < 0.05) return '#22c55e'; // Green - good
if (value < 0.1) return '#eab308'; // Yellow - moderate
if (value < 0.2) return '#f97316'; // Orange - fair
return '#ef4444'; // Red - poor
// Color function based on normalized value (percentage of range) - lower is better
const getWassersteinColor = (normalizedValue: number | undefined, rawValue: number): string => {
// Use normalized value if available, otherwise fall back to raw thresholds
const value = normalizedValue ?? (rawValue * 100); // Assume raw is already a fraction if no range

// Thresholds as percentage of variable range
if (value < 1) return '#16a34a'; // Dark green - excellent (<1% of range)
if (value < 3) return '#22c55e'; // Green - good (<3% of range)
if (value < 5) return '#eab308'; // Yellow - moderate (<5% of range)
if (value < 10) return '#f97316'; // Orange - fair (<10% of range)
return '#ef4444'; // Red - poor (>=10% of range)
};

const getKLColor = (value: number): string => {
Expand Down Expand Up @@ -112,9 +153,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
greater differences between imputed and true distributions.
</p>
<p className="text-sm text-gray-700">
<strong>Interpretation:</strong> Values closer to 0 are better. Generally, values below
0.05 indicate good imputation quality, while values above 0.2 suggest significant
distributional differences.
<strong>Interpretation:</strong> Since Wasserstein distance is scale-dependent, quality is assessed
relative to each variable&apos;s range. A distance of &lt;1% of the variable range is excellent,
&lt;3% is good, &lt;5% is moderate, &lt;10% is fair, and &ge;10% suggests poor distributional match.
</p>
</div>

Expand All @@ -130,14 +171,21 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
<XAxis type="number" tick={{ fill: '#000000' }} />
<YAxis type="category" dataKey="variable" width={90} tick={{ fill: '#000000' }} />
<Tooltip
formatter={(value: number) => [value.toFixed(6), 'Wasserstein Distance']}
formatter={(value: number, _name: string, props: { payload?: DistributionMetric }) => {
const normalizedValue = props.payload?.normalizedValue;
const distanceStr = value.toFixed(6);
const pctStr = normalizedValue !== undefined ? ` (${normalizedValue.toFixed(2)}% of range)` : '';
return [`${distanceStr}${pctStr}`, 'Wasserstein Distance'];
}}
contentStyle={{ color: '#000000' }}
labelStyle={{ color: '#000000' }}
/>
<Legend wrapperStyle={{ color: '#000000' }} />
<Bar dataKey="value" name="Wasserstein Distance">
{wassersteinData.map((entry, index) => (
<Cell
key={`cell-${index}`}
fill={getWassersteinColor(entry.value)}
fill={getWassersteinColor(entry.normalizedValue, entry.value)}
/>
))}
</Bar>
Expand All @@ -156,6 +204,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
Wasserstein Distance
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
% of Range
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
Quality Assessment
</th>
Expand All @@ -166,16 +217,19 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
let assessment = '';
let assessmentColor = '';

if (item.value < 0.01) {
// Use normalized value (percentage of range) for assessment
const normalizedValue = item.normalizedValue ?? (item.value * 100);

if (normalizedValue < 1) {
assessment = 'Excellent';
assessmentColor = 'text-green-700 font-semibold';
} else if (item.value < 0.05) {
} else if (normalizedValue < 3) {
assessment = 'Good';
assessmentColor = 'text-green-600';
} else if (item.value < 0.1) {
} else if (normalizedValue < 5) {
assessment = 'Moderate';
assessmentColor = 'text-yellow-600';
} else if (item.value < 0.2) {
} else if (normalizedValue < 10) {
assessment = 'Fair';
assessmentColor = 'text-orange-600';
} else {
Expand All @@ -191,6 +245,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
<td className="px-4 py-3 whitespace-nowrap text-sm text-gray-700">
{item.value.toFixed(6)}
</td>
<td className="px-4 py-3 whitespace-nowrap text-sm text-gray-700">
{item.normalizedValue !== undefined ? `${item.normalizedValue.toFixed(2)}%` : 'N/A'}
</td>
<td className={`px-4 py-3 whitespace-nowrap text-sm ${assessmentColor}`}>
{assessment}
</td>
Expand Down Expand Up @@ -243,6 +300,8 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
<YAxis type="category" dataKey="variable" width={90} tick={{ fill: '#000000' }} />
<Tooltip
formatter={(value: number) => [value.toFixed(6), 'KL-Divergence']}
contentStyle={{ color: '#000000' }}
labelStyle={{ color: '#000000' }}
/>
<Legend wrapperStyle={{ color: '#000000' }} />
<Bar dataKey="value" name="KL-Divergence">
Expand Down
Loading