+ )}
+
+ {/* Note about methods without data */}
+ {methodsWithoutData.length > 0 && (
+
+
+ Note: {methodsWithoutData.length === 1 ? 'The following method does' : 'The following methods do'} not appear in visualizations because {methodsWithoutData.length === 1 ? 'it does' : 'they do'} not support imputation of the selected variables due to variable types: {methodsWithoutData.join(', ')}
+
+ Quantile loss measures how well the imputation method predicts different quantiles of the distribution for numerical variables, creating an asymmetric loss function that penalizes under-prediction more heavily for higher quantiles and over-prediction more heavily for lower quantiles.
+
+ Lower values indicate better performance.
+
+ Log loss measures how well the imputation method predicts categorical and boolean variables by evaluating the accuracy of predicted probabilities. It heavily penalizes confident misclassifications, such that a perfect classifier would have a log loss of 0, while worse predictions yield increasingly higher values.
+
+ Overfitting assessment: When test performance (green bars) is significantly worse than train performance (cyan bars), it suggests the model may be overfitting to the training data and not generalizing well to unseen data. If both train and test performances are poor, the model may be underfitting and failing to capture underlying patterns.
+
+ Healthy performance is indicated by similar train and test metrics, with both being reasonably low.
+
+
+
+ )}
+
+
+ );
+}
diff --git a/microimputation-dashboard/components/FileUpload.tsx b/microimputation-dashboard/components/FileUpload.tsx
index 581a2f3..fad0ec7 100644
--- a/microimputation-dashboard/components/FileUpload.tsx
+++ b/microimputation-dashboard/components/FileUpload.tsx
@@ -8,11 +8,10 @@ import { DeeplinkParams, GitHubArtifactInfo } from '@/utils/deeplinks';
interface FileUploadProps {
onFileLoad: (content: string, filename: string) => void;
onViewDashboard: () => void;
- onCompareLoad?: (content1: string, filename1: string, content2: string, filename2: string) => void;
deeplinkParams?: DeeplinkParams | null;
isLoadingFromDeeplink?: boolean;
- onDeeplinkLoadComplete?: (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null | undefined) => void;
- onGithubLoad?: (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => void;
+ onDeeplinkLoadComplete?: (primary: GitHubArtifactInfo | null) => void;
+ onGithubLoad?: (primary: GitHubArtifactInfo | null) => void;
}
interface GitHubCommit {
@@ -43,7 +42,6 @@ interface GitHubArtifact {
export default function FileUpload({
onFileLoad,
onViewDashboard,
- onCompareLoad,
deeplinkParams,
isLoadingFromDeeplink,
onDeeplinkLoadComplete,
@@ -66,70 +64,33 @@ export default function FileUpload({
const [selectedArtifact, setSelectedArtifact] = useState('');
const [isLoadingGithubData, setIsLoadingGithubData] = useState(false);
- // Comparison mode state
- const [comparisonMode, setComparisonMode] = useState(false);
- const [selectedSecondBranch, setSelectedSecondBranch] = useState('');
- const [secondCommits, setSecondCommits] = useState([]);
- const [selectedSecondCommit, setSelectedSecondCommit] = useState('');
- const [secondArtifacts, setSecondArtifacts] = useState([]);
- const [selectedSecondArtifact, setSelectedSecondArtifact] = useState('');
-
// Helper function to load a single artifact from deeplink parameters
- const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo, githubToken: string): Promise => {
- // First, get the artifacts for the specific commit
- const [owner, repo] = artifactInfo.repo.split('/');
- const runsResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${artifactInfo.commit}`, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
-
- if (!runsResponse.ok) {
- throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`);
+ const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo): Promise => {
+ // Get artifacts for the specific commit using API route
+ const artifactsResponse = await fetch(
+ `/api/github/artifacts?repo=${encodeURIComponent(artifactInfo.repo)}&commit=${encodeURIComponent(artifactInfo.commit)}`
+ );
+
+ if (!artifactsResponse.ok) {
+ throw new Error(`Failed to fetch artifacts: ${artifactsResponse.status}`);
}
- const runsData = await runsResponse.json();
- const completedRuns = runsData.workflow_runs.filter((run: { status: string }) => run.status === 'completed');
-
- if (completedRuns.length === 0) {
- throw new Error('No completed workflow runs found for this commit');
- }
+ const artifacts = await artifactsResponse.json();
// Find the artifact by name
- let targetArtifact = null;
- for (const run of completedRuns) {
- const artifactsResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
-
- if (artifactsResponse.ok) {
- const artifactsData = await artifactsResponse.json();
- targetArtifact = artifactsData.artifacts.find((artifact: { name: string }) => artifact.name === artifactInfo.artifact);
- if (targetArtifact) break;
- }
- }
+ const targetArtifact = artifacts.find((artifact: { name: string }) => artifact.name === artifactInfo.artifact);
if (!targetArtifact) {
throw new Error(`Artifact "${artifactInfo.artifact}" not found for commit ${artifactInfo.commit}`);
}
- // Download and extract the artifact
- const downloadResponse = await fetch(targetArtifact.archive_download_url, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
+ // Download and extract the artifact using API route
+ const downloadResponse = await fetch(
+ `/api/github/download?url=${encodeURIComponent(targetArtifact.archive_download_url)}`
+ );
if (!downloadResponse.ok) {
- throw new Error(`Failed to download artifact: ${downloadResponse.status} ${downloadResponse.statusText}`);
+ throw new Error(`Failed to download artifact: ${downloadResponse.status}`);
}
const zipBuffer = await downloadResponse.arrayBuffer();
@@ -154,13 +115,7 @@ export default function FileUpload({
}, []);
// Load GitHub artifacts directly from deeplink parameters
- const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo, secondary?: GitHubArtifactInfo) => {
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
+ const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo) => {
setIsLoading(true);
setError('');
@@ -168,28 +123,16 @@ export default function FileUpload({
setError('🔄 Loading data from GitHub artifacts...');
// Load primary artifact
- const primaryData = await loadArtifactFromDeeplink(primary, githubToken);
-
- if (secondary && onCompareLoad) {
- // Load secondary artifact for comparison
- const secondaryData = await loadArtifactFromDeeplink(secondary, githubToken);
+ const primaryData = await loadArtifactFromDeeplink(primary);
- // Generate display names with commit info
- const primaryDisplayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`;
- const secondaryDisplayName = `${secondary.repo}@${secondary.branch} (${secondary.commit.substring(0, 7)}) - ${secondary.artifact}`;
-
- onCompareLoad(primaryData, primaryDisplayName, secondaryData, secondaryDisplayName);
- setLoadedFile(`Comparison: ${primaryDisplayName} vs ${secondaryDisplayName}`);
- } else {
- // Single artifact load
- const displayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`;
- onFileLoad(primaryData, displayName);
- setLoadedFile(displayName);
- }
+ // Single artifact load
+ const displayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`;
+ onFileLoad(primaryData, displayName);
+ setLoadedFile(displayName);
// Notify parent component that deeplink loading is complete
if (onDeeplinkLoadComplete) {
- onDeeplinkLoadComplete(primary, secondary);
+ onDeeplinkLoadComplete(primary);
}
setError('');
@@ -201,35 +144,19 @@ export default function FileUpload({
} finally {
setIsLoading(false);
}
- }, [onFileLoad, onCompareLoad, onDeeplinkLoadComplete, loadArtifactFromDeeplink]);
+ }, [onFileLoad, onDeeplinkLoadComplete, loadArtifactFromDeeplink]);
// Handle deeplink loading on mount
useEffect(() => {
- if (deeplinkParams && isLoadingFromDeeplink) {
+ if (deeplinkParams && isLoadingFromDeeplink && deeplinkParams.primary) {
setActiveTab('github');
+ setGithubRepo(deeplinkParams.primary.repo);
+ setSelectedBranch(deeplinkParams.primary.branch);
+ setSelectedCommit(deeplinkParams.primary.commit);
+ setSelectedArtifact(deeplinkParams.primary.artifact);
- if (deeplinkParams.mode === 'comparison' && deeplinkParams.primary && deeplinkParams.secondary) {
- setComparisonMode(true);
- setGithubRepo(deeplinkParams.primary.repo);
- setSelectedBranch(deeplinkParams.primary.branch);
- setSelectedCommit(deeplinkParams.primary.commit);
- setSelectedArtifact(deeplinkParams.primary.artifact);
- setSelectedSecondBranch(deeplinkParams.secondary.branch);
- setSelectedSecondCommit(deeplinkParams.secondary.commit);
- setSelectedSecondArtifact(deeplinkParams.secondary.artifact);
-
- // Auto-load comparison data
- loadDeeplinkArtifacts(deeplinkParams.primary, deeplinkParams.secondary);
- } else if (deeplinkParams.primary) {
- setComparisonMode(false);
- setGithubRepo(deeplinkParams.primary.repo);
- setSelectedBranch(deeplinkParams.primary.branch);
- setSelectedCommit(deeplinkParams.primary.commit);
- setSelectedArtifact(deeplinkParams.primary.artifact);
-
- // Auto-load single artifact data
- loadDeeplinkArtifacts(deeplinkParams.primary);
- }
+ // Auto-load artifact data
+ loadDeeplinkArtifacts(deeplinkParams.primary);
}
}, [deeplinkParams, isLoadingFromDeeplink, loadDeeplinkArtifacts]);
@@ -436,14 +363,24 @@ export default function FileUpload({
}
let url: URL;
+ const finalUrl = urlInput.trim();
+
try {
- url = new URL(urlInput.trim());
+ url = new URL(finalUrl);
} catch {
setError('Invalid URL format. Please enter a valid URL (e.g., https://example.com/data.csv).');
return;
}
- if (!url.pathname.toLowerCase().endsWith('.csv') && !urlInput.toLowerCase().includes('csv')) {
+ // Handle Google Drive URLs
+ if (url.hostname === 'drive.google.com') {
+ setError(
+ 'Google Drive links are not supported due to CORS restrictions. Please download the file and use the "Drop file" tab instead or host the file on a different public server.'
+ );
+ return;
+ }
+
+ if (!url.pathname.toLowerCase().endsWith('.csv') && !finalUrl.toLowerCase().includes('csv')) {
setError('URL should point to a CSV file. Please ensure the URL ends with .csv or contains CSV data.');
return;
}
@@ -460,7 +397,7 @@ export default function FileUpload({
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 30_000); // 30 s timeout
- const response = await fetch(urlInput.trim(), {
+ const response = await fetch(finalUrl, {
signal: controller.signal,
headers: { Accept: 'text/csv, text/plain, */*' }
});
@@ -557,62 +494,23 @@ export default function FileUpload({
return;
}
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
setIsLoadingGithubData(true);
setError('');
try {
- // Fetch all branches with pagination support
- const allBranches: GitHubBranch[] = [];
- let page = 1;
- const perPage = 100; // Maximum allowed by GitHub API
-
- while (true) {
- const response = await fetch(`https://api.github.com/repos/${githubRepo}/branches?per_page=${perPage}&page=${page}`, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
+ const response = await fetch(`/api/github/branches?repo=${encodeURIComponent(githubRepo)}`);
if (!response.ok) {
if (response.status === 404) {
- throw new Error('Repository not found. Please check the repository name and ensure it is accessible.');
+ throw new Error('Repository not found. Please check the repository name and ensure it is publicly accessible.');
} else if (response.status === 403) {
throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.');
}
- throw new Error(`Failed to fetch branches: ${response.status} ${response.statusText}`);
- }
-
- const branches: GitHubBranch[] = await response.json();
-
- if (branches.length === 0) {
- // No more branches to fetch
- break;
- }
-
- allBranches.push(...branches);
-
- // If we got fewer branches than requested, we've reached the end
- if (branches.length < perPage) {
- break;
+ const errorData = await response.json();
+ throw new Error(errorData.error || `Failed to fetch branches: ${response.status}`);
}
- page++;
-
- // Safety check to prevent infinite loops (GitHub repos rarely have more than 1000 branches)
- if (page > 10) {
- console.warn('Stopped fetching branches after 10 pages (1000 branches) to prevent excessive API calls');
- break;
- }
- }
-
+ const allBranches: GitHubBranch[] = await response.json();
setGithubBranches(allBranches);
// Auto-select main/master branch if available
@@ -631,28 +529,20 @@ export default function FileUpload({
async function fetchGithubCommits(branch: string) {
if (!githubRepo.trim() || !branch) return;
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
setIsLoadingGithubData(true);
try {
- const response = await fetch(`https://api.github.com/repos/${githubRepo}/commits?sha=${branch}&per_page=20`, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
+ const response = await fetch(
+ `/api/github/commits?repo=${encodeURIComponent(githubRepo)}&branch=${encodeURIComponent(branch)}`
+ );
+
if (!response.ok) {
if (response.status === 404) {
throw new Error('Branch not found or repository is private.');
} else if (response.status === 403) {
throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.');
}
- throw new Error(`Failed to fetch commits: ${response.status} ${response.statusText}`);
+ const errorData = await response.json();
+ throw new Error(errorData.error || `Failed to fetch commits: ${response.status}`);
}
const commits: GitHubCommit[] = await response.json();
@@ -673,98 +563,32 @@ export default function FileUpload({
async function fetchGithubArtifacts(commitSha: string) {
if (!githubRepo.trim() || !commitSha) return;
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
setIsLoadingGithubData(true);
setAvailableArtifacts([]);
setSelectedArtifact('');
try {
- const [owner, repo] = githubRepo.split('/');
-
- // Get workflow runs for the commit
- const runsResponse = await fetch(
- `https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${commitSha}`,
- {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- }
+ const response = await fetch(
+ `/api/github/artifacts?repo=${encodeURIComponent(githubRepo)}&commit=${encodeURIComponent(commitSha)}`
);
- if (!runsResponse.ok) {
- if (runsResponse.status === 403) {
+ if (!response.ok) {
+ if (response.status === 403) {
throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`);
- } else if (runsResponse.status === 404) {
+ } else if (response.status === 404) {
throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`);
- } else {
- throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`);
}
+ const errorData = await response.json();
+ throw new Error(errorData.error || `Failed to fetch artifacts: ${response.status}`);
}
- const runsData = await runsResponse.json();
- const runs = runsData.workflow_runs;
-
- if (!runs || runs.length === 0) {
- setError('No workflow runs found for this commit.');
- return;
- }
+ const uniqueArtifacts: GitHubArtifact[] = await response.json();
- // Collect all imputation artifacts from completed runs
- const allArtifacts: GitHubArtifact[] = [];
-
- for (const run of runs) {
- if (run.status !== 'completed') continue;
-
- try {
- const artifactsResponse = await fetch(
- `https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`,
- {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- }
- );
-
- if (!artifactsResponse.ok) continue;
-
- const artifactsData = await artifactsResponse.json();
- const artifacts = artifactsData.artifacts;
-
- // Filter for imputation artifacts
- const imputationArtifacts = artifacts.filter((artifact: GitHubArtifact) =>
- artifact.name.toLowerCase().includes('impute') ||
- artifact.name.toLowerCase().includes('imputation') ||
- artifact.name.toLowerCase().includes('result') ||
- artifact.name.toLowerCase().includes('.csv')
- );
-
- allArtifacts.push(...imputationArtifacts);
- } catch {
- continue;
- }
- }
-
- if (allArtifacts.length === 0) {
+ if (uniqueArtifacts.length === 0) {
setError('No imputation artifacts found for this commit.');
return;
}
- // Remove duplicates and sort by creation date (newest first)
- const uniqueArtifacts = allArtifacts
- .filter((artifact, index, self) =>
- index === self.findIndex(a => a.name === artifact.name)
- )
- .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime());
-
setAvailableArtifacts(uniqueArtifacts);
// Auto-select the first artifact
@@ -791,25 +615,15 @@ export default function FileUpload({
return;
}
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
setIsLoading(true);
setError('');
try {
setError('🔄 Downloading and extracting CSV from artifact...');
- const downloadResponse = await fetch(artifact.archive_download_url, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
+ const downloadResponse = await fetch(
+ `/api/github/download?url=${encodeURIComponent(artifact.archive_download_url)}`
+ );
if (!downloadResponse.ok) {
throw new Error(`Failed to download artifact: ${downloadResponse.status}`);
@@ -864,7 +678,7 @@ export default function FileUpload({
commit: selectedCommit,
artifact: artifact.name
};
- onGithubLoad(artifactInfo, null);
+ onGithubLoad(artifactInfo);
}
// Clear the GitHub state since we successfully loaded the file
@@ -884,286 +698,31 @@ export default function FileUpload({
}
}
- async function fetchSecondBranchCommits(branch: string) {
- if (!githubRepo.trim() || !branch) return;
-
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
- setIsLoadingGithubData(true);
- try {
- const response = await fetch(`https://api.github.com/repos/${githubRepo}/commits?sha=${branch}&per_page=20`, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- });
- if (!response.ok) {
- if (response.status === 404) {
- throw new Error('Branch not found or repository is private.');
- } else if (response.status === 403) {
- throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.');
- }
- throw new Error(`Failed to fetch commits: ${response.status} ${response.statusText}`);
- }
-
- const commits: GitHubCommit[] = await response.json();
- setSecondCommits(commits);
-
- // Auto-select latest commit and fetch its artifacts
- if (commits.length > 0) {
- setSelectedSecondCommit(commits[0].sha);
- await fetchSecondArtifacts(commits[0].sha);
- }
- } catch (err) {
- setError(`GitHub API error: ${err instanceof Error ? err.message : 'Unknown error'}`);
- } finally {
- setIsLoadingGithubData(false);
- }
- }
-
- async function fetchSecondArtifacts(commitSha: string) {
- if (!githubRepo.trim() || !commitSha) return;
-
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
- setIsLoadingGithubData(true);
- setSecondArtifacts([]);
- setSelectedSecondArtifact('');
-
- try {
- const [owner, repo] = githubRepo.split('/');
-
- // Get workflow runs for the commit
- const runsResponse = await fetch(
- `https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${commitSha}`,
- {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- }
- );
-
- if (!runsResponse.ok) {
- if (runsResponse.status === 403) {
- throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`);
- } else if (runsResponse.status === 404) {
- throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`);
- } else {
- throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`);
- }
- }
-
- const runsData = await runsResponse.json();
- const runs = runsData.workflow_runs;
-
- if (!runs || runs.length === 0) {
- setError('No workflow runs found for this commit.');
- return;
- }
-
- // Collect all imputation artifacts from completed runs
- const allArtifacts: GitHubArtifact[] = [];
-
- for (const run of runs) {
- if (run.status !== 'completed') continue;
-
- try {
- const artifactsResponse = await fetch(
- `https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`,
- {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- }
- );
-
- if (!artifactsResponse.ok) continue;
-
- const artifactsData = await artifactsResponse.json();
- const artifacts = artifactsData.artifacts;
-
- // Filter for imputation artifacts
- const imputationArtifacts = artifacts.filter((artifact: GitHubArtifact) =>
- artifact.name.toLowerCase().includes('impute') ||
- artifact.name.toLowerCase().includes('imputation') ||
- artifact.name.toLowerCase().includes('result') ||
- artifact.name.toLowerCase().includes('.csv')
- );
-
- allArtifacts.push(...imputationArtifacts);
- } catch {
- continue;
- }
- }
-
- if (allArtifacts.length === 0) {
- setError('No imputation artifacts found for this commit.');
- return;
- }
-
- // Remove duplicates and sort by creation date (newest first)
- const uniqueArtifacts = allArtifacts
- .filter((artifact, index, self) =>
- index === self.findIndex(a => a.name === artifact.name)
- )
- .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime());
-
- setSecondArtifacts(uniqueArtifacts);
-
- // Auto-select the first artifact
- if (uniqueArtifacts.length > 0) {
- setSelectedSecondArtifact(uniqueArtifacts[0].id.toString());
- }
-
- } catch (err) {
- setError(`Failed to fetch artifacts: ${err instanceof Error ? err.message : 'Unknown error'}`);
- } finally {
- setIsLoadingGithubData(false);
- }
- }
-
- async function loadComparisonData() {
- if (!selectedArtifact || !selectedSecondArtifact || !onCompareLoad) {
- setError('Please select artifacts from both commits to compare');
- return;
- }
-
- const firstArtifact = availableArtifacts.find(a => a.id.toString() === selectedArtifact);
- const secondArtifact = secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact);
-
- if (!firstArtifact || !secondArtifact) {
- setError('Selected artifacts not found');
- return;
- }
-
- const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN;
- if (!githubToken) {
- setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.');
- return;
- }
-
- setIsLoading(true);
- setError('');
-
- try {
- setError('🔄 Downloading and extracting CSV files for comparison...');
-
- // Download both artifacts
- const [firstDownload, secondDownload] = await Promise.all([
- fetch(firstArtifact.archive_download_url, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- }),
- fetch(secondArtifact.archive_download_url, {
- headers: {
- 'Authorization': `Bearer ${githubToken}`,
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'PolicyEngine-Dashboard/1.0'
- }
- })
- ]);
-
- if (!firstDownload.ok || !secondDownload.ok) {
- throw new Error('Failed to download one or both artifacts');
- }
-
- // Extract CSVs from both artifacts
- const [firstZipBuffer, secondZipBuffer] = await Promise.all([
- firstDownload.arrayBuffer(),
- secondDownload.arrayBuffer()
- ]);
-
- const firstZip = new JSZip();
- const secondZip = new JSZip();
- const [firstZipContent, secondZipContent] = await Promise.all([
- firstZip.loadAsync(firstZipBuffer),
- secondZip.loadAsync(secondZipBuffer)
- ]);
-
- // Find CSV files in both ZIPs
- const firstCsvFiles = Object.keys(firstZipContent.files).filter(filename =>
- filename.toLowerCase().endsWith('.csv') && !firstZipContent.files[filename].dir
- );
- const secondCsvFiles = Object.keys(secondZipContent.files).filter(filename =>
- filename.toLowerCase().endsWith('.csv') && !secondZipContent.files[filename].dir
- );
-
- if (firstCsvFiles.length === 0 || secondCsvFiles.length === 0) {
- throw new Error('No CSV files found in one or both artifacts');
- }
-
- // Extract CSV content
- const [firstCsvContent, secondCsvContent] = await Promise.all([
- firstZipContent.files[firstCsvFiles[0]].async('text'),
- secondZipContent.files[secondCsvFiles[0]].async('text')
- ]);
-
- // Create display names with commit info
- const firstCommitShort = selectedCommit.slice(0, 8);
- const secondCommitShort = selectedSecondCommit.slice(0, 8);
-
- const firstBranchInfo = selectedBranch !== selectedSecondBranch ? ` (${selectedBranch})` : '';
- const secondBranchInfo = selectedBranch !== selectedSecondBranch ? ` (${selectedSecondBranch})` : '';
-
- const firstName = `${firstCsvFiles[0]} @ ${firstCommitShort}${firstBranchInfo}`;
- const secondName = `${secondCsvFiles[0]} @ ${secondCommitShort}${secondBranchInfo}`;
-
- // Load into comparison mode
- onCompareLoad(firstCsvContent, firstName, secondCsvContent, secondName);
-
- // Notify parent component about GitHub artifact info for sharing
- if (onGithubLoad) {
- const primaryArtifactInfo: GitHubArtifactInfo = {
- repo: githubRepo,
- branch: selectedBranch,
- commit: selectedCommit,
- artifact: firstArtifact.name
- };
- const secondaryArtifactInfo: GitHubArtifactInfo = {
- repo: githubRepo,
- branch: selectedSecondBranch,
- commit: selectedSecondCommit,
- artifact: secondArtifact.name
- };
- onGithubLoad(primaryArtifactInfo, secondaryArtifactInfo);
- }
-
- setError('');
-
- } catch (extractError) {
- console.error('Comparison extraction error:', extractError);
- setError(`❌ Failed to extract comparison data: ${extractError instanceof Error ? extractError.message : 'Unknown error'}`);
- } finally {
- setIsLoading(false);
- }
- }
return (
-
-
-
Load imputation data
-
Choose how you would like to load your CSV file
+
+ {/* Page Title */}
+
+
Microimpute Dashboard
+
Microimputation quality and model benchmarking assessment
+ What is Wasserstein distance? Also known as "Earth Mover's Distance",
+ this metric measures how much "work" is needed to transform one probability distribution
+ into another. Think of it as the minimum cost to rearrange one pile of dirt to match
+ another pile's shape.
+
+
+ Why use it for imputation? Wasserstein distance is ideal for numerical
+ variables because it considers the actual distances between values, not just whether
+ they match exactly. A value of 0 means perfect imputation, and larger values indicate
+ greater differences between imputed and true distributions.
+
+
+ Interpretation: Values closer to 0 are better. Generally, values below
+ 0.05 indicate good imputation quality, while values above 0.2 suggest significant
+ distributional differences.
+
+ What is KL-divergence? Kullback-Leibler divergence measures how much
+ one probability distribution differs from another. It quantifies the "information lost"
+ when using the imputed distribution to approximate the true distribution.
+
+
+ Why use it for categorical variables? KL-divergence is particularly
+ useful for categorical data because it compares probability distributions across
+ categories. It's sensitive to differences in how probabilities are distributed across
+ all possible categories.
+
+
+ Interpretation: A value of 0 means perfect match. Values below 0.5
+ indicate good imputation, while values above 5.0 suggest substantial distributional
+ differences. Note that KL-divergence is not symmetric and can range from 0 to infinity.
+
+
+ {/* Legend - only for correlation metrics (not mutual_info) */}
+ {selectedMetric !== 'mutual_info' && (
+
+
+ Interpretation: Correlation values range from -1 to 1. Positive values (blue) indicate variables that increase together, negative values (red) indicate variables that move in opposite directions, and values near 0 (white) indicate little to no linear relationship.
+
+
+ Color scale:
+
+
+
+
+
+
+
+
+
+
+ ◄ Negative
+ |
+ Positive ►
+
+
+
+
+ Pearson vs Spearman: Pearson correlation measures linear relationships between variables and is sensitive to outliers. Spearman correlation measures monotonic relationships (whether variables consistently increase or decrease together) by ranking the data first, making it more robust to outliers and non-linear but monotonic relationships. Use Pearson for linear relationships and Spearman when the relationship may be non-linear or when data contains outliers.
+
+ What is mutual information? Mutual information measures how much information one variable provides about another. Unlike correlation, it captures both linear and non-linear relationships between variables. Values range from 0 (independent variables) to higher positive values (strong dependency).
+
+
+ Why measure it for imputed variables? Mutual information between predictors and imputed variables reveals which predictors are most informative for imputation. High mutual information indicates that a predictor strongly influences the imputed variable's distribution, making it crucial for accurate imputation. This helps validate that your imputation models are using the most relevant predictors and can identify when key predictive relationships exist in your data.
+
+
+ {/* Color scale within explanation box */}
+
+
+ Color scale:
+
+
+
+
+
+
+
+
+
+ Weak
+ →
+ Strong ►
+
+
+
+
+
+ {/* Message when no predictor-target data is available */}
+ {!hasPredictorTargetMI && (
+
+
+ Note: No predictor-imputed variable mutual information data was found in this CSV file. It is recommended to include this data in your analysis to understand which predictors are most informative for imputing each variable. This helps validate that your imputation models are leveraging the most relevant predictive relationships in your data.
+
+ How this works: This analysis adds predictors one at a time,
+ choosing the predictor that improves performance the most at each step. This
+ step-by-step approach is efficient but doesn't test
+ every possible combination of predictors. Note that this analysis may differ depending on the model type passed when using the `progressive_predictor_inclusion` function that produced these results.
+
+
+ Reading the chart: The bars show cumulative improvement from
+ baseline as predictors are added. Larger improvements indicate more valuable
+ predictor combinations.
+
+ Cumulative improvement:{' '}
+
+ {(bestCombination.cumulativeImprovement * 100).toFixed(3)}%
+
+ {' '}
+
+ (relative to the first predictor added, which was the best single predictor)
+
+
+ What this shows: This analysis measures how much performance
+ degrades when each predictor is removed. Predictors that cause large performance
+ drops when removed are critical to the model's accuracy.
+
+
+ Reading the chart: Positive values (bars pointing right) indicate
+ performance worsens when the predictor is removed, meaning the predictor is helpful.
+ Negative values suggest removing the predictor might actually improve performance.
+