diff --git a/.changeset/brave-kiwis-notice.md b/.changeset/brave-kiwis-notice.md new file mode 100644 index 000000000..a514e5684 --- /dev/null +++ b/.changeset/brave-kiwis-notice.md @@ -0,0 +1,5 @@ +--- +"ensrainbow": patch +--- + +feat: add CSV conversion command to ensrainbow CLI to convert rainbow tables from CSV format to ensrainbow format diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json index 88e149cc8..704a88cf7 100644 --- a/apps/ensrainbow/package.json +++ b/apps/ensrainbow/package.json @@ -19,6 +19,7 @@ "validate:lite": "tsx src/cli.ts validate --lite", "purge": "tsx src/cli.ts purge", "convert": "tsx src/cli.ts convert", + "convert-sql": "tsx src/cli.ts convert-sql", "test": "vitest", "test:coverage": "vitest --coverage", "lint": "biome check --write .", @@ -38,7 +39,8 @@ "progress": "^2.0.3", "protobufjs": "^7.4.0", "viem": "catalog:", - "yargs": "^17.7.2" + "yargs": "^17.7.2", + "@fast-csv/parse": "^5.0.0" }, "devDependencies": { "@ensnode/shared-configs": "workspace:*", diff --git a/apps/ensrainbow/src/cli.test.ts b/apps/ensrainbow/src/cli.test.ts index ff9364a32..dedf1b88a 100644 --- a/apps/ensrainbow/src/cli.test.ts +++ b/apps/ensrainbow/src/cli.test.ts @@ -107,32 +107,29 @@ describe("CLI", () => { const ensrainbowFile = join(TEST_FIXTURES_DIR, "test_ens_names_0.ensrainbow"); const ensrainbowOutputFile = join(tempDir, "test_ens_names_0.ensrainbow"); const labelSetId = "test-ens-names"; // Needed for convert - const labelSetVersion = 0; // Needed for convert expect(() => cli.parse([ - "convert", + "convert-sql", "--input-file", sqlInputFile, "--output-file", ensrainbowOutputFile, ]), - ).toThrow(/Missing required arguments: label-set-id, label-set-version/); + ).toThrow(/Missing required argument: label-set-id/); // Successful convert with args const ingestCli = createCLI({ exitProcess: false }); await ingestCli.parse([ - "convert", + "convert-sql", "--input-file", sqlInputFile, "--output-file", ensrainbowOutputFile, "--label-set-id", labelSetId, - "--label-set-version", - labelSetVersion.toString(), ]); - //command: pnpm convert --input-file test/fixtures/test_ens_names.sql.gz --output-file test/fixtures/test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0 + //command: pnpm convert-sql --input-file test/fixtures/test_ens_names.sql.gz --output-file test/fixtures/test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0 //verify that the file is created await expect(stat(ensrainbowOutputFile)).resolves.toBeDefined(); @@ -159,32 +156,29 @@ describe("CLI", () => { const sqlInputFile = join(TEST_FIXTURES_DIR, "ens_test_env_names.sql.gz"); const ensrainbowOutputFile = join(tempDir, "ens_test_env_0.ensrainbow"); const labelSetId = "ens-test-env"; // Needed for convert - const labelSetVersion = 0; // Needed for convert expect(() => cli.parse([ - "convert", + "convert-sql", "--input-file", sqlInputFile, "--output-file", ensrainbowOutputFile, ]), - ).toThrow(/Missing required arguments: label-set-id, label-set-version/); + ).toThrow(/Missing required argument: label-set-id/); // Successful convert with args const ingestCli = createCLI({ exitProcess: false }); await ingestCli.parse([ - "convert", + "convert-sql", "--input-file", sqlInputFile, "--output-file", ensrainbowOutputFile, "--label-set-id", labelSetId, - "--label-set-version", - labelSetVersion.toString(), ]); - //command: pnpm convert --input-file test_ens_names.sql.gz --output-file test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0 + //command: pnpm convert-sql --input-file test_ens_names.sql.gz --output-file test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0 //verify that the file is created await expect(stat(ensrainbowOutputFile)).resolves.toBeDefined(); @@ -207,30 +201,56 @@ describe("CLI", () => { const sqlInputFile = join(TEST_FIXTURES_DIR, "test_ens_names.sql.gz"); const ensrainbowOutputFile = join(tempDir, "test_ens_names_1.ensrainbow"); const labelSetId = "test-ens-names"; // Needed for convert - const labelSetVersion = 1; // Needed for convert expect(() => cli.parse([ - "convert", + "convert-sql", "--input-file", sqlInputFile, "--output-file", ensrainbowOutputFile, ]), - ).toThrow(/Missing required arguments: label-set-id, label-set-version/); + ).toThrow(/Missing required argument: label-set-id/); const ingestCli2 = createCLI({ exitProcess: false }); - // Successful convert with args + // Successful convert with args (convert-sql always creates version 0) + // To test version 1, we need to use convert command with existing database + // But for this test, we'll create version 0 and then manually test the ingestion failure + const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); + const tempDbDirForV1 = join(tempDir, "temp-db-for-v1"); + const version0FileForV1 = join(tempDir, "test_ens_names_0_for_v1.ensrainbow"); + + // Create version 0 file await ingestCli2.parse([ "convert", "--input-file", - sqlInputFile, + csvInputFile, + "--output-file", + version0FileForV1, + "--label-set-id", + labelSetId, + ]); + + // Ingest version 0 to create database + await ingestCli2.parse([ + "ingest-ensrainbow", + "--input-file", + version0FileForV1, + "--data-dir", + tempDbDirForV1, + ]); + + // Create version 1 file using existing database + await ingestCli2.parse([ + "convert", + "--input-file", + csvInputFile, "--output-file", ensrainbowOutputFile, "--label-set-id", labelSetId, - "--label-set-version", - labelSetVersion.toString(), + "--existing-db-path", + tempDbDirForV1, ]); //verify it is created await expect(stat(ensrainbowOutputFile)).resolves.toBeDefined(); @@ -254,38 +274,99 @@ describe("CLI", () => { }); it("should ingest first file successfully but reject second file with label set version not being 1 higher than the current highest label set version", async () => { - // First, ingest a valid file with label set version 0 - const firstInputFile = join(TEST_FIXTURES_DIR, "test_ens_names_0.ensrainbow"); + // First, we'll create a version 0 file and then a version 2 file const secondInputFile = join(tempDir, "test_ens_names_2.ensrainbow"); // Create an ensrainbow file with label set version 2 - const sqlInputFile = join(TEST_FIXTURES_DIR, "test_ens_names.sql.gz"); + // To create version 2, we need to create version 0, ingest it, create version 1, ingest it, then create version 2 + const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); const labelSetId = "test-ens-names"; - const labelSetVersion = 2; // Higher than 1 - // Successful convert with label set version 2 + // Create temporary directory for building up versions sequentially + const tempDbDir = join(tempDir, "temp-db"); + const version0File = join(tempDir, "test_ens_names_0_temp.ensrainbow"); + const version1File = join(tempDir, "test_ens_names_1_temp.ensrainbow"); + const convertCli = createCLI({ exitProcess: false }); + + // Step 1: Create version 0 file await convertCli.parse([ "convert", "--input-file", - sqlInputFile, + csvInputFile, + "--output-file", + version0File, + "--label-set-id", + labelSetId, + ]); + + // Step 2: Ingest version 0 to create database (database now has version 0) + await convertCli.parse([ + "ingest-ensrainbow", + "--input-file", + version0File, + "--data-dir", + tempDbDir, + ]); + + // Step 3: Create version 1 file using existing database (will be version 1) + await convertCli.parse([ + "convert", + "--input-file", + csvInputFile, + "--output-file", + version1File, + "--label-set-id", + labelSetId, + "--existing-db-path", + tempDbDir, + ]); + + // Step 4: Ingest version 1 into the same database (database now has versions 0 and 1, highest is 1) + await convertCli.parse([ + "ingest-ensrainbow", + "--input-file", + version1File, + "--data-dir", + tempDbDir, + ]); + + // Step 5: Create version 2 file using existing database (will be version 2, since highest is 1) + await convertCli.parse([ + "convert", + "--input-file", + csvInputFile, "--output-file", secondInputFile, "--label-set-id", labelSetId, - "--label-set-version", - labelSetVersion.toString(), + "--existing-db-path", + tempDbDir, ]); // Verify the file with label set version 2 was created await expect(stat(secondInputFile)).resolves.toBeDefined(); + // Create a completely separate version 0 file for the final test + // Use a fresh CLI instance and ensure no existing-db-path is used + const finalTestCli = createCLI({ exitProcess: false }); + const finalTestVersion0File = join(tempDir, "final_test_v0.ensrainbow"); + await finalTestCli.parse([ + "convert", + "--input-file", + csvInputFile, + "--output-file", + finalTestVersion0File, + "--label-set-id", + labelSetId, + ]); + // First ingest succeeds with label set version 0 const ingestCli = createCLI({ exitProcess: false }); await ingestCli.parse([ "ingest-ensrainbow", "--input-file", - firstInputFile, + finalTestVersion0File, "--data-dir", testDataDir, ]); @@ -311,35 +392,45 @@ describe("CLI", () => { const thirdInputFile = join(tempDir, "different_label_set_id_1.ensrainbow"); // Create an ensrainbow file with different label set id - const sqlInputFile = join(TEST_FIXTURES_DIR, "test_ens_names.sql.gz"); + const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); const labelSetId = "different-label-set-id"; // Different from test-ens-names - const labelSetVersion = 0; + + // Create temporary directory for version 0 database + const tempDbDir0 = join(tempDir, "temp-db-different-v0"); // Create second file with different label set id and label set version 0 const convertCli = createCLI({ exitProcess: false }); await convertCli.parse([ "convert", "--input-file", - sqlInputFile, + csvInputFile, "--output-file", secondInputFile, "--label-set-id", labelSetId, - "--label-set-version", - labelSetVersion.toString(), ]); // Create third file with different label set id and label set version 1 + // First, ingest version 0 to create database + await convertCli.parse([ + "ingest-ensrainbow", + "--input-file", + secondInputFile, + "--data-dir", + tempDbDir0, + ]); + + // Then create version 1 using existing database await convertCli.parse([ "convert", "--input-file", - sqlInputFile, + csvInputFile, "--output-file", thirdInputFile, "--label-set-id", labelSetId, - "--label-set-version", - "1", + "--existing-db-path", + tempDbDir0, ]); // Verify the file with different label set id was created diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts index 3fdc0d530..445f41cbe 100644 --- a/apps/ensrainbow/src/cli.ts +++ b/apps/ensrainbow/src/cli.ts @@ -5,14 +5,10 @@ import type { ArgumentsCamelCase, Argv } from "yargs"; import { hideBin } from "yargs/helpers"; import yargs from "yargs/yargs"; -import { - buildLabelSetId, - buildLabelSetVersion, - type LabelSetId, - type LabelSetVersion, -} from "@ensnode/ensnode-sdk"; - -import { convertCommand } from "@/commands/convert-command"; +import { buildLabelSetId, type LabelSetId } from "@ensnode/ensnode-sdk"; + +import { convertCommand } from "@/commands/convert-command-sql"; +import { convertCsvCommand } from "@/commands/convert-csv-command"; // import { ingestCommand } from "@/commands/ingest-command"; import { ingestProtobufCommand } from "@/commands/ingest-protobuf-command"; import { purgeCommand } from "@/commands/purge-command"; @@ -56,9 +52,17 @@ interface PurgeArgs { interface ConvertArgs { "input-file": string; - "output-file": string; + "output-file"?: string; + "label-set-id": LabelSetId; +} + +interface ConvertCsvArgs { + "input-file": string; + "output-file"?: string; "label-set-id": LabelSetId; - "label-set-version": LabelSetVersion; + "progress-interval"?: number; + "existing-db-path"?: string; + silent?: boolean; } export interface CLIOptions { @@ -184,38 +188,81 @@ export function createCLI(options: CLIOptions = {}) { ) .command( "convert", - "Convert rainbow tables from SQL dump to protobuf format", + "Convert rainbow tables from CSV format to ensrainbow format", (yargs: Argv) => { return yargs .option("input-file", { type: "string", - description: "Path to the gzipped SQL dump file", - default: join(process.cwd(), "ens_names.sql.gz"), - }) - .option("output-file", { - type: "string", - description: "Path to the output protobuf file", - default: join(process.cwd(), "rainbow-records.ensrainbow"), + description: "Path to the CSV input file", + demandOption: true, }) .option("label-set-id", { type: "string", - description: "Label set id for the rainbow record collection", + description: "Label set id for the generated ensrainbow file", demandOption: true, }) .coerce("label-set-id", buildLabelSetId) - .option("label-set-version", { + .option("output-file", { + type: "string", + description: + "Path to where the resulting ensrainbow file will be output (if not provided, will be generated automatically)", + }) + .option("progress-interval", { type: "number", - description: "Label set version for the rainbow record collection", + description: "Number of records to process before logging progress", + default: 50000, + }) + .option("existing-db-path", { + type: "string", + description: + "Path to existing ENSRainbow database to filter out existing labels and determine the next label set version (if not provided, version will be 0)", + }) + .option("silent", { + type: "boolean", + description: "Disable progress bar (useful for scripts)", + default: false, + }); + }, + async (argv: ArgumentsCamelCase) => { + await convertCsvCommand({ + inputFile: argv["input-file"], + outputFile: argv["output-file"], + labelSetId: argv["label-set-id"], + progressInterval: argv["progress-interval"], + existingDbPath: argv["existing-db-path"], + silent: argv["silent"], + }); + }, + ) + .command( + "convert-sql", + "Convert rainbow tables from legacy SQL dump to ensrainbow format", + (yargs: Argv) => { + return yargs + .option("input-file", { + type: "string", + description: "Path to the gzipped SQL dump file", + default: join(process.cwd(), "ens_names.sql.gz"), + }) + .option("label-set-id", { + type: "string", + description: "Label set id for the generated ensrainbow file", demandOption: true, }) - .coerce("label-set-version", buildLabelSetVersion); + .coerce("label-set-id", buildLabelSetId) + .option("output-file", { + type: "string", + description: "Path to where the resulting ensrainbow file will be output", + }); }, async (argv: ArgumentsCamelCase) => { + const outputFile = + argv["output-file"] ?? join(process.cwd(), `${argv["label-set-id"]}_0.ensrainbow`); await convertCommand({ inputFile: argv["input-file"], - outputFile: argv["output-file"], + outputFile, labelSetId: argv["label-set-id"], - labelSetVersion: argv["label-set-version"], + labelSetVersion: 0, }); }, ) diff --git a/apps/ensrainbow/src/commands/convert-command.ts b/apps/ensrainbow/src/commands/convert-command-sql.ts similarity index 100% rename from apps/ensrainbow/src/commands/convert-command.ts rename to apps/ensrainbow/src/commands/convert-command-sql.ts diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts new file mode 100644 index 000000000..42b1cdd47 --- /dev/null +++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts @@ -0,0 +1,704 @@ +import { mkdtemp, rm, stat, writeFile } from "fs/promises"; +import { tmpdir } from "os"; +import { join } from "path"; + +import { labelhash } from "viem"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { type LabelSetId, labelHashToBytes } from "@ensnode/ensnode-sdk"; + +import { createCLI } from "@/cli"; +import { ENSRainbowDB } from "@/lib/database"; + +import { convertCsvCommand } from "./convert-csv-command"; + +// Path to test fixtures +const TEST_FIXTURES_DIR = join(__dirname, "..", "..", "test", "fixtures"); + +describe("convert-csv-command", () => { + let tempDir: string; + + beforeEach(async () => { + vi.stubEnv("NODE_ENV", "test"); + tempDir = await mkdtemp(join(tmpdir(), "ensrainbow-csv-test-")); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + vi.restoreAllMocks(); + await rm(tempDir, { recursive: true, force: true }); + }); + + describe("CSV conversion and ingestion", () => { + it("should convert single column CSV and successfully ingest into database", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); + const outputFile = join(tempDir, "output_1col.ensrainbow"); + const dataDir = join(tempDir, "db_1col"); + + // Convert CSV to ensrainbow format + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-csv-one-col" as LabelSetId, + silent: true, + }); + + // Verify the output file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Ingest the converted file into database + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + expect(recordsCount).toBe(11); + expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("123"))))?.label).toBe( + "123", + ); + expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null); + await db.close(); + }); + + it("should convert two column CSV with provided hashes and ingest successfully", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); + const outputFile = join(tempDir, "output_2col.ensrainbow"); + const dataDir = join(tempDir, "db_2col"); + + // Convert CSV to ensrainbow format + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-csv-two-col" as LabelSetId, + silent: true, + }); + + // Verify the output file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Ingest the converted file into database + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + expect(recordsCount).toBe(10); + expect( + (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("test123"))))?.label, + ).toBe("test123"); + expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null); + await db.close(); + }); + + it("should fail when CSV has inconsistent column count", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_first.csv"); + const outputFile = join(tempDir, "output_invalid.ensrainbow"); + + // Convert CSV to ensrainbow format (should fail on inconsistent columns) + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-csv-invalid" as LabelSetId, + }), + ).rejects.toThrow(/Failed on line 1: Expected 1 or 2 col/); + }); + + it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_special_chars.csv"); + const outputFile = join(tempDir, "output_special.ensrainbow"); + const dataDir = join(tempDir, "db_special"); + + // Convert CSV to ensrainbow format + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-csv-special" as LabelSetId, + silent: true, + }); + + // Verify output file was created + const outputStats = await stat(outputFile); + expect(outputStats.isFile()).toBe(true); + expect(outputStats.size).toBeGreaterThan(0); + + // Ingest the converted file into database + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + expect(recordsCount).toBe(10); + const labels = [ + "πŸ”₯emoji-labelπŸš€", + 'special"quotes"inside', + "label with newline\n character", // new line + "label-with-null\0byte", // null byte + ]; + for (const label of labels) { + expect( + (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label))))?.label, + ).toBe(label); + } + expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null); + await db.close(); + }); + + it("should fail when CSV contains invalid labelhash format", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_hash.csv"); + const outputFile = join(tempDir, "output_invalid_hash.ensrainbow"); + + // Convert CSV to ensrainbow format (should fail on invalid hash format) + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-csv-invalid-hash" as LabelSetId, + }), + ).rejects.toThrow(/Failed on line 2: Invalid labelHash/); + }); + }); + + describe("Error handling", () => { + it("should throw error for non-existent input file", async () => { + const inputFile = join(tempDir, "non-existent.csv"); + const outputFile = join(tempDir, "output.ensrainbow"); + + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-missing" as LabelSetId, + }), + ).rejects.toThrow(); + }); + }); + + describe("CLI integration", () => { + it("should work through the full CLI pipeline", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); + const outputFile = join(tempDir, "cli_output.ensrainbow"); + const dataDir = join(tempDir, "cli_db"); + + const cli = createCLI({ exitProcess: false }); + + // Test convert-csv command through CLI + await cli.parse([ + "convert", + "--input-file", + inputFile, + "--output-file", + outputFile, + "--label-set-id", + "test-cli-csv", + ]); + + // Verify file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Test ingestion through CLI + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + // Verify database was created + const dbStats = await stat(dataDir); + expect(dbStats.isDirectory()).toBe(true); + }); + }); + + describe("Filtering functionality", () => { + it("should filter out labels that already exist in the database", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); + const outputFile = join(tempDir, "output_filtered.ensrainbow"); + const dataDir = join(tempDir, "db_filtered"); + + // First, create an initial database with some labels + const initialOutputFile = join(tempDir, "initial.ensrainbow"); + await convertCsvCommand({ + inputFile, + outputFile: initialOutputFile, + labelSetId: "test-filtering" as LabelSetId, + silent: true, + }); + + // Ingest the initial file + const cli = createCLI({ exitProcess: false }); + await cli.parse([ + "ingest-ensrainbow", + "--input-file", + initialOutputFile, + "--data-dir", + dataDir, + ]); + + // Verify initial database + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const initialCount = await db.getPrecalculatedRainbowRecordCount(); + expect(initialCount).toBe(11); + await db.close(); + + // Now convert the same CSV file again, but with filtering enabled + // This should automatically determine version 1 from the existing database + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-filtering" as LabelSetId, + existingDbPath: dataDir, + silent: true, + }); + + // Verify the filtered output file was created + const outputStats = await stat(outputFile); + expect(outputStats.isFile()).toBe(true); + + // The filtered file should be smaller than the original since it excludes existing labels + const initialStats = await stat(initialOutputFile); + expect(outputStats.size).toBeLessThan(initialStats.size); + + // Verify that ingesting the filtered file (version 1) into a new database fails + // because new databases require version 0 for initial ingestion + const filteredDataDir = join(tempDir, "db_filtered_result"); + await expect( + cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", filteredDataDir]), + ).rejects.toThrow(/Initial ingestion must use a file with label set version 0/); + }); + + it("should filter out duplicate labels within the same conversion", async () => { + // Create a CSV file with duplicate labels + const csvContent = "label1\nlabel2\nlabel1\nlabel3\nlabel2\nlabel4"; + const inputFile = join(tempDir, "duplicates.csv"); + await writeFile(inputFile, csvContent); + + const outputFile = join(tempDir, "output_no_duplicates.ensrainbow"); + + // Convert CSV with duplicate filtering + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-duplicates" as LabelSetId, + silent: true, + }); + + // Verify the output file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Ingest and verify only unique labels were processed + const dataDir = join(tempDir, "db_no_duplicates"); + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + + // Should have 4 unique labels (label1, label2, label3, label4) + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + expect(recordsCount).toBe(4); + + // Verify specific labels exist + expect( + (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label1"))))?.label, + ).toBe("label1"); + expect( + (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label2"))))?.label, + ).toBe("label2"); + expect( + (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label3"))))?.label, + ).toBe("label3"); + expect( + (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label4"))))?.label, + ).toBe("label4"); + + await db.close(); + }); + + it("should throw error when existing database path cannot be opened", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); + const outputFile = join(tempDir, "output_no_db.ensrainbow"); + const nonExistentDbPath = join(tempDir, "non-existent-db"); + + // Should throw error when database path is provided but cannot be opened + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-no-db" as LabelSetId, + existingDbPath: nonExistentDbPath, + }), + ).rejects.toThrow(/Database is not open/); + }); + + it("should throw error when label set ID mismatches existing database", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); + const outputFile = join(tempDir, "output_mismatch.ensrainbow"); + const dataDir = join(tempDir, "db_mismatch"); + + // First, create a database with one label set ID + const initialOutputFile = join(tempDir, "initial_mismatch.ensrainbow"); + await convertCsvCommand({ + inputFile, + outputFile: initialOutputFile, + labelSetId: "test-label-set-a" as LabelSetId, + silent: true, + }); + + // Ingest the initial file to create the database + const cli = createCLI({ exitProcess: false }); + await cli.parse([ + "ingest-ensrainbow", + "--input-file", + initialOutputFile, + "--data-dir", + dataDir, + ]); + + // Verify initial database was created + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const labelSet = await db.getLabelSet(); + expect(labelSet.labelSetId).toBe("test-label-set-a"); + await db.close(); + + // Now try to convert with a different label set ID and the existing database path + // This should throw an error about label set ID mismatch + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-label-set-b" as LabelSetId, + existingDbPath: dataDir, + silent: true, + }), + ).rejects.toThrow( + /Label set ID mismatch! Database label set id: test-label-set-a, provided label set id: test-label-set-b/, + ); + }); + + it("should work through CLI with existing database path", async () => { + const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); + const outputFile = join(tempDir, "cli_output_with_db.ensrainbow"); + const dataDir = join(tempDir, "cli_db_with_filtering"); + + // First create a database + const initialOutputFile = join(tempDir, "initial_cli.ensrainbow"); + const cli = createCLI({ exitProcess: false }); + + await cli.parse([ + "convert", + "--input-file", + inputFile, + "--output-file", + initialOutputFile, + "--label-set-id", + "test-cli-filtering", + ]); + + await cli.parse([ + "ingest-ensrainbow", + "--input-file", + initialOutputFile, + "--data-dir", + dataDir, + ]); + + // Now test CLI with existing database path + await cli.parse([ + "convert", + "--input-file", + inputFile, + "--output-file", + outputFile, + "--label-set-id", + "test-cli-filtering", + "--existing-db-path", + dataDir, + ]); + + // Verify file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + }); + }); + + describe("Streaming performance", () => { + it("should handle small CSV files efficiently", async () => { + const inputFile = join(tempDir, "small_test.csv"); + const outputFile = join(tempDir, "output_small.ensrainbow"); + const dataDir = join(tempDir, "db_small"); + + // Create a CSV with 100 records to test streaming + const records = []; + for (let i = 0; i < 100; i++) { + records.push(`label${i}`); + } + await writeFile(inputFile, records.join("\n")); + + const startTime = Date.now(); + + // Convert CSV + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-small" as LabelSetId, + silent: true, + }); + + const conversionTime = Date.now() - startTime; + + // Should complete conversion quickly (less than 2 seconds for 100 records) + expect(conversionTime).toBeLessThan(2000); + + // Verify file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Test ingestion + const cli = createCLI({ exitProcess: false }); + const ingestStartTime = Date.now(); + + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const ingestTime = Date.now() - ingestStartTime; + + // Should complete ingestion quickly (less than 3 seconds for 100 records) + expect(ingestTime).toBeLessThan(3000); + + // Verify database was created + const dbStats = await stat(dataDir); + expect(dbStats.isDirectory()).toBe(true); + }); + + it("should handle CSV files with many unique labels", async () => { + const inputFile = join(tempDir, "many_labels.csv"); + const outputFile = join(tempDir, "output_many_labels.ensrainbow"); + + // Create a CSV with 50,000 unique labels (tests deduplication with increased memory limit) + const records = []; + for (let i = 0; i < 50_000; i++) { + records.push(`label${i}`); + } + await writeFile(inputFile, records.join("\n")); + + // This should work without memory issues + await convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-many-labels" as LabelSetId, + silent: true, + }); + + // Verify file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + }, 60000); // 60 second timeout for large file test + }); + + describe("Edge cases", () => { + it("should handle empty CSV file", async () => { + const inputFile = join(tempDir, "empty.csv"); + const outputFile = join(tempDir, "output_empty.ensrainbow"); + await writeFile(inputFile, ""); + + // Should not throw error for empty file + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-empty" as LabelSetId, + silent: true, + }), + ).resolves.not.toThrow(); + + // Verify the output file was created (should have header only) + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Ingest and verify no records were written + const dataDir = join(tempDir, "db_empty"); + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + expect(recordsCount).toBe(0); + await db.close(); + }); + + it("should handle CSV file with only whitespace", async () => { + const inputFile = join(tempDir, "whitespace.csv"); + const outputFile = join(tempDir, "output_whitespace.ensrainbow"); + await writeFile(inputFile, " \n \n\t\n "); + + // Should not throw error for whitespace-only file + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-whitespace" as LabelSetId, + silent: true, + }), + ).resolves.not.toThrow(); + + // Verify the output file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + }); + + it("should process all CSV rows including potential headers", async () => { + const inputFile = join(tempDir, "with_header.csv"); + const outputFile = join(tempDir, "output_header.ensrainbow"); + const csvContent = + "label,labelhash\nalice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2"; + await writeFile(inputFile, csvContent); + + // Should process the file (header will be treated as a regular row and fail validation) + // Actually, the header row will be processed and fail because "label" is not a valid hex hash + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-header" as LabelSetId, + silent: true, + }), + ).rejects.toThrow(/Invalid labelHash/); + + // For a proper test, let's create a CSV where the header is valid data + const csvContentValid = "label\nlabel1\nlabel2"; + await writeFile(inputFile, csvContentValid); + + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-header-valid" as LabelSetId, + silent: true, + }), + ).resolves.not.toThrow(); + + // Verify records were created (including "label" as a label) + const dataDir = join(tempDir, "db_header"); + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + // Should have 3 records: "label", "label1", "label2" + expect(recordsCount).toBe(3); + await db.close(); + }); + + it("should handle CSV with malformed rows (extra columns)", async () => { + const inputFile = join(tempDir, "malformed_extra_cols.csv"); + const outputFile = join(tempDir, "output_malformed.ensrainbow"); + const csvContent = + "alice\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2,extra\ncharlie"; + await writeFile(inputFile, csvContent); + + // Should fail when column count is inconsistent + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-malformed" as LabelSetId, + silent: true, + }), + ).rejects.toThrow(/Expected \d+ columns/); + }); + + it("should handle CSV with malformed rows (missing columns)", async () => { + const inputFile = join(tempDir, "malformed_missing_cols.csv"); + const outputFile = join(tempDir, "output_malformed2.ensrainbow"); + const csvContent = + "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob\ncharlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de"; + await writeFile(inputFile, csvContent); + + // Should fail when column count is inconsistent + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-malformed2" as LabelSetId, + silent: true, + }), + ).rejects.toThrow(/Expected \d+ columns/); + }); + + it("should handle CSV with quoted fields containing commas", async () => { + const inputFile = join(tempDir, "quoted_fields.csv"); + const outputFile = join(tempDir, "output_quoted.ensrainbow"); + // CSV with quoted fields that contain commas - use single column format to auto-compute hashes + const csvContent = '"label,with,commas"\n"another,label"'; + await writeFile(inputFile, csvContent); + + // Should handle quoted fields correctly + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-quoted" as LabelSetId, + silent: true, + }), + ).resolves.not.toThrow(); + + // Verify the output file was created + const stats = await stat(outputFile); + expect(stats.isFile()).toBe(true); + expect(stats.size).toBeGreaterThan(0); + + // Ingest and verify records + const dataDir = join(tempDir, "db_quoted"); + const cli = createCLI({ exitProcess: false }); + await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); + + const db = await ENSRainbowDB.open(dataDir); + expect(await db.validate()).toBe(true); + const recordsCount = await db.getPrecalculatedRainbowRecordCount(); + expect(recordsCount).toBe(2); + + // Verify the labels were stored correctly + const label1 = "label,with,commas"; + const label2 = "another,label"; + expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label1))))?.label).toBe( + label1, + ); + expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label2))))?.label).toBe( + label2, + ); + await db.close(); + }); + + it("should handle CSV with empty labelhash column (should fail validation)", async () => { + const inputFile = join(tempDir, "empty_hash.csv"); + const outputFile = join(tempDir, "output_empty_hash.ensrainbow"); + const csvContent = + "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,\ncharlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de"; + await writeFile(inputFile, csvContent); + + // Should fail when labelhash is empty + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-empty-hash" as LabelSetId, + silent: true, + }), + ).rejects.toThrow(/LabelHash cannot be empty/); + }); + }); +}); diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts new file mode 100644 index 000000000..edeabfbba --- /dev/null +++ b/apps/ensrainbow/src/commands/convert-csv-command.ts @@ -0,0 +1,656 @@ +/** + * ENSRAINBOW CSV FILE CREATION COMMAND + * + * Converts CSV files to .ensrainbow format with fast-csv + * Supports 1-column (label only) and 2-column (label,labelhash) formats + */ + +import { createReadStream, createWriteStream, rmSync, statSync } from "fs"; +import { join } from "path"; + +import { parse } from "@fast-csv/parse"; +import { ClassicLevel } from "classic-level"; +import ProgressBar from "progress"; +import { labelhash } from "viem"; + +import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk"; + +import { ENSRainbowDB } from "../lib/database.js"; +import { logger } from "../utils/logger.js"; +import { + CURRENT_ENSRAINBOW_FILE_FORMAT_VERSION, + createRainbowProtobufRoot, +} from "../utils/protobuf-schema.js"; +import type { RainbowRecord } from "../utils/rainbow-record.js"; + +/** + * Estimate memory usage of a Map (rough approximation) + */ +function estimateMapMemory(map: Map): number { + let total = 0; + for (const [key, value] of map) { + // Rough estimate: key size + value size + Map overhead (48 bytes per entry) + total += key.length * 2 + (typeof value === "string" ? value.length * 2 : 8) + 48; + } + return total; +} + +/** + * Simple deduplication database using ClassicLevel directly + */ +class DeduplicationDB { + private pendingWrites: Map = new Map(); + + constructor(private db: ClassicLevel) { + // No in-memory cache - LevelDB has its own internal cache + } + + async has(key: string): Promise { + // Check pending writes first (not yet flushed to DB) + if (this.pendingWrites.has(key)) { + return true; + } + + // Check database (LevelDB has its own internal cache) + try { + await this.db.get(key); + return true; + } catch (error) { + return false; + } + } + + async add(key: string, value: string): Promise { + this.pendingWrites.set(key, value); + + // Flush frequently to keep pendingWrites small + if (this.pendingWrites.size >= DEDUP_PENDING_WRITES_FLUSH_THRESHOLD) { + await this.flush(); + } + } + + async flush(): Promise { + if (this.pendingWrites.size === 0) return; + + const batch = this.db.batch(); + for (const [key, value] of this.pendingWrites) { + batch.put(key, value); + } + await batch.write(); + this.pendingWrites.clear(); + + // Hint to garbage collector after large batch + if (global.gc) { + global.gc(); + } + } + + async close(): Promise { + await this.flush(); + await this.db.close(); + } + + getMemoryStats(): { + pendingWrites: number; + cache: number; + pendingWritesMB: number; + cacheMB: number; + } { + return { + pendingWrites: this.pendingWrites.size, + cache: 0, // Cache disabled - using LevelDB's internal cache + pendingWritesMB: estimateMapMemory(this.pendingWrites) / 1024 / 1024, + cacheMB: 0, + }; + } +} + +/** + * Sets up a simple progress bar that shows speed without total count. + */ +function setupProgressBar(): ProgressBar { + return new ProgressBar("Processing CSV [:bar] :current lines - :rate lines/sec", { + complete: "=", + incomplete: " ", + width: 40, + total: PROGRESS_BAR_LARGE_TOTAL, + }); +} + +/** + * Options for CSV conversion command + */ +export interface ConvertCsvCommandOptions { + inputFile: string; + outputFile?: string; // Optional - will be generated if not provided + labelSetId: string; + progressInterval?: number; + existingDbPath?: string; // Path to existing ENSRainbow database to check for existing labels and determine next version + silent?: boolean; // Disable progress bar for tests +} + +// Configuration constants +const DEFAULT_PROGRESS_INTERVAL = 50000; // Increased from 10k to 50k to reduce logging load +const PROGRESS_BAR_LARGE_TOTAL = 300_000_000; // Very large total for progress bar to handle big files +const DEDUP_PENDING_WRITES_FLUSH_THRESHOLD = 1000; // Flush deduplication DB when pending writes reach this count +const OUTPUT_STREAM_BUFFER_SIZE = 16 * 1024; // 16KB buffer - very small to catch backpressure early +const LARGE_FILE_SIZE_THRESHOLD_MB = 1024; // 1GB - warn user about very large files +const PROGRESS_BAR_UPDATE_INTERVAL = 1000; // Update progress bar every N lines + +interface ConversionStats { + totalLines: number; + processedRecords: number; + filteredExistingLabels: number; + filteredDuplicates: number; + outputBackpressureEvents: number; + startTime: Date; + endTime?: Date; +} + +/** + * Setup output stream for writing protobuf + */ +function setupWriteStream(outputFile: string) { + // Use very small highWaterMark (16KB) to trigger backpressure early and frequently + // This prevents unbounded buffer growth when writes are faster than disk I/O + // Smaller buffer = more frequent backpressure = better memory control + return createWriteStream(outputFile, { + highWaterMark: OUTPUT_STREAM_BUFFER_SIZE, + }); +} + +/** + * Write protobuf header + */ +function writeHeader( + outputStream: NodeJS.WritableStream, + RainbowRecordCollectionType: any, + labelSetId: string, + labelSetVersion: number, +) { + const headerCollection = RainbowRecordCollectionType.fromObject({ + format_identifier: "ensrainbow", + ensrainbow_file_format_version: CURRENT_ENSRAINBOW_FILE_FORMAT_VERSION, + label_set_id: labelSetId, + label_set_version: labelSetVersion, + records: [], // Header has no records + }); + // Encode and write the header collection with length-prefix encoding + outputStream.write( + Buffer.from(RainbowRecordCollectionType.encodeDelimited(headerCollection).finish()), + ); + logger.info("Wrote header message with version, label set id and label set version."); +} + +/** + * Log conversion summary + */ +function logSummary(stats: ConversionStats) { + stats.endTime = new Date(); + const duration = stats.endTime.getTime() - stats.startTime.getTime(); + + logger.info("=== Conversion Summary ==="); + logger.info(`Total lines processed: ${stats.totalLines}`); + logger.info(`Valid records: ${stats.processedRecords}`); + logger.info(`Filtered existing labels: ${stats.filteredExistingLabels}`); + logger.info(`Filtered duplicates: ${stats.filteredDuplicates}`); + logger.info(`Output backpressure events: ${stats.outputBackpressureEvents}`); + logger.info(`Duration: ${duration}ms`); +} + +/** + * Check if a labelhash exists in the ENSRainbow database + */ +async function checkLabelHashExists(db: ENSRainbowDB, labelHashBytes: Buffer): Promise { + try { + const record = await db.getVersionedRainbowRecord(labelHashBytes); + return record !== null; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error( + `Error while checking if labelhash exists in ENSRainbow database: ${errorMessage}`, + ); + throw error; + } +} + +/** + * Get the label set version and open database connection if needed + * Returns both the version and the open database connection (if opened) to avoid redundant opens + */ +async function getLabelSetVersionAndDatabase( + existingDbPath: string | undefined, + labelSetId: string, +): Promise<{ version: number; existingDb: ENSRainbowDB | null }> { + if (!existingDbPath) { + return { version: 0, existingDb: null }; + } + + try { + logger.info(`Opening existing database to determine next label set version: ${existingDbPath}`); + const existingDb = await ENSRainbowDB.open(existingDbPath); + const labelSet = await existingDb.getLabelSet(); + + // Validate that the label set ID matches + if (labelSet.labelSetId !== labelSetId) { + await existingDb.close(); + throw new Error( + `Label set ID mismatch! Database label set id: ${labelSet.labelSetId}, provided label set id: ${labelSetId}`, + ); + } + + const nextVersion = labelSet.highestLabelSetVersion + 1; + logger.info( + `Determined next label set version: ${nextVersion} (current highest: ${labelSet.highestLabelSetVersion})`, + ); + // Return the open database connection instead of closing it + return { version: nextVersion, existingDb }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + throw new Error( + `Failed to determine label set version from existing database at ${existingDbPath}: ${errorMessage}`, + ); + } +} + +/** + * Generate output file name from label set ID and version + */ +function generateOutputFileName(labelSetId: string, labelSetVersion: number): string { + return `${labelSetId}_${labelSetVersion}.ensrainbow`; +} + +/** + * Initialize conversion setup and logging + */ +async function initializeConversion( + options: ConvertCsvCommandOptions, + labelSetVersion: number, + outputFile: string, + existingDb: ENSRainbowDB | null, +) { + logger.info("Starting conversion from CSV to .ensrainbow format..."); + logger.info(`Input file: ${options.inputFile}`); + logger.info(`Output file: ${outputFile}`); + logger.info(`Label set id: ${options.labelSetId}`); + logger.info(`Label set version: ${labelSetVersion}`); + + // Check file size and warn for very large files + try { + const stats = statSync(options.inputFile); + const fileSizeMB = (stats.size / (1024 * 1024)).toFixed(2); + logger.info(`Input file size: ${fileSizeMB} MB`); + + if (stats.size > LARGE_FILE_SIZE_THRESHOLD_MB * 1024 * 1024) { + logger.warn("⚠️ Processing a very large file - using SEQUENTIAL mode."); + } + } catch (error) { + logger.warn(`Could not determine file size: ${error}`); + } + + // Log if using existing database for filtering + if (existingDb) { + logger.info("Using existing database connection for label filtering"); + } + + const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot(); + const outputStream = setupWriteStream(outputFile); + + writeHeader(outputStream, RainbowRecordCollectionType, options.labelSetId, labelSetVersion); + + logger.info("Reading and processing CSV file line by line with streaming..."); + + return { RainbowRecordType, outputStream, existingDb }; +} + +/** + * Create rainbow record from parsed CSV row + */ +function createRainbowRecord(row: string[]): RainbowRecord { + const label = String(row[0]); + + if (row.length === 1) { + // Single column: compute labelhash using labelhash function + const labelHashBytes = labelHashToBytes(labelhash(label)); + return { + labelHash: labelHashBytes, + label: label, + }; + } else if (row.length === 2) { + // Two columns: validate labelhash format and use provided hash + // Trim whitespace from hash (metadata), but preserve label as-is + const providedHash = String(row[1]).trim(); + if (providedHash === "") { + throw new Error("LabelHash cannot be empty"); + } + const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`; + try { + const labelHash = labelHashToBytes(maybeLabelHash as LabelHash); // performs labelhash format validation + return { + labelHash, + label, + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid labelHash: ${errorMessage}`); + } + } else { + throw new Error(`Expected 1 or 2 columns, but found ${row.length} columns`); + } +} + +/** + * Process a single CSV record with LevelDB-based deduplication + */ +async function processRecord( + row: string[], + expectedColumns: number, + RainbowRecordType: any, + outputStream: NodeJS.WritableStream, + lineNumber: number, + existingDb: ENSRainbowDB | null, + dedupDb: DeduplicationDB, + stats: ConversionStats, +): Promise { + // Validate column count + if (row.length !== expectedColumns) { + throw new Error( + `Expected ${expectedColumns} columns, but found ${row.length} in line ${lineNumber}`, + ); + } + + const rainbowRecord = createRainbowRecord(row); + const label = rainbowRecord.label; + const labelHashBytes = Buffer.from(rainbowRecord.labelHash); + + // Check if labelhash already exists in the existing database + if (existingDb) { + const existsInDb = await checkLabelHashExists(existingDb, labelHashBytes); + if (existsInDb) { + stats.filteredExistingLabels++; + return false; // Skip this record + } + } + + // Check if label is a duplicate within this conversion using LevelDB + const existsInDedupDb = await dedupDb.has(label); + if (existsInDedupDb) { + stats.filteredDuplicates++; + return false; // Skip this record + } + + // Add label to deduplication database + await dedupDb.add(label, ""); + + // Create protobuf message and write with backpressure handling + // Map RainbowRecord (labelHash) to protobuf format (labelhash) + const recordMessage = RainbowRecordType.fromObject({ + labelhash: Buffer.from(rainbowRecord.labelHash), + label: rainbowRecord.label, + }); + const buffer = Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish()); + + // Check if write returns false (buffer full) - if so, wait for drain + const canContinue = outputStream.write(buffer); + if (!canContinue) { + // Buffer is full - signal backpressure + stats.outputBackpressureEvents++; + // Wait for drain event before continuing + // Note: The CSV stream should be paused by the caller when backpressure is detected + await new Promise((resolve) => { + outputStream.once("drain", resolve); + }); + } + + return true; // Record was processed +} + +/** + * Process the entire CSV file - COMPLETELY SEQUENTIAL (one row at a time) + */ +async function processCSVFile( + inputFile: string, + RainbowRecordType: any, + outputStream: NodeJS.WritableStream, + progressInterval: number, + existingDb: ENSRainbowDB | null, + dedupDb: DeduplicationDB, + stats: ConversionStats, + progressBar: ProgressBar | null, +): Promise<{ totalLines: number; processedRecords: number }> { + let expectedColumns: number | null = null; + let lineNumber = 0; + let processedRecords = 0; + let lastLoggedLine = 0; + let lastLogTime = Date.now(); + + const fileStream = createReadStream(inputFile, { encoding: "utf8" }); + + return new Promise((resolve, reject) => { + const csvStream = parse(); // Sequential processing via pause/resume + let isProcessing = false; + let streamEnded = false; + + const checkAndResolve = () => { + if (streamEnded && !isProcessing) { + logger.info(`Sequential processing complete`); + resolve({ totalLines: lineNumber, processedRecords }); + } + }; + + csvStream + .on("data", async (row: string[]) => { + // PAUSE IMMEDIATELY - process one row at a time + csvStream.pause(); + isProcessing = true; + + lineNumber++; + + try { + // Skip empty rows (no columns or all empty strings) + const isEmptyRow = row.length === 0 || row.every((cell) => cell === ""); + if (isEmptyRow) { + isProcessing = false; + csvStream.resume(); + checkAndResolve(); + return; + } + + // Detect column count on first non-empty row + if (expectedColumns === null) { + expectedColumns = row.length; + logger.info(`Detected ${expectedColumns} columns - SEQUENTIAL processing mode`); + } + + // Log progress (less frequently to avoid logger crashes) + if (lineNumber % progressInterval === 0 && lineNumber !== lastLoggedLine) { + const currentTime = Date.now(); + const chunkTime = currentTime - lastLogTime; + const linesPerSecond = ((progressInterval / chunkTime) * 1000).toFixed(0); + + lastLoggedLine = lineNumber; + lastLogTime = currentTime; + + const memUsage = process.memoryUsage(); + const memInfo = `RSS=${(memUsage.rss / 1024 / 1024).toFixed(0)}MB, Heap=${(memUsage.heapUsed / 1024 / 1024).toFixed(0)}MB`; + + const dedupStats = dedupDb.getMemoryStats(); + const dedupInfo = ` | Dedup: ${dedupStats.pendingWrites}/${dedupStats.cache}`; + + // Use console.log instead of logger to avoid worker thread issues + console.log( + `[${new Date().toISOString()}] Line ${lineNumber}, written ${processedRecords} | ` + + `${linesPerSecond} lines/sec | ${memInfo}${dedupInfo}`, + ); + } + + // Process this one record + const wasProcessed = await processRecord( + row, + expectedColumns, + RainbowRecordType, + outputStream, + lineNumber, + existingDb, + dedupDb, + stats, + ); + + if (wasProcessed) { + processedRecords++; + } + + // Update progress bar + if (lineNumber % PROGRESS_BAR_UPDATE_INTERVAL === 0 && progressBar) { + progressBar.tick(PROGRESS_BAR_UPDATE_INTERVAL); + progressBar.curr = lineNumber; + } + + // Done processing - resume for next row + isProcessing = false; + csvStream.resume(); + checkAndResolve(); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + csvStream.destroy(); + fileStream.destroy(); + reject(new Error(`Failed on line ${lineNumber}: ${errorMessage}`)); + } + }) + .on("error", (error: Error) => { + reject(new Error(`CSV parsing error: ${error.message}`)); + }) + .on("end", () => { + streamEnded = true; + checkAndResolve(); + }); + + fileStream + .on("error", (error: Error) => { + reject(error); + }) + .pipe(csvStream); + }); +} + +/** + * Main CSV conversion command with true streaming using fast-csv + */ +export async function convertCsvCommand(options: ConvertCsvCommandOptions): Promise { + // Get label set version from existing database or default to 0 + // This also opens the database if needed, and we'll reuse that connection + const { version: labelSetVersion, existingDb: openedDb } = await getLabelSetVersionAndDatabase( + options.existingDbPath, + options.labelSetId, + ); + + // Generate output file name if not provided + const outputFile = + options.outputFile ?? generateOutputFileName(options.labelSetId, labelSetVersion); + + const stats: ConversionStats = { + totalLines: 0, + processedRecords: 0, + filteredExistingLabels: 0, + filteredDuplicates: 0, + outputBackpressureEvents: 0, + startTime: new Date(), + }; + + let existingDb: ENSRainbowDB | null = openedDb; + let dedupDb: DeduplicationDB | undefined; + let temporaryDedupDir: string | null = null; + + try { + const { + RainbowRecordType, + outputStream, + existingDb: db, + } = await initializeConversion(options, labelSetVersion, outputFile, existingDb); + existingDb = db; + + // Create temporary deduplication database + temporaryDedupDir = join(process.cwd(), "temp-dedup-" + Date.now()); + logger.info(`Creating temporary deduplication database at: ${temporaryDedupDir}`); + const tempDb = new ClassicLevel(temporaryDedupDir, { + keyEncoding: "utf8", + valueEncoding: "utf8", + createIfMissing: true, + // Aggressive memory limits + cacheSize: 2 * 1024 * 1024, // 2MB block cache (minimal) + writeBufferSize: 4 * 1024 * 1024, // 4MB write buffer (minimal) + maxOpenFiles: 100, // Limit open files + compression: false, // Disable compression to reduce CPU/memory + }); + await tempDb.open(); + dedupDb = new DeduplicationDB(tempDb); + + const progressInterval = options.progressInterval ?? DEFAULT_PROGRESS_INTERVAL; + + // Set up progress bar (only if not silent) + const progressBar = options.silent ? null : setupProgressBar(); + + // Process the CSV file + const { totalLines, processedRecords } = await processCSVFile( + options.inputFile, + RainbowRecordType, + outputStream, + progressInterval, + existingDb, + dedupDb, + stats, + progressBar, + ); + + stats.totalLines = totalLines; + stats.processedRecords = processedRecords; + + // Log final progress for large files + if (totalLines > 10_000) { + logger.info( + `βœ… Completed processing ${totalLines.toLocaleString()} lines, wrote ${processedRecords.toLocaleString()} records (LevelDB dedup active)`, + ); + } + + // Close output stream + outputStream.end(); + + logger.info(`βœ… Processed ${processedRecords} records with streaming fast-csv`); + logSummary(stats); + logger.info("βœ… CSV conversion completed successfully!"); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`❌ CSV conversion failed: ${errorMessage}`); + throw error; + } finally { + // Clean up deduplication database + if (dedupDb !== undefined) { + try { + await dedupDb.close(); + logger.info("Closed deduplication database"); + } catch (error) { + logger.warn(`Failed to close deduplication database: ${error}`); + } + } + + // Clean up existing database connection + if (existingDb) { + try { + await existingDb.close(); + logger.info("Closed existing database connection"); + } catch (error) { + logger.warn(`Failed to close existing database: ${error}`); + } + } + + // Remove temporary deduplication database directory + if (temporaryDedupDir) { + try { + rmSync(temporaryDedupDir, { recursive: true, force: true }); + logger.info(`Removed temporary deduplication database: ${temporaryDedupDir}`); + } catch (error) { + logger.warn(`Failed to remove temporary deduplication database: ${error}`); + } + } + } +} diff --git a/apps/ensrainbow/test/fixtures/test_labels_1col.csv b/apps/ensrainbow/test/fixtures/test_labels_1col.csv new file mode 100644 index 000000000..302ef8d63 --- /dev/null +++ b/apps/ensrainbow/test/fixtures/test_labels_1col.csv @@ -0,0 +1,11 @@ +alice +bob +charlie +domaintest +example +foundation +governance +hello +world +test123 +123 diff --git a/apps/ensrainbow/test/fixtures/test_labels_2col.csv b/apps/ensrainbow/test/fixtures/test_labels_2col.csv new file mode 100644 index 000000000..e02a65762 --- /dev/null +++ b/apps/ensrainbow/test/fixtures/test_labels_2col.csv @@ -0,0 +1,10 @@ +alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501 +bob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2 +charlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de +domaintest,0x56827be2a1678c2593e2a613fe8c4138ec451ab019d70cd890e007f99b513be1 +example,0x6fd43e7cffc31bb581d7421c8698e29aa2bd8e7186a394b85299908b4eb9b175 +foundation,0x0d5c1bd818a4086f28314415cb375a937593efab66f8f7d2903bf2a13ed35070 +governance,0xabea6fd3db56a6e6d0242111b43ebb13d1c42709651c032c7894962023a1f90a +hello,0x1c8aff950685c2ed4bc3174f3472287b56d9517b9c948127319a09a7a36deac8 +world,0x8452c9b9140222b08593a26daa782707297be9f7b3e8281d7b4974769f19afd0 +test123,0xf81b517a242b218999ec8eec0ea6e2ddbef2a367a14e93f4a32a39e260f686ad diff --git a/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv b/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv new file mode 100644 index 000000000..3d0b7b7e0 --- /dev/null +++ b/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv @@ -0,0 +1,3 @@ +label1,hash1,extra_column +validlabel +another_valid diff --git a/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv b/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv new file mode 100644 index 000000000..484983db9 --- /dev/null +++ b/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv @@ -0,0 +1,4 @@ +validlabel,0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef +invalidhash,not-a-hex-hash +anotherlabel,0x123 +toolong,0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef123456789 diff --git a/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv new file mode 100644 index 000000000..ac2a1f80d Binary files /dev/null and b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv differ diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx index b578aca07..8fb49b75a 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx @@ -10,7 +10,7 @@ import { LinkCard } from '@astrojs/starlight/components'; ENSRainbow consists of four primary layers working together to "heal" unknown labels: -1. **Data Generation & Conversion** – legacy `.sql.gz` rainbow tables are converted to the modern `.ensrainbow` format. +1. **Data Generation & Conversion** – CSV files are converted to the modern `.ensrainbow` format (SQL conversion is available only for migrating legacy ENS Subgraph data). 2. **Data Ingestion** – the `.ensrainbow` files are ingested into a LevelDB database using the `ingest-ensrainbow` CLI. 3. **HTTP API Service** – state in the database is exposed through a lightweight HTTP API. 4. **Client Integration** – applications call the API directly or via the TypeScript SDK. @@ -18,10 +18,13 @@ ENSRainbow consists of four primary layers working together to "heal" unknown la ```mermaid flowchart TD subgraph Data_Generation - SQL[".sql.gz files"] + CSV["CSV files"] + SQL[".sql.gz files
(legacy only)"] ENSRB[".ensrainbow files"] - SQL --> Convert["convert" command] - Convert --> ENSRB + CSV --> ConvertCSV["convert command"] + SQL --> ConvertSQL["convert-sql command
(legacy migration)"] + ConvertCSV --> ENSRB + ConvertSQL --> ENSRB end ENSRB --> Ingest["ingest-ensrainbow"] diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx new file mode 100644 index 000000000..335a112b5 --- /dev/null +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx @@ -0,0 +1,661 @@ +--- +title: Creating ENSRainbow Files +description: Complete guide to creating .ensrainbow files. +sidebar: + label: Creating Files + order: 3 +keywords: [ensrainbow, file creation, conversion, csv] +--- + +ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources. This guide helps you choose the right method and provides step-by-step instructions. + +## Prerequisites + +Before creating `.ensrainbow` files, ensure you have: + +1. **ENSNode repository cloned**: + ```bash + git clone https://github.com/namehash/ensnode.git + cd ensnode + ``` + +2. **Dependencies installed**: + ```bash + pnpm install + ``` + +3. **Working directory**: Navigate to the ENSRainbow directory: + ```bash + cd apps/ensrainbow + ``` + +All commands in this guide assume you're in the `apps/ensrainbow` directory unless otherwise specified. + +## Overview + +A `.ensrainbow` file is ENSRainbow's binary format for storing label-to-labelhash mappings. It uses Protocol Buffers for efficient serialization and supports streaming for large datasets. + +For detailed information about the file format structure, see the [Data Model](/ensrainbow/concepts/data-model) documentation. + +## Choosing Your Conversion Method + +| Method | Input Format | Use Case | Command | +|--------|-------------|----------|---------| +| **CSV Conversion** | CSV file (1 or 2 columns) | Building new ENS rainbow tables | `pnpm run convert` | +| **SQL Conversion** | Gzipped SQL dump (`ens_names.sql.gz`) | Converting legacy ENS Subgraph data | `pnpm run convert-sql` | + +### When to Use CSV Conversion + +- Creating new rainbow tables for ENSRainbow + +### When to Use SQL Conversion + +- **Legacy migration only**: Converting existing `ens_names.sql.gz` file from the legacy ENS Subgraph. This file can be obtained from [The Graph's ENS Rainbow repository](https://github.com/graphprotocol/ens-rainbow). +- **Note**: We recommend using CSV conversion for all new label sets. The SQL conversion method exists primarily for migrating away from legacy subgraph data, not for creating new subgraph-based label sets. + +## Method 1: Converting from CSV Files + +The `convert` command processes CSV files with flexible column formats. + +### Command Syntax + +```bash +pnpm run convert \ + --input-file \ + --output-file \ + --label-set-id \ + [--progress-interval ] \ + [--existing-db-path ] \ + [--silent] +``` + +### Required Parameters + +- `--input-file`: Path to the CSV file +- `--label-set-id`: Identifier for the output `.ensrainbow` file that will be created (used in file naming and metadata) + +### Optional Parameters + +- `--output-file`: Output file path (defaults to `{label-set-id}_{label-set-version}.ensrainbow`) +- `--progress-interval`: Progress logging frequency (default: 50000 records) +- `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file and determine the next label set version +- `--silent`: Disable progress bar (useful for scripts and automated workflows) + +### CSV Format Support + +The CSV converter supports two formats and expects CSV files **without a header row**. + +#### Single Column Format (Label Only) +```csv +ethereum +vitalik +ens +``` + +The converter automatically computes labelhashes using the `labelhash()` function. + +#### Two Column Format (Label + Labelhash) +```csv +ethereum,0x541111248b45b7a8dc3f5579f630e74cb01456ea6ac067d3f4d793245a255155 +vitalik,0xaf2caa1c2ca1d027f1ac823b529d0a67cd144264b2789fa2ea4d63a67c7103cc +ens,0x5cee339e13375638553bdf5a6e36ba80fb9f6a4f0783680884d92b558aa471da +``` + +The converter validates the format of provided labelhashes (does not verify they match the label). + +### Label Filtering + +The CSV converter includes built-in filtering capabilities to prevent duplicate labels: + +#### Filtering Existing Labels +Use `--existing-db-path` to filter out labels that already exist in an existing ENSRainbow database: + +```bash +pnpm run convert \ + --input-file new-labels.csv \ + --output-file incremental_1.ensrainbow \ + --label-set-id my-dataset \ + --existing-db-path data-my-dataset +``` + +This will: +- Check each label against the existing database +- Skip labels that already exist (avoiding duplicates) +- Only write new labels to the output file +- Log filtering statistics in the conversion summary + +#### Filtering Duplicate Labels Within CSV +The converter automatically filters duplicate labels within the same CSV file, keeping only the first occurrence of each label. + +#### Filtering Statistics +The conversion process logs detailed statistics: +``` +=== Conversion Summary === +Total lines processed: 1000 +Valid records: 850 +Filtered existing labels: 100 +Filtered duplicates: 50 +Duration: 150ms +``` + +### Example: Creating Test Dataset + +```bash +# Create test dataset from CSV +pnpm run convert \ + --input-file test-labels.csv \ + --output-file test-dataset_0.ensrainbow \ + --label-set-id test-dataset \ +``` + +### Example: Creating Discovery Dataset + +```bash +# Create discovery dataset (initially empty) +echo "" > empty.csv +pnpm run convert \ + --input-file empty.csv \ + --output-file discovery-a_0.ensrainbow \ + --label-set-id discovery-a \ +``` + +### How It Works + +1. **Detects** CSV format automatically (1 or 2 columns) +2. **Streams** CSV parsing using fast-csv for memory efficiency +3. **Validates** column count and data format +4. **Computes** or validates labelhashes as needed + - For single-column format: Computes labelhash using the `labelhash()` function + - For two-column format: Validates the format of the provided labelhash (does not verify it matches the label) + - Invalid labelhashes are rejected if they don't meet format requirements (66 characters including "0x" prefix, lowercase hex, valid hex format) +5. **Filters** existing labels if `--existing-db-path` is provided +6. **Filters** duplicate labels within the same CSV file +7. **Writes** .ensrainbow file as output + +## Method 2: Converting from SQL Dumps + +:::warning[Legacy Method] +The `convert-sql` command processes gzipped SQL dump file from the legacy ENS Subgraph. This method exists for migrating away from legacy subgraph data. **For all new label sets, we strongly recommend using CSV conversion (Method 1) instead.** +::: + +### Command Syntax + +```bash +pnpm run convert-sql \ + --input-file \ + --output-file \ + --label-set-id \ + --label-set-version +``` + +### Required Parameters + +- `--input-file`: Path to the gzipped SQL dump file +- `--label-set-id`: Identifier for the output `.ensrainbow` file that will be created (used in file naming and metadata, e.g., `subgraph`) +- `--label-set-version`: Version number for the output `.ensrainbow` file that will be created (used in file naming and metadata, non-negative integer) + +### Optional Parameters + +- `--output-file`: Output file path (defaults to `{label-set-id}_{label-set-version}.ensrainbow`) + +### Example: Converting Legacy ENS Subgraph Data + +:::note[Legacy Migration Only] +This example shows how to convert existing legacy subgraph data. For new label sets, use CSV conversion instead. +::: + +```bash +# Convert legacy ENS Subgraph data (migration use case only) +pnpm run convert-sql \ + --input-file ens_names.sql.gz \ + --output-file subgraph_0.ensrainbow \ + --label-set-id subgraph \ + --label-set-version 0 +``` + + +### How It Works + +1. **Streams** the gzipped SQL file to avoid memory issues +2. **Parses** SQL COPY statements to extract label/labelhash pairs +3. **Validates** each record and skips invalid entries + - **Invalid line format**: Lines that don't contain exactly 2 tab-separated columns (labelHash and label) + - **Invalid labelHash format**: LabelHash values that: + - Don't have exactly 66 characters (must be "0x" prefix + 64 hex digits) + - Are not in lowercase (must be all lowercase hexadecimal) + - Don't start with "0x" prefix + - Contain invalid hexadecimal characters + - Invalid entries are safely skipped as they would be unreachable by the ENS Subgraph +4. **Writes** .ensrainbow file as output + +## Common Workflows + +### Workflow 1: Migrating from Legacy ENS Subgraph + +:::warning[Legacy Migration Only] +This workflow is for migrating away from legacy ENS Subgraph data. For creating new label sets, use CSV conversion (see Workflow 3) instead. +::: + +```bash +# 1. Convert legacy SQL dump to .ensrainbow +pnpm run convert-sql \ + --input-file ens_names.sql.gz \ + --output-file subgraph_0.ensrainbow \ + --label-set-id subgraph \ + --label-set-version 0 + +# 2. Ingest into LevelDB +pnpm run ingest-ensrainbow \ + --input-file subgraph_0.ensrainbow \ + --data-dir data-subgraph + +# 3. Validate the database +pnpm run validate --data-dir data-subgraph + +# 4. Start the API server +pnpm run serve --data-dir data-subgraph --port 3223 +``` + +### Workflow 2: Creating Test Environment + +```bash +# 1. Convert test data +pnpm run convert \ + --input-file test/fixtures/ens_test_env_names.csv \ + --output-file ens-test-env_0.ensrainbow \ + --label-set-id ens-test-env + +# 2. Ingest test data +pnpm run ingest-ensrainbow \ + --input-file ens-test-env_0.ensrainbow \ + --data-dir data-test-env + +# 3. Run with test data +pnpm run serve --data-dir data-test-env --port 3223 +``` + +### Workflow 3: Create a new Labelset + +```bash +# 1. Create CSV with your labels +echo "mylabel1 +mylabel2 +mylabel3" > custom-labels.csv + +# 2. Convert to .ensrainbow +pnpm run convert \ + --input-file custom-labels.csv \ + --output-file custom_0.ensrainbow \ + --label-set-id custom + +# 3. Ingest and serve +pnpm run ingest-ensrainbow \ + --input-file custom_0.ensrainbow \ + --data-dir data-custom + +pnpm run serve --data-dir data-custom --port 3223 +``` + +### Workflow 4: Creating Incremental Label Set Versions + +```bash +# 1. Create initial labelset +pnpm run convert \ + --input-file initial-labels.csv \ + --output-file my-dataset_0.ensrainbow \ + --label-set-id my-dataset + +# 2. Ingest initial data +pnpm run ingest-ensrainbow \ + --input-file my-dataset_0.ensrainbow \ + --data-dir data-my-dataset + +# 3. Create incremental update (filtering existing labels) +pnpm run convert \ + --input-file new-labels.csv \ + --output-file my-dataset_1.ensrainbow \ + --label-set-id my-dataset \ + --existing-db-path data-my-dataset + +# 4. Ingest incremental update +pnpm run ingest-ensrainbow \ + --input-file my-dataset_1.ensrainbow \ + --data-dir data-my-dataset + +# 5. Serve updated data +pnpm run serve --data-dir data-my-dataset --port 3223 +``` + +### Workflow 5: Using Custom Label Set Server + +```bash +# 1. Configure custom label set server +export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com" + +# 2. Download from custom server +# The script downloads to labelsets/ subdirectory +./scripts/download-ensrainbow-files.sh my-dataset 0 + +# 3. Ingest and serve +# Files are downloaded to labelsets/ by the script +pnpm run ingest-ensrainbow \ + --input-file labelsets/my-dataset_0.ensrainbow \ + --data-dir data-my-dataset + +pnpm run serve --data-dir data-my-dataset --port 3223 +``` + +:::note[Script Output Locations] +ENSRainbow download scripts save files to specific subdirectories: +- **`.ensrainbow` files**: `labelsets/` +- **Database archives**: `databases/{schema_version}/` +- **Checksums and licenses**: Same directory as the downloaded file +::: + +## File Naming Conventions + +Follow the naming convention: `{label-set-id}_{label-set-version}.ensrainbow` + +**Examples:** +- `subgraph_0.ensrainbow` - Legacy ENS data, version 0 +- `discovery-a_0.ensrainbow` - Discovery dataset, version 0 +- `ens-test-env_0.ensrainbow` - Test environment data, version 0 + +## Next Steps + +After creating your `.ensrainbow` file: + +1. **[Ingest the data](/ensrainbow/contributing/index#data-ingestion-ingest-ensrainbow)** into a ENSRainbow database +2. **[Validate the database](/ensrainbow/contributing/index#database-validation-validate)** to ensure integrity +3. **[Start the API server](/ensrainbow/contributing/index#api-server-serve)** to serve the data + +For complete CLI reference information, see the [CLI Reference](/ensrainbow/contributing/cli-reference) documentation. + +## Creating and Publishing Custom .ensrainbow Files + +If you want to create, publish, and distribute your own `.ensrainbow` files, follow these steps: + +### 1. Create Your Dataset + +First, prepare your data in CSV format, then convert it using the `convert` command: + +```bash +pnpm run convert \ + --input-file my-labels.csv \ + --output-file my-dataset_0.ensrainbow \ + --label-set-id my-dataset + +# to create an incremental update, you can use the `--existing-db-path` flag to filter out existing labels: +pnpm run convert \ + --input-file my-labels2.csv \ + --output-file my-dataset_1.ensrainbow \ + --label-set-id my-dataset \ + --existing-db-path data-my-dataset +``` + +### 2. Validate Your File + +Test your `.ensrainbow` file by ingesting it locally: + +```bash +# Ingest your custom dataset +pnpm run ingest-ensrainbow \ + --input-file my-dataset_0.ensrainbow \ + --data-dir data-my-dataset + +# Validate the database +pnpm run validate --data-dir data-my-dataset + +# Test the API +pnpm run serve --data-dir data-my-dataset --port 3223 +``` + +### 3. Publish Your File + +#### Option A: Direct File Sharing +- Upload your `.ensrainbow` file to a web server or cloud storage +- Provide a direct download URL +- Share checksums for integrity verification + +#### Option B: Package as Database Archive +For better performance, package your data as a pre-built database: + +```bash +# Ingest your .ensrainbow file +pnpm run ingest-ensrainbow \ + --input-file my-dataset_0.ensrainbow \ + --data-dir data-my-dataset + +# Package the database +tar -czvf my-dataset_0.tgz ./data-my-dataset + +# Calculate checksum +sha256sum my-dataset_0.tgz > my-dataset_0.tgz.sha256sum +``` + +### 4. Document Your Label Set + +Create documentation for your custom label set including: + +- **Label Set ID**: The identifier users will specify +- **Description**: What labels are included and their source +- **Version**: Current version number +- **Download URLs**: Where to get the files +- **Checksums**: For integrity verification +- **Usage Examples**: How to use your dataset + +### Example Documentation Format + +```markdown +## Custom Label Set: my-dataset + +**Label Set ID**: `my-dataset` +**Current Version**: `0` +**Description**: Custom ENS labels from [source description] + +### Download +- Database Archive: `https://example.com/my-dataset_0.tgz` +- Checksum: `https://example.com/my-dataset_0.tgz.sha256sum` + +### Usage +``` + +```bash +# Using with Docker +docker run -d \ + -e DB_SCHEMA_VERSION="3" \ + -e LABEL_SET_ID="my-dataset" \ + -e LABEL_SET_VERSION="0" \ + -p 3223:3223 \ + ghcr.io/namehash/ensnode/ensrainbow:latest +``` + +## Setting Up Your Own Label Set Server + +A **Label Set Server** is a storage and hosting service for `.ensrainbow` files and prebuilt database archives. It's not the ENSRainbow API server itself, but rather a way to distribute your custom datasets for others to download and use. + +### 1. Choose Your Hosting Platform + +You can host your label set files on any web server or cloud storage service: + +- **AWS S3**: Industry standard with versioning +- **Cloudflare R2**: Cost-effective alternative to S3 +- **Simple HTTP server**: For internal/private use + +### 2. Organize Your Files + +Structure your label set files following ENSRainbow conventions: + +``` +my-label-set-server/ +β”œβ”€β”€ labelsets/ +β”‚ β”œβ”€β”€ my-dataset_0.ensrainbow +β”‚ β”œβ”€β”€ my-dataset_0.ensrainbow.sha256sum +β”‚ β”œβ”€β”€ my-dataset_1.ensrainbow +β”‚ └── my-dataset_1.ensrainbow.sha256sum +└── databases/ + β”œβ”€β”€ 3/ # Schema version + β”‚ β”œβ”€β”€ my-dataset_0.tgz + β”‚ β”œβ”€β”€ my-dataset_0.tgz.sha256sum + β”‚ β”œβ”€β”€ my-dataset_1.tgz + β”‚ └── my-dataset_1.tgz.sha256sum + └── 4/ # Future schema version +``` + +### 3. Use Existing Download Scripts + +ENSRainbow provides ready-to-use download scripts that users can configure to download from your label set server: + +#### Download .ensrainbow Files +```bash +# Configure your label set server URL +export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com" + +# Download .ensrainbow file using the existing script +./scripts/download-ensrainbow-files.sh my-dataset 0 +``` + +#### Download Prebuilt Database Archives +```bash +# Configure your label set server URL +export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com" + +# Download prebuilt database using the existing script +./scripts/download-prebuilt-database.sh 3 my-dataset 0 +``` + +#### Script Features +The existing scripts automatically handle: +- **Checksum verification** for data integrity +- **Resume downloads** if files already exist and are valid +- **License file downloads** (optional) +- **Progress reporting** for large files +- **Error handling** with cleanup of partial downloads + +### 4. Document Your Label Set Server + +Create a README or documentation page for your label set server: + +```markdown +# My Label Set Server + +This server hosts custom ENS label sets for ENSRainbow. + +## Available Label Sets + +### my-dataset +- **Description**: Custom ENS labels from [source] +- **Versions**: 0, 1 +- **Schema Versions**: 3 +- **Base URL**: `https://my-label-set-server.com` + +### another-dataset +- **Description**: Additional labels from [source] +- **Versions**: 0 +- **Schema Versions**: 3 +- **Base URL**: `https://my-label-set-server.com` +``` + +## Usage + +Users should have the ENSNode repository cloned and be in the `apps/ensrainbow` directory. + +### Option 1: Download .ensrainbow Files + +```bash +# Configure your label set server +export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com" + +# Download .ensrainbow file +./scripts/download-ensrainbow-files.sh my-dataset 0 + +# Ingest into ENSRainbow +pnpm run ingest-ensrainbow \ + --input-file labelsets/my-dataset_0.ensrainbow \ + --data-dir data-my-dataset + +# Start ENSRainbow server +pnpm run serve --data-dir data-my-dataset --port 3223 +``` + +### Option 2: Download Prebuilt Databases (Faster) + +```bash +# Configure your label set server +export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com" + +# Download prebuilt database +./scripts/download-prebuilt-database.sh 3 my-dataset 0 + +# Extract database +tar -xzf databases/3/my-dataset_0.tgz -C data-my-dataset --strip-components=1 + +# Start ENSRainbow server +pnpm run serve --data-dir data-my-dataset --port 3223 +``` + +### 5. Version Management + +Implement proper versioning for your label sets: + +```bash +# When releasing a new version +LABEL_SET_ID="my-dataset" +NEW_VERSION="1" + +# Create new .ensrainbow file +pnpm run convert \ + --input-file updated-labels.csv \ + --output-file ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow \ + --label-set-id ${LABEL_SET_ID} + +# Create prebuilt database +pnpm run ingest-ensrainbow \ + --input-file ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow \ + --data-dir data-${LABEL_SET_ID}-${NEW_VERSION} + +tar -czvf ${LABEL_SET_ID}_${NEW_VERSION}.tgz ./data-${LABEL_SET_ID}-${NEW_VERSION} + +# Calculate checksums +sha256sum ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow > ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow.sha256sum +sha256sum ${LABEL_SET_ID}_${NEW_VERSION}.tgz > ${LABEL_SET_ID}_${NEW_VERSION}.tgz.sha256sum + +# Upload to your label set server +# (implementation depends on your hosting platform) +``` + +### 6. Testing Your Label Set Server + +Before publishing, test that your label set server works correctly: + +```bash +# Set your test server URL +export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com" + +# Test downloading .ensrainbow file +./scripts/download-ensrainbow-files.sh my-dataset 0 + +# Verify checksum was validated +# The script will fail if checksums don't match + +# Test downloading prebuilt database +./scripts/download-prebuilt-database.sh 3 my-dataset 0 + +# Verify the database works by ingesting the downloaded file +pnpm run ingest-ensrainbow \ + --input-file labelsets/my-dataset_0.ensrainbow \ + --data-dir test-data + +pnpm run validate --data-dir test-data +``` + +## Running Your Own ENSRainbow Server + +If you want to run your own ENSRainbow API server (separate from the label set server), see the [Local Development](/ensrainbow/contributing/local-development) guide for instructions on setting up and running ENSRainbow locally or in production. + +## Related Documentation + +- **[Data Model](/ensrainbow/concepts/data-model)** - Understanding the `.ensrainbow` file format +- **[Label Sets & Versioning](/ensrainbow/concepts/label-sets-and-versioning)** - Managing label set versions +- **[CLI Reference](/ensrainbow/contributing/cli-reference)** - Complete command documentation +- **[Local Development](/ensrainbow/contributing/local-development)** - Setting up your development environment diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx index 8978ca5a9..dc5d4beaf 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx @@ -104,15 +104,14 @@ subgraph_0.ensrainbow # labelSetId = "subgraph", version = 0 subgraph_1.ensrainbow # next version with incremental labelhash-to-label mappings added ``` -## Converting Legacy SQL Data +## Creating ENSRainbow Files -If you have a legacy gzipped rainbow table (`ens_names.sql.gz`) from the ENS Subgraph, you can convert it to the `.ensrainbow` format: +ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources: -```bash title="Convert legacy SQL data" -pnpm run convert --input-file path/to/ens_names.sql.gz --output-file subgraph-0.ensrainbow -``` +- **CSV Conversion**: Convert custom datasets from CSV files using `pnpm run convert` +- **SQL Conversion**: Convert legacy ENS Subgraph data (`ens_names.sql.gz`) using `pnpm run convert-sql`. These legacy data files can be obtained from [The Graph's ENS Rainbow repository](https://github.com/graphprotocol/ens-rainbow). -This conversion process allows you to migrate existing rainbow table data that was previously stored in SQL format to ENSRainbow's optimized binary format. The resulting `.ensrainbow` file will be equivalent to the rainbow tables used by the ENS Subgraph, maintaining the same label-to-labelhash mappings while providing better performance and storage efficiency. +For complete instructions, examples, and workflow guidance, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide. ## Ingestion Process diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx index 6326f7b8b..58023497a 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx @@ -8,26 +8,38 @@ keywords: [ensrainbow, cli] | Command | Purpose | Most useful flags | Example | |---------|---------|-------------------|---------| -| `convert` | Convert legacy `.sql.gz` rainbow tables to `.ensrainbow` format. **This is currently the only way to create new .ensrainbow files.** | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version` | `pnpm run convert --input-file ens_names.sql.gz --output-file subgraph-0.ensrainbow` | -| `ingest-ensrainbow` | Stream a `.ensrainbow` file into LevelDB | `--input-file`, `--data-dir` | `pnpm run ingest-ensrainbow --input-file subgraph-0.ensrainbow --data-dir ./data` | +| `convert` | Convert CSV files to `.ensrainbow` format. **This is the primary method for creating new .ensrainbow files.** | `--input-file`, `--output-file`, `--label-set-id`, `--existing-db-path`, `--silent` | `pnpm run convert --input-file labels.csv --output-file my-dataset_0.ensrainbow --label-set-id my-dataset` | +| `convert-sql` | Convert legacy `.sql.gz` rainbow tables (ENS Subgraph data) to `.ensrainbow` format | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version` | `pnpm run convert-sql --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0` | +| `ingest-ensrainbow` | Stream a `.ensrainbow` file into LevelDB | `--input-file`, `--data-dir` | `pnpm run ingest-ensrainbow --input-file my-dataset_0.ensrainbow --data-dir ./data` | | `validate` | Verify DB integrity | `--data-dir`, `--lite` | `pnpm run validate --lite` | | `purge` | Delete all DB files in a directory | `--data-dir` | `pnpm run purge --data-dir ./data` | | `serve` | Launch the HTTP API server | `--data-dir`, `--port` | `pnpm run serve --port 3223` | ## Creating .ensrainbow Files -:::note[Important] -The `convert` command is **the only way** to create new `.ensrainbow` files from scratch. If you need to create custom label sets with your own data, you must use this command to convert from PostgreSQL dump format. +### CSV Conversion (Recommended) -You can download existing `.ensrainbow` files using the download scripts, but for creating entirely new files, `convert` is your only option. -::: +The `convert` command is the **primary method** for creating new `.ensrainbow` files from CSV data. **Full convert command syntax:** ```bash pnpm run convert \ - --input-file path/to/your_data.sql.gz \ + --input-file path/to/labels.csv \ --output-file path/to/output.ensrainbow \ --label-set-id your-label-set-id \ + [--existing-db-path path/to/existing/database] \ + [--silent] +``` + +### SQL Conversion (Legacy) + +For converting legacy ENS Subgraph data from SQL dumps: + +```bash +pnpm run convert-sql \ + --input-file path/to/ens_names.sql.gz \ + --output-file path/to/output.ensrainbow \ + --label-set-id subgraph \ --label-set-version 0 ``` diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx index 64556f1eb..984b5d683 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx @@ -17,6 +17,7 @@ This guide covers running ENSRainbow locally for development and contributions. For focused guidance on specific topics, check out these dedicated pages: + @@ -24,6 +25,7 @@ For focused guidance on specific topics, check out these dedicated pages: :::tip[Choose Your Path] - **New to the project?** Start with [Local Development](/ensrainbow/contributing/local-development) +- **Creating custom datasets?** See [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) - **Need CLI help?** Check the [CLI Reference](/ensrainbow/contributing/cli-reference) - **Building for production?** See [Building Docker Images](/ensrainbow/contributing/building) ::: @@ -41,6 +43,7 @@ Follow these steps to start contributing to ENSRainbow: ## Quick Reference - **Need to build from source?** β†’ [Building Docker Images](/ensrainbow/contributing/building) +- **Creating custom datasets?** β†’ [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) - **Looking for CLI commands?** β†’ [CLI Reference](/ensrainbow/contributing/cli-reference) - **Running into issues?** β†’ [Troubleshooting](/ensrainbow/usage/troubleshooting) - **Want to understand the data flow?** β†’ [Data Model](/ensrainbow/concepts/data-model) @@ -102,9 +105,9 @@ Starts the API server. The process will exit with: ## Using ENSRainbow with ens-test-env -The ens-test-env project provides a test environment for ENS development. It includes a small dataset of ENS names in the `ens_test_env_names.sql.gz` file that can be used with ENSRainbow for testing purposes. +The ens-test-env project provides a test environment for ENS development. It includes a small dataset of ENS names in the `ens_test_env_names.csv` file that can be used with ENSRainbow for testing purposes. -### Ingesting ens_test_env_names.sql.gz +### Ingesting ens_test_env_names.csv To ingest the test data into ENSRainbow: @@ -112,7 +115,7 @@ To ingest the test data into ENSRainbow: If you don't have a pre-converted `ens-test-env-0.ensrainbow` file: ```bash # Navigate to apps/ensrainbow or adjust paths accordingly - pnpm run convert --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env-0.ensrainbow + pnpm run convert --input-file test/fixtures/ens_test_env_names.csv --output-file ens-test-env-0.ensrainbow --label-set-id ens-test-env ``` This creates `ens-test-env-0.ensrainbow`. @@ -265,27 +268,27 @@ These steps are typically performed by project maintainers for releasing officia ### 1. Prepare `.ensrainbow` Files -This section covers the conversion of source data (like SQL dumps or empty files for initial datasets) into the `.ensrainbow` format. The `time` command is used here to measure the duration of potentially long-running conversion processes. +This section covers the conversion of source data into the `.ensrainbow` format. For detailed conversion instructions and examples, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide. -**For the `subgraph` Label Set (main dataset):** -This command converts a SQL dump file (`ens_names.sql.gz`) into an `.ensrainbow` file for version 0 of the `subgraph` Label Set. +**For the `subgraph` Label Set (legacy migration only):** +This command converts a SQL dump file (`ens_names.sql.gz`) from the legacy ENS Subgraph into an `.ensrainbow` file for version 0 of the `subgraph` Label Set. **Note:** SQL conversion is only for migrating legacy ENS Subgraph data. For all new label sets, use CSV conversion instead. ```bash -# Assuming ens_names.sql.gz contains the primary dataset -time pnpm run convert --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0 +# Assuming ens_names.sql.gz contains the dataset +time pnpm run convert-sql --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0 ``` **For the `discovery-a` Label Set (initially empty for discovered labels):** This creates an empty `.ensrainbow` file for version 0 of the `discovery-a` Label Set, which is used for labels discovered dynamically. ```bash -touch empty.sql -gzip empty.sql -time pnpm run convert --input-file empty.sql.gz --output-file discovery-a_0.ensrainbow --label-set-id discovery-a --label-set-version 0 +# Create empty CSV file for discovery dataset +echo "" > empty.csv +time pnpm run convert --input-file empty.csv --output-file discovery-a_0.ensrainbow --label-set-id discovery-a ``` **For the `ens-test-env` Label Set (for testing):** -This converts a test dataset SQL dump into an `.ensrainbow` file for version 0 of the `ens-test-env` Label Set. +This converts a test dataset CSV file into an `.ensrainbow` file for version 0 of the `ens-test-env` Label Set. ```bash -time pnpm run convert --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env_0.ensrainbow --label-set-id ens-test-env --label-set-version 0 +time pnpm run convert --input-file test/fixtures/ens_test_env_names.csv --output-file ens-test-env_0.ensrainbow --label-set-id ens-test-env ``` ### 2. Upload `.ensrainbow` Files to R2 Storage diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx index fa0d5704b..fbf621e7c 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx @@ -22,18 +22,15 @@ To stay informed about new versions, monitor the [Available Label Sets](/ensrain ## How can I create my own .ensrainbow file? -Currently, the `convert` command is the **only way** to create new `.ensrainbow` files from scratch. This command converts PostgreSQL rainbow table dumps (`.sql.gz` format) into the binary protobuf format that ENSRainbow uses. +You can create your own `.ensrainbow` files from CSV data using the `convert` command, which generates new `.ensrainbow` files from your supplied CSV input. -**To create a custom .ensrainbow file:** - -1. **Prepare your data** as a PostgreSQL dump file (`.sql.gz`) with ENS labels and labelhashes +1. **Prepare your data** as a CSV file with labels (1 column) or labels and labelhashes (2 columns) 2. **Run the convert command:** ```bash - pnpm run convert --input-file your_data.sql.gz --output-file custom.ensrainbow + pnpm run convert --input-file your_labels.csv --output-file custom.ensrainbow --label-set-id my-dataset ``` -3. **Specify the label set details** using `--label-set-id` and `--label-set-version` flags -**Note:** You can download existing `.ensrainbow` files using the download scripts, but for creating entirely new files with your own data, the `convert` command is currently the only option available. +**Note:** You can also download existing `.ensrainbow` files using the download scripts. See the [CLI Reference](/ensrainbow/contributing/cli-reference/) for detailed command usage. diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index de9b17e96..7a179413b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -486,6 +486,9 @@ importers: '@ensnode/ensrainbow-sdk': specifier: workspace:* version: link:../../packages/ensrainbow-sdk + '@fast-csv/parse': + specifier: ^5.0.0 + version: 5.0.5 '@hono/node-server': specifier: ^1.4.1 version: 1.19.5(hono@4.10.3) @@ -1865,6 +1868,9 @@ packages: '@expressive-code/plugin-text-markers@0.41.3': resolution: {integrity: sha512-SN8tkIzDpA0HLAscEYD2IVrfLiid6qEdE9QLlGVSxO1KEw7qYvjpbNBQjUjMr5/jvTJ7ys6zysU2vLPHE0sb2g==} + '@fast-csv/parse@5.0.5': + resolution: {integrity: sha512-M0IbaXZDbxfOnpVE5Kps/a6FGlILLhtLsvWd9qNH3d2TxNnpbNkFf3KD26OmJX6MHq7PdQAl5htStDwnuwHx6w==} + '@fastify/busboy@3.2.0': resolution: {integrity: sha512-m9FVDXU3GT2ITSe0UaMA5rU3QkfC/UXtCU8y0gSN/GugTqtVldOBWIB5V6V3sbmenVZUIpU6f+mPEO2+m5iTaA==} @@ -6171,12 +6177,30 @@ packages: lodash.debounce@4.0.8: resolution: {integrity: sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==} + lodash.escaperegexp@4.1.2: + resolution: {integrity: sha512-TM9YBvyC84ZxE3rgfefxUWiQKLilstD6k7PTGt6wfbtXF8ixIJLOL3VYyV/z+ZiPLsVxAsKAFVwWlWeb2Y8Yyw==} + + lodash.groupby@4.6.0: + resolution: {integrity: sha512-5dcWxm23+VAoz+awKmBaiBvzox8+RqMgFhi7UvX9DHZr2HdxHXM/Wrf8cfKpsW37RNrvtPn6hSwNqurSILbmJw==} + + lodash.isfunction@3.0.9: + resolution: {integrity: sha512-AirXNj15uRIMMPihnkInB4i3NHeb4iBtNg9WRWuK2o31S+ePwwNmDPaTL3o7dTJ+VXNZim7rFs4rxN4YU1oUJw==} + + lodash.isnil@4.0.0: + resolution: {integrity: sha512-up2Mzq3545mwVnMhTDMdfoG1OurpA/s5t88JmQX809eH3C8491iu2sfKhTfhQtKY78oPNhiaHJUpT/dUDAAtng==} + + lodash.isundefined@3.0.1: + resolution: {integrity: sha512-MXB1is3s899/cD8jheYYE2V9qTHwKvt+npCwpD+1Sxm3Q3cECXCiYHjeHWXNwr6Q0SOBPrYUDxendrO6goVTEA==} + lodash.sortby@4.7.0: resolution: {integrity: sha512-HDWXG8isMntAyRF5vZ7xKuEvOhT4AhlRt/3czTSjvGUxjYCBVRQY48ViDHyfYz9VIoBkW4TMGQNapx+l3RUwdA==} lodash.startcase@4.4.0: resolution: {integrity: sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg==} + lodash.uniq@4.5.0: + resolution: {integrity: sha512-xfBaXQd9ryd9dlSDvnvI0lvxfLJlYAZzXomUYzLKtUeOQvOP5piqAWuGtrhWeqaXK9hhoM/iyJc5AV+XfsX3HQ==} + lodash@4.17.21: resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==} @@ -9854,6 +9878,15 @@ snapshots: dependencies: '@expressive-code/core': 0.41.3 + '@fast-csv/parse@5.0.5': + dependencies: + lodash.escaperegexp: 4.1.2 + lodash.groupby: 4.6.0 + lodash.isfunction: 3.0.9 + lodash.isnil: 4.0.0 + lodash.isundefined: 3.0.1 + lodash.uniq: 4.5.0 + '@fastify/busboy@3.2.0': {} '@floating-ui/core@1.7.3': @@ -14673,10 +14706,22 @@ snapshots: lodash.debounce@4.0.8: {} + lodash.escaperegexp@4.1.2: {} + + lodash.groupby@4.6.0: {} + + lodash.isfunction@3.0.9: {} + + lodash.isnil@4.0.0: {} + + lodash.isundefined@3.0.1: {} + lodash.sortby@4.7.0: {} lodash.startcase@4.4.0: {} + lodash.uniq@4.5.0: {} + lodash@4.17.21: {} long@5.3.2: {}