From 2006959ea745adf5cb5704ee87c1b00f9b425d5f Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Tue, 30 Sep 2025 14:27:50 +0200
Subject: [PATCH 01/30] feat: add CSV conversion command to ensrainbow CLI

- Introduced `convert-csv` command for converting CSV files to .ensrainbow format.
- Added support for single and two-column CSV formats.
- Implemented error handling for invalid CSV data.
- Created tests for various CSV scenarios, including special characters and invalid formats.
- Updated package dependencies to include `csv-simple-parser` for CSV parsing.
---
 apps/ensrainbow/package.json                  |   3 +-
 apps/ensrainbow/src/cli.ts                    |  49 +++-
 .../src/commands/convert-csv-command.test.ts  | 241 +++++++++++++++++
 .../src/commands/convert-csv-command.ts       | 248 ++++++++++++++++++
 .../test/fixtures/test_labels_1col.csv        |  10 +
 .../test/fixtures/test_labels_2col.csv        |  10 +
 .../fixtures/test_labels_invalid_first.csv    |   3 +
 .../fixtures/test_labels_invalid_hash.csv     |   4 +
 .../fixtures/test_labels_special_chars.csv    |  10 +
 pnpm-lock.yaml                                |  17 +-
 10 files changed, 591 insertions(+), 4 deletions(-)
 create mode 100644 apps/ensrainbow/src/commands/convert-csv-command.test.ts
 create mode 100644 apps/ensrainbow/src/commands/convert-csv-command.ts
 create mode 100644 apps/ensrainbow/test/fixtures/test_labels_1col.csv
 create mode 100644 apps/ensrainbow/test/fixtures/test_labels_2col.csv
 create mode 100644 apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv
 create mode 100644 apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv
 create mode 100644 apps/ensrainbow/test/fixtures/test_labels_special_chars.csv

diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json
index ea7c2b95c..af46315e9 100644
--- a/apps/ensrainbow/package.json
+++ b/apps/ensrainbow/package.json
@@ -38,7 +38,8 @@
     "progress": "^2.0.3",
     "protobufjs": "^7.4.0",
     "viem": "catalog:",
-    "yargs": "^17.7.2"
+    "yargs": "^17.7.2",
+    "csv-simple-parser": "^2.0.2"
   },
   "devDependencies": {
     "@ensnode/shared-configs": "workspace:*",
diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 3fdc0d530..063c48df2 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -13,6 +13,7 @@ import {
 } from "@ensnode/ensnode-sdk";
 
 import { convertCommand } from "@/commands/convert-command";
+import { convertCsvCommand } from "@/commands/convert-csv-command";
 // import { ingestCommand } from "@/commands/ingest-command";
 import { ingestProtobufCommand } from "@/commands/ingest-protobuf-command";
 import { purgeCommand } from "@/commands/purge-command";
@@ -61,6 +62,13 @@ interface ConvertArgs {
   "label-set-version": LabelSetVersion;
 }
 
+interface ConvertCsvArgs {
+  "input-file": string;
+  "output-file": string;
+  "label-set-id": LabelSetId;
+  "label-set-version": LabelSetVersion;
+}
+
 export interface CLIOptions {
   exitProcess?: boolean;
 }
@@ -184,7 +192,7 @@ export function createCLI(options: CLIOptions = {}) {
       )
       .command(
         "convert",
-        "Convert rainbow tables from SQL dump to protobuf format",
+        "Convert rainbow tables from SQL dump to ensrainbow format",
         (yargs: Argv) => {
           return yargs
             .option("input-file", {
@@ -194,7 +202,7 @@ export function createCLI(options: CLIOptions = {}) {
             })
             .option("output-file", {
               type: "string",
-              description: "Path to the output protobuf file",
+              description: "Path to the output ensrainbow file",
               default: join(process.cwd(), "rainbow-records.ensrainbow"),
             })
             .option("label-set-id", {
@@ -219,6 +227,43 @@ export function createCLI(options: CLIOptions = {}) {
           });
         },
       )
+      .command(
+        "convert-csv",
+        "Convert rainbow tables from CSV format to ensrainbow format",
+        (yargs: Argv) => {
+          return yargs
+            .option("input-file", {
+              type: "string",
+              description: "Path to the CSV input file",
+              demandOption: true,
+            })
+            .option("output-file", {
+              type: "string",
+              description: "Path to the output ensrainbow file",
+              default: join(process.cwd(), "rainbow-records.ensrainbow"),
+            })
+            .option("label-set-id", {
+              type: "string",
+              description: "Label set id for the rainbow record collection",
+              demandOption: true,
+            })
+            .coerce("label-set-id", buildLabelSetId)
+            .option("label-set-version", {
+              type: "number",
+              description: "Label set version for the rainbow record collection",
+              demandOption: true,
+            })
+            .coerce("label-set-version", buildLabelSetVersion);
+        },
+        async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
+          await convertCsvCommand({
+            inputFile: argv["input-file"],
+            outputFile: argv["output-file"],
+            labelSetId: argv["label-set-id"],
+            labelSetVersion: argv["label-set-version"],
+          });
+        },
+      )
       .demandCommand(1, "You must specify a command")
       .fail((msg, err, yargs) => {
         if (process.env.VITEST) {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
new file mode 100644
index 000000000..2be46d924
--- /dev/null
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -0,0 +1,241 @@
+import { tmpdir } from "os";
+import { join } from "path";
+import { mkdtemp, rm, stat, writeFile } from "fs/promises";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import { createCLI } from "@/cli";
+import { type LabelSetId, type LabelSetVersion } from "@ensnode/ensnode-sdk";
+import { convertCsvCommand } from "./convert-csv-command";
+
+// Path to test fixtures
+const TEST_FIXTURES_DIR = join(__dirname, "..", "..", "test", "fixtures");
+
+describe("convert-csv-command", () => {
+  let tempDir: string;
+
+  beforeEach(async () => {
+    vi.stubEnv("NODE_ENV", "test");
+    tempDir = await mkdtemp(join(tmpdir(), "ensrainbow-csv-test-"));
+  });
+
+  afterEach(async () => {
+    vi.unstubAllEnvs();
+    vi.restoreAllMocks();
+    await rm(tempDir, { recursive: true, force: true });
+  });
+
+  describe("CSV conversion and ingestion", () => {
+    it("should convert single column CSV and successfully ingest into database", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
+      const outputFile = join(tempDir, "output_1col.ensrainbow");
+      const dataDir = join(tempDir, "db_1col");
+
+      // Convert CSV to ensrainbow format
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-csv-one-col" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+      });
+
+      // Verify the output file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Ingest the converted file into database
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      // Verify database was created
+      const dbStats = await stat(dataDir);
+      expect(dbStats.isDirectory()).toBe(true);
+
+      // Verify database contents by validating it
+      await cli.parse(["validate", "--data-dir", dataDir, "--lite"]);
+
+      // Database validation passed, which means records are accessible
+    });
+
+    it("should convert two column CSV with provided hashes and ingest successfully", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv");
+      const outputFile = join(tempDir, "output_2col.ensrainbow");
+      const dataDir = join(tempDir, "db_2col");
+
+      // Convert CSV to ensrainbow format
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-csv-two-col" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+      });
+
+      // Verify the output file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Ingest the converted file into database
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      // Verify database was created
+      const dbStats = await stat(dataDir);
+      expect(dbStats.isDirectory()).toBe(true);
+    });
+
+    it("should fail when CSV has inconsistent column count", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_first.csv");
+      const outputFile = join(tempDir, "output_invalid.ensrainbow");
+
+      // Convert CSV to ensrainbow format (should fail on inconsistent columns)
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-csv-invalid" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+        }),
+      ).rejects.toThrow(/CSV conversion failed due to invalid data/);
+    });
+
+    it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_special_chars.csv");
+      const outputFile = join(tempDir, "output_special.ensrainbow");
+
+      // Convert CSV to ensrainbow format
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-csv-special" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+      });
+
+      // Verify output file was created
+      const outputStats = await stat(outputFile);
+      expect(outputStats.isFile()).toBe(true);
+      expect(outputStats.size).toBeGreaterThan(0);
+
+      // Verify special characters were processed correctly by checking logs
+      // The conversion completed successfully, which means csv-simple-parser
+      // handled emojis, unicode, quoted fields with commas, etc.
+      expect(true).toBe(true); // Test passes if conversion doesn't crash
+    });
+
+    it("should fail when CSV contains invalid labelhash format", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_hash.csv");
+      const outputFile = join(tempDir, "output_invalid_hash.ensrainbow");
+
+      // Convert CSV to ensrainbow format (should fail on invalid hash format)
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-csv-invalid-hash" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+        }),
+      ).rejects.toThrow(/CSV conversion failed due to invalid data/);
+    });
+  });
+
+  describe("Error handling", () => {
+    it("should throw error for non-existent input file", async () => {
+      const inputFile = join(tempDir, "non-existent.csv");
+      const outputFile = join(tempDir, "output.ensrainbow");
+
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-missing" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+        }),
+      ).rejects.toThrow();
+    });
+  });
+
+  describe("CLI integration", () => {
+    it("should work through the full CLI pipeline", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
+      const outputFile = join(tempDir, "cli_output.ensrainbow");
+      const dataDir = join(tempDir, "cli_db");
+
+      const cli = createCLI({ exitProcess: false });
+
+      // Test convert-csv command through CLI
+      await cli.parse([
+        "convert-csv",
+        "--input-file",
+        inputFile,
+        "--output-file",
+        outputFile,
+        "--label-set-id",
+        "test-cli-csv",
+        "--label-set-version",
+        "0",
+      ]);
+
+      // Verify file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Test ingestion through CLI
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      // Verify database was created
+      const dbStats = await stat(dataDir);
+      expect(dbStats.isDirectory()).toBe(true);
+    });
+  });
+
+  describe("Streaming performance", () => {
+    it("should handle small CSV files efficiently", async () => {
+      const inputFile = join(tempDir, "small_test.csv");
+      const outputFile = join(tempDir, "output_small.ensrainbow");
+      const dataDir = join(tempDir, "db_small");
+
+      // Create a CSV with 100 records to test streaming
+      const records = [];
+      for (let i = 0; i < 100; i++) {
+        records.push(`label${i}`);
+      }
+      await writeFile(inputFile, records.join("\n"));
+
+      const startTime = Date.now();
+
+      // Convert CSV
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-small" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+      });
+
+      const conversionTime = Date.now() - startTime;
+
+      // Should complete conversion quickly (less than 2 seconds for 100 records)
+      expect(conversionTime).toBeLessThan(2000);
+
+      // Verify file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Test ingestion
+      const cli = createCLI({ exitProcess: false });
+      const ingestStartTime = Date.now();
+
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      const ingestTime = Date.now() - ingestStartTime;
+
+      // Should complete ingestion quickly (less than 3 seconds for 100 records)
+      expect(ingestTime).toBeLessThan(3000);
+
+      // Verify database was created
+      const dbStats = await stat(dataDir);
+      expect(dbStats.isDirectory()).toBe(true);
+    });
+  });
+});
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
new file mode 100644
index 000000000..1c04fbf5c
--- /dev/null
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -0,0 +1,248 @@
+/**
+ * ENSRAINBOW CSV FILE CREATION COMMAND
+ *
+ * Converts CSV files to .ensrainbow format with csv-simple-parser
+ * Supports 1-column (label only) and 2-column (label,labelhash) formats
+ */
+
+import { createReadStream, createWriteStream } from "fs";
+import { createInterface } from "readline";
+import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk";
+import parse from "csv-simple-parser";
+import { labelhash } from "viem";
+import { logger } from "../utils/logger.js";
+import {
+  CURRENT_ENSRAINBOW_FILE_FORMAT_VERSION,
+  createRainbowProtobufRoot,
+} from "../utils/protobuf-schema.js";
+
+/**
+ * Parse CSV using csv-simple-parser
+ */
+function parseCsvLine(line: string): string[] {
+  const result = parse(line);
+  return result.length > 0 ? (result[0] as string[]) : [];
+}
+
+// No label validation - ENS accepts any UTF-8 string
+
+export interface ConvertCsvCommandOptions {
+  inputFile: string;
+  outputFile: string;
+  labelSetId: string;
+  labelSetVersion: number;
+}
+
+interface ConversionStats {
+  totalLines: number;
+  processedRecords: number;
+  skippedRecords: number;
+  invalidLabels: number;
+  duplicates: number;
+  startTime: Date;
+  endTime?: Date;
+}
+
+/**
+ * Process a single CSV line with csv-simple-parser and validation
+ */
+function processStreamingCsvLine(line: string, expectedColumns: number): string[] {
+  if (line.trim() === "") {
+    throw new Error("Empty line");
+  }
+
+  const parsedLine = parseCsvLine(line);
+
+  // Validate column count
+  if (parsedLine.length !== expectedColumns) {
+    throw new Error(
+      `Expected ${expectedColumns} columns, but found ${parsedLine.length} in line: ${line}`,
+    );
+  }
+
+  return parsedLine;
+}
+
+/**
+ * Setup input stream for reading CSV line by line
+ */
+function setupReadStream(inputFile: string) {
+  const fileStream = createReadStream(inputFile, { encoding: "utf8" });
+  return createInterface({
+    input: fileStream,
+    crlfDelay: Infinity,
+  });
+}
+
+/**
+ * Setup output stream for writing protobuf
+ */
+function setupWriteStream(outputFile: string) {
+  // For now, just write directly to file without gzip compression
+  return createWriteStream(outputFile);
+}
+
+/**
+ * Write protobuf header
+ */
+function writeHeader(
+  outputStream: NodeJS.WritableStream,
+  RainbowRecordCollectionType: any,
+  labelSetId: string,
+  labelSetVersion: number,
+) {
+  const headerCollection = RainbowRecordCollectionType.fromObject({
+    format_identifier: "ensrainbow",
+    ensrainbow_file_format_version: CURRENT_ENSRAINBOW_FILE_FORMAT_VERSION,
+    label_set_id: labelSetId,
+    label_set_version: labelSetVersion,
+    records: [], // Header has no records
+  });
+  // Encode and write the header collection with length-prefix encoding
+  outputStream.write(
+    Buffer.from(RainbowRecordCollectionType.encodeDelimited(headerCollection).finish()),
+  );
+  logger.info("Wrote header message with version, label set id and label set version.");
+}
+
+/**
+ * Log conversion summary
+ */
+function logSummary(stats: ConversionStats) {
+  stats.endTime = new Date();
+  const duration = stats.endTime.getTime() - stats.startTime.getTime();
+
+  logger.info("=== Conversion Summary ===");
+  logger.info(`Total lines processed: ${stats.totalLines}`);
+  logger.info(`Valid records: ${stats.processedRecords}`);
+  logger.info(`Skipped records: ${stats.skippedRecords}`);
+  logger.info(`Invalid labels: ${stats.invalidLabels}`);
+  logger.info(`Duplicates found: ${stats.duplicates}`);
+  logger.info(`Duration: ${duration}ms`);
+}
+
+/**
+ * Main CSV conversion command with true streaming using csv-simple-parser
+ */
+export async function convertCsvCommand(options: ConvertCsvCommandOptions): Promise<void> {
+  const stats: ConversionStats = {
+    totalLines: 0,
+    processedRecords: 0,
+    skippedRecords: 0,
+    invalidLabels: 0,
+    duplicates: 0,
+    startTime: new Date(),
+  };
+
+  try {
+    logger.info("Starting conversion from CSV to protobuf format...");
+    logger.info(`Input file: ${options.inputFile}`);
+    logger.info(`Output file: ${options.outputFile}`);
+    logger.info(`Label set id: ${options.labelSetId}`);
+    logger.info(`Label set version: ${options.labelSetVersion}`);
+
+    // Setup protobuf schema
+    const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot();
+
+    // Setup streams
+    const outputStream = setupWriteStream(options.outputFile);
+
+    // Write header
+    writeHeader(
+      outputStream,
+      RainbowRecordCollectionType,
+      options.labelSetId,
+      options.labelSetVersion,
+    );
+
+    logger.info("Reading and processing CSV file line by line with streaming...");
+
+    // Setup streaming CSV reader
+    const rl = setupReadStream(options.inputFile);
+
+    let expectedColumns: number | null = null;
+    let lineNumber = 0;
+    let processedRecords = 0;
+
+    // Process line by line with csv-simple-parser
+    for await (const line of rl) {
+      lineNumber++;
+
+      // Skip empty lines
+      if (line.trim() === "") {
+        continue;
+      }
+
+      try {
+        // For the first line, detect column count
+        if (expectedColumns === null) {
+          const firstLineParsed = parseCsvLine(line);
+          expectedColumns = firstLineParsed.length;
+          logger.info(`Detected ${expectedColumns} columns using csv-simple-parser`);
+        }
+
+        // Parse current line with csv-simple-parser
+        const parsedColumns = processStreamingCsvLine(line, expectedColumns);
+
+        // Get label (no validation - ENS accepts any UTF-8 string)
+        const label = parsedColumns[0];
+
+        // Build rainbow record immediately (streaming)
+        let rainbowRecord;
+
+        if (parsedColumns.length === 1) {
+          // Single column: compute labelhash using labelhash function
+          const labelHashBytes = labelHashToBytes(labelhash(label));
+
+          rainbowRecord = {
+            labelhash: Buffer.from(labelHashBytes),
+            label: label,
+          };
+        } else {
+          // Two columns: validate and use provided hash
+          const [, providedHash] = parsedColumns;
+
+          // Ensure the hash has 0x prefix for labelHashToBytes
+          const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`;
+          const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
+
+          rainbowRecord = {
+            labelhash: Buffer.from(labelHash),
+            label: label,
+          };
+        }
+
+        // Create protobuf message and write immediately
+        const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
+        outputStream.write(Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish()));
+
+        processedRecords++;
+
+        // Log progress for large files
+        if (processedRecords % 10000 === 0) {
+          logger.info(`Processed ${processedRecords} records so far...`);
+        }
+      } catch (error) {
+        const errorMessage = error instanceof Error ? error.message : String(error);
+        throw new Error(
+          `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
+        );
+      }
+    }
+
+    stats.totalLines = lineNumber;
+    stats.processedRecords = processedRecords;
+
+    // Close output stream
+    outputStream.end();
+
+    logger.info(`✅ Processed ${processedRecords} records with streaming csv-simple-parser`);
+
+    logSummary(stats);
+    logger.info("✅ CSV conversion completed successfully!");
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    logger.error("❌ CSV conversion failed:", errorMessage);
+    throw error;
+  }
+}
diff --git a/apps/ensrainbow/test/fixtures/test_labels_1col.csv b/apps/ensrainbow/test/fixtures/test_labels_1col.csv
new file mode 100644
index 000000000..d809bd116
--- /dev/null
+++ b/apps/ensrainbow/test/fixtures/test_labels_1col.csv
@@ -0,0 +1,10 @@
+alice
+bob
+charlie
+domaintest
+example
+foundation
+governance
+hello
+world
+test123
diff --git a/apps/ensrainbow/test/fixtures/test_labels_2col.csv b/apps/ensrainbow/test/fixtures/test_labels_2col.csv
new file mode 100644
index 000000000..f410bf758
--- /dev/null
+++ b/apps/ensrainbow/test/fixtures/test_labels_2col.csv
@@ -0,0 +1,10 @@
+alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501
+bob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2
+charlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de
+domaintest,0xc2d1b32ab4268fbba175baa3dcab1eb8299bc784030b080f28eaf1b9336c0445
+example,0x6fd43e7cffc31bb581d7421c8698e29aa2bd8e7186a394b85299908b4eb9b175
+foundation,0x0d5c1bd818a4086f28314415cb375a937593efab66f8f7d2903bf2a13ed35070
+governance,0xabea6fd3db56a6e6d0242111b43ebb13d1c42709651c032c7894962023a1f90a
+hello,0x1c8aff950685c2ed4bc3174f3472287b56d9517b9c948127319a09a7a36deac8
+world,0x8452c9b9140222b08593a26daa782707297be9f7b3e8281d7b4974769f19afd0
+test123,0xf81b517a242b218999ec8eec0ea6e2ddbef2a367a14e93f4a32a39e260f686ad
diff --git a/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv b/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv
new file mode 100644
index 000000000..3d0b7b7e0
--- /dev/null
+++ b/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv
@@ -0,0 +1,3 @@
+label1,hash1,extra_column
+validlabel
+another_valid
diff --git a/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv b/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv
new file mode 100644
index 000000000..484983db9
--- /dev/null
+++ b/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv
@@ -0,0 +1,4 @@
+validlabel,0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
+invalidhash,not-a-hex-hash
+anotherlabel,0x123
+toolong,0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef123456789
diff --git a/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv
new file mode 100644
index 000000000..a1cc2a55f
--- /dev/null
+++ b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv
@@ -0,0 +1,10 @@
+🔥emoji-label🚀
+"label,with,commas"
+"label with newline\n character"  
+Ąśćžłñ-unicode
+"label-with-null\0byte"
+"quoted label with spaces"
+中文-chinese
+😀😁😂🤣-multiple-emojis
+"special""quotes""inside"
+café-àçćént
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 2045e22d5..8c8c0b79b 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -462,6 +462,9 @@ importers:
       classic-level:
         specifier: ^1.4.1
         version: 1.4.1
+      csv-simple-parser:
+        specifier: ^2.0.2
+        version: 2.0.2
       hono:
         specifier: 'catalog:'
         version: 4.10.3
@@ -4161,6 +4164,9 @@ packages:
   csstype@3.2.3:
     resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
 
+  csv-simple-parser@2.0.2:
+    resolution: {integrity: sha512-G9KUSB7Bh8mRjZcg340FJM96tJYPPfb+UjR6T+dOcdRLChmwOTP6jB9+rJwmqDoaPHMJW/CXabYbJ1ZEjbkrrg==}
+
   cytoscape-cose-bilkent@4.1.0:
     resolution: {integrity: sha512-wgQlVIUJF13Quxiv5e1gstZ08rnZj2XaLHGoFMYXz7SkNfCDOOteKBE6SYRfA9WxxI/iBc3ajfDoc6hb/MRAHQ==}
     peerDependencies:
@@ -4404,6 +4410,9 @@ packages:
   destr@2.0.5:
     resolution: {integrity: sha512-ugFTXCtDZunbzasqBxrK93Ik/DRYsO6S/fedkWEMKqt04xZ4csmnmwGDBAb07QWNaGMAmnTIemsYZCksjATwsA==}
 
+  detect-eol@3.0.1:
+    resolution: {integrity: sha512-ncnuLiZCKO7Kt+3CpwUIV8QnnwpBsSFxGQBY6Nve18K2aOrTim2xpzDa8YunHkePt39OCfV2qOX+b7xjYSDRWg==}
+
   detect-indent@6.1.0:
     resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==}
     engines: {node: '>=8'}
@@ -8672,7 +8681,7 @@ snapshots:
   '@expressive-code/plugin-shiki@0.41.3':
     dependencies:
       '@expressive-code/core': 0.41.3
-      shiki: 3.14.0
+      shiki: 3.15.0
 
   '@expressive-code/plugin-text-markers@0.41.3':
     dependencies:
@@ -11662,6 +11671,10 @@ snapshots:
 
   csstype@3.2.3: {}
 
+  csv-simple-parser@2.0.2:
+    dependencies:
+      detect-eol: 3.0.1
+
   cytoscape-cose-bilkent@4.1.0(cytoscape@3.33.1):
     dependencies:
       cose-base: 1.0.3
@@ -11905,6 +11918,8 @@ snapshots:
 
   destr@2.0.5: {}
 
+  detect-eol@3.0.1: {}
+
   detect-indent@6.1.0: {}
 
   detect-libc@2.1.2: {}

From b49144124c2550bc02385f8a4268b6462cd8dbec Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Tue, 30 Sep 2025 17:25:09 +0200
Subject: [PATCH 02/30] refactor

---
 apps/ensrainbow/src/cli.ts                    |   9 +-
 .../src/commands/convert-csv-command.ts       | 242 ++++++++++--------
 2 files changed, 148 insertions(+), 103 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 063c48df2..940692729 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -67,6 +67,7 @@ interface ConvertCsvArgs {
   "output-file": string;
   "label-set-id": LabelSetId;
   "label-set-version": LabelSetVersion;
+  "progress-interval"?: number;
 }
 
 export interface CLIOptions {
@@ -253,7 +254,12 @@ export function createCLI(options: CLIOptions = {}) {
               description: "Label set version for the rainbow record collection",
               demandOption: true,
             })
-            .coerce("label-set-version", buildLabelSetVersion);
+            .coerce("label-set-version", buildLabelSetVersion)
+            .option("progress-interval", {
+              type: "number",
+              description: "Number of records to process before logging progress",
+              default: 10000,
+            });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
           await convertCsvCommand({
@@ -261,6 +267,7 @@ export function createCLI(options: CLIOptions = {}) {
             outputFile: argv["output-file"],
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
+            progressInterval: argv["progress-interval"],
           });
         },
       )
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 1c04fbf5c..0b4ed5d6b 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -17,11 +17,14 @@ import {
 } from "../utils/protobuf-schema.js";
 
 /**
- * Parse CSV using csv-simple-parser
+ * Parse CSV using csv-simple-parser with proper type safety
  */
 function parseCsvLine(line: string): string[] {
   const result = parse(line);
-  return result.length > 0 ? (result[0] as string[]) : [];
+  if (result.length === 0) return [];
+  const firstRow = result[0];
+  if (!Array.isArray(firstRow)) return [];
+  return firstRow.filter((item) => typeof item === "string");
 }
 
 // No label validation - ENS accepts any UTF-8 string
@@ -31,14 +34,15 @@ export interface ConvertCsvCommandOptions {
   outputFile: string;
   labelSetId: string;
   labelSetVersion: number;
+  progressInterval?: number;
 }
 
+// Configuration constants
+const DEFAULT_PROGRESS_INTERVAL = 10000;
+
 interface ConversionStats {
   totalLines: number;
   processedRecords: number;
-  skippedRecords: number;
-  invalidLabels: number;
-  duplicates: number;
   startTime: Date;
   endTime?: Date;
 }
@@ -115,12 +119,123 @@ function logSummary(stats: ConversionStats) {
   logger.info("=== Conversion Summary ===");
   logger.info(`Total lines processed: ${stats.totalLines}`);
   logger.info(`Valid records: ${stats.processedRecords}`);
-  logger.info(`Skipped records: ${stats.skippedRecords}`);
-  logger.info(`Invalid labels: ${stats.invalidLabels}`);
-  logger.info(`Duplicates found: ${stats.duplicates}`);
   logger.info(`Duration: ${duration}ms`);
 }
 
+/**
+ * Initialize conversion setup and logging
+ */
+function initializeConversion(options: ConvertCsvCommandOptions) {
+  logger.info("Starting conversion from CSV to protobuf format...");
+  logger.info(`Input file: ${options.inputFile}`);
+  logger.info(`Output file: ${options.outputFile}`);
+  logger.info(`Label set id: ${options.labelSetId}`);
+  logger.info(`Label set version: ${options.labelSetVersion}`);
+
+  const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot();
+  const outputStream = setupWriteStream(options.outputFile);
+
+  writeHeader(
+    outputStream,
+    RainbowRecordCollectionType,
+    options.labelSetId,
+    options.labelSetVersion,
+  );
+
+  logger.info("Reading and processing CSV file line by line with streaming...");
+
+  return { RainbowRecordType, outputStream };
+}
+
+/**
+ * Create rainbow record from parsed CSV columns
+ */
+function createRainbowRecord(parsedColumns: string[]): { labelhash: Buffer; label: string } {
+  const label = parsedColumns[0];
+
+  if (parsedColumns.length === 1) {
+    // Single column: compute labelhash using labelhash function
+    const labelHashBytes = labelHashToBytes(labelhash(label));
+    return {
+      labelhash: Buffer.from(labelHashBytes),
+      label: label,
+    };
+  } else {
+    // Two columns: validate and use provided hash
+    const [, providedHash] = parsedColumns;
+    const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`;
+    const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
+    return {
+      labelhash: Buffer.from(labelHash),
+      label: label,
+    };
+  }
+}
+
+/**
+ * Process a single CSV record
+ */
+function processRecord(
+  line: string,
+  expectedColumns: number,
+  RainbowRecordType: any,
+  outputStream: NodeJS.WritableStream,
+): void {
+  const parsedColumns = processStreamingCsvLine(line, expectedColumns);
+  const rainbowRecord = createRainbowRecord(parsedColumns);
+
+  // Create protobuf message and write immediately
+  const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
+  outputStream.write(Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish()));
+}
+
+/**
+ * Process the entire CSV file
+ */
+async function processCSVFile(
+  rl: ReturnType<typeof setupReadStream>,
+  RainbowRecordType: any,
+  outputStream: NodeJS.WritableStream,
+  progressInterval: number,
+): Promise<{ totalLines: number; processedRecords: number }> {
+  let expectedColumns: number | null = null;
+  let lineNumber = 0;
+  let processedRecords = 0;
+
+  for await (const line of rl) {
+    lineNumber++;
+
+    // Skip empty lines
+    if (line.trim() === "") {
+      continue;
+    }
+
+    try {
+      // For the first line, detect column count
+      if (expectedColumns === null) {
+        const firstLineParsed = parseCsvLine(line);
+        expectedColumns = firstLineParsed.length;
+        logger.info(`Detected ${expectedColumns} columns using csv-simple-parser`);
+      }
+
+      processRecord(line, expectedColumns, RainbowRecordType, outputStream);
+      processedRecords++;
+
+      // Log progress for large files
+      if (processedRecords % progressInterval === 0) {
+        logger.info(`Processed ${processedRecords} records so far...`);
+      }
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      throw new Error(
+        `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
+      );
+    }
+  }
+
+  return { totalLines: lineNumber, processedRecords };
+}
+
 /**
  * Main CSV conversion command with true streaming using csv-simple-parser
  */
@@ -128,121 +243,44 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
   const stats: ConversionStats = {
     totalLines: 0,
     processedRecords: 0,
-    skippedRecords: 0,
-    invalidLabels: 0,
-    duplicates: 0,
     startTime: new Date(),
   };
 
+  let rl: ReturnType<typeof setupReadStream> | null = null;
+
   try {
-    logger.info("Starting conversion from CSV to protobuf format...");
-    logger.info(`Input file: ${options.inputFile}`);
-    logger.info(`Output file: ${options.outputFile}`);
-    logger.info(`Label set id: ${options.labelSetId}`);
-    logger.info(`Label set version: ${options.labelSetVersion}`);
+    const { RainbowRecordType, outputStream } = initializeConversion(options);
 
-    // Setup protobuf schema
-    const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot();
+    // Setup streaming CSV reader
+    rl = setupReadStream(options.inputFile);
 
-    // Setup streams
-    const outputStream = setupWriteStream(options.outputFile);
+    const progressInterval = options.progressInterval ?? DEFAULT_PROGRESS_INTERVAL;
 
-    // Write header
-    writeHeader(
+    // Process the CSV file
+    const { totalLines, processedRecords } = await processCSVFile(
+      rl,
+      RainbowRecordType,
       outputStream,
-      RainbowRecordCollectionType,
-      options.labelSetId,
-      options.labelSetVersion,
+      progressInterval,
     );
 
-    logger.info("Reading and processing CSV file line by line with streaming...");
-
-    // Setup streaming CSV reader
-    const rl = setupReadStream(options.inputFile);
-
-    let expectedColumns: number | null = null;
-    let lineNumber = 0;
-    let processedRecords = 0;
-
-    // Process line by line with csv-simple-parser
-    for await (const line of rl) {
-      lineNumber++;
-
-      // Skip empty lines
-      if (line.trim() === "") {
-        continue;
-      }
-
-      try {
-        // For the first line, detect column count
-        if (expectedColumns === null) {
-          const firstLineParsed = parseCsvLine(line);
-          expectedColumns = firstLineParsed.length;
-          logger.info(`Detected ${expectedColumns} columns using csv-simple-parser`);
-        }
-
-        // Parse current line with csv-simple-parser
-        const parsedColumns = processStreamingCsvLine(line, expectedColumns);
-
-        // Get label (no validation - ENS accepts any UTF-8 string)
-        const label = parsedColumns[0];
-
-        // Build rainbow record immediately (streaming)
-        let rainbowRecord;
-
-        if (parsedColumns.length === 1) {
-          // Single column: compute labelhash using labelhash function
-          const labelHashBytes = labelHashToBytes(labelhash(label));
-
-          rainbowRecord = {
-            labelhash: Buffer.from(labelHashBytes),
-            label: label,
-          };
-        } else {
-          // Two columns: validate and use provided hash
-          const [, providedHash] = parsedColumns;
-
-          // Ensure the hash has 0x prefix for labelHashToBytes
-          const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`;
-          const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
-
-          rainbowRecord = {
-            labelhash: Buffer.from(labelHash),
-            label: label,
-          };
-        }
-
-        // Create protobuf message and write immediately
-        const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
-        outputStream.write(Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish()));
-
-        processedRecords++;
-
-        // Log progress for large files
-        if (processedRecords % 10000 === 0) {
-          logger.info(`Processed ${processedRecords} records so far...`);
-        }
-      } catch (error) {
-        const errorMessage = error instanceof Error ? error.message : String(error);
-        throw new Error(
-          `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
-        );
-      }
-    }
-
-    stats.totalLines = lineNumber;
+    stats.totalLines = totalLines;
     stats.processedRecords = processedRecords;
 
     // Close output stream
     outputStream.end();
 
     logger.info(`✅ Processed ${processedRecords} records with streaming csv-simple-parser`);
-
     logSummary(stats);
     logger.info("✅ CSV conversion completed successfully!");
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     logger.error("❌ CSV conversion failed:", errorMessage);
     throw error;
+  } finally {
+    // Ensure readline interface is properly closed to prevent resource leaks
+    if (rl) {
+      rl.close();
+    }
   }
 }

From 4c18e0b904791a51fb1baf0d3092b58908361629 Mon Sep 17 00:00:00 2001
From: "kwrobel.eth" <djstrong@gmail.com>
Date: Tue, 30 Sep 2025 14:29:40 +0200
Subject: [PATCH 03/30] Create brave-kiwis-notice.md

---
 .changeset/brave-kiwis-notice.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/brave-kiwis-notice.md

diff --git a/.changeset/brave-kiwis-notice.md b/.changeset/brave-kiwis-notice.md
new file mode 100644
index 000000000..fbdba8bfc
--- /dev/null
+++ b/.changeset/brave-kiwis-notice.md
@@ -0,0 +1,5 @@
+---
+"ensrainbow": patch
+---
+
+feat: add CSV conversion command to ensrainbow CLI

From 5aefe9dab4bff69fbcadf14879186838edd78184 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 1 Oct 2025 17:21:11 +0200
Subject: [PATCH 04/30] fix tests

---
 .../src/commands/convert-csv-command.test.ts  | 54 ++++++++++++++-----
 .../src/commands/convert-csv-command.ts       |  7 ++-
 .../test/fixtures/test_labels_1col.csv        |  1 +
 .../test/fixtures/test_labels_2col.csv        |  2 +-
 .../fixtures/test_labels_special_chars.csv    |  3 +-
 5 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 2be46d924..16a6c5cdb 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -4,8 +4,10 @@ import { mkdtemp, rm, stat, writeFile } from "fs/promises";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
 import { createCLI } from "@/cli";
-import { type LabelSetId, type LabelSetVersion } from "@ensnode/ensnode-sdk";
+import { labelHashToBytes, type LabelSetId, type LabelSetVersion } from "@ensnode/ensnode-sdk";
 import { convertCsvCommand } from "./convert-csv-command";
+import { ENSRainbowDB } from "@/lib/database";
+import { labelhash } from "viem";
 
 // Path to test fixtures
 const TEST_FIXTURES_DIR = join(__dirname, "..", "..", "test", "fixtures");
@@ -47,14 +49,13 @@ describe("convert-csv-command", () => {
       const cli = createCLI({ exitProcess: false });
       await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
 
-      // Verify database was created
-      const dbStats = await stat(dataDir);
-      expect(dbStats.isDirectory()).toBe(true);
-
-      // Verify database contents by validating it
-      await cli.parse(["validate", "--data-dir", dataDir, "--lite"]);
-
-      // Database validation passed, which means records are accessible
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(recordsCount).toBe(11);
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("123"))))?.label).toBe("123");
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234"))))).toBe(null);
+      await db.close();
     });
 
     it("should convert two column CSV with provided hashes and ingest successfully", async () => {
@@ -79,9 +80,13 @@ describe("convert-csv-command", () => {
       const cli = createCLI({ exitProcess: false });
       await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
 
-      // Verify database was created
-      const dbStats = await stat(dataDir);
-      expect(dbStats.isDirectory()).toBe(true);
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(recordsCount).toBe(10);
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("test123"))))?.label).toBe("test123");
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234"))))).toBe(null);
+      await db.close();
     });
 
     it("should fail when CSV has inconsistent column count", async () => {
@@ -99,9 +104,10 @@ describe("convert-csv-command", () => {
       ).rejects.toThrow(/CSV conversion failed due to invalid data/);
     });
 
-    it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
+    it.only("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
       const inputFile = join(TEST_FIXTURES_DIR, "test_labels_special_chars.csv");
       const outputFile = join(tempDir, "output_special.ensrainbow");
+      const dataDir = join(tempDir, "db_special");
 
       // Convert CSV to ensrainbow format
       await convertCsvCommand({
@@ -119,7 +125,27 @@ describe("convert-csv-command", () => {
       // Verify special characters were processed correctly by checking logs
       // The conversion completed successfully, which means csv-simple-parser
       // handled emojis, unicode, quoted fields with commas, etc.
-      expect(true).toBe(true); // Test passes if conversion doesn't crash
+
+      // Ingest the converted file into database
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(recordsCount).toBe(10);
+      const labels = [
+        "🔥emoji-label🚀", 
+        "special\"quotes\"inside",
+        "label with newline\n character",
+        "label-with-null\0byte",
+      ];
+      for (const label of labels) {
+        expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label))))?.label).toBe(label);
+      }
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234"))))).toBe(null);
+      await db.close();
+
     });
 
     it("should fail when CSV contains invalid labelhash format", async () => {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 0b4ed5d6b..7b08da655 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -20,15 +20,13 @@ import {
  * Parse CSV using csv-simple-parser with proper type safety
  */
 function parseCsvLine(line: string): string[] {
-  const result = parse(line);
+  const result = parse(line, {optimistic: false});
   if (result.length === 0) return [];
   const firstRow = result[0];
   if (!Array.isArray(firstRow)) return [];
-  return firstRow.filter((item) => typeof item === "string");
+  return firstRow.map((item) => String(item));
 }
 
-// No label validation - ENS accepts any UTF-8 string
-
 export interface ConvertCsvCommandOptions {
   inputFile: string;
   outputFile: string;
@@ -156,6 +154,7 @@ function createRainbowRecord(parsedColumns: string[]): { labelhash: Buffer; labe
   if (parsedColumns.length === 1) {
     // Single column: compute labelhash using labelhash function
     const labelHashBytes = labelHashToBytes(labelhash(label));
+    console.log(label);
     return {
       labelhash: Buffer.from(labelHashBytes),
       label: label,
diff --git a/apps/ensrainbow/test/fixtures/test_labels_1col.csv b/apps/ensrainbow/test/fixtures/test_labels_1col.csv
index d809bd116..302ef8d63 100644
--- a/apps/ensrainbow/test/fixtures/test_labels_1col.csv
+++ b/apps/ensrainbow/test/fixtures/test_labels_1col.csv
@@ -8,3 +8,4 @@ governance
 hello
 world
 test123
+123
diff --git a/apps/ensrainbow/test/fixtures/test_labels_2col.csv b/apps/ensrainbow/test/fixtures/test_labels_2col.csv
index f410bf758..e02a65762 100644
--- a/apps/ensrainbow/test/fixtures/test_labels_2col.csv
+++ b/apps/ensrainbow/test/fixtures/test_labels_2col.csv
@@ -1,7 +1,7 @@
 alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501
 bob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2
 charlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de
-domaintest,0xc2d1b32ab4268fbba175baa3dcab1eb8299bc784030b080f28eaf1b9336c0445
+domaintest,0x56827be2a1678c2593e2a613fe8c4138ec451ab019d70cd890e007f99b513be1
 example,0x6fd43e7cffc31bb581d7421c8698e29aa2bd8e7186a394b85299908b4eb9b175
 foundation,0x0d5c1bd818a4086f28314415cb375a937593efab66f8f7d2903bf2a13ed35070
 governance,0xabea6fd3db56a6e6d0242111b43ebb13d1c42709651c032c7894962023a1f90a
diff --git a/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv
index a1cc2a55f..300cfc70a 100644
--- a/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv
+++ b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv
@@ -1,6 +1,7 @@
 🔥emoji-label🚀
 "label,with,commas"
-"label with newline\n character"  
+"label with newline
+ character"
 Ąśćžłñ-unicode
 "label-with-null\0byte"
 "quoted label with spaces"

From f2c8f20309c1d5e3f40c3ad8dc530e1200f697de Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 1 Oct 2025 18:07:33 +0200
Subject: [PATCH 05/30] use fast-csv package

---
 apps/ensrainbow/package.json                  |   2 +-
 .../src/commands/convert-csv-command.test.ts  |  29 +--
 .../src/commands/convert-csv-command.ts       | 175 +++++++-----------
 .../fixtures/test_labels_special_chars.csv    | Bin 235 -> 234 bytes
 pnpm-lock.yaml                                |  60 ++++--
 5 files changed, 134 insertions(+), 132 deletions(-)

diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json
index af46315e9..046cb2e2e 100644
--- a/apps/ensrainbow/package.json
+++ b/apps/ensrainbow/package.json
@@ -39,7 +39,7 @@
     "protobufjs": "^7.4.0",
     "viem": "catalog:",
     "yargs": "^17.7.2",
-    "csv-simple-parser": "^2.0.2"
+    "@fast-csv/parse": "^5.0.0"
   },
   "devDependencies": {
     "@ensnode/shared-configs": "workspace:*",
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 16a6c5cdb..795e53bdc 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -4,10 +4,10 @@ import { mkdtemp, rm, stat, writeFile } from "fs/promises";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
 import { createCLI } from "@/cli";
-import { labelHashToBytes, type LabelSetId, type LabelSetVersion } from "@ensnode/ensnode-sdk";
-import { convertCsvCommand } from "./convert-csv-command";
 import { ENSRainbowDB } from "@/lib/database";
+import { type LabelSetId, type LabelSetVersion, labelHashToBytes } from "@ensnode/ensnode-sdk";
 import { labelhash } from "viem";
+import { convertCsvCommand } from "./convert-csv-command";
 
 // Path to test fixtures
 const TEST_FIXTURES_DIR = join(__dirname, "..", "..", "test", "fixtures");
@@ -53,8 +53,10 @@ describe("convert-csv-command", () => {
       expect(await db.validate()).toBe(true);
       const recordsCount = await db.getPrecalculatedRainbowRecordCount();
       expect(recordsCount).toBe(11);
-      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("123"))))?.label).toBe("123");
-      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234"))))).toBe(null);
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("123"))))?.label).toBe(
+        "123",
+      );
+      expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null);
       await db.close();
     });
 
@@ -84,8 +86,10 @@ describe("convert-csv-command", () => {
       expect(await db.validate()).toBe(true);
       const recordsCount = await db.getPrecalculatedRainbowRecordCount();
       expect(recordsCount).toBe(10);
-      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("test123"))))?.label).toBe("test123");
-      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234"))))).toBe(null);
+      expect(
+        (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("test123"))))?.label,
+      ).toBe("test123");
+      expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null);
       await db.close();
     });
 
@@ -104,7 +108,7 @@ describe("convert-csv-command", () => {
       ).rejects.toThrow(/CSV conversion failed due to invalid data/);
     });
 
-    it.only("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
+    it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
       const inputFile = join(TEST_FIXTURES_DIR, "test_labels_special_chars.csv");
       const outputFile = join(tempDir, "output_special.ensrainbow");
       const dataDir = join(tempDir, "db_special");
@@ -135,17 +139,18 @@ describe("convert-csv-command", () => {
       const recordsCount = await db.getPrecalculatedRainbowRecordCount();
       expect(recordsCount).toBe(10);
       const labels = [
-        "🔥emoji-label🚀", 
-        "special\"quotes\"inside",
+        "🔥emoji-label🚀",
+        'special"quotes"inside',
         "label with newline\n character",
         "label-with-null\0byte",
       ];
       for (const label of labels) {
-        expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label))))?.label).toBe(label);
+        expect(
+          (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label))))?.label,
+        ).toBe(label);
       }
-      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234"))))).toBe(null);
+      expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null);
       await db.close();
-
     });
 
     it("should fail when CSV contains invalid labelhash format", async () => {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 7b08da655..14ae2d4b3 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -1,14 +1,13 @@
 /**
  * ENSRAINBOW CSV FILE CREATION COMMAND
  *
- * Converts CSV files to .ensrainbow format with csv-simple-parser
+ * Converts CSV files to .ensrainbow format with fast-csv
  * Supports 1-column (label only) and 2-column (label,labelhash) formats
  */
 
 import { createReadStream, createWriteStream } from "fs";
-import { createInterface } from "readline";
 import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk";
-import parse from "csv-simple-parser";
+import { parse } from "@fast-csv/parse";
 import { labelhash } from "viem";
 import { logger } from "../utils/logger.js";
 import {
@@ -16,17 +15,6 @@ import {
   createRainbowProtobufRoot,
 } from "../utils/protobuf-schema.js";
 
-/**
- * Parse CSV using csv-simple-parser with proper type safety
- */
-function parseCsvLine(line: string): string[] {
-  const result = parse(line, {optimistic: false});
-  if (result.length === 0) return [];
-  const firstRow = result[0];
-  if (!Array.isArray(firstRow)) return [];
-  return firstRow.map((item) => String(item));
-}
-
 export interface ConvertCsvCommandOptions {
   inputFile: string;
   outputFile: string;
@@ -45,37 +33,6 @@ interface ConversionStats {
   endTime?: Date;
 }
 
-/**
- * Process a single CSV line with csv-simple-parser and validation
- */
-function processStreamingCsvLine(line: string, expectedColumns: number): string[] {
-  if (line.trim() === "") {
-    throw new Error("Empty line");
-  }
-
-  const parsedLine = parseCsvLine(line);
-
-  // Validate column count
-  if (parsedLine.length !== expectedColumns) {
-    throw new Error(
-      `Expected ${expectedColumns} columns, but found ${parsedLine.length} in line: ${line}`,
-    );
-  }
-
-  return parsedLine;
-}
-
-/**
- * Setup input stream for reading CSV line by line
- */
-function setupReadStream(inputFile: string) {
-  const fileStream = createReadStream(inputFile, { encoding: "utf8" });
-  return createInterface({
-    input: fileStream,
-    crlfDelay: Infinity,
-  });
-}
-
 /**
  * Setup output stream for writing protobuf
  */
@@ -146,12 +103,12 @@ function initializeConversion(options: ConvertCsvCommandOptions) {
 }
 
 /**
- * Create rainbow record from parsed CSV columns
+ * Create rainbow record from parsed CSV row
  */
-function createRainbowRecord(parsedColumns: string[]): { labelhash: Buffer; label: string } {
-  const label = parsedColumns[0];
+function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string } {
+  const label = String(row[0]);
 
-  if (parsedColumns.length === 1) {
+  if (row.length === 1) {
     // Single column: compute labelhash using labelhash function
     const labelHashBytes = labelHashToBytes(labelhash(label));
     console.log(label);
@@ -161,7 +118,7 @@ function createRainbowRecord(parsedColumns: string[]): { labelhash: Buffer; labe
     };
   } else {
     // Two columns: validate and use provided hash
-    const [, providedHash] = parsedColumns;
+    const providedHash = String(row[1]);
     const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`;
     const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
     return {
@@ -175,13 +132,20 @@ function createRainbowRecord(parsedColumns: string[]): { labelhash: Buffer; labe
  * Process a single CSV record
  */
 function processRecord(
-  line: string,
+  row: string[],
   expectedColumns: number,
   RainbowRecordType: any,
   outputStream: NodeJS.WritableStream,
+  lineNumber: number,
 ): void {
-  const parsedColumns = processStreamingCsvLine(line, expectedColumns);
-  const rainbowRecord = createRainbowRecord(parsedColumns);
+  // Validate column count
+  if (row.length !== expectedColumns) {
+    throw new Error(
+      `Expected ${expectedColumns} columns, but found ${row.length} in line ${lineNumber}`,
+    );
+  }
+
+  const rainbowRecord = createRainbowRecord(row);
 
   // Create protobuf message and write immediately
   const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
@@ -189,54 +153,67 @@ function processRecord(
 }
 
 /**
- * Process the entire CSV file
+ * Process the entire CSV file using fast-csv
  */
 async function processCSVFile(
-  rl: ReturnType<typeof setupReadStream>,
+  inputFile: string,
   RainbowRecordType: any,
   outputStream: NodeJS.WritableStream,
   progressInterval: number,
 ): Promise<{ totalLines: number; processedRecords: number }> {
-  let expectedColumns: number | null = null;
-  let lineNumber = 0;
-  let processedRecords = 0;
-
-  for await (const line of rl) {
-    lineNumber++;
-
-    // Skip empty lines
-    if (line.trim() === "") {
-      continue;
-    }
-
-    try {
-      // For the first line, detect column count
-      if (expectedColumns === null) {
-        const firstLineParsed = parseCsvLine(line);
-        expectedColumns = firstLineParsed.length;
-        logger.info(`Detected ${expectedColumns} columns using csv-simple-parser`);
-      }
-
-      processRecord(line, expectedColumns, RainbowRecordType, outputStream);
-      processedRecords++;
-
-      // Log progress for large files
-      if (processedRecords % progressInterval === 0) {
-        logger.info(`Processed ${processedRecords} records so far...`);
-      }
-    } catch (error) {
-      const errorMessage = error instanceof Error ? error.message : String(error);
-      throw new Error(
-        `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
-      );
-    }
-  }
-
-  return { totalLines: lineNumber, processedRecords };
+  return new Promise((resolve, reject) => {
+    let expectedColumns: number | null = null;
+    let lineNumber = 0;
+    let processedRecords = 0;
+
+    const fileStream = createReadStream(inputFile, { encoding: "utf8" });
+
+    const csvStream = parse()
+      .on("data", (row: string[]) => {
+        lineNumber++;
+
+        try {
+          // For the first row, detect column count
+          if (expectedColumns === null) {
+            expectedColumns = row.length;
+            logger.info(`Detected ${expectedColumns} columns using fast-csv`);
+          }
+
+          processRecord(row, expectedColumns, RainbowRecordType, outputStream, lineNumber);
+          processedRecords++;
+
+          // Log progress for large files
+          if (processedRecords % progressInterval === 0) {
+            logger.info(`Processed ${processedRecords} records so far...`);
+          }
+        } catch (error) {
+          const errorMessage = error instanceof Error ? error.message : String(error);
+          csvStream.destroy();
+          fileStream.destroy();
+          reject(
+            new Error(
+              `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
+            ),
+          );
+        }
+      })
+      .on("error", (error: Error) => {
+        reject(new Error(`CSV parsing error: ${error.message}`));
+      })
+      .on("end", () => {
+        resolve({ totalLines: lineNumber, processedRecords });
+      });
+
+    fileStream
+      .on("error", (error: Error) => {
+        reject(error);
+      })
+      .pipe(csvStream);
+  });
 }
 
 /**
- * Main CSV conversion command with true streaming using csv-simple-parser
+ * Main CSV conversion command with true streaming using fast-csv
  */
 export async function convertCsvCommand(options: ConvertCsvCommandOptions): Promise<void> {
   const stats: ConversionStats = {
@@ -245,19 +222,14 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     startTime: new Date(),
   };
 
-  let rl: ReturnType<typeof setupReadStream> | null = null;
-
   try {
     const { RainbowRecordType, outputStream } = initializeConversion(options);
 
-    // Setup streaming CSV reader
-    rl = setupReadStream(options.inputFile);
-
     const progressInterval = options.progressInterval ?? DEFAULT_PROGRESS_INTERVAL;
 
     // Process the CSV file
     const { totalLines, processedRecords } = await processCSVFile(
-      rl,
+      options.inputFile,
       RainbowRecordType,
       outputStream,
       progressInterval,
@@ -269,17 +241,12 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     // Close output stream
     outputStream.end();
 
-    logger.info(`✅ Processed ${processedRecords} records with streaming csv-simple-parser`);
+    logger.info(`✅ Processed ${processedRecords} records with streaming fast-csv`);
     logSummary(stats);
     logger.info("✅ CSV conversion completed successfully!");
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     logger.error("❌ CSV conversion failed:", errorMessage);
     throw error;
-  } finally {
-    // Ensure readline interface is properly closed to prevent resource leaks
-    if (rl) {
-      rl.close();
-    }
   }
 }
diff --git a/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv b/apps/ensrainbow/test/fixtures/test_labels_special_chars.csv
index 300cfc70a9f1230c7346e7b38832f742eb463706..ac2a1f80d8fad7fafbcde1febbe21d95dd15e545 100644
GIT binary patch
delta 11
ScmaFO_=<5tE+fOl{2Blp00dG1

delta 12
TcmaFG_?mG-E>n!b#Jm~+A!G#J

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 8c8c0b79b..3dea391e0 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -456,15 +456,15 @@ importers:
       '@ensnode/ensrainbow-sdk':
         specifier: workspace:*
         version: link:../../packages/ensrainbow-sdk
+      '@fast-csv/parse':
+        specifier: ^5.0.0
+        version: 5.0.5
       '@hono/node-server':
         specifier: ^1.4.1
         version: 1.19.5(hono@4.10.3)
       classic-level:
         specifier: ^1.4.1
         version: 1.4.1
-      csv-simple-parser:
-        specifier: ^2.0.2
-        version: 2.0.2
       hono:
         specifier: 'catalog:'
         version: 4.10.3
@@ -1518,6 +1518,9 @@ packages:
   '@expressive-code/plugin-text-markers@0.41.3':
     resolution: {integrity: sha512-SN8tkIzDpA0HLAscEYD2IVrfLiid6qEdE9QLlGVSxO1KEw7qYvjpbNBQjUjMr5/jvTJ7ys6zysU2vLPHE0sb2g==}
 
+  '@fast-csv/parse@5.0.5':
+    resolution: {integrity: sha512-M0IbaXZDbxfOnpVE5Kps/a6FGlILLhtLsvWd9qNH3d2TxNnpbNkFf3KD26OmJX6MHq7PdQAl5htStDwnuwHx6w==}
+
   '@fastify/busboy@3.2.0':
     resolution: {integrity: sha512-m9FVDXU3GT2ITSe0UaMA5rU3QkfC/UXtCU8y0gSN/GugTqtVldOBWIB5V6V3sbmenVZUIpU6f+mPEO2+m5iTaA==}
 
@@ -4164,9 +4167,6 @@ packages:
   csstype@3.2.3:
     resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
 
-  csv-simple-parser@2.0.2:
-    resolution: {integrity: sha512-G9KUSB7Bh8mRjZcg340FJM96tJYPPfb+UjR6T+dOcdRLChmwOTP6jB9+rJwmqDoaPHMJW/CXabYbJ1ZEjbkrrg==}
-
   cytoscape-cose-bilkent@4.1.0:
     resolution: {integrity: sha512-wgQlVIUJF13Quxiv5e1gstZ08rnZj2XaLHGoFMYXz7SkNfCDOOteKBE6SYRfA9WxxI/iBc3ajfDoc6hb/MRAHQ==}
     peerDependencies:
@@ -4410,9 +4410,6 @@ packages:
   destr@2.0.5:
     resolution: {integrity: sha512-ugFTXCtDZunbzasqBxrK93Ik/DRYsO6S/fedkWEMKqt04xZ4csmnmwGDBAb07QWNaGMAmnTIemsYZCksjATwsA==}
 
-  detect-eol@3.0.1:
-    resolution: {integrity: sha512-ncnuLiZCKO7Kt+3CpwUIV8QnnwpBsSFxGQBY6Nve18K2aOrTim2xpzDa8YunHkePt39OCfV2qOX+b7xjYSDRWg==}
-
   detect-indent@6.1.0:
     resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==}
     engines: {node: '>=8'}
@@ -5480,12 +5477,30 @@ packages:
   lodash.debounce@4.0.8:
     resolution: {integrity: sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==}
 
+  lodash.escaperegexp@4.1.2:
+    resolution: {integrity: sha512-TM9YBvyC84ZxE3rgfefxUWiQKLilstD6k7PTGt6wfbtXF8ixIJLOL3VYyV/z+ZiPLsVxAsKAFVwWlWeb2Y8Yyw==}
+
+  lodash.groupby@4.6.0:
+    resolution: {integrity: sha512-5dcWxm23+VAoz+awKmBaiBvzox8+RqMgFhi7UvX9DHZr2HdxHXM/Wrf8cfKpsW37RNrvtPn6hSwNqurSILbmJw==}
+
+  lodash.isfunction@3.0.9:
+    resolution: {integrity: sha512-AirXNj15uRIMMPihnkInB4i3NHeb4iBtNg9WRWuK2o31S+ePwwNmDPaTL3o7dTJ+VXNZim7rFs4rxN4YU1oUJw==}
+
+  lodash.isnil@4.0.0:
+    resolution: {integrity: sha512-up2Mzq3545mwVnMhTDMdfoG1OurpA/s5t88JmQX809eH3C8491iu2sfKhTfhQtKY78oPNhiaHJUpT/dUDAAtng==}
+
+  lodash.isundefined@3.0.1:
+    resolution: {integrity: sha512-MXB1is3s899/cD8jheYYE2V9qTHwKvt+npCwpD+1Sxm3Q3cECXCiYHjeHWXNwr6Q0SOBPrYUDxendrO6goVTEA==}
+
   lodash.sortby@4.7.0:
     resolution: {integrity: sha512-HDWXG8isMntAyRF5vZ7xKuEvOhT4AhlRt/3czTSjvGUxjYCBVRQY48ViDHyfYz9VIoBkW4TMGQNapx+l3RUwdA==}
 
   lodash.startcase@4.4.0:
     resolution: {integrity: sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg==}
 
+  lodash.uniq@4.5.0:
+    resolution: {integrity: sha512-xfBaXQd9ryd9dlSDvnvI0lvxfLJlYAZzXomUYzLKtUeOQvOP5piqAWuGtrhWeqaXK9hhoM/iyJc5AV+XfsX3HQ==}
+
   lodash@4.17.21:
     resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
 
@@ -8687,6 +8702,15 @@ snapshots:
     dependencies:
       '@expressive-code/core': 0.41.3
 
+  '@fast-csv/parse@5.0.5':
+    dependencies:
+      lodash.escaperegexp: 4.1.2
+      lodash.groupby: 4.6.0
+      lodash.isfunction: 3.0.9
+      lodash.isnil: 4.0.0
+      lodash.isundefined: 3.0.1
+      lodash.uniq: 4.5.0
+
   '@fastify/busboy@3.2.0': {}
 
   '@floating-ui/core@1.7.3':
@@ -11671,10 +11695,6 @@ snapshots:
 
   csstype@3.2.3: {}
 
-  csv-simple-parser@2.0.2:
-    dependencies:
-      detect-eol: 3.0.1
-
   cytoscape-cose-bilkent@4.1.0(cytoscape@3.33.1):
     dependencies:
       cose-base: 1.0.3
@@ -11918,8 +11938,6 @@ snapshots:
 
   destr@2.0.5: {}
 
-  detect-eol@3.0.1: {}
-
   detect-indent@6.1.0: {}
 
   detect-libc@2.1.2: {}
@@ -13029,10 +13047,22 @@ snapshots:
 
   lodash.debounce@4.0.8: {}
 
+  lodash.escaperegexp@4.1.2: {}
+
+  lodash.groupby@4.6.0: {}
+
+  lodash.isfunction@3.0.9: {}
+
+  lodash.isnil@4.0.0: {}
+
+  lodash.isundefined@3.0.1: {}
+
   lodash.sortby@4.7.0: {}
 
   lodash.startcase@4.4.0: {}
 
+  lodash.uniq@4.5.0: {}
+
   lodash@4.17.21: {}
 
   long@5.3.2: {}

From e20932db1e0c53549aa1a35aecd5eb76be8564cc Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 6 Oct 2025 16:44:32 +0200
Subject: [PATCH 06/30] add documentation for csv convert

---
 .../src/commands/convert-csv-command.test.ts  |   8 +-
 .../ensrainbow/concepts/creating-files.mdx    | 593 ++++++++++++++++++
 .../docs/ensrainbow/concepts/data-model.mdx   |  11 +-
 .../docs/ensrainbow/contributing/index.mdx    |   5 +-
 4 files changed, 604 insertions(+), 13 deletions(-)
 create mode 100644 docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 795e53bdc..58c7af900 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -126,10 +126,6 @@ describe("convert-csv-command", () => {
       expect(outputStats.isFile()).toBe(true);
       expect(outputStats.size).toBeGreaterThan(0);
 
-      // Verify special characters were processed correctly by checking logs
-      // The conversion completed successfully, which means csv-simple-parser
-      // handled emojis, unicode, quoted fields with commas, etc.
-
       // Ingest the converted file into database
       const cli = createCLI({ exitProcess: false });
       await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
@@ -141,8 +137,8 @@ describe("convert-csv-command", () => {
       const labels = [
         "🔥emoji-label🚀",
         'special"quotes"inside',
-        "label with newline\n character",
-        "label-with-null\0byte",
+        "label with newline\n character", // new line
+        "label-with-null\0byte", // null byte
       ];
       for (const label of labels) {
         expect(
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
new file mode 100644
index 000000000..f2c9c34cf
--- /dev/null
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -0,0 +1,593 @@
+---
+title: Creating ENSRainbow Files
+description: Complete guide to creating .ensrainbow files from SQL dumps and CSV data.
+sidebar:
+  label: Creating Files
+  order: 3
+keywords: [ensrainbow, file creation, conversion, sql, csv]
+---
+
+ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources. This guide helps you choose the right method and provides step-by-step instructions.
+
+## Prerequisites
+
+Before creating `.ensrainbow` files, ensure you have:
+
+1. **ENSNode repository cloned**:
+   ```bash
+   git clone https://github.com/namehash/ensnode.git
+   cd ensnode
+   ```
+
+2. **Dependencies installed**:
+   ```bash
+   pnpm install
+   ```
+
+3. **Working directory**: Navigate to the ENSRainbow directory:
+   ```bash
+   cd apps/ensrainbow
+   ```
+
+All commands in this guide assume you're in the `apps/ensrainbow` directory unless otherwise specified.
+
+## Overview
+
+A `.ensrainbow` file is ENSRainbow's binary format for storing label-to-labelhash mappings. It uses Protocol Buffers for efficient serialization and supports streaming for large datasets.
+
+For detailed information about the file format structure, see the [Data Model](/ensrainbow/concepts/data-model) documentation.
+
+## Choosing Your Conversion Method
+
+| Method | Input Format | Use Case | Command |
+|--------|-------------|----------|---------|
+| **SQL Conversion** | Gzipped SQL dump (`ens_names.sql.gz`) | Converting legacy ENS Subgraph data | `pnpm run convert` |
+| **CSV Conversion** | CSV file (1 or 2 columns) | Custom datasets, test data, external sources | `pnpm run convert-csv` |
+
+### When to Use SQL Conversion
+
+- Converting existing ENS Subgraph rainbow tables
+- Working with legacy `ens_names.sql.gz` files
+- Migrating from previous ENS data formats
+
+### When to Use CSV Conversion
+
+- Creating test datasets
+- Converting data from external sources
+- Working with custom label collections
+- Building incremental label sets
+
+## Method 1: Converting from SQL Dumps
+
+The `convert` command processes gzipped SQL dump files from the ENS Subgraph.
+
+### Command Syntax
+
+```bash
+pnpm run convert \
+  --input-file <path/to/ens_names.sql.gz> \
+  --output-file <output.ensrainbow> \
+  --label-set-id <label-set-id> \
+  --label-set-version <version-number>
+```
+
+### Required Parameters
+
+- `--input-file`: Path to the gzipped SQL dump file
+- `--label-set-id`: Identifier for the label set (e.g., `subgraph`, `discovery-a`)
+- `--label-set-version`: Version number for the label set (non-negative integer)
+
+### Optional Parameters
+
+- `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
+
+### Example: Converting ENS Subgraph Data
+
+```bash
+# Convert main ENS Subgraph data
+pnpm run convert \
+  --input-file ens_names.sql.gz \
+  --output-file subgraph_0.ensrainbow \
+  --label-set-id subgraph \
+  --label-set-version 0
+```
+
+### Example: Converting Test Data
+
+```bash
+# Convert ens-test-env data
+pnpm run convert \
+  --input-file test/fixtures/ens_test_env_names.sql.gz \
+  --output-file ens-test-env_0.ensrainbow \
+  --label-set-id ens-test-env \
+  --label-set-version 0
+```
+
+### How It Works
+
+1. **Streams** the gzipped SQL file to avoid memory issues
+2. **Parses** SQL COPY statements to extract label/labelhash pairs
+3. **Validates** each record and skips invalid entries
+4. **Writes** protobuf messages with length-delimited encoding
+5. **Creates** a header message followed by individual record messages
+
+## Method 2: Converting from CSV Files
+
+The `convert-csv` command processes CSV files with flexible column formats.
+
+### Command Syntax
+
+```bash
+pnpm run convert-csv \
+  --input-file <path/to/data.csv> \
+  --output-file <output.ensrainbow> \
+  --label-set-id <label-set-id> \
+  --label-set-version <version-number> \
+  [--progress-interval <number>]
+```
+
+### Required Parameters
+
+- `--input-file`: Path to the CSV file
+- `--label-set-id`: Identifier for the label set
+- `--label-set-version`: Version number for the label set
+
+### Optional Parameters
+
+- `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
+- `--progress-interval`: Progress logging frequency (default: 10000 records)
+
+### CSV Format Support
+
+The CSV converter supports two formats:
+
+#### Single Column Format (Label Only)
+```csv
+ethereum
+vitalik
+ens
+```
+
+The converter automatically computes labelhashes using the `labelhash()` function.
+
+#### Two Column Format (Label + Labelhash)
+```csv
+ethereum,0x541111248b45b7a8dc3f5579f630e74cb01456ea6ac067d3f4d793245a255155
+vitalik,0xaf2caa1c2ca1d027f1ac823b529d0a67cd144264b2789fa2ea4d63a67c7103cc
+ens,0x5cee339e13375638553bdf5a6e36ba80fb9f6a4f0783680884d92b558aa471da
+```
+
+The converter validates that provided labelhashes match the computed hash for each label.
+
+### Example: Creating Test Dataset
+
+```bash
+# Create test dataset from CSV
+pnpm run convert-csv \
+  --input-file test-labels.csv \
+  --output-file test-dataset_0.ensrainbow \
+  --label-set-id test-dataset \
+  --label-set-version 0
+```
+
+### Example: Creating Discovery Dataset
+
+```bash
+# Create discovery dataset (initially empty)
+echo "" > empty.csv
+pnpm run convert-csv \
+  --input-file empty.csv \
+  --output-file discovery-a_0.ensrainbow \
+  --label-set-id discovery-a \
+  --label-set-version 0
+```
+
+### How It Works
+
+1. **Detects** CSV format automatically (1 or 2 columns)
+2. **Streams** CSV parsing using fast-csv for memory efficiency
+3. **Validates** column count and data format
+4. **Computes** or validates labelhashes as needed
+5. **Writes** protobuf messages with the same format as SQL conversion
+
+## Common Workflows
+
+### Workflow 1: Migrating from ENS Subgraph
+
+```bash
+# 1. Convert SQL dump to .ensrainbow
+pnpm run convert \
+  --input-file ens_names.sql.gz \
+  --output-file subgraph_0.ensrainbow \
+  --label-set-id subgraph \
+  --label-set-version 0
+
+# 2. Ingest into LevelDB
+pnpm run ingest-ensrainbow \
+  --input-file subgraph_0.ensrainbow \
+  --data-dir data-subgraph
+
+# 3. Validate the database
+pnpm run validate --data-dir data-subgraph
+
+# 4. Start the API server
+pnpm run serve --data-dir data-subgraph --port 3223
+```
+
+### Workflow 2: Creating Test Environment
+
+```bash
+# 1. Convert test data
+pnpm run convert \
+  --input-file test/fixtures/ens_test_env_names.sql.gz \
+  --output-file ens-test-env_0.ensrainbow \
+  --label-set-id ens-test-env \
+  --label-set-version 0
+
+# 2. Ingest test data
+pnpm run ingest-ensrainbow \
+  --input-file ens-test-env_0.ensrainbow \
+  --data-dir data-test-env
+
+# 3. Run with test data
+pnpm run serve --data-dir data-test-env --port 3223
+```
+
+### Workflow 3: Building Custom Dataset
+
+```bash
+# 1. Create CSV with your labels
+echo "mylabel1
+mylabel2
+mylabel3" > custom-labels.csv
+
+# 2. Convert to .ensrainbow
+pnpm run convert-csv \
+  --input-file custom-labels.csv \
+  --output-file custom_0.ensrainbow \
+  --label-set-id custom \
+  --label-set-version 0
+
+# 3. Ingest and serve
+pnpm run ingest-ensrainbow \
+  --input-file custom_0.ensrainbow \
+  --data-dir data-custom
+
+pnpm run serve --data-dir data-custom --port 3223
+```
+
+### Workflow 4: Using Custom Label Set Server
+
+```bash
+# 1. Configure custom label set server
+export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
+
+# 2. Download from custom server
+# The script downloads to labelsets/ subdirectory
+./scripts/download-ensrainbow-files.sh my-dataset 0
+
+# 3. Ingest and serve
+# Files are downloaded to labelsets/ by the script
+pnpm run ingest-ensrainbow \
+  --input-file labelsets/my-dataset_0.ensrainbow \
+  --data-dir data-my-dataset
+
+pnpm run serve --data-dir data-my-dataset --port 3223
+```
+
+:::note[Script Output Locations]
+ENSRainbow download scripts save files to specific subdirectories:
+- **`.ensrainbow` files**: `labelsets/`
+- **Database archives**: `databases/{schema_version}/`
+- **Checksums and licenses**: Same directory as the downloaded file
+:::
+
+## File Naming Conventions
+
+Follow the naming convention: `{label-set-id}_{label-set-version}.ensrainbow`
+
+**Examples:**
+- `subgraph_0.ensrainbow` - Main ENS data, version 0
+- `subgraph_1.ensrainbow` - Main ENS data, version 1 (incremental update)
+- `discovery-a_0.ensrainbow` - Discovery dataset, version 0
+- `ens-test-env_0.ensrainbow` - Test environment data, version 0
+
+## Next Steps
+
+After creating your `.ensrainbow` file:
+
+1. **[Ingest the data](/ensrainbow/contributing/index#data-ingestion-ingest-ensrainbow)** into a ENSRainbow database
+2. **[Validate the database](/ensrainbow/contributing/index#database-validation-validate)** to ensure integrity
+3. **[Start the API server](/ensrainbow/contributing/index#api-server-serve)** to serve the data
+
+For complete CLI reference information, see the [CLI Reference](/ensrainbow/contributing/cli-reference) documentation.
+
+## Creating and Publishing Custom .ensrainbow Files
+
+If you want to create, publish, and distribute your own `.ensrainbow` files, follow these steps:
+
+### 1. Create Your Dataset
+
+First, prepare your data in either SQL or CSV (recommended) format, then convert it using the appropriate method:
+
+```bash
+# For CSV data
+pnpm run convert-csv \
+  --input-file my-labels.csv \
+  --output-file my-dataset_0.ensrainbow \
+  --label-set-id my-dataset \
+  --label-set-version 0
+
+# For SQL data
+pnpm run convert \
+  --input-file my-data.sql.gz \
+  --output-file my-dataset_0.ensrainbow \
+  --label-set-id my-dataset \
+  --label-set-version 0
+```
+
+### 2. Validate Your File
+
+Test your `.ensrainbow` file by ingesting it locally:
+
+```bash
+# Ingest your custom dataset
+pnpm run ingest-ensrainbow \
+  --input-file my-dataset_0.ensrainbow \
+  --data-dir data-my-dataset
+
+# Validate the database
+pnpm run validate --data-dir data-my-dataset
+
+# Test the API
+pnpm run serve --data-dir data-my-dataset --port 3223
+```
+
+### 3. Publish Your File
+
+#### Option A: Direct File Sharing
+- Upload your `.ensrainbow` file to a web server or cloud storage
+- Provide a direct download URL
+- Share checksums for integrity verification
+
+#### Option B: Package as Database Archive
+For better performance, package your data as a pre-built database:
+
+```bash
+# Ingest your .ensrainbow file
+pnpm run ingest-ensrainbow \
+  --input-file my-dataset_0.ensrainbow \
+  --data-dir data-my-dataset
+
+# Package the database
+tar -czvf my-dataset_0.tgz ./data-my-dataset
+
+# Calculate checksum
+sha256sum my-dataset_0.tgz > my-dataset_0.tgz.sha256sum
+```
+
+### 4. Document Your Label Set
+
+Create documentation for your custom label set including:
+
+- **Label Set ID**: The identifier users will specify
+- **Description**: What labels are included and their source
+- **Version**: Current version number
+- **Download URLs**: Where to get the files
+- **Checksums**: For integrity verification
+- **Usage Examples**: How to use your dataset
+
+### Example Documentation Format
+
+```markdown
+## Custom Label Set: my-dataset
+
+**Label Set ID**: `my-dataset`  
+**Current Version**: `0`  
+**Description**: Custom ENS labels from [source description]
+
+### Download
+- Database Archive: `https://example.com/my-dataset_0.tgz`
+- Checksum: `https://example.com/my-dataset_0.tgz.sha256sum`
+
+### Usage
+```bash
+# Using with Docker
+docker run -d \
+  -e DB_SCHEMA_VERSION="3" \
+  -e LABEL_SET_ID="my-dataset" \
+  -e LABEL_SET_VERSION="0" \
+  -p 3223:3223 \
+  ghcr.io/namehash/ensnode/ensrainbow:latest
+```
+
+## Setting Up Your Own Label Set Server
+
+A **Label Set Server** is a storage and hosting service for `.ensrainbow` files and prebuilt database archives. It's not the ENSRainbow API server itself, but rather a way to distribute your custom datasets for others to download and use.
+
+### 1. Choose Your Hosting Platform
+
+You can host your label set files on any web server or cloud storage service:
+
+- **AWS S3**: Industry standard with versioning
+- **Cloudflare R2**: Cost-effective alternative to S3
+- **Simple HTTP server**: For internal/private use
+
+### 2. Organize Your Files
+
+Structure your label set files following ENSRainbow conventions:
+
+```
+my-label-set-server/
+├── labelsets/
+│   ├── my-dataset_0.ensrainbow
+│   ├── my-dataset_0.ensrainbow.sha256sum
+│   ├── my-dataset_1.ensrainbow
+│   └── my-dataset_1.ensrainbow.sha256sum
+└── databases/
+    ├── 3/  # Schema version
+    │   ├── my-dataset_0.tgz
+    │   ├── my-dataset_0.tgz.sha256sum
+    │   ├── my-dataset_1.tgz
+    │   └── my-dataset_1.tgz.sha256sum
+    └── 4/  # Future schema version
+```
+
+### 3. Use Existing Download Scripts
+
+ENSRainbow provides ready-to-use download scripts that users can configure to download from your label set server:
+
+#### Download .ensrainbow Files
+```bash
+# Configure your label set server URL
+export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
+
+# Download .ensrainbow file using the existing script
+./scripts/download-ensrainbow-files.sh my-dataset 0
+```
+
+#### Download Prebuilt Database Archives
+```bash
+# Configure your label set server URL
+export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
+
+# Download prebuilt database using the existing script
+./scripts/download-prebuilt-database.sh 3 my-dataset 0
+```
+
+#### Script Features
+The existing scripts automatically handle:
+- **Checksum verification** for data integrity
+- **Resume downloads** if files already exist and are valid
+- **License file downloads** (optional)
+- **Progress reporting** for large files
+- **Error handling** with cleanup of partial downloads
+
+### 4. Document Your Label Set Server
+
+Create a README or documentation page for your label set server:
+
+```markdown
+# My Label Set Server
+
+This server hosts custom ENS label sets for ENSRainbow.
+
+## Available Label Sets
+
+### my-dataset
+- **Description**: Custom ENS labels from [source]
+- **Versions**: 0, 1
+- **Schema Versions**: 3
+- **Base URL**: `https://my-label-set-server.com`
+
+### another-dataset
+- **Description**: Additional labels from [source]
+- **Versions**: 0
+- **Schema Versions**: 3
+- **Base URL**: `https://my-label-set-server.com`
+```
+
+## Usage
+
+Users should have the ENSNode repository cloned and be in the `apps/ensrainbow` directory.
+
+### Option 1: Download .ensrainbow Files
+
+```bash
+# Configure your label set server
+export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
+
+# Download .ensrainbow file
+./scripts/download-ensrainbow-files.sh my-dataset 0
+
+# Ingest into ENSRainbow
+pnpm run ingest-ensrainbow \
+  --input-file labelsets/my-dataset_0.ensrainbow \
+  --data-dir data-my-dataset
+
+# Start ENSRainbow server
+pnpm run serve --data-dir data-my-dataset --port 3223
+```
+
+### Option 2: Download Prebuilt Databases (Faster)
+
+```bash
+# Configure your label set server
+export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
+
+# Download prebuilt database
+./scripts/download-prebuilt-database.sh 3 my-dataset 0
+
+# Extract database
+tar -xzf databases/3/my-dataset_0.tgz -C data-my-dataset --strip-components=1
+
+# Start ENSRainbow server
+pnpm run serve --data-dir data-my-dataset --port 3223
+```
+
+### 5. Version Management
+
+Implement proper versioning for your label sets:
+
+```bash
+# When releasing a new version
+LABEL_SET_ID="my-dataset"
+NEW_VERSION="1"
+
+# Create new .ensrainbow file
+pnpm run convert-csv \
+  --input-file updated-labels.csv \
+  --output-file ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow \
+  --label-set-id ${LABEL_SET_ID} \
+  --label-set-version ${NEW_VERSION}
+
+# Create prebuilt database
+pnpm run ingest-ensrainbow \
+  --input-file ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow \
+  --data-dir data-${LABEL_SET_ID}-${NEW_VERSION}
+
+tar -czvf ${LABEL_SET_ID}_${NEW_VERSION}.tgz ./data-${LABEL_SET_ID}-${NEW_VERSION}
+
+# Calculate checksums
+sha256sum ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow > ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow.sha256sum
+sha256sum ${LABEL_SET_ID}_${NEW_VERSION}.tgz > ${LABEL_SET_ID}_${NEW_VERSION}.tgz.sha256sum
+
+# Upload to your label set server
+# (implementation depends on your hosting platform)
+```
+
+### 6. Testing Your Label Set Server
+
+Before publishing, test that your label set server works correctly:
+
+```bash
+# Set your test server URL
+export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
+
+# Test downloading .ensrainbow file
+./scripts/download-ensrainbow-files.sh my-dataset 0
+
+# Verify checksum was validated
+# The script will fail if checksums don't match
+
+# Test downloading prebuilt database
+./scripts/download-prebuilt-database.sh 3 my-dataset 0
+
+# Verify the database works
+pnpm run ingest-ensrainbow \
+  --input-file labelsets/my-dataset_0.ensrainbow \
+  --data-dir test-data
+
+pnpm run validate --data-dir test-data
+```
+
+## Running Your Own ENSRainbow Server
+
+If you want to run your own ENSRainbow API server (separate from the label set server), see the [Local Development](/ensrainbow/contributing/local-development) guide for instructions on setting up and running ENSRainbow locally or in production.
+
+## Related Documentation
+
+- **[Data Model](/ensrainbow/concepts/data-model)** - Understanding the `.ensrainbow` file format
+- **[Label Sets & Versioning](/ensrainbow/concepts/label-sets-and-versioning)** - Managing label set versions
+- **[CLI Reference](/ensrainbow/contributing/cli-reference)** - Complete command documentation
+- **[Local Development](/ensrainbow/contributing/local-development)** - Setting up your development environment
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
index 8978ca5a9..e1df686d0 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
@@ -104,15 +104,14 @@ subgraph_0.ensrainbow     # labelSetId = "subgraph", version = 0
 subgraph_1.ensrainbow     # next version with incremental labelhash-to-label mappings added
 ```
 
-## Converting Legacy SQL Data
+## Creating ENSRainbow Files
 
-If you have a legacy gzipped rainbow table (`ens_names.sql.gz`) from the ENS Subgraph, you can convert it to the `.ensrainbow` format:
+ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources:
 
-```bash title="Convert legacy SQL data"
-pnpm run convert --input-file path/to/ens_names.sql.gz --output-file subgraph-0.ensrainbow
-```
+- **SQL Conversion**: Convert legacy ENS Subgraph data (`ens_names.sql.gz`) using `pnpm run convert`
+- **CSV Conversion**: Convert custom datasets from CSV files using `pnpm run convert-csv`
 
-This conversion process allows you to migrate existing rainbow table data that was previously stored in SQL format to ENSRainbow's optimized binary format. The resulting `.ensrainbow` file will be equivalent to the rainbow tables used by the ENS Subgraph, maintaining the same label-to-labelhash mappings while providing better performance and storage efficiency.
+For complete instructions, examples, and workflow guidance, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide.
 
 ## Ingestion Process
 
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
index 64556f1eb..401a0f986 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
@@ -17,6 +17,7 @@ This guide covers running ENSRainbow locally for development and contributions.
 For focused guidance on specific topics, check out these dedicated pages:
 
 <LinkCard title="Local Development" href="/ensrainbow/contributing/local-development" />
+<LinkCard title="Creating ENSRainbow Files" href="/ensrainbow/concepts/creating-files" />
 <LinkCard title="CLI Reference" href="/ensrainbow/contributing/cli-reference" />
 <LinkCard title="Service Management" href="/ensrainbow/contributing/service-management" />
 <LinkCard title="System Requirements" href="/ensrainbow/contributing/system-requirements" />
@@ -24,6 +25,7 @@ For focused guidance on specific topics, check out these dedicated pages:
 
 :::tip[Choose Your Path]
 - **New to the project?** Start with [Local Development](/ensrainbow/contributing/local-development)
+- **Creating custom datasets?** See [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files)
 - **Need CLI help?** Check the [CLI Reference](/ensrainbow/contributing/cli-reference)
 - **Building for production?** See [Building Docker Images](/ensrainbow/contributing/building)
 :::
@@ -41,6 +43,7 @@ Follow these steps to start contributing to ENSRainbow:
 ## Quick Reference
 
 - **Need to build from source?** → [Building Docker Images](/ensrainbow/contributing/building)
+- **Creating custom datasets?** → [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files)
 - **Looking for CLI commands?** → [CLI Reference](/ensrainbow/contributing/cli-reference)
 - **Running into issues?** → [Troubleshooting](/ensrainbow/usage/troubleshooting)
 - **Want to understand the data flow?** → [Data Model](/ensrainbow/concepts/data-model)
@@ -265,7 +268,7 @@ These steps are typically performed by project maintainers for releasing officia
 
 ### 1. Prepare `.ensrainbow` Files
 
-This section covers the conversion of source data (like SQL dumps or empty files for initial datasets) into the `.ensrainbow` format. The `time` command is used here to measure the duration of potentially long-running conversion processes.
+This section covers the conversion of source data (like SQL dumps or empty files for initial datasets) into the `.ensrainbow` format. For detailed conversion instructions and examples, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide.
 
 **For the `subgraph` Label Set (main dataset):**
 This command converts a SQL dump file (`ens_names.sql.gz`) into an `.ensrainbow` file for version 0 of the `subgraph` Label Set.

From b9c31b08422a1b71100bcec7ac2940a11bb5e35b Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Fri, 17 Oct 2025 22:45:34 +0200
Subject: [PATCH 07/30] feat: add filtering capabilities to CSV conversion

- Introduced `--existing-db-path` option to filter out existing labels from an ENSRainbow database during CSV conversion.
- Enhanced conversion process to skip duplicate labels within the same CSV file.
- Updated logging to include statistics on filtered labels.
- Added comprehensive tests for filtering functionality and updated documentation to reflect new features.
---
 apps/ensrainbow/src/cli.ts                    |   6 +
 .../src/commands/convert-csv-command.test.ts  | 189 ++++++++++++++++++
 .../src/commands/convert-csv-command.ts       | 111 +++++++++-
 .../ensrainbow/concepts/creating-files.mdx    |  86 +++++++-
 4 files changed, 379 insertions(+), 13 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 940692729..d9d38c4f9 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -68,6 +68,7 @@ interface ConvertCsvArgs {
   "label-set-id": LabelSetId;
   "label-set-version": LabelSetVersion;
   "progress-interval"?: number;
+  "existing-db-path"?: string;
 }
 
 export interface CLIOptions {
@@ -259,6 +260,10 @@ export function createCLI(options: CLIOptions = {}) {
               type: "number",
               description: "Number of records to process before logging progress",
               default: 10000,
+            })
+            .option("existing-db-path", {
+              type: "string",
+              description: "Path to existing ENSRainbow database to filter out existing labels",
             });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
@@ -268,6 +273,7 @@ export function createCLI(options: CLIOptions = {}) {
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
             progressInterval: argv["progress-interval"],
+            existingDbPath: argv["existing-db-path"],
           });
         },
       )
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 58c7af900..9e2569ab2 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -216,6 +216,195 @@ describe("convert-csv-command", () => {
     });
   });
 
+  describe("Filtering functionality", () => {
+    it("should filter out labels that already exist in the database", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
+      const outputFile = join(tempDir, "output_filtered.ensrainbow");
+      const dataDir = join(tempDir, "db_filtered");
+
+      // First, create an initial database with some labels
+      const initialOutputFile = join(tempDir, "initial.ensrainbow");
+      await convertCsvCommand({
+        inputFile,
+        outputFile: initialOutputFile,
+        labelSetId: "test-filtering" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+      });
+
+      // Ingest the initial file
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse([
+        "ingest-ensrainbow",
+        "--input-file",
+        initialOutputFile,
+        "--data-dir",
+        dataDir,
+      ]);
+
+      // Verify initial database
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const initialCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(initialCount).toBe(11);
+      await db.close();
+
+      // Now convert the same CSV file again, but with filtering enabled
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-filtering" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion, // Use same version as initial
+        existingDbPath: dataDir,
+      });
+
+      // Verify the filtered output file was created
+      const outputStats = await stat(outputFile);
+      expect(outputStats.isFile()).toBe(true);
+
+      // The filtered file should be smaller than the original since it excludes existing labels
+      const initialStats = await stat(initialOutputFile);
+      expect(outputStats.size).toBeLessThan(initialStats.size);
+
+      // Verify that the filtered file contains fewer records
+      const filteredDataDir = join(tempDir, "db_filtered_result");
+      await cli.parse([
+        "ingest-ensrainbow",
+        "--input-file",
+        outputFile,
+        "--data-dir",
+        filteredDataDir,
+      ]);
+
+      const filteredDb = await ENSRainbowDB.open(filteredDataDir);
+      expect(await filteredDb.validate()).toBe(true);
+      const filteredCount = await filteredDb.getPrecalculatedRainbowRecordCount();
+      expect(filteredCount).toBe(0); // All labels should be filtered out since they already exist
+      await filteredDb.close();
+    });
+
+    it("should filter out duplicate labels within the same conversion", async () => {
+      // Create a CSV file with duplicate labels
+      const csvContent = "label1\nlabel2\nlabel1\nlabel3\nlabel2\nlabel4";
+      const inputFile = join(tempDir, "duplicates.csv");
+      await writeFile(inputFile, csvContent);
+
+      const outputFile = join(tempDir, "output_no_duplicates.ensrainbow");
+
+      // Convert CSV with duplicate filtering
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-duplicates" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+      });
+
+      // Verify the output file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Ingest and verify only unique labels were processed
+      const dataDir = join(tempDir, "db_no_duplicates");
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+
+      // Should have 4 unique labels (label1, label2, label3, label4)
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(recordsCount).toBe(4);
+
+      // Verify specific labels exist
+      expect(
+        (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label1"))))?.label,
+      ).toBe("label1");
+      expect(
+        (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label2"))))?.label,
+      ).toBe("label2");
+      expect(
+        (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label3"))))?.label,
+      ).toBe("label3");
+      expect(
+        (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("label4"))))?.label,
+      ).toBe("label4");
+
+      await db.close();
+    });
+
+    it("should handle non-existent database path gracefully", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
+      const outputFile = join(tempDir, "output_no_db.ensrainbow");
+      const nonExistentDbPath = join(tempDir, "non-existent-db");
+
+      // Should not throw error even with non-existent database path
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-no-db" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          existingDbPath: nonExistentDbPath,
+        }),
+      ).resolves.not.toThrow();
+
+      // Verify the output file was still created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+    });
+
+    it("should work through CLI with existing database path", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
+      const outputFile = join(tempDir, "cli_output_with_db.ensrainbow");
+      const dataDir = join(tempDir, "cli_db_with_filtering");
+
+      // First create a database
+      const initialOutputFile = join(tempDir, "initial_cli.ensrainbow");
+      const cli = createCLI({ exitProcess: false });
+
+      await cli.parse([
+        "convert-csv",
+        "--input-file",
+        inputFile,
+        "--output-file",
+        initialOutputFile,
+        "--label-set-id",
+        "test-cli-filtering",
+        "--label-set-version",
+        "0",
+      ]);
+
+      await cli.parse([
+        "ingest-ensrainbow",
+        "--input-file",
+        initialOutputFile,
+        "--data-dir",
+        dataDir,
+      ]);
+
+      // Now test CLI with existing database path
+      await cli.parse([
+        "convert-csv",
+        "--input-file",
+        inputFile,
+        "--output-file",
+        outputFile,
+        "--label-set-id",
+        "test-cli-filtering",
+        "--label-set-version",
+        "1",
+        "--existing-db-path",
+        dataDir,
+      ]);
+
+      // Verify file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+    });
+  });
+
   describe("Streaming performance", () => {
     it("should handle small CSV files efficiently", async () => {
       const inputFile = join(tempDir, "small_test.csv");
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 14ae2d4b3..34f64d935 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -9,6 +9,7 @@ import { createReadStream, createWriteStream } from "fs";
 import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk";
 import { parse } from "@fast-csv/parse";
 import { labelhash } from "viem";
+import { ENSRainbowDB } from "../lib/database.js";
 import { logger } from "../utils/logger.js";
 import {
   CURRENT_ENSRAINBOW_FILE_FORMAT_VERSION,
@@ -21,6 +22,7 @@ export interface ConvertCsvCommandOptions {
   labelSetId: string;
   labelSetVersion: number;
   progressInterval?: number;
+  existingDbPath?: string; // Path to existing ENSRainbow database to check for existing labels
 }
 
 // Configuration constants
@@ -29,6 +31,8 @@ const DEFAULT_PROGRESS_INTERVAL = 10000;
 interface ConversionStats {
   totalLines: number;
   processedRecords: number;
+  filteredExistingLabels: number;
+  filteredDuplicates: number;
   startTime: Date;
   endTime?: Date;
 }
@@ -74,19 +78,47 @@ function logSummary(stats: ConversionStats) {
   logger.info("=== Conversion Summary ===");
   logger.info(`Total lines processed: ${stats.totalLines}`);
   logger.info(`Valid records: ${stats.processedRecords}`);
+  logger.info(`Filtered existing labels: ${stats.filteredExistingLabels}`);
+  logger.info(`Filtered duplicates: ${stats.filteredDuplicates}`);
   logger.info(`Duration: ${duration}ms`);
 }
 
+/**
+ * Check if a labelhash exists in the ENSRainbow database
+ */
+async function checkLabelHashExists(db: ENSRainbowDB, labelHashBytes: Buffer): Promise<boolean> {
+  try {
+    const record = await db.getVersionedRainbowRecord(labelHashBytes);
+    return record !== null;
+  } catch (error) {
+    // If there's an error checking, assume it doesn't exist
+    return false;
+  }
+}
+
 /**
  * Initialize conversion setup and logging
  */
-function initializeConversion(options: ConvertCsvCommandOptions) {
+async function initializeConversion(options: ConvertCsvCommandOptions) {
   logger.info("Starting conversion from CSV to protobuf format...");
   logger.info(`Input file: ${options.inputFile}`);
   logger.info(`Output file: ${options.outputFile}`);
   logger.info(`Label set id: ${options.labelSetId}`);
   logger.info(`Label set version: ${options.labelSetVersion}`);
 
+  // Open existing database if path is provided
+  let existingDb: ENSRainbowDB | null = null;
+  if (options.existingDbPath) {
+    try {
+      logger.info(`Opening existing database for filtering: ${options.existingDbPath}`);
+      existingDb = await ENSRainbowDB.open(options.existingDbPath);
+      logger.info("Successfully opened existing database for label filtering");
+    } catch (error) {
+      logger.warn(`Failed to open existing database at ${options.existingDbPath}: ${error}`);
+      logger.warn("Proceeding without filtering existing labels");
+    }
+  }
+
   const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot();
   const outputStream = setupWriteStream(options.outputFile);
 
@@ -99,7 +131,7 @@ function initializeConversion(options: ConvertCsvCommandOptions) {
 
   logger.info("Reading and processing CSV file line by line with streaming...");
 
-  return { RainbowRecordType, outputStream };
+  return { RainbowRecordType, outputStream, existingDb };
 }
 
 /**
@@ -131,13 +163,16 @@ function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string
 /**
  * Process a single CSV record
  */
-function processRecord(
+async function processRecord(
   row: string[],
   expectedColumns: number,
   RainbowRecordType: any,
   outputStream: NodeJS.WritableStream,
   lineNumber: number,
-): void {
+  existingDb: ENSRainbowDB | null,
+  writtenLabels: Set<string>,
+  stats: ConversionStats,
+): Promise<boolean> {
   // Validate column count
   if (row.length !== expectedColumns) {
     throw new Error(
@@ -146,10 +181,32 @@ function processRecord(
   }
 
   const rainbowRecord = createRainbowRecord(row);
+  const label = rainbowRecord.label;
+  const labelHashBytes = rainbowRecord.labelhash;
+
+  // Check if labelhash already exists in the database
+  if (existingDb) {
+    const existsInDb = await checkLabelHashExists(existingDb, labelHashBytes);
+    if (existsInDb) {
+      stats.filteredExistingLabels++;
+      return false; // Skip this record
+    }
+  }
+
+  // Check if label is a duplicate within this conversion
+  if (writtenLabels.has(label)) {
+    stats.filteredDuplicates++;
+    return false; // Skip this record
+  }
+
+  // Add label to written set to track duplicates
+  writtenLabels.add(label);
 
   // Create protobuf message and write immediately
   const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
   outputStream.write(Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish()));
+
+  return true; // Record was processed
 }
 
 /**
@@ -160,16 +217,19 @@ async function processCSVFile(
   RainbowRecordType: any,
   outputStream: NodeJS.WritableStream,
   progressInterval: number,
+  existingDb: ENSRainbowDB | null,
+  stats: ConversionStats,
 ): Promise<{ totalLines: number; processedRecords: number }> {
   return new Promise((resolve, reject) => {
     let expectedColumns: number | null = null;
     let lineNumber = 0;
     let processedRecords = 0;
+    const writtenLabels = new Set<string>(); // Track labels written in this conversion
 
     const fileStream = createReadStream(inputFile, { encoding: "utf8" });
 
     const csvStream = parse()
-      .on("data", (row: string[]) => {
+      .on("data", async (row: string[]) => {
         lineNumber++;
 
         try {
@@ -179,12 +239,26 @@ async function processCSVFile(
             logger.info(`Detected ${expectedColumns} columns using fast-csv`);
           }
 
-          processRecord(row, expectedColumns, RainbowRecordType, outputStream, lineNumber);
-          processedRecords++;
+          const wasProcessed = await processRecord(
+            row,
+            expectedColumns,
+            RainbowRecordType,
+            outputStream,
+            lineNumber,
+            existingDb,
+            writtenLabels,
+            stats,
+          );
+
+          if (wasProcessed) {
+            processedRecords++;
+          }
 
           // Log progress for large files
-          if (processedRecords % progressInterval === 0) {
-            logger.info(`Processed ${processedRecords} records so far...`);
+          if (lineNumber % progressInterval === 0) {
+            logger.info(
+              `Processed ${lineNumber} lines, written ${processedRecords} records so far...`,
+            );
           }
         } catch (error) {
           const errorMessage = error instanceof Error ? error.message : String(error);
@@ -219,11 +293,16 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
   const stats: ConversionStats = {
     totalLines: 0,
     processedRecords: 0,
+    filteredExistingLabels: 0,
+    filteredDuplicates: 0,
     startTime: new Date(),
   };
 
+  let existingDb: ENSRainbowDB | null = null;
+
   try {
-    const { RainbowRecordType, outputStream } = initializeConversion(options);
+    const { RainbowRecordType, outputStream, existingDb: db } = await initializeConversion(options);
+    existingDb = db;
 
     const progressInterval = options.progressInterval ?? DEFAULT_PROGRESS_INTERVAL;
 
@@ -233,6 +312,8 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
       RainbowRecordType,
       outputStream,
       progressInterval,
+      existingDb,
+      stats,
     );
 
     stats.totalLines = totalLines;
@@ -248,5 +329,15 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     const errorMessage = error instanceof Error ? error.message : String(error);
     logger.error("❌ CSV conversion failed:", errorMessage);
     throw error;
+  } finally {
+    // Clean up database connection
+    if (existingDb) {
+      try {
+        await existingDb.close();
+        logger.info("Closed existing database connection");
+      } catch (error) {
+        logger.warn(`Failed to close existing database: ${error}`);
+      }
+    }
   }
 }
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index f2c9c34cf..125e9916a 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -123,7 +123,8 @@ pnpm run convert-csv \
   --output-file <output.ensrainbow> \
   --label-set-id <label-set-id> \
   --label-set-version <version-number> \
-  [--progress-interval <number>]
+  [--progress-interval <number>] \
+  [--existing-db-path <path/to/existing/database>]
 ```
 
 ### Required Parameters
@@ -136,6 +137,7 @@ pnpm run convert-csv \
 
 - `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
 - `--progress-interval`: Progress logging frequency (default: 10000 records)
+- `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels
 
 ### CSV Format Support
 
@@ -159,6 +161,42 @@ ens,0x5cee339e13375638553bdf5a6e36ba80fb9f6a4f0783680884d92b558aa471da
 
 The converter validates that provided labelhashes match the computed hash for each label.
 
+### Label Filtering
+
+The CSV converter includes built-in filtering capabilities to prevent duplicate labels:
+
+#### Filtering Existing Labels
+Use `--existing-db-path` to filter out labels that already exist in an existing ENSRainbow database:
+
+```bash
+pnpm run convert-csv \
+  --input-file new-labels.csv \
+  --output-file incremental_1.ensrainbow \
+  --label-set-id my-dataset \
+  --label-set-version 1 \
+  --existing-db-path data-my-dataset
+```
+
+This will:
+- Check each label against the existing database
+- Skip labels that already exist (avoiding duplicates)
+- Only write new labels to the output file
+- Log filtering statistics in the conversion summary
+
+#### Filtering Duplicate Labels Within CSV
+The converter automatically filters duplicate labels within the same CSV file, keeping only the first occurrence of each label.
+
+#### Filtering Statistics
+The conversion process logs detailed statistics:
+```
+=== Conversion Summary ===
+Total lines processed: 1000
+Valid records: 850
+Filtered existing labels: 100
+Filtered duplicates: 50
+Duration: 150ms
+```
+
 ### Example: Creating Test Dataset
 
 ```bash
@@ -188,7 +226,9 @@ pnpm run convert-csv \
 2. **Streams** CSV parsing using fast-csv for memory efficiency
 3. **Validates** column count and data format
 4. **Computes** or validates labelhashes as needed
-5. **Writes** protobuf messages with the same format as SQL conversion
+5. **Filters** existing labels if `--existing-db-path` is provided
+6. **Filters** duplicate labels within the same CSV file
+7. **Writes** protobuf messages with the same format as SQL conversion
 
 ## Common Workflows
 
@@ -256,7 +296,39 @@ pnpm run ingest-ensrainbow \
 pnpm run serve --data-dir data-custom --port 3223
 ```
 
-### Workflow 4: Using Custom Label Set Server
+### Workflow 4: Creating Incremental Updates
+
+```bash
+# 1. Create initial dataset
+pnpm run convert-csv \
+  --input-file initial-labels.csv \
+  --output-file my-dataset_0.ensrainbow \
+  --label-set-id my-dataset \
+  --label-set-version 0
+
+# 2. Ingest initial data
+pnpm run ingest-ensrainbow \
+  --input-file my-dataset_0.ensrainbow \
+  --data-dir data-my-dataset
+
+# 3. Create incremental update (filtering existing labels)
+pnpm run convert-csv \
+  --input-file new-labels.csv \
+  --output-file my-dataset_1.ensrainbow \
+  --label-set-id my-dataset \
+  --label-set-version 1 \
+  --existing-db-path data-my-dataset
+
+# 4. Ingest incremental update
+pnpm run ingest-ensrainbow \
+  --input-file my-dataset_1.ensrainbow \
+  --data-dir data-my-dataset
+
+# 5. Serve updated data
+pnpm run serve --data-dir data-my-dataset --port 3223
+```
+
+### Workflow 5: Using Custom Label Set Server
 
 ```bash
 # 1. Configure custom label set server
@@ -318,6 +390,14 @@ pnpm run convert-csv \
   --label-set-id my-dataset \
   --label-set-version 0
 
+# For CSV data with filtering (if you have an existing database)
+pnpm run convert-csv \
+  --input-file my-labels.csv \
+  --output-file my-dataset_1.ensrainbow \
+  --label-set-id my-dataset \
+  --label-set-version 1 \
+  --existing-db-path data-my-dataset
+
 # For SQL data
 pnpm run convert \
   --input-file my-data.sql.gz \

From e2b9255224621dac9208bb6c6f2ca00b6fbaf75c Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 24 Nov 2025 13:26:01 +0100
Subject: [PATCH 08/30] feat: enhance CSV conversion with Bloom filter and
 deduplication options

- Added new command-line options for CSV conversion: `--silent`, `--disable-dedup`, `--cache-size`, `--use-bloom-filter`, and `--bloom-filter-size`.
- Implemented a deduplication database using ClassicLevel with optional Bloom filter for faster processing.
- Updated the conversion process to support deduplication and improved memory management.
- Enhanced logging for large file processing and added tests for new deduplication features.
---
 apps/ensrainbow/package.json                  |   4 +-
 apps/ensrainbow/src/cli.ts                    |  47 ++-
 .../src/commands/convert-csv-command.test.ts  |  39 +-
 .../src/commands/convert-csv-command.ts       | 342 +++++++++++++++---
 pnpm-lock.yaml                                |  51 +++
 5 files changed, 427 insertions(+), 56 deletions(-)

diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json
index 046cb2e2e..341e0d440 100644
--- a/apps/ensrainbow/package.json
+++ b/apps/ensrainbow/package.json
@@ -19,7 +19,8 @@
     "validate:lite": "tsx src/cli.ts validate --lite",
     "purge": "tsx src/cli.ts purge",
     "convert": "tsx src/cli.ts convert",
-    "test": "vitest",
+    "convert-csv": "NODE_OPTIONS='--expose-gc --max-old-space-size=4096' tsx src/cli.ts convert-csv",
+    "test": "NODE_OPTIONS='--max-old-space-size=8192' vitest",
     "test:coverage": "vitest --coverage",
     "lint": "biome check --write .",
     "lint:ci": "biome ci",
@@ -32,6 +33,7 @@
     "@ensnode/ensrainbow-sdk": "workspace:*",
     "@ensnode/ensnode-sdk": "workspace:*",
     "@hono/node-server": "^1.4.1",
+    "bloom-filters": "^3.0.4",
     "classic-level": "^1.4.1",
     "hono": "catalog:",
     "pino": "catalog:",
diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index d9d38c4f9..6e6bb4f32 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -69,6 +69,11 @@ interface ConvertCsvArgs {
   "label-set-version": LabelSetVersion;
   "progress-interval"?: number;
   "existing-db-path"?: string;
+  "silent"?: boolean;
+  "disable-dedup"?: boolean;
+  "cache-size"?: number;
+  "use-bloom-filter"?: boolean;
+  "bloom-filter-size"?: number;
 }
 
 export interface CLIOptions {
@@ -261,10 +266,35 @@ export function createCLI(options: CLIOptions = {}) {
               description: "Number of records to process before logging progress",
               default: 10000,
             })
-            .option("existing-db-path", {
-              type: "string",
-              description: "Path to existing ENSRainbow database to filter out existing labels",
-            });
+        .option("existing-db-path", {
+          type: "string",
+          description: "Path to existing ENSRainbow database to filter out existing labels",
+        })
+        .option("silent", {
+          type: "boolean",
+          description: "Disable progress bar (useful for scripts)",
+          default: false,
+        })
+        .option("disable-dedup", {
+          type: "boolean",
+          description: "Disable deduplication within CSV file (faster but may create duplicates)",
+          default: false,
+        })
+        .option("cache-size", {
+          type: "number",
+          description: "Cache size for deduplication (default: 5000)",
+          default: 5000,
+        })
+        .option("use-bloom-filter", {
+          type: "boolean",
+          description: "Use Bloom filter for faster deduplication (default: false)",
+          default: false,
+        })
+        .option("bloom-filter-size", {
+          type: "number",
+          description: "Expected number of items for Bloom filter (default: 10000000)",
+          default: 10000000,
+        });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
           await convertCsvCommand({
@@ -272,8 +302,13 @@ export function createCLI(options: CLIOptions = {}) {
             outputFile: argv["output-file"],
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
-            progressInterval: argv["progress-interval"],
-            existingDbPath: argv["existing-db-path"],
+          progressInterval: argv["progress-interval"],
+          existingDbPath: argv["existing-db-path"],
+          silent: argv["silent"],
+          noDedup: argv["disable-dedup"],
+            cacheSize: argv["cache-size"],
+            useBloomFilter: argv["use-bloom-filter"],
+            bloomFilterSize: argv["bloom-filter-size"],
           });
         },
       )
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 9e2569ab2..c6ddadb03 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -38,6 +38,7 @@ describe("convert-csv-command", () => {
         outputFile,
         labelSetId: "test-csv-one-col" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
       });
 
       // Verify the output file was created
@@ -71,6 +72,7 @@ describe("convert-csv-command", () => {
         outputFile,
         labelSetId: "test-csv-two-col" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
       });
 
       // Verify the output file was created
@@ -119,6 +121,7 @@ describe("convert-csv-command", () => {
         outputFile,
         labelSetId: "test-csv-special" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
       });
 
       // Verify output file was created
@@ -229,6 +232,7 @@ describe("convert-csv-command", () => {
         outputFile: initialOutputFile,
         labelSetId: "test-filtering" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
       });
 
       // Ingest the initial file
@@ -255,6 +259,7 @@ describe("convert-csv-command", () => {
         labelSetId: "test-filtering" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion, // Use same version as initial
         existingDbPath: dataDir,
+        silent: true,
       });
 
       // Verify the filtered output file was created
@@ -296,6 +301,7 @@ describe("convert-csv-command", () => {
         outputFile,
         labelSetId: "test-duplicates" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
       });
 
       // Verify the output file was created
@@ -400,10 +406,10 @@ describe("convert-csv-command", () => {
 
       // Verify file was created
       const stats = await stat(outputFile);
-      expect(stats.isFile()).toBe(true);
-      expect(stats.size).toBeGreaterThan(0);
-    });
+    expect(stats.isFile()).toBe(true);
+    expect(stats.size).toBeGreaterThan(0);
   });
+});
 
   describe("Streaming performance", () => {
     it("should handle small CSV files efficiently", async () => {
@@ -426,6 +432,7 @@ describe("convert-csv-command", () => {
         outputFile,
         labelSetId: "test-small" as LabelSetId,
         labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
       });
 
       const conversionTime = Date.now() - startTime;
@@ -453,5 +460,31 @@ describe("convert-csv-command", () => {
       const dbStats = await stat(dataDir);
       expect(dbStats.isDirectory()).toBe(true);
     });
+
+    it("should handle CSV files with many unique labels", async () => {
+      const inputFile = join(tempDir, "many_labels.csv");
+      const outputFile = join(tempDir, "output_many_labels.ensrainbow");
+
+      // Create a CSV with 50,000 unique labels (tests deduplication with increased memory limit)
+      const records = [];
+      for (let i = 0; i < 50_000; i++) {
+        records.push(`label${i}`);
+      }
+      await writeFile(inputFile, records.join("\n"));
+
+      // This should work without memory issues
+      await convertCsvCommand({
+        inputFile,
+        outputFile,
+        labelSetId: "test-many-labels" as LabelSetId,
+        labelSetVersion: 0 as LabelSetVersion,
+        silent: true,
+      });
+
+      // Verify file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+    }, 60000); // 60 second timeout for large file test
   });
 });
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 34f64d935..0e0c8ac0e 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -5,10 +5,15 @@
  * Supports 1-column (label only) and 2-column (label,labelhash) formats
  */
 
-import { createReadStream, createWriteStream } from "fs";
+import { createReadStream, createWriteStream, statSync } from "fs";
+import { rmSync } from "fs";
+import { join } from "path";
 import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk";
 import { parse } from "@fast-csv/parse";
 import { labelhash } from "viem";
+import { ClassicLevel } from "classic-level";
+import ProgressBar from "progress";
+import bloomFilters from "bloom-filters";
 import { ENSRainbowDB } from "../lib/database.js";
 import { logger } from "../utils/logger.js";
 import {
@@ -16,6 +21,129 @@ import {
   createRainbowProtobufRoot,
 } from "../utils/protobuf-schema.js";
 
+/**
+ * Simple deduplication database using ClassicLevel directly
+ */
+class DeduplicationDB {
+  private pendingWrites: Map<string, string> = new Map();
+  private cache: Map<string, boolean> = new Map();
+  private cacheSize: number;
+  private bloomFilter: typeof bloomFilters.BloomFilter | null = null;
+
+  constructor(private db: ClassicLevel<string, string>, cacheSize: number = 10000, useBloomFilter: boolean = false, expectedItems: number = 10000000) {
+    this.cacheSize = cacheSize;
+    
+    if (useBloomFilter) {
+      // Create Bloom filter with 0.1% false positive rate
+      this.bloomFilter = bloomFilters.BloomFilter.create(expectedItems, 0.01);
+      logger.info(`Created Bloom filter for ${expectedItems} items (~${(this.bloomFilter.size / 8 / 1024 / 1024).toFixed(2)} MB)`);
+    }
+  }
+
+  async has(key: string): Promise<boolean> {
+    // Check cache first
+    if (this.cache.has(key)) {
+      return this.cache.get(key)!;
+    }
+
+    // Check pending writes
+    if (this.pendingWrites.has(key)) {
+      this.cache.set(key, true);
+      return true;
+    }
+
+    // Use Bloom filter if available
+    if (this.bloomFilter) {
+      // If Bloom filter says "not present", we can skip LevelDB check
+      if (!this.bloomFilter.has(key)) {
+        this.cache.set(key, false);
+        return false;
+      }
+      // Bloom filter says "maybe present" - need to check LevelDB
+    }
+
+    // Check database
+    try {
+      await this.db.get(key);
+      this.cache.set(key, true);
+      return true;
+    } catch (error) {
+      this.cache.set(key, false);
+      return false;
+    }
+  }
+
+  async add(key: string, value: string): Promise<void> {
+    this.pendingWrites.set(key, value);
+    this.cache.set(key, true); // Cache the fact that this key exists
+    
+    // Add to Bloom filter if available
+    if (this.bloomFilter) {
+      this.bloomFilter.add(key);
+    }
+    
+    // Check cache size periodically (not on every add)
+    this.evictCacheIfNeeded();
+    
+    // Flush to database periodically (smaller batch to reduce memory usage)
+    if (this.pendingWrites.size >= 5000) {
+      await this.flush();
+    }
+  }
+
+  private evictCacheIfNeeded(): void {
+    // Limit cache size - only evict when significantly exceeded
+    if (this.cache.size > this.cacheSize * 1.2) {
+      // Remove oldest 20% of entries
+      let toRemove = Math.floor(this.cacheSize * 0.2);
+      for (const key of this.cache.keys()) {
+        if (toRemove-- <= 0) break;
+        this.cache.delete(key);
+      }
+    }
+  }
+
+  async flush(): Promise<void> {
+    if (this.pendingWrites.size === 0) return;
+
+    const batch = this.db.batch();
+    for (const [key, value] of this.pendingWrites) {
+      batch.put(key, value);
+    }
+    await batch.write();
+    this.pendingWrites.clear();
+    
+    // Hint to garbage collector after large batch
+    if (global.gc) {
+      global.gc();
+    }
+  }
+
+  async close(): Promise<void> {
+    await this.flush();
+    await this.db.close();
+  }
+}
+
+
+/**
+ * Sets up a simple progress bar that shows speed without total count.
+ */
+function setupProgressBar(): ProgressBar {
+  return new ProgressBar(
+    "Processing CSV [:bar] :current lines - :rate lines/sec",
+    {
+      complete: "=",
+      incomplete: " ",
+      width: 40,
+      total: 200000000, // Very large total for big files
+    },
+  );
+}
+
+/**
+ * Options for CSV conversion command
+ */
 export interface ConvertCsvCommandOptions {
   inputFile: string;
   outputFile: string;
@@ -23,6 +151,11 @@ export interface ConvertCsvCommandOptions {
   labelSetVersion: number;
   progressInterval?: number;
   existingDbPath?: string; // Path to existing ENSRainbow database to check for existing labels
+  silent?: boolean; // Disable progress bar for tests
+  noDedup?: boolean; // Disable deduplication within CSV file
+  cacheSize?: number; // Cache size for deduplication (default: 10000)
+  useBloomFilter?: boolean; // Use Bloom filter for faster deduplication (default: false)
+  bloomFilterSize?: number; // Expected number of items for Bloom filter (default: 10000000)
 }
 
 // Configuration constants
@@ -106,6 +239,20 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
   logger.info(`Label set id: ${options.labelSetId}`);
   logger.info(`Label set version: ${options.labelSetVersion}`);
 
+  // Check file size and warn for very large files
+  try {
+    const stats = statSync(options.inputFile);
+    const fileSizeMB = (stats.size / (1024 * 1024)).toFixed(2);
+    logger.info(`Input file size: ${fileSizeMB} MB`);
+    
+    if (stats.size > 1024 * 1024 * 1024) { // > 1GB
+      logger.warn("⚠️  Processing a very large file. This may take significant time and memory.");
+      logger.warn("💡 Consider using --existing-db-path to filter out existing labels for better performance.");
+    }
+  } catch (error) {
+    logger.warn(`Could not determine file size: ${error}`);
+  }
+
   // Open existing database if path is provided
   let existingDb: ENSRainbowDB | null = null;
   if (options.existingDbPath) {
@@ -143,7 +290,6 @@ function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string
   if (row.length === 1) {
     // Single column: compute labelhash using labelhash function
     const labelHashBytes = labelHashToBytes(labelhash(label));
-    console.log(label);
     return {
       labelhash: Buffer.from(labelHashBytes),
       label: label,
@@ -161,7 +307,7 @@ function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string
 }
 
 /**
- * Process a single CSV record
+ * Process a single CSV record with LevelDB-based deduplication
  */
 async function processRecord(
   row: string[],
@@ -170,7 +316,7 @@ async function processRecord(
   outputStream: NodeJS.WritableStream,
   lineNumber: number,
   existingDb: ENSRainbowDB | null,
-  writtenLabels: Set<string>,
+  dedupDb: DeduplicationDB | null,
   stats: ConversionStats,
 ): Promise<boolean> {
   // Validate column count
@@ -184,7 +330,7 @@ async function processRecord(
   const label = rainbowRecord.label;
   const labelHashBytes = rainbowRecord.labelhash;
 
-  // Check if labelhash already exists in the database
+  // Check if labelhash already exists in the existing database
   if (existingDb) {
     const existsInDb = await checkLabelHashExists(existingDb, labelHashBytes);
     if (existsInDb) {
@@ -193,14 +339,17 @@ async function processRecord(
     }
   }
 
-  // Check if label is a duplicate within this conversion
-  if (writtenLabels.has(label)) {
-    stats.filteredDuplicates++;
-    return false; // Skip this record
-  }
+  // Check if label is a duplicate within this conversion using LevelDB (if enabled)
+  if (dedupDb) {
+    const existsInDedupDb = await dedupDb.has(label);
+    if (existsInDedupDb) {
+      stats.filteredDuplicates++;
+      return false; // Skip this record
+    }
 
-  // Add label to written set to track duplicates
-  writtenLabels.add(label);
+    // Add label to deduplication database
+    await dedupDb.add(label, "");
+  }
 
   // Create protobuf message and write immediately
   const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
@@ -218,49 +367,89 @@ async function processCSVFile(
   outputStream: NodeJS.WritableStream,
   progressInterval: number,
   existingDb: ENSRainbowDB | null,
+  dedupDb: DeduplicationDB | null,
   stats: ConversionStats,
+  progressBar: ProgressBar | null,
 ): Promise<{ totalLines: number; processedRecords: number }> {
-  return new Promise((resolve, reject) => {
-    let expectedColumns: number | null = null;
-    let lineNumber = 0;
-    let processedRecords = 0;
-    const writtenLabels = new Set<string>(); // Track labels written in this conversion
+  let expectedColumns: number | null = null;
+  let lineNumber = 0;
+  let processedRecords = 0;
+  let lastLoggedLine = 0; // Track last logged line to avoid duplicate logs
+  const startTime = Date.now(); // Track start time for overall processing
+  let lastLogTime = Date.now(); // Track time of last log for chunk timing
+  
+  // LevelDB-based deduplication: Uses temporary database to avoid RAM limits
 
-    const fileStream = createReadStream(inputFile, { encoding: "utf8" });
+  const fileStream = createReadStream(inputFile, { encoding: "utf8" });
+
+  return new Promise((resolve, reject) => {
+    let pendingCount = 0;
+    const MAX_PENDING = 100; // Smaller limit to reduce memory
 
     const csvStream = parse()
-      .on("data", async (row: string[]) => {
+      .on("data", (row: string[]) => {
         lineNumber++;
 
-        try {
-          // For the first row, detect column count
-          if (expectedColumns === null) {
-            expectedColumns = row.length;
-            logger.info(`Detected ${expectedColumns} columns using fast-csv`);
-          }
+        // For the first row, detect column count
+        if (expectedColumns === null) {
+          expectedColumns = row.length;
+          logger.info(`Detected ${expectedColumns} columns using fast-csv`);
+        }
 
-          const wasProcessed = await processRecord(
-            row,
-            expectedColumns,
-            RainbowRecordType,
-            outputStream,
-            lineNumber,
-            existingDb,
-            writtenLabels,
-            stats,
+        // Log progress synchronously when line is read (not in async callback)
+        // This ensures logs appear at the correct intervals
+        if (lineNumber % progressInterval === 0 && lineNumber !== lastLoggedLine) {
+          const currentTime = Date.now();
+          const chunkTime = currentTime - lastLogTime; // Time for this 10k chunk
+          const totalElapsed = currentTime - startTime; // Total time since start
+          const chunkTimeSeconds = (chunkTime / 1000).toFixed(2);
+          const totalTimeSeconds = (totalElapsed / 1000).toFixed(2);
+          const linesPerSecond = ((progressInterval / chunkTime) * 1000).toFixed(0);
+          
+          lastLoggedLine = lineNumber;
+          lastLogTime = currentTime;
+          
+          // Note: processedRecords may be slightly behind due to async processing
+          logger.info(
+            `Processed ${lineNumber} lines, written ${processedRecords} records | ` +
+            `Chunk: ${chunkTimeSeconds}s (${linesPerSecond} lines/sec) | ` +
+            `Total: ${totalTimeSeconds}s`
           );
+        }
+
+        // Backpressure: pause if too many pending
+        if (pendingCount >= MAX_PENDING) {
+          csvStream.pause();
+        }
 
+        pendingCount++;
+        processRecord(
+          row,
+          expectedColumns,
+          RainbowRecordType,
+          outputStream,
+          lineNumber,
+          existingDb,
+          dedupDb,
+          stats,
+        ).then((wasProcessed) => {
           if (wasProcessed) {
             processedRecords++;
           }
-
-          // Log progress for large files
-          if (lineNumber % progressInterval === 0) {
-            logger.info(
-              `Processed ${lineNumber} lines, written ${processedRecords} records so far...`,
-            );
+          
+          // Update progress bar every 1000 lines
+          if (lineNumber % 1000 === 0 && progressBar) {
+            progressBar.tick(1000);
+            progressBar.curr = lineNumber;
+          }
+          
+          pendingCount--;
+          
+          // Resume when under threshold
+          if (csvStream.isPaused() && pendingCount < MAX_PENDING / 2) {
+            csvStream.resume();
           }
-        } catch (error) {
+        }).catch((error) => {
           const errorMessage = error instanceof Error ? error.message : String(error);
           csvStream.destroy();
           fileStream.destroy();
@@ -269,12 +458,18 @@ async function processCSVFile(
               `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
             ),
           );
-        }
+        });
       })
       .on("error", (error: Error) => {
         reject(new Error(`CSV parsing error: ${error.message}`));
       })
-      .on("end", () => {
+      .on("end", async () => {
+        // Wait for all pending to complete
+        while (pendingCount > 0) {
+          await new Promise(resolve => setTimeout(resolve, 10));
+        }
+        const dedupStatus = dedupDb ? "LevelDB deduplication completed" : "Deduplication disabled";
+        logger.info(dedupStatus);
         resolve({ totalLines: lineNumber, processedRecords });
       });
 
@@ -299,13 +494,38 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
   };
 
   let existingDb: ENSRainbowDB | null = null;
+  let dedupDb: DeduplicationDB | null = null;
+  let tempDedupDir: string | null = null;
 
   try {
     const { RainbowRecordType, outputStream, existingDb: db } = await initializeConversion(options);
     existingDb = db;
 
+    // Create temporary deduplication database (if not disabled)
+    if (!options.noDedup) {
+      tempDedupDir = join(process.cwd(), 'temp-dedup-' + Date.now());
+      logger.info(`Creating temporary deduplication database at: ${tempDedupDir}`);
+      const tempDb = new ClassicLevel<string, string>(tempDedupDir, {
+        keyEncoding: 'utf8',
+        valueEncoding: 'utf8',
+        createIfMissing: true,
+      });
+      await tempDb.open();
+      dedupDb = new DeduplicationDB(
+        tempDb, 
+        options.cacheSize ?? 10000,
+        options.useBloomFilter ?? false,
+        options.bloomFilterSize ?? 10000000
+      );
+    } else {
+      logger.info("Deduplication disabled - processing all records");
+    }
+
     const progressInterval = options.progressInterval ?? DEFAULT_PROGRESS_INTERVAL;
 
+    // Set up progress bar (only if not silent)
+    const progressBar = options.silent ? null : setupProgressBar();
+
     // Process the CSV file
     const { totalLines, processedRecords } = await processCSVFile(
       options.inputFile,
@@ -313,11 +533,21 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
       outputStream,
       progressInterval,
       existingDb,
+      dedupDb,
       stats,
+      progressBar,
     );
 
-    stats.totalLines = totalLines;
-    stats.processedRecords = processedRecords;
+            stats.totalLines = totalLines;
+            stats.processedRecords = processedRecords;
+
+            // Log final progress for large files
+            if (totalLines > 10_000) {
+              const dedupStatus = options.noDedup ? "dedup disabled" : "LevelDB dedup active";
+              logger.info(
+                `✅ Completed processing ${totalLines.toLocaleString()} lines, wrote ${processedRecords.toLocaleString()} records (${dedupStatus})`,
+              );
+            }
 
     // Close output stream
     outputStream.end();
@@ -330,7 +560,17 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     logger.error("❌ CSV conversion failed:", errorMessage);
     throw error;
   } finally {
-    // Clean up database connection
+    // Clean up deduplication database
+    if (dedupDb) {
+      try {
+        await dedupDb.close();
+        logger.info("Closed deduplication database");
+      } catch (error) {
+        logger.warn(`Failed to close deduplication database: ${error}`);
+      }
+    }
+
+    // Clean up existing database connection
     if (existingDb) {
       try {
         await existingDb.close();
@@ -339,5 +579,15 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
         logger.warn(`Failed to close existing database: ${error}`);
       }
     }
+
+    // Remove temporary deduplication database directory
+    if (tempDedupDir) {
+      try {
+        rmSync(tempDedupDir, { recursive: true, force: true });
+        logger.info(`Removed temporary deduplication database: ${tempDedupDir}`);
+      } catch (error) {
+        logger.warn(`Failed to remove temporary deduplication database: ${error}`);
+      }
+    }
   }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 3dea391e0..3ccf052d8 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -462,6 +462,9 @@ importers:
       '@hono/node-server':
         specifier: ^1.4.1
         version: 1.19.5(hono@4.10.3)
+      bloom-filters:
+        specifier: ^3.0.4
+        version: 3.0.4
       classic-level:
         specifier: ^1.4.1
         version: 1.4.1
@@ -3435,6 +3438,9 @@ packages:
   '@types/sax@1.2.7':
     resolution: {integrity: sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==}
 
+  '@types/seedrandom@3.0.8':
+    resolution: {integrity: sha512-TY1eezMU2zH2ozQoAFAQFOPpvP15g+ZgSfTZt31AUUH/Rxtnz3H+A/Sv1Snw2/amp//omibc+AEkTaA8KUeOLQ==}
+
   '@types/tar@6.1.13':
     resolution: {integrity: sha512-IznnlmU5f4WcGTh2ltRu/Ijpmk8wiWXfF0VA4s+HPjHZgvFggk1YaIkbo5krX/zUCzWF8N/l4+W/LNxnvAJ8nw==}
 
@@ -3783,6 +3789,10 @@ packages:
   base-64@1.0.0:
     resolution: {integrity: sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==}
 
+  base64-arraybuffer@1.0.2:
+    resolution: {integrity: sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==}
+    engines: {node: '>= 0.6.0'}
+
   base64-js@1.5.1:
     resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
 
@@ -3813,6 +3823,10 @@ packages:
   bintrees@1.0.2:
     resolution: {integrity: sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==}
 
+  bloom-filters@3.0.4:
+    resolution: {integrity: sha512-BdnPWo2OpYhlvuP2fRzJBdioMCkm7Zp0HCf8NJgF5Mbyqy7VQ/CnTiVWMMyq4EZCBHwj0Kq6098gW2/3RsZsrA==}
+    engines: {node: '>=12'}
+
   boolbase@1.0.0:
     resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
 
@@ -4167,6 +4181,9 @@ packages:
   csstype@3.2.3:
     resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
 
+  cuint@0.2.2:
+    resolution: {integrity: sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw==}
+
   cytoscape-cose-bilkent@4.1.0:
     resolution: {integrity: sha512-wgQlVIUJF13Quxiv5e1gstZ08rnZj2XaLHGoFMYXz7SkNfCDOOteKBE6SYRfA9WxxI/iBc3ajfDoc6hb/MRAHQ==}
     peerDependencies:
@@ -6537,6 +6554,9 @@ packages:
   recma-stringify@1.0.0:
     resolution: {integrity: sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g==}
 
+  reflect-metadata@0.1.14:
+    resolution: {integrity: sha512-ZhYeb6nRaXCfhnndflDK8qI6ZQ/YcWZCISRAWICW9XYqMUwjZM9Z0DveWX/ABN01oxSHwVxKQmxeYZSsm0jh5A==}
+
   regex-recursion@6.0.2:
     resolution: {integrity: sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==}
 
@@ -6708,6 +6728,9 @@ packages:
   secure-json-parse@4.1.0:
     resolution: {integrity: sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==}
 
+  seedrandom@3.0.5:
+    resolution: {integrity: sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==}
+
   semver-compare@1.0.0:
     resolution: {integrity: sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==}
 
@@ -7767,6 +7790,9 @@ packages:
   xxhash-wasm@1.1.0:
     resolution: {integrity: sha512-147y/6YNh+tlp6nd/2pWq38i9h6mz/EuQ6njIrmW8D1BS5nCqs0P6DG+m6zTGnNz5I+uhZ0SHxBs9BsPrwcKDA==}
 
+  xxhashjs@0.2.2:
+    resolution: {integrity: sha512-AkTuIuVTET12tpsVIQo+ZU6f/qDmKuRUcjaqR+OIvm+aCBsZ95i7UVY5WJ9TMsSaZ0DA2WxoZ4acu0sPH+OKAw==}
+
   y18n@5.0.8:
     resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
     engines: {node: '>=10'}
@@ -10818,6 +10844,8 @@ snapshots:
     dependencies:
       '@types/node': 22.18.13
 
+  '@types/seedrandom@3.0.8': {}
+
   '@types/tar@6.1.13':
     dependencies:
       '@types/node': 22.18.13
@@ -11294,6 +11322,8 @@ snapshots:
 
   base-64@1.0.0: {}
 
+  base64-arraybuffer@1.0.2: {}
+
   base64-js@1.5.1: {}
 
   baseline-browser-mapping@2.8.21: {}
@@ -11320,6 +11350,17 @@ snapshots:
 
   bintrees@1.0.2: {}
 
+  bloom-filters@3.0.4:
+    dependencies:
+      '@types/seedrandom': 3.0.8
+      base64-arraybuffer: 1.0.2
+      is-buffer: 2.0.5
+      lodash: 4.17.21
+      long: 5.3.2
+      reflect-metadata: 0.1.14
+      seedrandom: 3.0.5
+      xxhashjs: 0.2.2
+
   boolbase@1.0.0: {}
 
   boring-avatars@1.11.2: {}
@@ -11695,6 +11736,8 @@ snapshots:
 
   csstype@3.2.3: {}
 
+  cuint@0.2.2: {}
+
   cytoscape-cose-bilkent@4.1.0(cytoscape@3.33.1):
     dependencies:
       cose-base: 1.0.3
@@ -14442,6 +14485,8 @@ snapshots:
       unified: 11.0.5
       vfile: 6.0.3
 
+  reflect-metadata@0.1.14: {}
+
   regex-recursion@6.0.2:
     dependencies:
       regex-utilities: 2.3.0
@@ -14708,6 +14753,8 @@ snapshots:
 
   secure-json-parse@4.1.0: {}
 
+  seedrandom@3.0.5: {}
+
   semver-compare@1.0.0: {}
 
   semver@6.3.1: {}
@@ -15814,6 +15861,10 @@ snapshots:
 
   xxhash-wasm@1.1.0: {}
 
+  xxhashjs@0.2.2:
+    dependencies:
+      cuint: 0.2.2
+
   y18n@5.0.8: {}
 
   yallist@3.1.1: {}

From 2c94d417a9d8fc631c2035e9c245a49410fe727b Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 24 Nov 2025 13:27:31 +0100
Subject: [PATCH 09/30] refactor: simplify command options in package.json

---
 apps/ensrainbow/package.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json
index 341e0d440..7379e93d3 100644
--- a/apps/ensrainbow/package.json
+++ b/apps/ensrainbow/package.json
@@ -19,8 +19,8 @@
     "validate:lite": "tsx src/cli.ts validate --lite",
     "purge": "tsx src/cli.ts purge",
     "convert": "tsx src/cli.ts convert",
-    "convert-csv": "NODE_OPTIONS='--expose-gc --max-old-space-size=4096' tsx src/cli.ts convert-csv",
-    "test": "NODE_OPTIONS='--max-old-space-size=8192' vitest",
+    "convert-csv": "tsx src/cli.ts convert-csv",
+    "test": "vitest",
     "test:coverage": "vitest --coverage",
     "lint": "biome check --write .",
     "lint:ci": "biome ci",

From 721a50d4507261fcf4efb93c347c430e2c364a1d Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Thu, 11 Dec 2025 20:56:40 +0100
Subject: [PATCH 10/30] refactor: improve memory management and logging in CSV
 conversion

- Added a function to estimate memory usage of Maps for better tracking.
- Reduced default cache size in DeduplicationDB from 10000 to 1000.
- Enhanced backpressure handling during CSV writing to prevent memory overflow.
- Updated logging to include output backpressure events and improved performance for large files.
- Streamlined the CSV processing to operate in a completely sequential manner.
---
 .../src/commands/convert-csv-command.ts       | 245 +++++++++---------
 1 file changed, 128 insertions(+), 117 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 0e0c8ac0e..db7478664 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -21,17 +21,27 @@ import {
   createRainbowProtobufRoot,
 } from "../utils/protobuf-schema.js";
 
+/**
+ * Estimate memory usage of a Map (rough approximation)
+ */
+function estimateMapMemory(map: Map<string, any>): number {
+  let total = 0;
+  for (const [key, value] of map) {
+    // Rough estimate: key size + value size + Map overhead (48 bytes per entry)
+    total += (key.length * 2) + (typeof value === 'string' ? value.length * 2 : 8) + 48;
+  }
+  return total;
+}
+
 /**
  * Simple deduplication database using ClassicLevel directly
  */
 class DeduplicationDB {
   private pendingWrites: Map<string, string> = new Map();
-  private cache: Map<string, boolean> = new Map();
-  private cacheSize: number;
   private bloomFilter: typeof bloomFilters.BloomFilter | null = null;
 
-  constructor(private db: ClassicLevel<string, string>, cacheSize: number = 10000, useBloomFilter: boolean = false, expectedItems: number = 10000000) {
-    this.cacheSize = cacheSize;
+  constructor(private db: ClassicLevel<string, string>, cacheSize: number = 1000, useBloomFilter: boolean = false, expectedItems: number = 10000000) {
+    // No in-memory cache - LevelDB has its own internal cache
     
     if (useBloomFilter) {
       // Create Bloom filter with 0.1% false positive rate
@@ -41,68 +51,41 @@ class DeduplicationDB {
   }
 
   async has(key: string): Promise<boolean> {
-    // Check cache first
-    if (this.cache.has(key)) {
-      return this.cache.get(key)!;
-    }
-
-    // Check pending writes
+    // Check pending writes first (not yet flushed to DB)
     if (this.pendingWrites.has(key)) {
-      this.cache.set(key, true);
       return true;
     }
 
-    // Use Bloom filter if available
+    // Use Bloom filter if available (skip expensive DB lookup)
     if (this.bloomFilter) {
-      // If Bloom filter says "not present", we can skip LevelDB check
       if (!this.bloomFilter.has(key)) {
-        this.cache.set(key, false);
         return false;
       }
-      // Bloom filter says "maybe present" - need to check LevelDB
     }
 
-    // Check database
+    // Check database (LevelDB has its own internal cache)
     try {
       await this.db.get(key);
-      this.cache.set(key, true);
       return true;
     } catch (error) {
-      this.cache.set(key, false);
       return false;
     }
   }
 
   async add(key: string, value: string): Promise<void> {
     this.pendingWrites.set(key, value);
-    this.cache.set(key, true); // Cache the fact that this key exists
     
     // Add to Bloom filter if available
     if (this.bloomFilter) {
       this.bloomFilter.add(key);
     }
     
-    // Check cache size periodically (not on every add)
-    this.evictCacheIfNeeded();
-    
-    // Flush to database periodically (smaller batch to reduce memory usage)
-    if (this.pendingWrites.size >= 5000) {
+    // Flush frequently to keep pendingWrites small
+    if (this.pendingWrites.size >= 1000) {
       await this.flush();
     }
   }
 
-  private evictCacheIfNeeded(): void {
-    // Limit cache size - only evict when significantly exceeded
-    if (this.cache.size > this.cacheSize * 1.2) {
-      // Remove oldest 20% of entries
-      let toRemove = Math.floor(this.cacheSize * 0.2);
-      for (const key of this.cache.keys()) {
-        if (toRemove-- <= 0) break;
-        this.cache.delete(key);
-      }
-    }
-  }
-
   async flush(): Promise<void> {
     if (this.pendingWrites.size === 0) return;
 
@@ -123,6 +106,15 @@ class DeduplicationDB {
     await this.flush();
     await this.db.close();
   }
+
+  getMemoryStats(): { pendingWrites: number; cache: number; pendingWritesMB: number; cacheMB: number } {
+    return {
+      pendingWrites: this.pendingWrites.size,
+      cache: 0, // Cache disabled - using LevelDB's internal cache
+      pendingWritesMB: estimateMapMemory(this.pendingWrites) / 1024 / 1024,
+      cacheMB: 0,
+    };
+  }
 }
 
 
@@ -159,13 +151,14 @@ export interface ConvertCsvCommandOptions {
 }
 
 // Configuration constants
-const DEFAULT_PROGRESS_INTERVAL = 10000;
+const DEFAULT_PROGRESS_INTERVAL = 50000; // Increased from 10k to 50k to reduce logging load
 
 interface ConversionStats {
   totalLines: number;
   processedRecords: number;
   filteredExistingLabels: number;
   filteredDuplicates: number;
+  outputBackpressureEvents: number;
   startTime: Date;
   endTime?: Date;
 }
@@ -174,8 +167,12 @@ interface ConversionStats {
  * Setup output stream for writing protobuf
  */
 function setupWriteStream(outputFile: string) {
-  // For now, just write directly to file without gzip compression
-  return createWriteStream(outputFile);
+  // Use very small highWaterMark (16KB) to trigger backpressure early and frequently
+  // This prevents unbounded buffer growth when writes are faster than disk I/O
+  // Smaller buffer = more frequent backpressure = better memory control
+  return createWriteStream(outputFile, {
+    highWaterMark: 16 * 1024, // 16KB buffer - very small to catch backpressure early
+  });
 }
 
 /**
@@ -213,6 +210,7 @@ function logSummary(stats: ConversionStats) {
   logger.info(`Valid records: ${stats.processedRecords}`);
   logger.info(`Filtered existing labels: ${stats.filteredExistingLabels}`);
   logger.info(`Filtered duplicates: ${stats.filteredDuplicates}`);
+  logger.info(`Output backpressure events: ${stats.outputBackpressureEvents}`);
   logger.info(`Duration: ${duration}ms`);
 }
 
@@ -246,8 +244,8 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
     logger.info(`Input file size: ${fileSizeMB} MB`);
     
     if (stats.size > 1024 * 1024 * 1024) { // > 1GB
-      logger.warn("⚠️  Processing a very large file. This may take significant time and memory.");
-      logger.warn("💡 Consider using --existing-db-path to filter out existing labels for better performance.");
+      logger.warn("⚠️  Processing a very large file - using SEQUENTIAL mode.");
+      logger.warn("💡 Use --existing-db-path to filter existing labels and speed up processing.");
     }
   } catch (error) {
     logger.warn(`Could not determine file size: ${error}`);
@@ -351,15 +349,27 @@ async function processRecord(
     await dedupDb.add(label, "");
   }
 
-  // Create protobuf message and write immediately
+  // Create protobuf message and write with backpressure handling
   const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
-  outputStream.write(Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish()));
+  const buffer = Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish());
+  
+  // Check if write returns false (buffer full) - if so, wait for drain
+  const canContinue = outputStream.write(buffer);
+  if (!canContinue) {
+    // Buffer is full - signal backpressure
+    stats.outputBackpressureEvents++;
+    // Wait for drain event before continuing
+    // Note: The CSV stream should be paused by the caller when backpressure is detected
+    await new Promise<void>((resolve) => {
+      outputStream.once('drain', resolve);
+    });
+  }
 
   return true; // Record was processed
 }
 
 /**
- * Process the entire CSV file using fast-csv
+ * Process the entire CSV file - COMPLETELY SEQUENTIAL (one row at a time)
  */
 async function processCSVFile(
   inputFile: string,
@@ -374,102 +384,97 @@ async function processCSVFile(
   let expectedColumns: number | null = null;
   let lineNumber = 0;
   let processedRecords = 0;
-  let lastLoggedLine = 0; // Track last logged line to avoid duplicate logs
-  const startTime = Date.now(); // Track start time for overall processing
-  let lastLogTime = Date.now(); // Track time of last log for chunk timing
-  
-  // LevelDB-based deduplication: Uses temporary database to avoid RAM limits
+  let lastLoggedLine = 0;
+  const startTime = Date.now();
+  let lastLogTime = Date.now();
 
   const fileStream = createReadStream(inputFile, { encoding: "utf8" });
 
   return new Promise((resolve, reject) => {
-    let pendingCount = 0;
-    const MAX_PENDING = 100; // Smaller limit to reduce memory
+    const csvStream = parse(); // Sequential processing via pause/resume
+    let isProcessing = false;
+
+    csvStream
+      .on("data", async (row: string[]) => {
+        // PAUSE IMMEDIATELY - process one row at a time
+        csvStream.pause();
+        isProcessing = true;
 
-    const csvStream = parse()
-      .on("data", (row: string[]) => {
         lineNumber++;
 
-        // For the first row, detect column count
-        if (expectedColumns === null) {
-          expectedColumns = row.length;
-          logger.info(`Detected ${expectedColumns} columns using fast-csv`);
-        }
+        try {
+          // Detect column count on first row
+          if (expectedColumns === null) {
+            expectedColumns = row.length;
+            logger.info(`Detected ${expectedColumns} columns - SEQUENTIAL processing mode`);
+          }
 
-        // Log progress synchronously when line is read (not in async callback)
-        // This ensures logs appear at the correct intervals
-        if (lineNumber % progressInterval === 0 && lineNumber !== lastLoggedLine) {
-          const currentTime = Date.now();
-          const chunkTime = currentTime - lastLogTime; // Time for this 10k chunk
-          const totalElapsed = currentTime - startTime; // Total time since start
-          const chunkTimeSeconds = (chunkTime / 1000).toFixed(2);
-          const totalTimeSeconds = (totalElapsed / 1000).toFixed(2);
-          const linesPerSecond = ((progressInterval / chunkTime) * 1000).toFixed(0);
-          
-          lastLoggedLine = lineNumber;
-          lastLogTime = currentTime;
-          
-          // Note: processedRecords may be slightly behind due to async processing
-          logger.info(
-            `Processed ${lineNumber} lines, written ${processedRecords} records | ` +
-            `Chunk: ${chunkTimeSeconds}s (${linesPerSecond} lines/sec) | ` +
-            `Total: ${totalTimeSeconds}s`
-          );
-        }
+          // Log progress (less frequently to avoid logger crashes)
+          if (lineNumber % progressInterval === 0 && lineNumber !== lastLoggedLine) {
+            const currentTime = Date.now();
+            const chunkTime = currentTime - lastLogTime;
+            const totalElapsed = currentTime - startTime;
+            const chunkTimeSeconds = (chunkTime / 1000).toFixed(2);
+            const totalTimeSeconds = (totalElapsed / 1000).toFixed(2);
+            const linesPerSecond = ((progressInterval / chunkTime) * 1000).toFixed(0);
+            
+            lastLoggedLine = lineNumber;
+            lastLogTime = currentTime;
+            
+            const memUsage = process.memoryUsage();
+            const memInfo = `RSS=${(memUsage.rss / 1024 / 1024).toFixed(0)}MB, Heap=${(memUsage.heapUsed / 1024 / 1024).toFixed(0)}MB`;
+            
+            let dedupInfo = "";
+            if (dedupDb) {
+              const dedupStats = dedupDb.getMemoryStats();
+              dedupInfo = ` | Dedup: ${dedupStats.pendingWrites}/${dedupStats.cache}`;
+            }
+            
+            // Use console.log instead of logger to avoid worker thread issues
+            console.log(
+              `[${new Date().toISOString()}] Line ${lineNumber}, written ${processedRecords} | ` +
+              `${linesPerSecond} lines/sec | ${memInfo}${dedupInfo}`
+            );
+          }
 
-        // Backpressure: pause if too many pending
-        if (pendingCount >= MAX_PENDING) {
-          csvStream.pause();
-        }
+          // Process this one record
+          const wasProcessed = await processRecord(
+            row,
+            expectedColumns,
+            RainbowRecordType,
+            outputStream,
+            lineNumber,
+            existingDb,
+            dedupDb,
+            stats,
+          );
 
-        pendingCount++;
-        processRecord(
-          row,
-          expectedColumns,
-          RainbowRecordType,
-          outputStream,
-          lineNumber,
-          existingDb,
-          dedupDb,
-          stats,
-        ).then((wasProcessed) => {
           if (wasProcessed) {
             processedRecords++;
           }
-          
-          // Update progress bar every 1000 lines
+
+          // Update progress bar
           if (lineNumber % 1000 === 0 && progressBar) {
             progressBar.tick(1000);
             progressBar.curr = lineNumber;
           }
-          
-          pendingCount--;
-          
-          // Resume when under threshold
-          if (csvStream.isPaused() && pendingCount < MAX_PENDING / 2) {
-            csvStream.resume();
-          }
-        }).catch((error) => {
+
+          // Done processing - resume for next row
+          isProcessing = false;
+          csvStream.resume();
+
+        } catch (error) {
           const errorMessage = error instanceof Error ? error.message : String(error);
           csvStream.destroy();
           fileStream.destroy();
-          reject(
-            new Error(
-              `CSV conversion failed due to invalid data on line ${lineNumber}: ${errorMessage}`,
-            ),
-          );
-        });
+          reject(new Error(`Failed on line ${lineNumber}: ${errorMessage}`));
+        }
       })
       .on("error", (error: Error) => {
         reject(new Error(`CSV parsing error: ${error.message}`));
       })
-      .on("end", async () => {
-        // Wait for all pending to complete
-        while (pendingCount > 0) {
-          await new Promise(resolve => setTimeout(resolve, 10));
-        }
-        const dedupStatus = dedupDb ? "LevelDB deduplication completed" : "Deduplication disabled";
-        logger.info(dedupStatus);
+      .on("end", () => {
+        logger.info(`Sequential processing complete`);
         resolve({ totalLines: lineNumber, processedRecords });
       });
 
@@ -490,6 +495,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     processedRecords: 0,
     filteredExistingLabels: 0,
     filteredDuplicates: 0,
+    outputBackpressureEvents: 0,
     startTime: new Date(),
   };
 
@@ -509,11 +515,16 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
         keyEncoding: 'utf8',
         valueEncoding: 'utf8',
         createIfMissing: true,
+        // Aggressive memory limits
+        cacheSize: 2 * 1024 * 1024, // 2MB block cache (minimal)
+        writeBufferSize: 4 * 1024 * 1024, // 4MB write buffer (minimal)
+        maxOpenFiles: 100, // Limit open files
+        compression: false, // Disable compression to reduce CPU/memory
       });
       await tempDb.open();
       dedupDb = new DeduplicationDB(
         tempDb, 
-        options.cacheSize ?? 10000,
+        options.cacheSize ?? 1000, // Reduced default from 10000 to 1000
         options.useBloomFilter ?? false,
         options.bloomFilterSize ?? 10000000
       );

From 56bc3563a512da001524cd501368dde8816d3118 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 15 Dec 2025 15:44:59 +0100
Subject: [PATCH 11/30] refactor: streamline CSV conversion CLI options and
 improve logging

- Removed unused command-line options for deduplication and Bloom filter from the CLI interface.
- Updated default progress interval from 10000 to 50000 records for improved performance.
- Enhanced logging for file processing and memory management during CSV conversion.
- Cleaned up code for better readability and maintainability.
---
 apps/ensrainbow/src/cli.ts                    |  56 ++-----
 .../src/commands/convert-csv-command.test.ts  |  15 +-
 .../src/commands/convert-csv-command.ts       | 141 +++++++-----------
 .../ensrainbow/concepts/creating-files.mdx    |   6 +-
 4 files changed, 82 insertions(+), 136 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 6e6bb4f32..de84a0963 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -69,11 +69,7 @@ interface ConvertCsvArgs {
   "label-set-version": LabelSetVersion;
   "progress-interval"?: number;
   "existing-db-path"?: string;
-  "silent"?: boolean;
-  "disable-dedup"?: boolean;
-  "cache-size"?: number;
-  "use-bloom-filter"?: boolean;
-  "bloom-filter-size"?: number;
+  silent?: boolean;
 }
 
 export interface CLIOptions {
@@ -264,37 +260,17 @@ export function createCLI(options: CLIOptions = {}) {
             .option("progress-interval", {
               type: "number",
               description: "Number of records to process before logging progress",
-              default: 10000,
+              default: 50000,
             })
-        .option("existing-db-path", {
-          type: "string",
-          description: "Path to existing ENSRainbow database to filter out existing labels",
-        })
-        .option("silent", {
-          type: "boolean",
-          description: "Disable progress bar (useful for scripts)",
-          default: false,
-        })
-        .option("disable-dedup", {
-          type: "boolean",
-          description: "Disable deduplication within CSV file (faster but may create duplicates)",
-          default: false,
-        })
-        .option("cache-size", {
-          type: "number",
-          description: "Cache size for deduplication (default: 5000)",
-          default: 5000,
-        })
-        .option("use-bloom-filter", {
-          type: "boolean",
-          description: "Use Bloom filter for faster deduplication (default: false)",
-          default: false,
-        })
-        .option("bloom-filter-size", {
-          type: "number",
-          description: "Expected number of items for Bloom filter (default: 10000000)",
-          default: 10000000,
-        });
+            .option("existing-db-path", {
+              type: "string",
+              description: "Path to existing ENSRainbow database to filter out existing labels",
+            })
+            .option("silent", {
+              type: "boolean",
+              description: "Disable progress bar (useful for scripts)",
+              default: false,
+            });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
           await convertCsvCommand({
@@ -302,13 +278,9 @@ export function createCLI(options: CLIOptions = {}) {
             outputFile: argv["output-file"],
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
-          progressInterval: argv["progress-interval"],
-          existingDbPath: argv["existing-db-path"],
-          silent: argv["silent"],
-          noDedup: argv["disable-dedup"],
-            cacheSize: argv["cache-size"],
-            useBloomFilter: argv["use-bloom-filter"],
-            bloomFilterSize: argv["bloom-filter-size"],
+            progressInterval: argv["progress-interval"],
+            existingDbPath: argv["existing-db-path"],
+            silent: argv["silent"],
           });
         },
       )
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index c6ddadb03..4f5b37eb6 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -1,12 +1,15 @@
+import { mkdtemp, rm, stat, writeFile } from "fs/promises";
 import { tmpdir } from "os";
 import { join } from "path";
-import { mkdtemp, rm, stat, writeFile } from "fs/promises";
+
+import { labelhash } from "viem";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
+import { type LabelSetId, type LabelSetVersion, labelHashToBytes } from "@ensnode/ensnode-sdk";
+
 import { createCLI } from "@/cli";
 import { ENSRainbowDB } from "@/lib/database";
-import { type LabelSetId, type LabelSetVersion, labelHashToBytes } from "@ensnode/ensnode-sdk";
-import { labelhash } from "viem";
+
 import { convertCsvCommand } from "./convert-csv-command";
 
 // Path to test fixtures
@@ -406,10 +409,10 @@ describe("convert-csv-command", () => {
 
       // Verify file was created
       const stats = await stat(outputFile);
-    expect(stats.isFile()).toBe(true);
-    expect(stats.size).toBeGreaterThan(0);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+    });
   });
-});
 
   describe("Streaming performance", () => {
     it("should handle small CSV files efficiently", async () => {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index db7478664..47d790a69 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -5,15 +5,16 @@
  * Supports 1-column (label only) and 2-column (label,labelhash) formats
  */
 
-import { createReadStream, createWriteStream, statSync } from "fs";
-import { rmSync } from "fs";
+import { createReadStream, createWriteStream, rmSync, statSync } from "fs";
 import { join } from "path";
-import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk";
+
 import { parse } from "@fast-csv/parse";
-import { labelhash } from "viem";
 import { ClassicLevel } from "classic-level";
 import ProgressBar from "progress";
-import bloomFilters from "bloom-filters";
+import { labelhash } from "viem";
+
+import { type LabelHash, labelHashToBytes } from "@ensnode/ensnode-sdk";
+
 import { ENSRainbowDB } from "../lib/database.js";
 import { logger } from "../utils/logger.js";
 import {
@@ -28,7 +29,7 @@ function estimateMapMemory(map: Map<string, any>): number {
   let total = 0;
   for (const [key, value] of map) {
     // Rough estimate: key size + value size + Map overhead (48 bytes per entry)
-    total += (key.length * 2) + (typeof value === 'string' ? value.length * 2 : 8) + 48;
+    total += key.length * 2 + (typeof value === "string" ? value.length * 2 : 8) + 48;
   }
   return total;
 }
@@ -38,16 +39,9 @@ function estimateMapMemory(map: Map<string, any>): number {
  */
 class DeduplicationDB {
   private pendingWrites: Map<string, string> = new Map();
-  private bloomFilter: typeof bloomFilters.BloomFilter | null = null;
 
-  constructor(private db: ClassicLevel<string, string>, cacheSize: number = 1000, useBloomFilter: boolean = false, expectedItems: number = 10000000) {
+  constructor(private db: ClassicLevel<string, string>) {
     // No in-memory cache - LevelDB has its own internal cache
-    
-    if (useBloomFilter) {
-      // Create Bloom filter with 0.1% false positive rate
-      this.bloomFilter = bloomFilters.BloomFilter.create(expectedItems, 0.01);
-      logger.info(`Created Bloom filter for ${expectedItems} items (~${(this.bloomFilter.size / 8 / 1024 / 1024).toFixed(2)} MB)`);
-    }
   }
 
   async has(key: string): Promise<boolean> {
@@ -56,13 +50,6 @@ class DeduplicationDB {
       return true;
     }
 
-    // Use Bloom filter if available (skip expensive DB lookup)
-    if (this.bloomFilter) {
-      if (!this.bloomFilter.has(key)) {
-        return false;
-      }
-    }
-
     // Check database (LevelDB has its own internal cache)
     try {
       await this.db.get(key);
@@ -74,12 +61,7 @@ class DeduplicationDB {
 
   async add(key: string, value: string): Promise<void> {
     this.pendingWrites.set(key, value);
-    
-    // Add to Bloom filter if available
-    if (this.bloomFilter) {
-      this.bloomFilter.add(key);
-    }
-    
+
     // Flush frequently to keep pendingWrites small
     if (this.pendingWrites.size >= 1000) {
       await this.flush();
@@ -95,7 +77,7 @@ class DeduplicationDB {
     }
     await batch.write();
     this.pendingWrites.clear();
-    
+
     // Hint to garbage collector after large batch
     if (global.gc) {
       global.gc();
@@ -107,7 +89,12 @@ class DeduplicationDB {
     await this.db.close();
   }
 
-  getMemoryStats(): { pendingWrites: number; cache: number; pendingWritesMB: number; cacheMB: number } {
+  getMemoryStats(): {
+    pendingWrites: number;
+    cache: number;
+    pendingWritesMB: number;
+    cacheMB: number;
+  } {
     return {
       pendingWrites: this.pendingWrites.size,
       cache: 0, // Cache disabled - using LevelDB's internal cache
@@ -117,20 +104,16 @@ class DeduplicationDB {
   }
 }
 
-
 /**
  * Sets up a simple progress bar that shows speed without total count.
  */
 function setupProgressBar(): ProgressBar {
-  return new ProgressBar(
-    "Processing CSV [:bar] :current lines - :rate lines/sec",
-    {
-      complete: "=",
-      incomplete: " ",
-      width: 40,
-      total: 200000000, // Very large total for big files
-    },
-  );
+  return new ProgressBar("Processing CSV [:bar] :current lines - :rate lines/sec", {
+    complete: "=",
+    incomplete: " ",
+    width: 40,
+    total: 200000000, // Very large total for big files
+  });
 }
 
 /**
@@ -144,10 +127,6 @@ export interface ConvertCsvCommandOptions {
   progressInterval?: number;
   existingDbPath?: string; // Path to existing ENSRainbow database to check for existing labels
   silent?: boolean; // Disable progress bar for tests
-  noDedup?: boolean; // Disable deduplication within CSV file
-  cacheSize?: number; // Cache size for deduplication (default: 10000)
-  useBloomFilter?: boolean; // Use Bloom filter for faster deduplication (default: false)
-  bloomFilterSize?: number; // Expected number of items for Bloom filter (default: 10000000)
 }
 
 // Configuration constants
@@ -242,8 +221,9 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
     const stats = statSync(options.inputFile);
     const fileSizeMB = (stats.size / (1024 * 1024)).toFixed(2);
     logger.info(`Input file size: ${fileSizeMB} MB`);
-    
-    if (stats.size > 1024 * 1024 * 1024) { // > 1GB
+
+    if (stats.size > 1024 * 1024 * 1024) {
+      // > 1GB
       logger.warn("⚠️  Processing a very large file - using SEQUENTIAL mode.");
       logger.warn("💡 Use --existing-db-path to filter existing labels and speed up processing.");
     }
@@ -352,7 +332,7 @@ async function processRecord(
   // Create protobuf message and write with backpressure handling
   const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
   const buffer = Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish());
-  
+
   // Check if write returns false (buffer full) - if so, wait for drain
   const canContinue = outputStream.write(buffer);
   if (!canContinue) {
@@ -361,7 +341,7 @@ async function processRecord(
     // Wait for drain event before continuing
     // Note: The CSV stream should be paused by the caller when backpressure is detected
     await new Promise<void>((resolve) => {
-      outputStream.once('drain', resolve);
+      outputStream.once("drain", resolve);
     });
   }
 
@@ -417,23 +397,23 @@ async function processCSVFile(
             const chunkTimeSeconds = (chunkTime / 1000).toFixed(2);
             const totalTimeSeconds = (totalElapsed / 1000).toFixed(2);
             const linesPerSecond = ((progressInterval / chunkTime) * 1000).toFixed(0);
-            
+
             lastLoggedLine = lineNumber;
             lastLogTime = currentTime;
-            
+
             const memUsage = process.memoryUsage();
             const memInfo = `RSS=${(memUsage.rss / 1024 / 1024).toFixed(0)}MB, Heap=${(memUsage.heapUsed / 1024 / 1024).toFixed(0)}MB`;
-            
+
             let dedupInfo = "";
             if (dedupDb) {
               const dedupStats = dedupDb.getMemoryStats();
               dedupInfo = ` | Dedup: ${dedupStats.pendingWrites}/${dedupStats.cache}`;
             }
-            
+
             // Use console.log instead of logger to avoid worker thread issues
             console.log(
               `[${new Date().toISOString()}] Line ${lineNumber}, written ${processedRecords} | ` +
-              `${linesPerSecond} lines/sec | ${memInfo}${dedupInfo}`
+                `${linesPerSecond} lines/sec | ${memInfo}${dedupInfo}`,
             );
           }
 
@@ -462,7 +442,6 @@ async function processCSVFile(
           // Done processing - resume for next row
           isProcessing = false;
           csvStream.resume();
-
         } catch (error) {
           const errorMessage = error instanceof Error ? error.message : String(error);
           csvStream.destroy();
@@ -507,30 +486,21 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     const { RainbowRecordType, outputStream, existingDb: db } = await initializeConversion(options);
     existingDb = db;
 
-    // Create temporary deduplication database (if not disabled)
-    if (!options.noDedup) {
-      tempDedupDir = join(process.cwd(), 'temp-dedup-' + Date.now());
-      logger.info(`Creating temporary deduplication database at: ${tempDedupDir}`);
-      const tempDb = new ClassicLevel<string, string>(tempDedupDir, {
-        keyEncoding: 'utf8',
-        valueEncoding: 'utf8',
-        createIfMissing: true,
-        // Aggressive memory limits
-        cacheSize: 2 * 1024 * 1024, // 2MB block cache (minimal)
-        writeBufferSize: 4 * 1024 * 1024, // 4MB write buffer (minimal)
-        maxOpenFiles: 100, // Limit open files
-        compression: false, // Disable compression to reduce CPU/memory
-      });
-      await tempDb.open();
-      dedupDb = new DeduplicationDB(
-        tempDb, 
-        options.cacheSize ?? 1000, // Reduced default from 10000 to 1000
-        options.useBloomFilter ?? false,
-        options.bloomFilterSize ?? 10000000
-      );
-    } else {
-      logger.info("Deduplication disabled - processing all records");
-    }
+    // Create temporary deduplication database
+    tempDedupDir = join(process.cwd(), "temp-dedup-" + Date.now());
+    logger.info(`Creating temporary deduplication database at: ${tempDedupDir}`);
+    const tempDb = new ClassicLevel<string, string>(tempDedupDir, {
+      keyEncoding: "utf8",
+      valueEncoding: "utf8",
+      createIfMissing: true,
+      // Aggressive memory limits
+      cacheSize: 2 * 1024 * 1024, // 2MB block cache (minimal)
+      writeBufferSize: 4 * 1024 * 1024, // 4MB write buffer (minimal)
+      maxOpenFiles: 100, // Limit open files
+      compression: false, // Disable compression to reduce CPU/memory
+    });
+    await tempDb.open();
+    dedupDb = new DeduplicationDB(tempDb);
 
     const progressInterval = options.progressInterval ?? DEFAULT_PROGRESS_INTERVAL;
 
@@ -549,16 +519,15 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
       progressBar,
     );
 
-            stats.totalLines = totalLines;
-            stats.processedRecords = processedRecords;
+    stats.totalLines = totalLines;
+    stats.processedRecords = processedRecords;
 
-            // Log final progress for large files
-            if (totalLines > 10_000) {
-              const dedupStatus = options.noDedup ? "dedup disabled" : "LevelDB dedup active";
-              logger.info(
-                `✅ Completed processing ${totalLines.toLocaleString()} lines, wrote ${processedRecords.toLocaleString()} records (${dedupStatus})`,
-              );
-            }
+    // Log final progress for large files
+    if (totalLines > 10_000) {
+      logger.info(
+        `✅ Completed processing ${totalLines.toLocaleString()} lines, wrote ${processedRecords.toLocaleString()} records (LevelDB dedup active)`,
+      );
+    }
 
     // Close output stream
     outputStream.end();
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 125e9916a..2d9ec8c10 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -124,7 +124,8 @@ pnpm run convert-csv \
   --label-set-id <label-set-id> \
   --label-set-version <version-number> \
   [--progress-interval <number>] \
-  [--existing-db-path <path/to/existing/database>]
+  [--existing-db-path <path/to/existing/database>] \
+  [--silent]
 ```
 
 ### Required Parameters
@@ -136,8 +137,9 @@ pnpm run convert-csv \
 ### Optional Parameters
 
 - `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
-- `--progress-interval`: Progress logging frequency (default: 10000 records)
+- `--progress-interval`: Progress logging frequency (default: 50000 records)
 - `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels
+- `--silent`: Disable progress bar (useful for scripts and automated workflows)
 
 ### CSV Format Support
 

From 11992d7abab25e36184d504bd9a82ab660b9e111 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 15 Dec 2025 16:33:19 +0100
Subject: [PATCH 12/30] fix: improve error handling and logging in CSV
 conversion tests

---
 .../src/commands/convert-csv-command.test.ts  |  4 +-
 .../src/commands/convert-csv-command.ts       | 47 +++++++++++++++----
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 4f5b37eb6..f3e85f6fa 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -110,7 +110,7 @@ describe("convert-csv-command", () => {
           labelSetId: "test-csv-invalid" as LabelSetId,
           labelSetVersion: 0 as LabelSetVersion,
         }),
-      ).rejects.toThrow(/CSV conversion failed due to invalid data/);
+      ).rejects.toThrow(/Failed on line 1: Invalid labelHash/);
     });
 
     it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
@@ -167,7 +167,7 @@ describe("convert-csv-command", () => {
           labelSetId: "test-csv-invalid-hash" as LabelSetId,
           labelSetVersion: 0 as LabelSetVersion,
         }),
-      ).rejects.toThrow(/CSV conversion failed due to invalid data/);
+      ).rejects.toThrow(/Failed on line 2: Invalid labelHash/);
     });
   });
 
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 47d790a69..3a0f14d84 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -274,13 +274,22 @@ function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string
     };
   } else {
     // Two columns: validate and use provided hash
-    const providedHash = String(row[1]);
+    // Trim whitespace from hash (metadata), but preserve label as-is
+    const providedHash = String(row[1]).trim();
+    if (providedHash === "") {
+      throw new Error("LabelHash cannot be empty");
+    }
     const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`;
-    const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
-    return {
-      labelhash: Buffer.from(labelHash),
-      label: label,
-    };
+    try {
+      const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
+      return {
+        labelhash: Buffer.from(labelHash),
+        label: label,
+      };
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      throw new Error(`Invalid labelHash: ${errorMessage}`);
+    }
   }
 }
 
@@ -373,6 +382,14 @@ async function processCSVFile(
   return new Promise((resolve, reject) => {
     const csvStream = parse(); // Sequential processing via pause/resume
     let isProcessing = false;
+    let streamEnded = false;
+
+    const checkAndResolve = () => {
+      if (streamEnded && !isProcessing) {
+        logger.info(`Sequential processing complete`);
+        resolve({ totalLines: lineNumber, processedRecords });
+      }
+    };
 
     csvStream
       .on("data", async (row: string[]) => {
@@ -383,7 +400,16 @@ async function processCSVFile(
         lineNumber++;
 
         try {
-          // Detect column count on first row
+          // Skip empty rows (no columns or all empty strings)
+          const isEmptyRow = row.length === 0 || row.every((cell) => cell === "");
+          if (isEmptyRow) {
+            isProcessing = false;
+            csvStream.resume();
+            checkAndResolve();
+            return;
+          }
+
+          // Detect column count on first non-empty row
           if (expectedColumns === null) {
             expectedColumns = row.length;
             logger.info(`Detected ${expectedColumns} columns - SEQUENTIAL processing mode`);
@@ -442,6 +468,7 @@ async function processCSVFile(
           // Done processing - resume for next row
           isProcessing = false;
           csvStream.resume();
+          checkAndResolve();
         } catch (error) {
           const errorMessage = error instanceof Error ? error.message : String(error);
           csvStream.destroy();
@@ -453,8 +480,8 @@ async function processCSVFile(
         reject(new Error(`CSV parsing error: ${error.message}`));
       })
       .on("end", () => {
-        logger.info(`Sequential processing complete`);
-        resolve({ totalLines: lineNumber, processedRecords });
+        streamEnded = true;
+        checkAndResolve();
       });
 
     fileStream
@@ -537,7 +564,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     logger.info("✅ CSV conversion completed successfully!");
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
-    logger.error("❌ CSV conversion failed:", errorMessage);
+    logger.error(`❌ CSV conversion failed: ${errorMessage}`);
     throw error;
   } finally {
     // Clean up deduplication database

From 3dea60ecf687863f3de34a3589512720618373e7 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Tue, 16 Dec 2025 21:59:20 +0100
Subject: [PATCH 13/30] refactor: update CSV conversion logic and improve
 deduplication handling

---
 .../src/commands/convert-csv-command.ts       | 44 +++++++++----------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 3a0f14d84..4770a0b1e 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -112,7 +112,7 @@ function setupProgressBar(): ProgressBar {
     complete: "=",
     incomplete: " ",
     width: 40,
-    total: 200000000, // Very large total for big files
+    total: 300000000, // Very large total for big files
   });
 }
 
@@ -225,7 +225,6 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
     if (stats.size > 1024 * 1024 * 1024) {
       // > 1GB
       logger.warn("⚠️  Processing a very large file - using SEQUENTIAL mode.");
-      logger.warn("💡 Use --existing-db-path to filter existing labels and speed up processing.");
     }
   } catch (error) {
     logger.warn(`Could not determine file size: ${error}`);
@@ -303,7 +302,7 @@ async function processRecord(
   outputStream: NodeJS.WritableStream,
   lineNumber: number,
   existingDb: ENSRainbowDB | null,
-  dedupDb: DeduplicationDB | null,
+  dedupDb: DeduplicationDB,
   stats: ConversionStats,
 ): Promise<boolean> {
   // Validate column count
@@ -326,18 +325,16 @@ async function processRecord(
     }
   }
 
-  // Check if label is a duplicate within this conversion using LevelDB (if enabled)
-  if (dedupDb) {
-    const existsInDedupDb = await dedupDb.has(label);
-    if (existsInDedupDb) {
-      stats.filteredDuplicates++;
-      return false; // Skip this record
-    }
-
-    // Add label to deduplication database
-    await dedupDb.add(label, "");
+  // Check if label is a duplicate within this conversion using LevelDB
+  const existsInDedupDb = await dedupDb.has(label);
+  if (existsInDedupDb) {
+    stats.filteredDuplicates++;
+    return false; // Skip this record
   }
 
+  // Add label to deduplication database
+  await dedupDb.add(label, "");
+
   // Create protobuf message and write with backpressure handling
   const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
   const buffer = Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish());
@@ -366,7 +363,7 @@ async function processCSVFile(
   outputStream: NodeJS.WritableStream,
   progressInterval: number,
   existingDb: ENSRainbowDB | null,
-  dedupDb: DeduplicationDB | null,
+  dedupDb: DeduplicationDB,
   stats: ConversionStats,
   progressBar: ProgressBar | null,
 ): Promise<{ totalLines: number; processedRecords: number }> {
@@ -419,9 +416,6 @@ async function processCSVFile(
           if (lineNumber % progressInterval === 0 && lineNumber !== lastLoggedLine) {
             const currentTime = Date.now();
             const chunkTime = currentTime - lastLogTime;
-            const totalElapsed = currentTime - startTime;
-            const chunkTimeSeconds = (chunkTime / 1000).toFixed(2);
-            const totalTimeSeconds = (totalElapsed / 1000).toFixed(2);
             const linesPerSecond = ((progressInterval / chunkTime) * 1000).toFixed(0);
 
             lastLoggedLine = lineNumber;
@@ -430,11 +424,8 @@ async function processCSVFile(
             const memUsage = process.memoryUsage();
             const memInfo = `RSS=${(memUsage.rss / 1024 / 1024).toFixed(0)}MB, Heap=${(memUsage.heapUsed / 1024 / 1024).toFixed(0)}MB`;
 
-            let dedupInfo = "";
-            if (dedupDb) {
-              const dedupStats = dedupDb.getMemoryStats();
-              dedupInfo = ` | Dedup: ${dedupStats.pendingWrites}/${dedupStats.cache}`;
-            }
+            const dedupStats = dedupDb.getMemoryStats();
+            const dedupInfo = ` | Dedup: ${dedupStats.pendingWrites}/${dedupStats.cache}`;
 
             // Use console.log instead of logger to avoid worker thread issues
             console.log(
@@ -496,6 +487,11 @@ async function processCSVFile(
  * Main CSV conversion command with true streaming using fast-csv
  */
 export async function convertCsvCommand(options: ConvertCsvCommandOptions): Promise<void> {
+  // Validate that existingDbPath is provided when labelSetVersion > 0
+  if (options.labelSetVersion > 0 && !options.existingDbPath) {
+    throw new Error("existingDbPath must be specified if label set version is higher than 0");
+  }
+
   const stats: ConversionStats = {
     totalLines: 0,
     processedRecords: 0,
@@ -506,7 +502,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
   };
 
   let existingDb: ENSRainbowDB | null = null;
-  let dedupDb: DeduplicationDB | null = null;
+  let dedupDb: DeduplicationDB | undefined;
   let tempDedupDir: string | null = null;
 
   try {
@@ -568,7 +564,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     throw error;
   } finally {
     // Clean up deduplication database
-    if (dedupDb) {
+    if (dedupDb !== undefined) {
       try {
         await dedupDb.close();
         logger.info("Closed deduplication database");

From b02b7f17cbd261af372f831061a9f6e75266a76d Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 17 Dec 2025 16:48:52 +0100
Subject: [PATCH 14/30] refactor: remove unused dependencies and enhance CSV
 conversion tests

---
 apps/ensrainbow/package.json                  |   1 -
 .../src/commands/convert-csv-command.test.ts  | 204 ++++++++++++++++++
 .../src/commands/convert-csv-command.ts       |  18 +-
 pnpm-lock.yaml                                |  51 -----
 4 files changed, 215 insertions(+), 59 deletions(-)

diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json
index 387e79cbd..024d6f567 100644
--- a/apps/ensrainbow/package.json
+++ b/apps/ensrainbow/package.json
@@ -33,7 +33,6 @@
     "@ensnode/ensrainbow-sdk": "workspace:*",
     "@ensnode/ensnode-sdk": "workspace:*",
     "@hono/node-server": "^1.4.1",
-    "bloom-filters": "^3.0.4",
     "classic-level": "^1.4.1",
     "hono": "catalog:",
     "pino": "catalog:",
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index f3e85f6fa..e45c8712c 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -490,4 +490,208 @@ describe("convert-csv-command", () => {
       expect(stats.size).toBeGreaterThan(0);
     }, 60000); // 60 second timeout for large file test
   });
+
+  describe("Edge cases", () => {
+    it("should handle empty CSV file", async () => {
+      const inputFile = join(tempDir, "empty.csv");
+      const outputFile = join(tempDir, "output_empty.ensrainbow");
+      await writeFile(inputFile, "");
+
+      // Should not throw error for empty file
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-empty" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).resolves.not.toThrow();
+
+      // Verify the output file was created (should have header only)
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Ingest and verify no records were written
+      const dataDir = join(tempDir, "db_empty");
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(recordsCount).toBe(0);
+      await db.close();
+    });
+
+    it("should handle CSV file with only whitespace", async () => {
+      const inputFile = join(tempDir, "whitespace.csv");
+      const outputFile = join(tempDir, "output_whitespace.ensrainbow");
+      await writeFile(inputFile, "   \n  \n\t\n  ");
+
+      // Should not throw error for whitespace-only file
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-whitespace" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).resolves.not.toThrow();
+
+      // Verify the output file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+    });
+
+    it("should skip CSV header row if present", async () => {
+      const inputFile = join(tempDir, "with_header.csv");
+      const outputFile = join(tempDir, "output_header.ensrainbow");
+      const csvContent =
+        "label,labelhash\nalice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2";
+      await writeFile(inputFile, csvContent);
+
+      // Should process the file (header will be treated as a regular row and fail validation)
+      // Actually, the header row will be processed and fail because "label" is not a valid hex hash
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-header" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).rejects.toThrow(/Invalid labelHash/);
+
+      // For a proper test, let's create a CSV where the header is valid data
+      const csvContentValid = "label\nlabel1\nlabel2";
+      await writeFile(inputFile, csvContentValid);
+
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-header-valid" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).resolves.not.toThrow();
+
+      // Verify records were created (including "label" as a label)
+      const dataDir = join(tempDir, "db_header");
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      // Should have 3 records: "label", "label1", "label2"
+      expect(recordsCount).toBe(3);
+      await db.close();
+    });
+
+    it("should handle CSV with malformed rows (extra columns)", async () => {
+      const inputFile = join(tempDir, "malformed_extra_cols.csv");
+      const outputFile = join(tempDir, "output_malformed.ensrainbow");
+      const csvContent =
+        "alice\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2,extra\ncharlie";
+      await writeFile(inputFile, csvContent);
+
+      // Should fail when column count is inconsistent
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-malformed" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).rejects.toThrow(/Expected \d+ columns/);
+    });
+
+    it("should handle CSV with malformed rows (missing columns)", async () => {
+      const inputFile = join(tempDir, "malformed_missing_cols.csv");
+      const outputFile = join(tempDir, "output_malformed2.ensrainbow");
+      const csvContent =
+        "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob\ncharlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de";
+      await writeFile(inputFile, csvContent);
+
+      // Should fail when column count is inconsistent
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-malformed2" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).rejects.toThrow(/Expected \d+ columns/);
+    });
+
+    it("should handle CSV with quoted fields containing commas", async () => {
+      const inputFile = join(tempDir, "quoted_fields.csv");
+      const outputFile = join(tempDir, "output_quoted.ensrainbow");
+      // CSV with quoted fields that contain commas - use single column format to auto-compute hashes
+      const csvContent = '"label,with,commas"\n"another,label"';
+      await writeFile(inputFile, csvContent);
+
+      // Should handle quoted fields correctly
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-quoted" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).resolves.not.toThrow();
+
+      // Verify the output file was created
+      const stats = await stat(outputFile);
+      expect(stats.isFile()).toBe(true);
+      expect(stats.size).toBeGreaterThan(0);
+
+      // Ingest and verify records
+      const dataDir = join(tempDir, "db_quoted");
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]);
+
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const recordsCount = await db.getPrecalculatedRainbowRecordCount();
+      expect(recordsCount).toBe(2);
+
+      // Verify the labels were stored correctly
+      const label1 = "label,with,commas";
+      const label2 = "another,label";
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label1))))?.label).toBe(
+        label1,
+      );
+      expect((await db.getVersionedRainbowRecord(labelHashToBytes(labelhash(label2))))?.label).toBe(
+        label2,
+      );
+      await db.close();
+    });
+
+    it("should handle CSV with empty labelhash column (should fail validation)", async () => {
+      const inputFile = join(tempDir, "empty_hash.csv");
+      const outputFile = join(tempDir, "output_empty_hash.ensrainbow");
+      const csvContent =
+        "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,\ncharlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de";
+      await writeFile(inputFile, csvContent);
+
+      // Should fail when labelhash is empty
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-empty-hash" as LabelSetId,
+          labelSetVersion: 0 as LabelSetVersion,
+          silent: true,
+        }),
+      ).rejects.toThrow(/LabelHash cannot be empty/);
+    });
+  });
 });
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 4770a0b1e..87995971e 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -63,7 +63,7 @@ class DeduplicationDB {
     this.pendingWrites.set(key, value);
 
     // Flush frequently to keep pendingWrites small
-    if (this.pendingWrites.size >= 1000) {
+    if (this.pendingWrites.size >= DEDUP_PENDING_WRITES_FLUSH_THRESHOLD) {
       await this.flush();
     }
   }
@@ -112,7 +112,7 @@ function setupProgressBar(): ProgressBar {
     complete: "=",
     incomplete: " ",
     width: 40,
-    total: 300000000, // Very large total for big files
+    total: PROGRESS_BAR_LARGE_TOTAL,
   });
 }
 
@@ -131,6 +131,11 @@ export interface ConvertCsvCommandOptions {
 
 // Configuration constants
 const DEFAULT_PROGRESS_INTERVAL = 50000; // Increased from 10k to 50k to reduce logging load
+const PROGRESS_BAR_LARGE_TOTAL = 300_000_000; // Very large total for progress bar to handle big files
+const DEDUP_PENDING_WRITES_FLUSH_THRESHOLD = 1000; // Flush deduplication DB when pending writes reach this count
+const OUTPUT_STREAM_BUFFER_SIZE = 16 * 1024; // 16KB buffer - very small to catch backpressure early
+const LARGE_FILE_SIZE_THRESHOLD_MB = 1024; // 1GB - warn user about very large files
+const PROGRESS_BAR_UPDATE_INTERVAL = 1000; // Update progress bar every N lines
 
 interface ConversionStats {
   totalLines: number;
@@ -150,7 +155,7 @@ function setupWriteStream(outputFile: string) {
   // This prevents unbounded buffer growth when writes are faster than disk I/O
   // Smaller buffer = more frequent backpressure = better memory control
   return createWriteStream(outputFile, {
-    highWaterMark: 16 * 1024, // 16KB buffer - very small to catch backpressure early
+    highWaterMark: OUTPUT_STREAM_BUFFER_SIZE,
   });
 }
 
@@ -222,8 +227,7 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
     const fileSizeMB = (stats.size / (1024 * 1024)).toFixed(2);
     logger.info(`Input file size: ${fileSizeMB} MB`);
 
-    if (stats.size > 1024 * 1024 * 1024) {
-      // > 1GB
+    if (stats.size > LARGE_FILE_SIZE_THRESHOLD_MB * 1024 * 1024) {
       logger.warn("⚠️  Processing a very large file - using SEQUENTIAL mode.");
     }
   } catch (error) {
@@ -451,8 +455,8 @@ async function processCSVFile(
           }
 
           // Update progress bar
-          if (lineNumber % 1000 === 0 && progressBar) {
-            progressBar.tick(1000);
+          if (lineNumber % PROGRESS_BAR_UPDATE_INTERVAL === 0 && progressBar) {
+            progressBar.tick(PROGRESS_BAR_UPDATE_INTERVAL);
             progressBar.curr = lineNumber;
           }
 
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b38eaba1b..2d4600d82 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -462,9 +462,6 @@ importers:
       '@hono/node-server':
         specifier: ^1.4.1
         version: 1.19.5(hono@4.10.3)
-      bloom-filters:
-        specifier: ^3.0.4
-        version: 3.0.4
       classic-level:
         specifier: ^1.4.1
         version: 1.4.1
@@ -3438,9 +3435,6 @@ packages:
   '@types/sax@1.2.7':
     resolution: {integrity: sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==}
 
-  '@types/seedrandom@3.0.8':
-    resolution: {integrity: sha512-TY1eezMU2zH2ozQoAFAQFOPpvP15g+ZgSfTZt31AUUH/Rxtnz3H+A/Sv1Snw2/amp//omibc+AEkTaA8KUeOLQ==}
-
   '@types/tar@6.1.13':
     resolution: {integrity: sha512-IznnlmU5f4WcGTh2ltRu/Ijpmk8wiWXfF0VA4s+HPjHZgvFggk1YaIkbo5krX/zUCzWF8N/l4+W/LNxnvAJ8nw==}
 
@@ -3789,10 +3783,6 @@ packages:
   base-64@1.0.0:
     resolution: {integrity: sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==}
 
-  base64-arraybuffer@1.0.2:
-    resolution: {integrity: sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==}
-    engines: {node: '>= 0.6.0'}
-
   base64-js@1.5.1:
     resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
 
@@ -3823,10 +3813,6 @@ packages:
   bintrees@1.0.2:
     resolution: {integrity: sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==}
 
-  bloom-filters@3.0.4:
-    resolution: {integrity: sha512-BdnPWo2OpYhlvuP2fRzJBdioMCkm7Zp0HCf8NJgF5Mbyqy7VQ/CnTiVWMMyq4EZCBHwj0Kq6098gW2/3RsZsrA==}
-    engines: {node: '>=12'}
-
   boolbase@1.0.0:
     resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
 
@@ -4181,9 +4167,6 @@ packages:
   csstype@3.2.3:
     resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
 
-  cuint@0.2.2:
-    resolution: {integrity: sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw==}
-
   cytoscape-cose-bilkent@4.1.0:
     resolution: {integrity: sha512-wgQlVIUJF13Quxiv5e1gstZ08rnZj2XaLHGoFMYXz7SkNfCDOOteKBE6SYRfA9WxxI/iBc3ajfDoc6hb/MRAHQ==}
     peerDependencies:
@@ -6554,9 +6537,6 @@ packages:
   recma-stringify@1.0.0:
     resolution: {integrity: sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g==}
 
-  reflect-metadata@0.1.14:
-    resolution: {integrity: sha512-ZhYeb6nRaXCfhnndflDK8qI6ZQ/YcWZCISRAWICW9XYqMUwjZM9Z0DveWX/ABN01oxSHwVxKQmxeYZSsm0jh5A==}
-
   regex-recursion@6.0.2:
     resolution: {integrity: sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==}
 
@@ -6728,9 +6708,6 @@ packages:
   secure-json-parse@4.1.0:
     resolution: {integrity: sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==}
 
-  seedrandom@3.0.5:
-    resolution: {integrity: sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==}
-
   semver-compare@1.0.0:
     resolution: {integrity: sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==}
 
@@ -7790,9 +7767,6 @@ packages:
   xxhash-wasm@1.1.0:
     resolution: {integrity: sha512-147y/6YNh+tlp6nd/2pWq38i9h6mz/EuQ6njIrmW8D1BS5nCqs0P6DG+m6zTGnNz5I+uhZ0SHxBs9BsPrwcKDA==}
 
-  xxhashjs@0.2.2:
-    resolution: {integrity: sha512-AkTuIuVTET12tpsVIQo+ZU6f/qDmKuRUcjaqR+OIvm+aCBsZ95i7UVY5WJ9TMsSaZ0DA2WxoZ4acu0sPH+OKAw==}
-
   y18n@5.0.8:
     resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
     engines: {node: '>=10'}
@@ -10844,8 +10818,6 @@ snapshots:
     dependencies:
       '@types/node': 22.18.13
 
-  '@types/seedrandom@3.0.8': {}
-
   '@types/tar@6.1.13':
     dependencies:
       '@types/node': 22.18.13
@@ -11322,8 +11294,6 @@ snapshots:
 
   base-64@1.0.0: {}
 
-  base64-arraybuffer@1.0.2: {}
-
   base64-js@1.5.1: {}
 
   baseline-browser-mapping@2.8.21: {}
@@ -11350,17 +11320,6 @@ snapshots:
 
   bintrees@1.0.2: {}
 
-  bloom-filters@3.0.4:
-    dependencies:
-      '@types/seedrandom': 3.0.8
-      base64-arraybuffer: 1.0.2
-      is-buffer: 2.0.5
-      lodash: 4.17.21
-      long: 5.3.2
-      reflect-metadata: 0.1.14
-      seedrandom: 3.0.5
-      xxhashjs: 0.2.2
-
   boolbase@1.0.0: {}
 
   boring-avatars@1.11.2: {}
@@ -11736,8 +11695,6 @@ snapshots:
 
   csstype@3.2.3: {}
 
-  cuint@0.2.2: {}
-
   cytoscape-cose-bilkent@4.1.0(cytoscape@3.33.1):
     dependencies:
       cose-base: 1.0.3
@@ -14485,8 +14442,6 @@ snapshots:
       unified: 11.0.5
       vfile: 6.0.3
 
-  reflect-metadata@0.1.14: {}
-
   regex-recursion@6.0.2:
     dependencies:
       regex-utilities: 2.3.0
@@ -14753,8 +14708,6 @@ snapshots:
 
   secure-json-parse@4.1.0: {}
 
-  seedrandom@3.0.5: {}
-
   semver-compare@1.0.0: {}
 
   semver@6.3.1: {}
@@ -15861,10 +15814,6 @@ snapshots:
 
   xxhash-wasm@1.1.0: {}
 
-  xxhashjs@0.2.2:
-    dependencies:
-      cuint: 0.2.2
-
   y18n@5.0.8: {}
 
   yallist@3.1.1: {}

From 35a05cb08d576053f1b4192beac4b2597bc8f30c Mon Sep 17 00:00:00 2001
From: "kwrobel.eth" <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 14:19:28 +0100
Subject: [PATCH 15/30] Apply suggestions from code review

Co-authored-by: lightwalker.eth <126201998+lightwalker-eth@users.noreply.github.com>
---
 .changeset/brave-kiwis-notice.md              |  2 +-
 apps/ensrainbow/src/cli.ts                    |  8 ++++----
 .../ensrainbow/concepts/creating-files.mdx    | 19 ++++++++-----------
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/.changeset/brave-kiwis-notice.md b/.changeset/brave-kiwis-notice.md
index fbdba8bfc..a514e5684 100644
--- a/.changeset/brave-kiwis-notice.md
+++ b/.changeset/brave-kiwis-notice.md
@@ -2,4 +2,4 @@
 "ensrainbow": patch
 ---
 
-feat: add CSV conversion command to ensrainbow CLI
+feat: add CSV conversion command to ensrainbow CLI to convert rainbow tables from CSV format to ensrainbow format
diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index de84a0963..b010bed15 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -242,18 +242,18 @@ export function createCLI(options: CLIOptions = {}) {
             })
             .option("output-file", {
               type: "string",
-              description: "Path to the output ensrainbow file",
+              description: "Path to where the resulting ensrainbow file will be output",
               default: join(process.cwd(), "rainbow-records.ensrainbow"),
             })
             .option("label-set-id", {
               type: "string",
-              description: "Label set id for the rainbow record collection",
+              description: "Label set id for the generated ensrainbow file",
               demandOption: true,
             })
             .coerce("label-set-id", buildLabelSetId)
             .option("label-set-version", {
               type: "number",
-              description: "Label set version for the rainbow record collection",
+              description: "Label set version for the generated ensrainbow file",
               demandOption: true,
             })
             .coerce("label-set-version", buildLabelSetVersion)
@@ -264,7 +264,7 @@ export function createCLI(options: CLIOptions = {}) {
             })
             .option("existing-db-path", {
               type: "string",
-              description: "Path to existing ENSRainbow database to filter out existing labels",
+              description: "Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file",
             })
             .option("silent", {
               type: "boolean",
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 2d9ec8c10..d914a344d 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -1,10 +1,10 @@
 ---
 title: Creating ENSRainbow Files
-description: Complete guide to creating .ensrainbow files from SQL dumps and CSV data.
+description: Complete guide to creating .ensrainbow files.
 sidebar:
   label: Creating Files
   order: 3
-keywords: [ensrainbow, file creation, conversion, sql, csv]
+keywords: [ensrainbow, file creation, conversion, csv]
 ---
 
 ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources. This guide helps you choose the right method and provides step-by-step instructions.
@@ -42,20 +42,17 @@ For detailed information about the file format structure, see the [Data Model](/
 | Method | Input Format | Use Case | Command |
 |--------|-------------|----------|---------|
 | **SQL Conversion** | Gzipped SQL dump (`ens_names.sql.gz`) | Converting legacy ENS Subgraph data | `pnpm run convert` |
-| **CSV Conversion** | CSV file (1 or 2 columns) | Custom datasets, test data, external sources | `pnpm run convert-csv` |
+| **CSV Conversion** | CSV file (1 or 2 columns) | Building new ENS rainbow tables | `pnpm run convert-csv` |
 
 ### When to Use SQL Conversion
 
 - Converting existing ENS Subgraph rainbow tables
-- Working with legacy `ens_names.sql.gz` files
+- Working with the legacy `ens_names.sql.gz` file.
 - Migrating from previous ENS data formats
 
 ### When to Use CSV Conversion
 
-- Creating test datasets
-- Converting data from external sources
-- Working with custom label collections
-- Building incremental label sets
+- Creating new rainbow tables for ENSRainbow
 
 ## Method 1: Converting from SQL Dumps
 
@@ -275,7 +272,7 @@ pnpm run ingest-ensrainbow \
 pnpm run serve --data-dir data-test-env --port 3223
 ```
 
-### Workflow 3: Building Custom Dataset
+### Workflow 3: Create a new Labelset
 
 ```bash
 # 1. Create CSV with your labels
@@ -298,10 +295,10 @@ pnpm run ingest-ensrainbow \
 pnpm run serve --data-dir data-custom --port 3223
 ```
 
-### Workflow 4: Creating Incremental Updates
+### Workflow 4: Creating Incremental Label Set Versions
 
 ```bash
-# 1. Create initial dataset
+# 1. Create initial labelset
 pnpm run convert-csv \
   --input-file initial-labels.csv \
   --output-file my-dataset_0.ensrainbow \

From 2cc8cad606029d1ccd3e7d30225b9a94a70c48f3 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 15:31:23 +0100
Subject: [PATCH 16/30] refactor: rename convert command to convert-sql and
 update CLI documentation

---
 apps/ensrainbow/package.json                  |   2 +-
 apps/ensrainbow/src/cli.test.ts               |  22 +--
 apps/ensrainbow/src/cli.ts                    |  73 ++++----
 .../src/commands/convert-csv-command.test.ts  |   6 +-
 .../ensrainbow/concepts/creating-files.mdx    | 162 +++++++++---------
 .../docs/ensrainbow/concepts/data-model.mdx   |   4 +-
 .../ensrainbow/contributing/cli-reference.mdx |  27 ++-
 .../docs/ensrainbow/contributing/index.mdx    |  12 +-
 .../src/content/docs/ensrainbow/faq.mdx       |  21 ++-
 9 files changed, 177 insertions(+), 152 deletions(-)

diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json
index 024d6f567..704a88cf7 100644
--- a/apps/ensrainbow/package.json
+++ b/apps/ensrainbow/package.json
@@ -19,7 +19,7 @@
     "validate:lite": "tsx src/cli.ts validate --lite",
     "purge": "tsx src/cli.ts purge",
     "convert": "tsx src/cli.ts convert",
-    "convert-csv": "tsx src/cli.ts convert-csv",
+    "convert-sql": "tsx src/cli.ts convert-sql",
     "test": "vitest",
     "test:coverage": "vitest --coverage",
     "lint": "biome check --write .",
diff --git a/apps/ensrainbow/src/cli.test.ts b/apps/ensrainbow/src/cli.test.ts
index ff9364a32..596b35663 100644
--- a/apps/ensrainbow/src/cli.test.ts
+++ b/apps/ensrainbow/src/cli.test.ts
@@ -111,7 +111,7 @@ describe("CLI", () => {
 
         expect(() =>
           cli.parse([
-            "convert",
+            "convert-sql",
             "--input-file",
             sqlInputFile,
             "--output-file",
@@ -122,7 +122,7 @@ describe("CLI", () => {
         // Successful convert with args
         const ingestCli = createCLI({ exitProcess: false });
         await ingestCli.parse([
-          "convert",
+          "convert-sql",
           "--input-file",
           sqlInputFile,
           "--output-file",
@@ -132,7 +132,7 @@ describe("CLI", () => {
           "--label-set-version",
           labelSetVersion.toString(),
         ]);
-        //command: pnpm convert --input-file test/fixtures/test_ens_names.sql.gz --output-file test/fixtures/test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0
+        //command: pnpm convert-sql --input-file test/fixtures/test_ens_names.sql.gz --output-file test/fixtures/test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0
         //verify that the file is created
 
         await expect(stat(ensrainbowOutputFile)).resolves.toBeDefined();
@@ -163,7 +163,7 @@ describe("CLI", () => {
 
         expect(() =>
           cli.parse([
-            "convert",
+            "convert-sql",
             "--input-file",
             sqlInputFile,
             "--output-file",
@@ -174,7 +174,7 @@ describe("CLI", () => {
         // Successful convert with args
         const ingestCli = createCLI({ exitProcess: false });
         await ingestCli.parse([
-          "convert",
+          "convert-sql",
           "--input-file",
           sqlInputFile,
           "--output-file",
@@ -184,7 +184,7 @@ describe("CLI", () => {
           "--label-set-version",
           labelSetVersion.toString(),
         ]);
-        //command: pnpm convert --input-file test_ens_names.sql.gz --output-file test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0
+        //command: pnpm convert-sql --input-file test_ens_names.sql.gz --output-file test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0
         //verify that the file is created
 
         await expect(stat(ensrainbowOutputFile)).resolves.toBeDefined();
@@ -211,7 +211,7 @@ describe("CLI", () => {
 
         expect(() =>
           cli.parse([
-            "convert",
+            "convert-sql",
             "--input-file",
             sqlInputFile,
             "--output-file",
@@ -222,7 +222,7 @@ describe("CLI", () => {
         const ingestCli2 = createCLI({ exitProcess: false });
         // Successful convert with args
         await ingestCli2.parse([
-          "convert",
+          "convert-sql",
           "--input-file",
           sqlInputFile,
           "--output-file",
@@ -266,7 +266,7 @@ describe("CLI", () => {
         // Successful convert with label set version 2
         const convertCli = createCLI({ exitProcess: false });
         await convertCli.parse([
-          "convert",
+          "convert-sql",
           "--input-file",
           sqlInputFile,
           "--output-file",
@@ -318,7 +318,7 @@ describe("CLI", () => {
         // Create second file with different label set id and label set version 0
         const convertCli = createCLI({ exitProcess: false });
         await convertCli.parse([
-          "convert",
+          "convert-sql",
           "--input-file",
           sqlInputFile,
           "--output-file",
@@ -331,7 +331,7 @@ describe("CLI", () => {
 
         // Create third file with different label set id and label set version 1
         await convertCli.parse([
-          "convert",
+          "convert-sql",
           "--input-file",
           sqlInputFile,
           "--output-file",
diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index b010bed15..75dc53587 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -195,92 +195,93 @@ export function createCLI(options: CLIOptions = {}) {
       )
       .command(
         "convert",
-        "Convert rainbow tables from SQL dump to ensrainbow format",
+        "Convert rainbow tables from CSV format to ensrainbow format",
         (yargs: Argv) => {
           return yargs
             .option("input-file", {
               type: "string",
-              description: "Path to the gzipped SQL dump file",
-              default: join(process.cwd(), "ens_names.sql.gz"),
+              description: "Path to the CSV input file",
+              demandOption: true,
             })
             .option("output-file", {
               type: "string",
-              description: "Path to the output ensrainbow file",
+              description: "Path to where the resulting ensrainbow file will be output",
               default: join(process.cwd(), "rainbow-records.ensrainbow"),
             })
             .option("label-set-id", {
               type: "string",
-              description: "Label set id for the rainbow record collection",
+              description: "Label set id for the generated ensrainbow file",
               demandOption: true,
             })
             .coerce("label-set-id", buildLabelSetId)
             .option("label-set-version", {
               type: "number",
-              description: "Label set version for the rainbow record collection",
+              description: "Label set version for the generated ensrainbow file",
               demandOption: true,
             })
-            .coerce("label-set-version", buildLabelSetVersion);
+            .coerce("label-set-version", buildLabelSetVersion)
+            .option("progress-interval", {
+              type: "number",
+              description: "Number of records to process before logging progress",
+              default: 50000,
+            })
+            .option("existing-db-path", {
+              type: "string",
+              description:
+                "Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file",
+            })
+            .option("silent", {
+              type: "boolean",
+              description: "Disable progress bar (useful for scripts)",
+              default: false,
+            });
         },
-        async (argv: ArgumentsCamelCase<ConvertArgs>) => {
-          await convertCommand({
+        async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
+          await convertCsvCommand({
             inputFile: argv["input-file"],
             outputFile: argv["output-file"],
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
+            progressInterval: argv["progress-interval"],
+            existingDbPath: argv["existing-db-path"],
+            silent: argv["silent"],
           });
         },
       )
       .command(
-        "convert-csv",
-        "Convert rainbow tables from CSV format to ensrainbow format",
+        "convert-sql",
+        "Convert rainbow tables from legacy SQL dump to ensrainbow format",
         (yargs: Argv) => {
           return yargs
             .option("input-file", {
               type: "string",
-              description: "Path to the CSV input file",
-              demandOption: true,
+              description: "Path to the gzipped SQL dump file",
+              default: join(process.cwd(), "ens_names.sql.gz"),
             })
             .option("output-file", {
               type: "string",
-              description: "Path to where the resulting ensrainbow file will be output",
+              description: "Path to the output ensrainbow file",
               default: join(process.cwd(), "rainbow-records.ensrainbow"),
             })
             .option("label-set-id", {
               type: "string",
-              description: "Label set id for the generated ensrainbow file",
+              description: "Label set id for the rainbow record collection",
               demandOption: true,
             })
             .coerce("label-set-id", buildLabelSetId)
             .option("label-set-version", {
               type: "number",
-              description: "Label set version for the generated ensrainbow file",
+              description: "Label set version for the rainbow record collection",
               demandOption: true,
             })
-            .coerce("label-set-version", buildLabelSetVersion)
-            .option("progress-interval", {
-              type: "number",
-              description: "Number of records to process before logging progress",
-              default: 50000,
-            })
-            .option("existing-db-path", {
-              type: "string",
-              description: "Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file",
-            })
-            .option("silent", {
-              type: "boolean",
-              description: "Disable progress bar (useful for scripts)",
-              default: false,
-            });
+            .coerce("label-set-version", buildLabelSetVersion);
         },
-        async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
-          await convertCsvCommand({
+        async (argv: ArgumentsCamelCase<ConvertArgs>) => {
+          await convertCommand({
             inputFile: argv["input-file"],
             outputFile: argv["output-file"],
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
-            progressInterval: argv["progress-interval"],
-            existingDbPath: argv["existing-db-path"],
-            silent: argv["silent"],
           });
         },
       )
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index e45c8712c..685ff6da7 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -197,7 +197,7 @@ describe("convert-csv-command", () => {
 
       // Test convert-csv command through CLI
       await cli.parse([
-        "convert-csv",
+        "convert",
         "--input-file",
         inputFile,
         "--output-file",
@@ -373,7 +373,7 @@ describe("convert-csv-command", () => {
       const cli = createCLI({ exitProcess: false });
 
       await cli.parse([
-        "convert-csv",
+        "convert",
         "--input-file",
         inputFile,
         "--output-file",
@@ -394,7 +394,7 @@ describe("convert-csv-command", () => {
 
       // Now test CLI with existing database path
       await cli.parse([
-        "convert-csv",
+        "convert",
         "--input-file",
         inputFile,
         "--output-file",
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index d914a344d..cb1ac514e 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -41,81 +41,29 @@ For detailed information about the file format structure, see the [Data Model](/
 
 | Method | Input Format | Use Case | Command |
 |--------|-------------|----------|---------|
-| **SQL Conversion** | Gzipped SQL dump (`ens_names.sql.gz`) | Converting legacy ENS Subgraph data | `pnpm run convert` |
-| **CSV Conversion** | CSV file (1 or 2 columns) | Building new ENS rainbow tables | `pnpm run convert-csv` |
-
-### When to Use SQL Conversion
-
-- Converting existing ENS Subgraph rainbow tables
-- Working with the legacy `ens_names.sql.gz` file.
-- Migrating from previous ENS data formats
+| **CSV Conversion** | CSV file (1 or 2 columns) | Building new ENS rainbow tables | `pnpm run convert` |
+| **SQL Conversion** | Gzipped SQL dump (`ens_names.sql.gz`) | Converting legacy ENS Subgraph data | `pnpm run convert-sql` |
 
 ### When to Use CSV Conversion
 
 - Creating new rainbow tables for ENSRainbow
+- Building custom label sets
+- Standard data ingestion workflow
 
-## Method 1: Converting from SQL Dumps
-
-The `convert` command processes gzipped SQL dump files from the ENS Subgraph.
-
-### Command Syntax
-
-```bash
-pnpm run convert \
-  --input-file <path/to/ens_names.sql.gz> \
-  --output-file <output.ensrainbow> \
-  --label-set-id <label-set-id> \
-  --label-set-version <version-number>
-```
-
-### Required Parameters
-
-- `--input-file`: Path to the gzipped SQL dump file
-- `--label-set-id`: Identifier for the label set (e.g., `subgraph`, `discovery-a`)
-- `--label-set-version`: Version number for the label set (non-negative integer)
-
-### Optional Parameters
-
-- `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
-
-### Example: Converting ENS Subgraph Data
-
-```bash
-# Convert main ENS Subgraph data
-pnpm run convert \
-  --input-file ens_names.sql.gz \
-  --output-file subgraph_0.ensrainbow \
-  --label-set-id subgraph \
-  --label-set-version 0
-```
-
-### Example: Converting Test Data
-
-```bash
-# Convert ens-test-env data
-pnpm run convert \
-  --input-file test/fixtures/ens_test_env_names.sql.gz \
-  --output-file ens-test-env_0.ensrainbow \
-  --label-set-id ens-test-env \
-  --label-set-version 0
-```
-
-### How It Works
+### When to Use SQL Conversion
 
-1. **Streams** the gzipped SQL file to avoid memory issues
-2. **Parses** SQL COPY statements to extract label/labelhash pairs
-3. **Validates** each record and skips invalid entries
-4. **Writes** protobuf messages with length-delimited encoding
-5. **Creates** a header message followed by individual record messages
+- Converting existing ENS Subgraph rainbow tables
+- Working with the legacy `ens_names.sql.gz` file
+- Migrating from previous ENS data formats
 
-## Method 2: Converting from CSV Files
+## Method 1: Converting from CSV Files
 
-The `convert-csv` command processes CSV files with flexible column formats.
+The `convert` command processes CSV files with flexible column formats.
 
 ### Command Syntax
 
 ```bash
-pnpm run convert-csv \
+pnpm run convert \
   --input-file <path/to/data.csv> \
   --output-file <output.ensrainbow> \
   --label-set-id <label-set-id> \
@@ -168,7 +116,7 @@ The CSV converter includes built-in filtering capabilities to prevent duplicate
 Use `--existing-db-path` to filter out labels that already exist in an existing ENSRainbow database:
 
 ```bash
-pnpm run convert-csv \
+pnpm run convert \
   --input-file new-labels.csv \
   --output-file incremental_1.ensrainbow \
   --label-set-id my-dataset \
@@ -200,7 +148,7 @@ Duration: 150ms
 
 ```bash
 # Create test dataset from CSV
-pnpm run convert-csv \
+pnpm run convert \
   --input-file test-labels.csv \
   --output-file test-dataset_0.ensrainbow \
   --label-set-id test-dataset \
@@ -212,7 +160,7 @@ pnpm run convert-csv \
 ```bash
 # Create discovery dataset (initially empty)
 echo "" > empty.csv
-pnpm run convert-csv \
+pnpm run convert \
   --input-file empty.csv \
   --output-file discovery-a_0.ensrainbow \
   --label-set-id discovery-a \
@@ -227,7 +175,61 @@ pnpm run convert-csv \
 4. **Computes** or validates labelhashes as needed
 5. **Filters** existing labels if `--existing-db-path` is provided
 6. **Filters** duplicate labels within the same CSV file
-7. **Writes** protobuf messages with the same format as SQL conversion
+7. **Writes** protobuf messages with length-delimited encoding
+
+## Method 2: Converting from SQL Dumps
+
+The `convert-sql` command processes gzipped SQL dump files from the ENS Subgraph.
+
+### Command Syntax
+
+```bash
+pnpm run convert-sql \
+  --input-file <path/to/ens_names.sql.gz> \
+  --output-file <output.ensrainbow> \
+  --label-set-id <label-set-id> \
+  --label-set-version <version-number>
+```
+
+### Required Parameters
+
+- `--input-file`: Path to the gzipped SQL dump file
+- `--label-set-id`: Identifier for the label set (e.g., `subgraph`, `discovery-a`)
+- `--label-set-version`: Version number for the label set (non-negative integer)
+
+### Optional Parameters
+
+- `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
+
+### Example: Converting ENS Subgraph Data
+
+```bash
+# Convert main ENS Subgraph data
+pnpm run convert-sql \
+  --input-file ens_names.sql.gz \
+  --output-file subgraph_0.ensrainbow \
+  --label-set-id subgraph \
+  --label-set-version 0
+```
+
+### Example: Converting Test Data
+
+```bash
+# Convert ens-test-env data
+pnpm run convert-sql \
+  --input-file test/fixtures/ens_test_env_names.sql.gz \
+  --output-file ens-test-env_0.ensrainbow \
+  --label-set-id ens-test-env \
+  --label-set-version 0
+```
+
+### How It Works
+
+1. **Streams** the gzipped SQL file to avoid memory issues
+2. **Parses** SQL COPY statements to extract label/labelhash pairs
+3. **Validates** each record and skips invalid entries
+4. **Writes** protobuf messages with length-delimited encoding
+5. **Creates** a header message followed by individual record messages
 
 ## Common Workflows
 
@@ -235,7 +237,7 @@ pnpm run convert-csv \
 
 ```bash
 # 1. Convert SQL dump to .ensrainbow
-pnpm run convert \
+pnpm run convert-sql \
   --input-file ens_names.sql.gz \
   --output-file subgraph_0.ensrainbow \
   --label-set-id subgraph \
@@ -257,7 +259,7 @@ pnpm run serve --data-dir data-subgraph --port 3223
 
 ```bash
 # 1. Convert test data
-pnpm run convert \
+pnpm run convert-sql \
   --input-file test/fixtures/ens_test_env_names.sql.gz \
   --output-file ens-test-env_0.ensrainbow \
   --label-set-id ens-test-env \
@@ -281,7 +283,7 @@ mylabel2
 mylabel3" > custom-labels.csv
 
 # 2. Convert to .ensrainbow
-pnpm run convert-csv \
+pnpm run convert \
   --input-file custom-labels.csv \
   --output-file custom_0.ensrainbow \
   --label-set-id custom \
@@ -299,7 +301,7 @@ pnpm run serve --data-dir data-custom --port 3223
 
 ```bash
 # 1. Create initial labelset
-pnpm run convert-csv \
+pnpm run convert \
   --input-file initial-labels.csv \
   --output-file my-dataset_0.ensrainbow \
   --label-set-id my-dataset \
@@ -311,7 +313,7 @@ pnpm run ingest-ensrainbow \
   --data-dir data-my-dataset
 
 # 3. Create incremental update (filtering existing labels)
-pnpm run convert-csv \
+pnpm run convert \
   --input-file new-labels.csv \
   --output-file my-dataset_1.ensrainbow \
   --label-set-id my-dataset \
@@ -379,26 +381,26 @@ If you want to create, publish, and distribute your own `.ensrainbow` files, fol
 
 ### 1. Create Your Dataset
 
-First, prepare your data in either SQL or CSV (recommended) format, then convert it using the appropriate method:
+First, prepare your data in either CSV (recommended) or SQL format, then convert it using the appropriate method:
 
 ```bash
-# For CSV data
-pnpm run convert-csv \
+# For CSV data (recommended)
+pnpm run convert \
   --input-file my-labels.csv \
   --output-file my-dataset_0.ensrainbow \
   --label-set-id my-dataset \
   --label-set-version 0
 
 # For CSV data with filtering (if you have an existing database)
-pnpm run convert-csv \
+pnpm run convert \
   --input-file my-labels.csv \
   --output-file my-dataset_1.ensrainbow \
   --label-set-id my-dataset \
   --label-set-version 1 \
   --existing-db-path data-my-dataset
 
-# For SQL data
-pnpm run convert \
+# For legacy SQL data
+pnpm run convert-sql \
   --input-file my-data.sql.gz \
   --output-file my-dataset_0.ensrainbow \
   --label-set-id my-dataset \
@@ -614,7 +616,7 @@ LABEL_SET_ID="my-dataset"
 NEW_VERSION="1"
 
 # Create new .ensrainbow file
-pnpm run convert-csv \
+pnpm run convert \
   --input-file updated-labels.csv \
   --output-file ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow \
   --label-set-id ${LABEL_SET_ID} \
@@ -652,7 +654,7 @@ export ENSRAINBOW_LABELSET_SERVER_URL="https://my-label-set-server.com"
 # Test downloading prebuilt database
 ./scripts/download-prebuilt-database.sh 3 my-dataset 0
 
-# Verify the database works
+# Verify the database works by ingesting the downloaded file
 pnpm run ingest-ensrainbow \
   --input-file labelsets/my-dataset_0.ensrainbow \
   --data-dir test-data
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
index e1df686d0..64189a07c 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
@@ -108,8 +108,8 @@ subgraph_1.ensrainbow     # next version with incremental labelhash-to-label map
 
 ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources:
 
-- **SQL Conversion**: Convert legacy ENS Subgraph data (`ens_names.sql.gz`) using `pnpm run convert`
-- **CSV Conversion**: Convert custom datasets from CSV files using `pnpm run convert-csv`
+- **CSV Conversion**: Convert custom datasets from CSV files using `pnpm run convert`
+- **SQL Conversion**: Convert legacy ENS Subgraph data (`ens_names.sql.gz`) using `pnpm run convert-sql`
 
 For complete instructions, examples, and workflow guidance, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide.
 
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx
index 6326f7b8b..31b8c686a 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx
@@ -8,26 +8,39 @@ keywords: [ensrainbow, cli]
 
 | Command | Purpose | Most useful flags | Example |
 |---------|---------|-------------------|---------|
-| `convert` | Convert legacy `.sql.gz` rainbow tables to `.ensrainbow` format. **This is currently the only way to create new .ensrainbow files.** | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version` | `pnpm run convert --input-file ens_names.sql.gz --output-file subgraph-0.ensrainbow` |
-| `ingest-ensrainbow` | Stream a `.ensrainbow` file into LevelDB | `--input-file`, `--data-dir` | `pnpm run ingest-ensrainbow --input-file subgraph-0.ensrainbow --data-dir ./data` |
+| `convert` | Convert CSV files to `.ensrainbow` format. **This is the primary method for creating new .ensrainbow files.** | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version`, `--existing-db-path`, `--silent` | `pnpm run convert --input-file labels.csv --output-file my-dataset_0.ensrainbow --label-set-id my-dataset --label-set-version 0` |
+| `convert-sql` | Convert legacy `.sql.gz` rainbow tables (ENS Subgraph data) to `.ensrainbow` format | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version` | `pnpm run convert-sql --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0` |
+| `ingest-ensrainbow` | Stream a `.ensrainbow` file into LevelDB | `--input-file`, `--data-dir` | `pnpm run ingest-ensrainbow --input-file my-dataset_0.ensrainbow --data-dir ./data` |
 | `validate` | Verify DB integrity | `--data-dir`, `--lite` | `pnpm run validate --lite` |
 | `purge` | Delete all DB files in a directory | `--data-dir` | `pnpm run purge --data-dir ./data` |
 | `serve` | Launch the HTTP API server | `--data-dir`, `--port` | `pnpm run serve --port 3223` |
 
 ## Creating .ensrainbow Files
 
-:::note[Important]
-The `convert` command is **the only way** to create new `.ensrainbow` files from scratch. If you need to create custom label sets with your own data, you must use this command to convert from PostgreSQL dump format.
+### CSV Conversion (Recommended)
 
-You can download existing `.ensrainbow` files using the download scripts, but for creating entirely new files, `convert` is your only option.
-:::
+The `convert` command is the **primary method** for creating new `.ensrainbow` files from CSV data.
 
 **Full convert command syntax:**
 ```bash
 pnpm run convert \
-  --input-file path/to/your_data.sql.gz \
+  --input-file path/to/labels.csv \
   --output-file path/to/output.ensrainbow \
   --label-set-id your-label-set-id \
+  --label-set-version 0 \
+  [--existing-db-path path/to/existing/database] \
+  [--silent]
+```
+
+### SQL Conversion (Legacy)
+
+For converting legacy ENS Subgraph data from SQL dumps:
+
+```bash
+pnpm run convert-sql \
+  --input-file path/to/ens_names.sql.gz \
+  --output-file path/to/output.ensrainbow \
+  --label-set-id subgraph \
   --label-set-version 0
 ```
 
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
index 401a0f986..03d213258 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
@@ -115,7 +115,7 @@ To ingest the test data into ENSRainbow:
     If you don't have a pre-converted `ens-test-env-0.ensrainbow` file:
     ```bash
     # Navigate to apps/ensrainbow or adjust paths accordingly
-    pnpm run convert --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env-0.ensrainbow
+    pnpm run convert-sql --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env-0.ensrainbow --label-set-id ens-test-env --label-set-version 0
     ```
     This creates `ens-test-env-0.ensrainbow`.
 
@@ -274,21 +274,21 @@ This section covers the conversion of source data (like SQL dumps or empty files
 This command converts a SQL dump file (`ens_names.sql.gz`) into an `.ensrainbow` file for version 0 of the `subgraph` Label Set.
 ```bash
 # Assuming ens_names.sql.gz contains the primary dataset
-time pnpm run convert --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0
+time pnpm run convert-sql --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0
 ```
 
 **For the `discovery-a` Label Set (initially empty for discovered labels):**
 This creates an empty `.ensrainbow` file for version 0 of the `discovery-a` Label Set, which is used for labels discovered dynamically.
 ```bash
-touch empty.sql
-gzip empty.sql
-time pnpm run convert --input-file empty.sql.gz --output-file discovery-a_0.ensrainbow --label-set-id discovery-a --label-set-version 0
+# Create empty CSV file for discovery dataset
+echo "" > empty.csv
+time pnpm run convert --input-file empty.csv --output-file discovery-a_0.ensrainbow --label-set-id discovery-a --label-set-version 0
 ```
 
 **For the `ens-test-env` Label Set (for testing):**
 This converts a test dataset SQL dump into an `.ensrainbow` file for version 0 of the `ens-test-env` Label Set.
 ```bash
-time pnpm run convert --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env_0.ensrainbow --label-set-id ens-test-env --label-set-version 0
+time pnpm run convert-sql --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env_0.ensrainbow --label-set-id ens-test-env --label-set-version 0
 ```
 
 ### 2. Upload `.ensrainbow` Files to R2 Storage
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx
index fa0d5704b..5262fd4c1 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx
@@ -22,18 +22,27 @@ To stay informed about new versions, monitor the [Available Label Sets](/ensrain
 
 ## How can I create my own .ensrainbow file?
 
-Currently, the `convert` command is the **only way** to create new `.ensrainbow` files from scratch. This command converts PostgreSQL rainbow table dumps (`.sql.gz` format) into the binary protobuf format that ENSRainbow uses.
+ENSRainbow provides two methods for creating `.ensrainbow` files:
 
-**To create a custom .ensrainbow file:**
+**Method 1: CSV Conversion (Recommended)**
 
-1. **Prepare your data** as a PostgreSQL dump file (`.sql.gz`) with ENS labels and labelhashes
+The `convert` command is the **primary method** for creating new `.ensrainbow` files from CSV data:
+
+1. **Prepare your data** as a CSV file with labels (1 column) or labels and labelhashes (2 columns)
 2. **Run the convert command:**
    ```bash
-   pnpm run convert --input-file your_data.sql.gz --output-file custom.ensrainbow
+   pnpm run convert --input-file your_labels.csv --output-file custom.ensrainbow --label-set-id my-dataset --label-set-version 0
    ```
-3. **Specify the label set details** using `--label-set-id` and `--label-set-version` flags
 
-**Note:** You can download existing `.ensrainbow` files using the download scripts, but for creating entirely new files with your own data, the `convert` command is currently the only option available.
+**Method 2: SQL Conversion (Legacy)**
+
+For converting legacy ENS Subgraph data from PostgreSQL dumps:
+
+```bash
+pnpm run convert-sql --input-file ens_names.sql.gz --output-file custom.ensrainbow --label-set-id subgraph --label-set-version 0
+```
+
+**Note:** You can also download existing `.ensrainbow` files using the download scripts.
 
 See the [CLI Reference](/ensrainbow/contributing/cli-reference/) for detailed command usage.
 

From bbc2786e75bd462f51899cc38a5c08dc405fefe0 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 15:39:30 +0100
Subject: [PATCH 17/30] refactor: update CLI documentation for output file and
 label set descriptions

---
 apps/ensrainbow/src/cli.ts                                  | 6 +++---
 .../src/content/docs/ensrainbow/concepts/creating-files.mdx | 4 +---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 75dc53587..07e4a46dd 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -260,18 +260,18 @@ export function createCLI(options: CLIOptions = {}) {
             })
             .option("output-file", {
               type: "string",
-              description: "Path to the output ensrainbow file",
+              description: "Path to where the resulting ensrainbow file will be output",
               default: join(process.cwd(), "rainbow-records.ensrainbow"),
             })
             .option("label-set-id", {
               type: "string",
-              description: "Label set id for the rainbow record collection",
+              description: "Label set id for the generated ensrainbow file",
               demandOption: true,
             })
             .coerce("label-set-id", buildLabelSetId)
             .option("label-set-version", {
               type: "number",
-              description: "Label set version for the rainbow record collection",
+              description: "Label set version for the generated ensrainbow file",
               demandOption: true,
             })
             .coerce("label-set-version", buildLabelSetVersion);
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index cb1ac514e..9d3655397 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -47,8 +47,6 @@ For detailed information about the file format structure, see the [Data Model](/
 ### When to Use CSV Conversion
 
 - Creating new rainbow tables for ENSRainbow
-- Building custom label sets
-- Standard data ingestion workflow
 
 ### When to Use SQL Conversion
 
@@ -83,7 +81,7 @@ pnpm run convert \
 
 - `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
 - `--progress-interval`: Progress logging frequency (default: 50000 records)
-- `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels
+- `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file
 - `--silent`: Disable progress bar (useful for scripts and automated workflows)
 
 ### CSV Format Support

From af4b04175fec4d022fb0f9b693f2b52b18b36ee2 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 15:54:36 +0100
Subject: [PATCH 18/30] docs: enhance SQL conversion section with repository
 link for legacy data files

---
 .../src/content/docs/ensrainbow/concepts/creating-files.mdx   | 4 +---
 .../src/content/docs/ensrainbow/concepts/data-model.mdx       | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 9d3655397..eedb4487c 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -50,9 +50,7 @@ For detailed information about the file format structure, see the [Data Model](/
 
 ### When to Use SQL Conversion
 
-- Converting existing ENS Subgraph rainbow tables
-- Working with the legacy `ens_names.sql.gz` file
-- Migrating from previous ENS data formats
+- Working with the legacy `ens_names.sql.gz` file. These legacy data files can be obtained from [The Graph's ENS Rainbow repository](https://github.com/graphprotocol/ens-rainbow).
 
 ## Method 1: Converting from CSV Files
 
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
index 64189a07c..dc5d4beaf 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/data-model.mdx
@@ -109,7 +109,7 @@ subgraph_1.ensrainbow     # next version with incremental labelhash-to-label map
 ENSRainbow provides two methods for creating `.ensrainbow` files from different data sources:
 
 - **CSV Conversion**: Convert custom datasets from CSV files using `pnpm run convert`
-- **SQL Conversion**: Convert legacy ENS Subgraph data (`ens_names.sql.gz`) using `pnpm run convert-sql`
+- **SQL Conversion**: Convert legacy ENS Subgraph data (`ens_names.sql.gz`) using `pnpm run convert-sql`. These legacy data files can be obtained from [The Graph's ENS Rainbow repository](https://github.com/graphprotocol/ens-rainbow).
 
 For complete instructions, examples, and workflow guidance, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide.
 

From 0ebee692bb432cfecdff643d8601a9bddde2b31e Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 17:38:40 +0100
Subject: [PATCH 19/30] refactor: update CLI to make output-file optional and
 enhance documentation for file naming conventions

---
 apps/ensrainbow/src/cli.ts                    | 34 +++++++-------
 .../ensrainbow/concepts/creating-files.mdx    | 45 ++++++++++---------
 2 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 07e4a46dd..f6368b78d 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -57,14 +57,14 @@ interface PurgeArgs {
 
 interface ConvertArgs {
   "input-file": string;
-  "output-file": string;
+  "output-file"?: string;
   "label-set-id": LabelSetId;
   "label-set-version": LabelSetVersion;
 }
 
 interface ConvertCsvArgs {
   "input-file": string;
-  "output-file": string;
+  "output-file"?: string;
   "label-set-id": LabelSetId;
   "label-set-version": LabelSetVersion;
   "progress-interval"?: number;
@@ -203,11 +203,6 @@ export function createCLI(options: CLIOptions = {}) {
               description: "Path to the CSV input file",
               demandOption: true,
             })
-            .option("output-file", {
-              type: "string",
-              description: "Path to where the resulting ensrainbow file will be output",
-              default: join(process.cwd(), "rainbow-records.ensrainbow"),
-            })
             .option("label-set-id", {
               type: "string",
               description: "Label set id for the generated ensrainbow file",
@@ -220,6 +215,10 @@ export function createCLI(options: CLIOptions = {}) {
               demandOption: true,
             })
             .coerce("label-set-version", buildLabelSetVersion)
+            .option("output-file", {
+              type: "string",
+              description: "Path to where the resulting ensrainbow file will be output",
+            })
             .option("progress-interval", {
               type: "number",
               description: "Number of records to process before logging progress",
@@ -237,9 +236,12 @@ export function createCLI(options: CLIOptions = {}) {
             });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
+          const outputFile =
+            argv["output-file"] ??
+            join(process.cwd(), `${argv["label-set-id"]}_${argv["label-set-version"]}.ensrainbow`);
           await convertCsvCommand({
             inputFile: argv["input-file"],
-            outputFile: argv["output-file"],
+            outputFile,
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
             progressInterval: argv["progress-interval"],
@@ -258,11 +260,6 @@ export function createCLI(options: CLIOptions = {}) {
               description: "Path to the gzipped SQL dump file",
               default: join(process.cwd(), "ens_names.sql.gz"),
             })
-            .option("output-file", {
-              type: "string",
-              description: "Path to where the resulting ensrainbow file will be output",
-              default: join(process.cwd(), "rainbow-records.ensrainbow"),
-            })
             .option("label-set-id", {
               type: "string",
               description: "Label set id for the generated ensrainbow file",
@@ -274,12 +271,19 @@ export function createCLI(options: CLIOptions = {}) {
               description: "Label set version for the generated ensrainbow file",
               demandOption: true,
             })
-            .coerce("label-set-version", buildLabelSetVersion);
+            .coerce("label-set-version", buildLabelSetVersion)
+            .option("output-file", {
+              type: "string",
+              description: "Path to where the resulting ensrainbow file will be output",
+            });
         },
         async (argv: ArgumentsCamelCase<ConvertArgs>) => {
+          const outputFile =
+            argv["output-file"] ??
+            join(process.cwd(), `${argv["label-set-id"]}_${argv["label-set-version"]}.ensrainbow`);
           await convertCommand({
             inputFile: argv["input-file"],
-            outputFile: argv["output-file"],
+            outputFile,
             labelSetId: argv["label-set-id"],
             labelSetVersion: argv["label-set-version"],
           });
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index eedb4487c..1d8d61621 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -50,7 +50,8 @@ For detailed information about the file format structure, see the [Data Model](/
 
 ### When to Use SQL Conversion
 
-- Working with the legacy `ens_names.sql.gz` file. These legacy data files can be obtained from [The Graph's ENS Rainbow repository](https://github.com/graphprotocol/ens-rainbow).
+- **Legacy migration only**: Converting existing `ens_names.sql.gz` file from the legacy ENS Subgraph. This file can be obtained from [The Graph's ENS Rainbow repository](https://github.com/graphprotocol/ens-rainbow).
+- **Note**: We recommend using CSV conversion for all new label sets. The SQL conversion method exists primarily for migrating away from legacy subgraph data, not for creating new subgraph-based label sets.
 
 ## Method 1: Converting from CSV Files
 
@@ -72,12 +73,12 @@ pnpm run convert \
 ### Required Parameters
 
 - `--input-file`: Path to the CSV file
-- `--label-set-id`: Identifier for the label set
-- `--label-set-version`: Version number for the label set
+- `--label-set-id`: Identifier for the output `.ensrainbow` file that will be created (used in file naming and metadata)
+- `--label-set-version`: Version number for the output `.ensrainbow` file that will be created (used in file naming and metadata)
 
 ### Optional Parameters
 
-- `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
+- `--output-file`: Output file path (defaults to `rainbow-records-{label-set-id}_{label-set-version}.ensrainbow`)
 - `--progress-interval`: Progress logging frequency (default: 50000 records)
 - `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file
 - `--silent`: Disable progress bar (useful for scripts and automated workflows)
@@ -175,7 +176,9 @@ pnpm run convert \
 
 ## Method 2: Converting from SQL Dumps
 
-The `convert-sql` command processes gzipped SQL dump files from the ENS Subgraph.
+:::warning[Legacy Method]
+The `convert-sql` command processes gzipped SQL dump file from the legacy ENS Subgraph. This method exists for migrating away from legacy subgraph data. **For all new label sets, we strongly recommend using CSV conversion (Method 1) instead.**
+:::
 
 ### Command Syntax
 
@@ -190,17 +193,21 @@ pnpm run convert-sql \
 ### Required Parameters
 
 - `--input-file`: Path to the gzipped SQL dump file
-- `--label-set-id`: Identifier for the label set (e.g., `subgraph`, `discovery-a`)
-- `--label-set-version`: Version number for the label set (non-negative integer)
+- `--label-set-id`: Identifier for the output `.ensrainbow` file that will be created (used in file naming and metadata, e.g., `subgraph`)
+- `--label-set-version`: Version number for the output `.ensrainbow` file that will be created (used in file naming and metadata, non-negative integer)
 
 ### Optional Parameters
 
-- `--output-file`: Output file path (defaults to `rainbow-records.ensrainbow`)
+- `--output-file`: Output file path (defaults to `rainbow-records-{label-set-id}_{label-set-version}.ensrainbow`)
+
+### Example: Converting Legacy ENS Subgraph Data
 
-### Example: Converting ENS Subgraph Data
+:::note[Legacy Migration Only]
+This example shows how to convert existing legacy subgraph data. For new label sets, use CSV conversion instead.
+:::
 
 ```bash
-# Convert main ENS Subgraph data
+# Convert legacy ENS Subgraph data (migration use case only)
 pnpm run convert-sql \
   --input-file ens_names.sql.gz \
   --output-file subgraph_0.ensrainbow \
@@ -229,10 +236,14 @@ pnpm run convert-sql \
 
 ## Common Workflows
 
-### Workflow 1: Migrating from ENS Subgraph
+### Workflow 1: Migrating from Legacy ENS Subgraph
+
+:::warning[Legacy Migration Only]
+This workflow is for migrating away from legacy ENS Subgraph data. For creating new label sets, use CSV conversion (see Workflow 3) instead.
+:::
 
 ```bash
-# 1. Convert SQL dump to .ensrainbow
+# 1. Convert legacy SQL dump to .ensrainbow
 pnpm run convert-sql \
   --input-file ens_names.sql.gz \
   --output-file subgraph_0.ensrainbow \
@@ -356,8 +367,7 @@ ENSRainbow download scripts save files to specific subdirectories:
 Follow the naming convention: `{label-set-id}_{label-set-version}.ensrainbow`
 
 **Examples:**
-- `subgraph_0.ensrainbow` - Main ENS data, version 0
-- `subgraph_1.ensrainbow` - Main ENS data, version 1 (incremental update)
+- `subgraph_0.ensrainbow` - Legacy ENS data, version 0
 - `discovery-a_0.ensrainbow` - Discovery dataset, version 0
 - `ens-test-env_0.ensrainbow` - Test environment data, version 0
 
@@ -395,13 +405,6 @@ pnpm run convert \
   --label-set-version 1 \
   --existing-db-path data-my-dataset
 
-# For legacy SQL data
-pnpm run convert-sql \
-  --input-file my-data.sql.gz \
-  --output-file my-dataset_0.ensrainbow \
-  --label-set-id my-dataset \
-  --label-set-version 0
-```
 
 ### 2. Validate Your File
 

From 9cdbb39a1184fe7e7489c8d96ead6f8fce12d740 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 18:15:06 +0100
Subject: [PATCH 20/30] fix: enforce existing database path requirement in CLI
 and improve error handling for database access

---
 apps/ensrainbow/src/cli.ts                        | 15 ++++++++++++++-
 .../src/commands/convert-csv-command.test.ts      | 11 +++--------
 .../src/commands/convert-csv-command.ts           |  9 +++++++--
 .../docs/ensrainbow/concepts/creating-files.mdx   |  5 ++---
 4 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index f6368b78d..849057647 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -227,12 +227,25 @@ export function createCLI(options: CLIOptions = {}) {
             .option("existing-db-path", {
               type: "string",
               description:
-                "Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file",
+                "Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file (required when --label-set-version > 0)",
             })
             .option("silent", {
               type: "boolean",
               description: "Disable progress bar (useful for scripts)",
               default: false,
+            })
+            .check((argv) => {
+              const labelSetVersion = argv["label-set-version"];
+              if (
+                labelSetVersion !== undefined &&
+                labelSetVersion > 0 &&
+                !argv["existing-db-path"]
+              ) {
+                throw new Error(
+                  "--existing-db-path is required when --label-set-version is greater than 0",
+                );
+              }
+              return true;
             });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 685ff6da7..39b3cafbc 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -341,12 +341,12 @@ describe("convert-csv-command", () => {
       await db.close();
     });
 
-    it("should handle non-existent database path gracefully", async () => {
+    it("should throw error when existing database path cannot be opened", async () => {
       const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
       const outputFile = join(tempDir, "output_no_db.ensrainbow");
       const nonExistentDbPath = join(tempDir, "non-existent-db");
 
-      // Should not throw error even with non-existent database path
+      // Should throw error when database path is provided but cannot be opened
       await expect(
         convertCsvCommand({
           inputFile,
@@ -355,12 +355,7 @@ describe("convert-csv-command", () => {
           labelSetVersion: 0 as LabelSetVersion,
           existingDbPath: nonExistentDbPath,
         }),
-      ).resolves.not.toThrow();
-
-      // Verify the output file was still created
-      const stats = await stat(outputFile);
-      expect(stats.isFile()).toBe(true);
-      expect(stats.size).toBeGreaterThan(0);
+      ).rejects.toThrow("Cannot proceed without existing database");
     });
 
     it("should work through CLI with existing database path", async () => {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 87995971e..dc9020e44 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -242,8 +242,13 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
       existingDb = await ENSRainbowDB.open(options.existingDbPath);
       logger.info("Successfully opened existing database for label filtering");
     } catch (error) {
-      logger.warn(`Failed to open existing database at ${options.existingDbPath}: ${error}`);
-      logger.warn("Proceeding without filtering existing labels");
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      logger.error(
+        `Failed to open existing database at ${options.existingDbPath}: ${errorMessage}`,
+      );
+      throw new Error(
+        `Cannot proceed without existing database. Failed to open database at ${options.existingDbPath}: ${errorMessage}`,
+      );
     }
   }
 
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 1d8d61621..7b8b6b2f3 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -172,7 +172,7 @@ pnpm run convert \
 4. **Computes** or validates labelhashes as needed
 5. **Filters** existing labels if `--existing-db-path` is provided
 6. **Filters** duplicate labels within the same CSV file
-7. **Writes** protobuf messages with length-delimited encoding
+7. **Writes** .ensrainbow file as output
 
 ## Method 2: Converting from SQL Dumps
 
@@ -231,8 +231,7 @@ pnpm run convert-sql \
 1. **Streams** the gzipped SQL file to avoid memory issues
 2. **Parses** SQL COPY statements to extract label/labelhash pairs
 3. **Validates** each record and skips invalid entries
-4. **Writes** protobuf messages with length-delimited encoding
-5. **Creates** a header message followed by individual record messages
+4. **Writes** .ensrainbow file as output
 
 ## Common Workflows
 

From a7fd4f3beccc3a4d176e8353bea4f029442fb554 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Mon, 5 Jan 2026 18:26:30 +0100
Subject: [PATCH 21/30] refactor: update createRainbowRecord function to use
 RainbowRecord type and improve labelhash handling

---
 .../src/commands/convert-csv-command.ts         | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index dc9020e44..4d49403c5 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -21,6 +21,7 @@ import {
   CURRENT_ENSRAINBOW_FILE_FORMAT_VERSION,
   createRainbowProtobufRoot,
 } from "../utils/protobuf-schema.js";
+import type { RainbowRecord } from "../utils/rainbow-record.js";
 
 /**
  * Estimate memory usage of a Map (rough approximation)
@@ -270,18 +271,18 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
 /**
  * Create rainbow record from parsed CSV row
  */
-function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string } {
+function createRainbowRecord(row: string[]): RainbowRecord {
   const label = String(row[0]);
 
   if (row.length === 1) {
     // Single column: compute labelhash using labelhash function
     const labelHashBytes = labelHashToBytes(labelhash(label));
     return {
-      labelhash: Buffer.from(labelHashBytes),
+      labelHash: labelHashBytes,
       label: label,
     };
   } else {
-    // Two columns: validate and use provided hash
+    // Two columns: validate labelhash format and use provided hash
     // Trim whitespace from hash (metadata), but preserve label as-is
     const providedHash = String(row[1]).trim();
     if (providedHash === "") {
@@ -291,7 +292,7 @@ function createRainbowRecord(row: string[]): { labelhash: Buffer; label: string
     try {
       const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
       return {
-        labelhash: Buffer.from(labelHash),
+        labelHash: labelHash,
         label: label,
       };
     } catch (error) {
@@ -323,7 +324,7 @@ async function processRecord(
 
   const rainbowRecord = createRainbowRecord(row);
   const label = rainbowRecord.label;
-  const labelHashBytes = rainbowRecord.labelhash;
+  const labelHashBytes = Buffer.from(rainbowRecord.labelHash);
 
   // Check if labelhash already exists in the existing database
   if (existingDb) {
@@ -345,7 +346,11 @@ async function processRecord(
   await dedupDb.add(label, "");
 
   // Create protobuf message and write with backpressure handling
-  const recordMessage = RainbowRecordType.fromObject(rainbowRecord);
+  // Map RainbowRecord (labelHash) to protobuf format (labelhash)
+  const recordMessage = RainbowRecordType.fromObject({
+    labelhash: Buffer.from(rainbowRecord.labelHash),
+    label: rainbowRecord.label,
+  });
   const buffer = Buffer.from(RainbowRecordType.encodeDelimited(recordMessage).finish());
 
   // Check if write returns false (buffer full) - if so, wait for drain

From aac678950ab8686f6280e10bdd5b57b509e02bc8 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Tue, 6 Jan 2026 01:00:13 +0100
Subject: [PATCH 22/30] refactor: remove label set version requirement from CLI
 and enhance output file handling

---
 apps/ensrainbow/src/cli.ts                    | 31 +------
 .../src/commands/convert-csv-command.test.ts  | 50 ++---------
 .../src/commands/convert-csv-command.ts       | 90 +++++++++++++++----
 3 files changed, 83 insertions(+), 88 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 849057647..cb4c18b23 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -66,7 +66,6 @@ interface ConvertCsvArgs {
   "input-file": string;
   "output-file"?: string;
   "label-set-id": LabelSetId;
-  "label-set-version": LabelSetVersion;
   "progress-interval"?: number;
   "existing-db-path"?: string;
   silent?: boolean;
@@ -209,15 +208,10 @@ export function createCLI(options: CLIOptions = {}) {
               demandOption: true,
             })
             .coerce("label-set-id", buildLabelSetId)
-            .option("label-set-version", {
-              type: "number",
-              description: "Label set version for the generated ensrainbow file",
-              demandOption: true,
-            })
-            .coerce("label-set-version", buildLabelSetVersion)
             .option("output-file", {
               type: "string",
-              description: "Path to where the resulting ensrainbow file will be output",
+              description:
+                "Path to where the resulting ensrainbow file will be output (if not provided, will be generated automatically)",
             })
             .option("progress-interval", {
               type: "number",
@@ -227,36 +221,19 @@ export function createCLI(options: CLIOptions = {}) {
             .option("existing-db-path", {
               type: "string",
               description:
-                "Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file (required when --label-set-version > 0)",
+                "Path to existing ENSRainbow database to filter out existing labels and determine the next label set version (if not provided, version will be 0)",
             })
             .option("silent", {
               type: "boolean",
               description: "Disable progress bar (useful for scripts)",
               default: false,
-            })
-            .check((argv) => {
-              const labelSetVersion = argv["label-set-version"];
-              if (
-                labelSetVersion !== undefined &&
-                labelSetVersion > 0 &&
-                !argv["existing-db-path"]
-              ) {
-                throw new Error(
-                  "--existing-db-path is required when --label-set-version is greater than 0",
-                );
-              }
-              return true;
             });
         },
         async (argv: ArgumentsCamelCase<ConvertCsvArgs>) => {
-          const outputFile =
-            argv["output-file"] ??
-            join(process.cwd(), `${argv["label-set-id"]}_${argv["label-set-version"]}.ensrainbow`);
           await convertCsvCommand({
             inputFile: argv["input-file"],
-            outputFile,
+            outputFile: argv["output-file"],
             labelSetId: argv["label-set-id"],
-            labelSetVersion: argv["label-set-version"],
             progressInterval: argv["progress-interval"],
             existingDbPath: argv["existing-db-path"],
             silent: argv["silent"],
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 39b3cafbc..7646f5a77 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -5,7 +5,7 @@ import { join } from "path";
 import { labelhash } from "viem";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
-import { type LabelSetId, type LabelSetVersion, labelHashToBytes } from "@ensnode/ensnode-sdk";
+import { type LabelSetId, labelHashToBytes } from "@ensnode/ensnode-sdk";
 
 import { createCLI } from "@/cli";
 import { ENSRainbowDB } from "@/lib/database";
@@ -40,7 +40,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile,
         labelSetId: "test-csv-one-col" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -74,7 +73,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile,
         labelSetId: "test-csv-two-col" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -108,7 +106,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-csv-invalid" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
         }),
       ).rejects.toThrow(/Failed on line 1: Invalid labelHash/);
     });
@@ -123,7 +120,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile,
         labelSetId: "test-csv-special" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -165,7 +161,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-csv-invalid-hash" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
         }),
       ).rejects.toThrow(/Failed on line 2: Invalid labelHash/);
     });
@@ -181,7 +176,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-missing" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
         }),
       ).rejects.toThrow();
     });
@@ -204,8 +198,6 @@ describe("convert-csv-command", () => {
         outputFile,
         "--label-set-id",
         "test-cli-csv",
-        "--label-set-version",
-        "0",
       ]);
 
       // Verify file was created
@@ -234,7 +226,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile: initialOutputFile,
         labelSetId: "test-filtering" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -256,11 +247,11 @@ describe("convert-csv-command", () => {
       await db.close();
 
       // Now convert the same CSV file again, but with filtering enabled
+      // This should automatically determine version 1 from the existing database
       await convertCsvCommand({
         inputFile,
         outputFile,
         labelSetId: "test-filtering" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion, // Use same version as initial
         existingDbPath: dataDir,
         silent: true,
       });
@@ -273,21 +264,12 @@ describe("convert-csv-command", () => {
       const initialStats = await stat(initialOutputFile);
       expect(outputStats.size).toBeLessThan(initialStats.size);
 
-      // Verify that the filtered file contains fewer records
+      // Verify that ingesting the filtered file (version 1) into a new database fails
+      // because new databases require version 0 for initial ingestion
       const filteredDataDir = join(tempDir, "db_filtered_result");
-      await cli.parse([
-        "ingest-ensrainbow",
-        "--input-file",
-        outputFile,
-        "--data-dir",
-        filteredDataDir,
-      ]);
-
-      const filteredDb = await ENSRainbowDB.open(filteredDataDir);
-      expect(await filteredDb.validate()).toBe(true);
-      const filteredCount = await filteredDb.getPrecalculatedRainbowRecordCount();
-      expect(filteredCount).toBe(0); // All labels should be filtered out since they already exist
-      await filteredDb.close();
+      await expect(
+        cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", filteredDataDir]),
+      ).rejects.toThrow(/Initial ingestion must use a file with label set version 0/);
     });
 
     it("should filter out duplicate labels within the same conversion", async () => {
@@ -303,7 +285,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile,
         labelSetId: "test-duplicates" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -352,10 +333,9 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-no-db" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           existingDbPath: nonExistentDbPath,
         }),
-      ).rejects.toThrow("Cannot proceed without existing database");
+      ).rejects.toThrow(/Database is not open/);
     });
 
     it("should work through CLI with existing database path", async () => {
@@ -375,8 +355,6 @@ describe("convert-csv-command", () => {
         initialOutputFile,
         "--label-set-id",
         "test-cli-filtering",
-        "--label-set-version",
-        "0",
       ]);
 
       await cli.parse([
@@ -396,8 +374,6 @@ describe("convert-csv-command", () => {
         outputFile,
         "--label-set-id",
         "test-cli-filtering",
-        "--label-set-version",
-        "1",
         "--existing-db-path",
         dataDir,
       ]);
@@ -429,7 +405,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile,
         labelSetId: "test-small" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -475,7 +450,6 @@ describe("convert-csv-command", () => {
         inputFile,
         outputFile,
         labelSetId: "test-many-labels" as LabelSetId,
-        labelSetVersion: 0 as LabelSetVersion,
         silent: true,
       });
 
@@ -498,7 +472,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-empty" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).resolves.not.toThrow();
@@ -531,7 +504,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-whitespace" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).resolves.not.toThrow();
@@ -555,7 +527,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-header" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).rejects.toThrow(/Invalid labelHash/);
@@ -569,7 +540,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-header-valid" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).resolves.not.toThrow();
@@ -600,7 +570,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-malformed" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).rejects.toThrow(/Expected \d+ columns/);
@@ -619,7 +588,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-malformed2" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).rejects.toThrow(/Expected \d+ columns/);
@@ -638,7 +606,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-quoted" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).resolves.not.toThrow();
@@ -683,7 +650,6 @@ describe("convert-csv-command", () => {
           inputFile,
           outputFile,
           labelSetId: "test-empty-hash" as LabelSetId,
-          labelSetVersion: 0 as LabelSetVersion,
           silent: true,
         }),
       ).rejects.toThrow(/LabelHash cannot be empty/);
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 4d49403c5..e44eb2f9a 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -122,11 +122,10 @@ function setupProgressBar(): ProgressBar {
  */
 export interface ConvertCsvCommandOptions {
   inputFile: string;
-  outputFile: string;
+  outputFile?: string; // Optional - will be generated if not provided
   labelSetId: string;
-  labelSetVersion: number;
   progressInterval?: number;
-  existingDbPath?: string; // Path to existing ENSRainbow database to check for existing labels
+  existingDbPath?: string; // Path to existing ENSRainbow database to check for existing labels and determine next version
   silent?: boolean; // Disable progress bar for tests
 }
 
@@ -212,15 +211,64 @@ async function checkLabelHashExists(db: ENSRainbowDB, labelHashBytes: Buffer): P
   }
 }
 
+/**
+ * Determine the label set version based on existing database or default to 0
+ */
+async function determineLabelSetVersion(
+  existingDbPath: string | undefined,
+  labelSetId: string,
+): Promise<number> {
+  if (!existingDbPath) {
+    return 0;
+  }
+
+  try {
+    logger.info(`Opening existing database to determine next label set version: ${existingDbPath}`);
+    const existingDb = await ENSRainbowDB.open(existingDbPath);
+    const labelSet = await existingDb.getLabelSet();
+
+    // Validate that the label set ID matches
+    if (labelSet.labelSetId !== labelSetId) {
+      await existingDb.close();
+      throw new Error(
+        `Label set ID mismatch! Database label set id: ${labelSet.labelSetId}, provided label set id: ${labelSetId}`,
+      );
+    }
+
+    const nextVersion = labelSet.highestLabelSetVersion + 1;
+    await existingDb.close();
+    logger.info(
+      `Determined next label set version: ${nextVersion} (current highest: ${labelSet.highestLabelSetVersion})`,
+    );
+    return nextVersion;
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    throw new Error(
+      `Failed to determine label set version from existing database at ${existingDbPath}: ${errorMessage}`,
+    );
+  }
+}
+
+/**
+ * Generate output file name from label set ID and version
+ */
+function generateOutputFileName(labelSetId: string, labelSetVersion: number): string {
+  return `${labelSetId}_${labelSetVersion}.ensrainbow`;
+}
+
 /**
  * Initialize conversion setup and logging
  */
-async function initializeConversion(options: ConvertCsvCommandOptions) {
+async function initializeConversion(
+  options: ConvertCsvCommandOptions,
+  labelSetVersion: number,
+  outputFile: string,
+) {
   logger.info("Starting conversion from CSV to protobuf format...");
   logger.info(`Input file: ${options.inputFile}`);
-  logger.info(`Output file: ${options.outputFile}`);
+  logger.info(`Output file: ${outputFile}`);
   logger.info(`Label set id: ${options.labelSetId}`);
-  logger.info(`Label set version: ${options.labelSetVersion}`);
+  logger.info(`Label set version: ${labelSetVersion}`);
 
   // Check file size and warn for very large files
   try {
@@ -235,7 +283,7 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
     logger.warn(`Could not determine file size: ${error}`);
   }
 
-  // Open existing database if path is provided
+  // Open existing database if path is provided (for filtering existing labels)
   let existingDb: ENSRainbowDB | null = null;
   if (options.existingDbPath) {
     try {
@@ -254,14 +302,9 @@ async function initializeConversion(options: ConvertCsvCommandOptions) {
   }
 
   const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot();
-  const outputStream = setupWriteStream(options.outputFile);
+  const outputStream = setupWriteStream(outputFile);
 
-  writeHeader(
-    outputStream,
-    RainbowRecordCollectionType,
-    options.labelSetId,
-    options.labelSetVersion,
-  );
+  writeHeader(outputStream, RainbowRecordCollectionType, options.labelSetId, labelSetVersion);
 
   logger.info("Reading and processing CSV file line by line with streaming...");
 
@@ -501,10 +544,15 @@ async function processCSVFile(
  * Main CSV conversion command with true streaming using fast-csv
  */
 export async function convertCsvCommand(options: ConvertCsvCommandOptions): Promise<void> {
-  // Validate that existingDbPath is provided when labelSetVersion > 0
-  if (options.labelSetVersion > 0 && !options.existingDbPath) {
-    throw new Error("existingDbPath must be specified if label set version is higher than 0");
-  }
+  // Determine label set version from existing database or default to 0
+  const labelSetVersion = await determineLabelSetVersion(
+    options.existingDbPath,
+    options.labelSetId,
+  );
+
+  // Generate output file name if not provided
+  const outputFile =
+    options.outputFile ?? generateOutputFileName(options.labelSetId, labelSetVersion);
 
   const stats: ConversionStats = {
     totalLines: 0,
@@ -520,7 +568,11 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
   let tempDedupDir: string | null = null;
 
   try {
-    const { RainbowRecordType, outputStream, existingDb: db } = await initializeConversion(options);
+    const {
+      RainbowRecordType,
+      outputStream,
+      existingDb: db,
+    } = await initializeConversion(options, labelSetVersion, outputFile);
     existingDb = db;
 
     // Create temporary deduplication database

From 35cf39bd7d9fe5b1690a87c9c23cfef66d665ff4 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 7 Jan 2026 13:19:51 +0100
Subject: [PATCH 23/30] docs: update documentation to reflect removal of label
 set version requirement and clarify CSV conversion process

---
 .../src/commands/convert-csv-command.ts       |  2 +-
 .../docs/ensrainbow/concepts/architecture.mdx | 11 ++--
 .../ensrainbow/concepts/creating-files.mdx    | 57 +++++++------------
 .../ensrainbow/contributing/cli-reference.mdx |  3 +-
 .../docs/ensrainbow/contributing/index.mdx    | 20 +++----
 .../src/content/docs/ensrainbow/faq.mdx       | 16 +-----
 6 files changed, 43 insertions(+), 66 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index e44eb2f9a..9d1f75497 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -333,7 +333,7 @@ function createRainbowRecord(row: string[]): RainbowRecord {
     }
     const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`;
     try {
-      const labelHash = labelHashToBytes(maybeLabelHash as LabelHash);
+      const labelHash = labelHashToBytes(maybeLabelHash as LabelHash); // performs labelhash format validation
       return {
         labelHash: labelHash,
         label: label,
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx
index b578aca07..8fb49b75a 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/architecture.mdx
@@ -10,7 +10,7 @@ import { LinkCard } from '@astrojs/starlight/components';
 
 ENSRainbow consists of four primary layers working together to "heal" unknown labels:
 
-1. **Data Generation & Conversion** – legacy `.sql.gz` rainbow tables are converted to the modern `.ensrainbow` format.
+1. **Data Generation & Conversion** – CSV files are converted to the modern `.ensrainbow` format (SQL conversion is available only for migrating legacy ENS Subgraph data).
 2. **Data Ingestion** – the `.ensrainbow` files are ingested into a LevelDB database using the `ingest-ensrainbow` CLI.
 3. **HTTP API Service** – state in the database is exposed through a lightweight HTTP API.
 4. **Client Integration** – applications call the API directly or via the TypeScript SDK.
@@ -18,10 +18,13 @@ ENSRainbow consists of four primary layers working together to "heal" unknown la
 ```mermaid
 flowchart TD
     subgraph Data_Generation
-        SQL[".sql.gz files"]
+        CSV["CSV files"]
+        SQL[".sql.gz files<br/>(legacy only)"]
         ENSRB[".ensrainbow files"]
-        SQL --> Convert["convert" command]
-        Convert --> ENSRB
+        CSV --> ConvertCSV["convert command"]
+        SQL --> ConvertSQL["convert-sql command<br/>(legacy migration)"]
+        ConvertCSV --> ENSRB
+        ConvertSQL --> ENSRB
     end
 
     ENSRB --> Ingest["ingest-ensrainbow"]
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 7b8b6b2f3..4af35a0ee 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -64,7 +64,6 @@ pnpm run convert \
   --input-file <path/to/data.csv> \
   --output-file <output.ensrainbow> \
   --label-set-id <label-set-id> \
-  --label-set-version <version-number> \
   [--progress-interval <number>] \
   [--existing-db-path <path/to/existing/database>] \
   [--silent]
@@ -74,7 +73,6 @@ pnpm run convert \
 
 - `--input-file`: Path to the CSV file
 - `--label-set-id`: Identifier for the output `.ensrainbow` file that will be created (used in file naming and metadata)
-- `--label-set-version`: Version number for the output `.ensrainbow` file that will be created (used in file naming and metadata)
 
 ### Optional Parameters
 
@@ -103,7 +101,7 @@ vitalik,0xaf2caa1c2ca1d027f1ac823b529d0a67cd144264b2789fa2ea4d63a67c7103cc
 ens,0x5cee339e13375638553bdf5a6e36ba80fb9f6a4f0783680884d92b558aa471da
 ```
 
-The converter validates that provided labelhashes match the computed hash for each label.
+The converter validates the format of provided labelhashes (does not verify they match the label).
 
 ### Label Filtering
 
@@ -117,7 +115,6 @@ pnpm run convert \
   --input-file new-labels.csv \
   --output-file incremental_1.ensrainbow \
   --label-set-id my-dataset \
-  --label-set-version 1 \
   --existing-db-path data-my-dataset
 ```
 
@@ -149,7 +146,6 @@ pnpm run convert \
   --input-file test-labels.csv \
   --output-file test-dataset_0.ensrainbow \
   --label-set-id test-dataset \
-  --label-set-version 0
 ```
 
 ### Example: Creating Discovery Dataset
@@ -161,7 +157,6 @@ pnpm run convert \
   --input-file empty.csv \
   --output-file discovery-a_0.ensrainbow \
   --label-set-id discovery-a \
-  --label-set-version 0
 ```
 
 ### How It Works
@@ -170,6 +165,9 @@ pnpm run convert \
 2. **Streams** CSV parsing using fast-csv for memory efficiency
 3. **Validates** column count and data format
 4. **Computes** or validates labelhashes as needed
+   - For single-column format: Computes labelhash using the `labelhash()` function
+   - For two-column format: Validates the format of the provided labelhash (does not verify it matches the label)
+   - Invalid labelhashes are rejected if they don't meet format requirements (66 characters including "0x" prefix, lowercase hex, valid hex format)
 5. **Filters** existing labels if `--existing-db-path` is provided
 6. **Filters** duplicate labels within the same CSV file
 7. **Writes** .ensrainbow file as output
@@ -215,22 +213,19 @@ pnpm run convert-sql \
   --label-set-version 0
 ```
 
-### Example: Converting Test Data
-
-```bash
-# Convert ens-test-env data
-pnpm run convert-sql \
-  --input-file test/fixtures/ens_test_env_names.sql.gz \
-  --output-file ens-test-env_0.ensrainbow \
-  --label-set-id ens-test-env \
-  --label-set-version 0
-```
 
 ### How It Works
 
 1. **Streams** the gzipped SQL file to avoid memory issues
 2. **Parses** SQL COPY statements to extract label/labelhash pairs
 3. **Validates** each record and skips invalid entries
+   - **Invalid line format**: Lines that don't contain exactly 2 tab-separated columns (labelHash and label)
+   - **Invalid labelHash format**: LabelHash values that:
+     - Don't have exactly 66 characters (must be "0x" prefix + 64 hex digits)
+     - Are not in lowercase (must be all lowercase hexadecimal)
+     - Don't start with "0x" prefix
+     - Contain invalid hexadecimal characters
+   - Invalid entries are safely skipped as they would be unreachable by the ENS Subgraph
 4. **Writes** .ensrainbow file as output
 
 ## Common Workflows
@@ -265,11 +260,10 @@ pnpm run serve --data-dir data-subgraph --port 3223
 
 ```bash
 # 1. Convert test data
-pnpm run convert-sql \
-  --input-file test/fixtures/ens_test_env_names.sql.gz \
+pnpm run convert \
+  --input-file test/fixtures/ens_test_env_names.csv \
   --output-file ens-test-env_0.ensrainbow \
-  --label-set-id ens-test-env \
-  --label-set-version 0
+  --label-set-id ens-test-env
 
 # 2. Ingest test data
 pnpm run ingest-ensrainbow \
@@ -292,8 +286,7 @@ mylabel3" > custom-labels.csv
 pnpm run convert \
   --input-file custom-labels.csv \
   --output-file custom_0.ensrainbow \
-  --label-set-id custom \
-  --label-set-version 0
+  --label-set-id custom
 
 # 3. Ingest and serve
 pnpm run ingest-ensrainbow \
@@ -310,8 +303,7 @@ pnpm run serve --data-dir data-custom --port 3223
 pnpm run convert \
   --input-file initial-labels.csv \
   --output-file my-dataset_0.ensrainbow \
-  --label-set-id my-dataset \
-  --label-set-version 0
+  --label-set-id my-dataset
 
 # 2. Ingest initial data
 pnpm run ingest-ensrainbow \
@@ -323,7 +315,6 @@ pnpm run convert \
   --input-file new-labels.csv \
   --output-file my-dataset_1.ensrainbow \
   --label-set-id my-dataset \
-  --label-set-version 1 \
   --existing-db-path data-my-dataset
 
 # 4. Ingest incremental update
@@ -386,24 +377,21 @@ If you want to create, publish, and distribute your own `.ensrainbow` files, fol
 
 ### 1. Create Your Dataset
 
-First, prepare your data in either CSV (recommended) or SQL format, then convert it using the appropriate method:
+First, prepare your data in CSV format, then convert it using the `convert` command:
 
 ```bash
-# For CSV data (recommended)
 pnpm run convert \
   --input-file my-labels.csv \
   --output-file my-dataset_0.ensrainbow \
-  --label-set-id my-dataset \
-  --label-set-version 0
+  --label-set-id my-dataset
 
-# For CSV data with filtering (if you have an existing database)
+# to create an incremental update, you can use the `--existing-db-path` flag to filter out existing labels:
 pnpm run convert \
-  --input-file my-labels.csv \
+  --input-file my-labels2.csv \
   --output-file my-dataset_1.ensrainbow \
   --label-set-id my-dataset \
-  --label-set-version 1 \
   --existing-db-path data-my-dataset
-
+```
 
 ### 2. Validate Your File
 
@@ -617,8 +605,7 @@ NEW_VERSION="1"
 pnpm run convert \
   --input-file updated-labels.csv \
   --output-file ${LABEL_SET_ID}_${NEW_VERSION}.ensrainbow \
-  --label-set-id ${LABEL_SET_ID} \
-  --label-set-version ${NEW_VERSION}
+  --label-set-id ${LABEL_SET_ID}
 
 # Create prebuilt database
 pnpm run ingest-ensrainbow \
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx
index 31b8c686a..58023497a 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/cli-reference.mdx
@@ -8,7 +8,7 @@ keywords: [ensrainbow, cli]
 
 | Command | Purpose | Most useful flags | Example |
 |---------|---------|-------------------|---------|
-| `convert` | Convert CSV files to `.ensrainbow` format. **This is the primary method for creating new .ensrainbow files.** | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version`, `--existing-db-path`, `--silent` | `pnpm run convert --input-file labels.csv --output-file my-dataset_0.ensrainbow --label-set-id my-dataset --label-set-version 0` |
+| `convert` | Convert CSV files to `.ensrainbow` format. **This is the primary method for creating new .ensrainbow files.** | `--input-file`, `--output-file`, `--label-set-id`, `--existing-db-path`, `--silent` | `pnpm run convert --input-file labels.csv --output-file my-dataset_0.ensrainbow --label-set-id my-dataset` |
 | `convert-sql` | Convert legacy `.sql.gz` rainbow tables (ENS Subgraph data) to `.ensrainbow` format | `--input-file`, `--output-file`, `--label-set-id`, `--label-set-version` | `pnpm run convert-sql --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0` |
 | `ingest-ensrainbow` | Stream a `.ensrainbow` file into LevelDB | `--input-file`, `--data-dir` | `pnpm run ingest-ensrainbow --input-file my-dataset_0.ensrainbow --data-dir ./data` |
 | `validate` | Verify DB integrity | `--data-dir`, `--lite` | `pnpm run validate --lite` |
@@ -27,7 +27,6 @@ pnpm run convert \
   --input-file path/to/labels.csv \
   --output-file path/to/output.ensrainbow \
   --label-set-id your-label-set-id \
-  --label-set-version 0 \
   [--existing-db-path path/to/existing/database] \
   [--silent]
 ```
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
index 03d213258..984b5d683 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/contributing/index.mdx
@@ -105,9 +105,9 @@ Starts the API server. The process will exit with:
 
 ## Using ENSRainbow with ens-test-env
 
-The ens-test-env project provides a test environment for ENS development. It includes a small dataset of ENS names in the `ens_test_env_names.sql.gz` file that can be used with ENSRainbow for testing purposes.
+The ens-test-env project provides a test environment for ENS development. It includes a small dataset of ENS names in the `ens_test_env_names.csv` file that can be used with ENSRainbow for testing purposes.
 
-### Ingesting ens_test_env_names.sql.gz
+### Ingesting ens_test_env_names.csv
 
 To ingest the test data into ENSRainbow:
 
@@ -115,7 +115,7 @@ To ingest the test data into ENSRainbow:
     If you don't have a pre-converted `ens-test-env-0.ensrainbow` file:
     ```bash
     # Navigate to apps/ensrainbow or adjust paths accordingly
-    pnpm run convert-sql --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env-0.ensrainbow --label-set-id ens-test-env --label-set-version 0
+    pnpm run convert --input-file test/fixtures/ens_test_env_names.csv --output-file ens-test-env-0.ensrainbow --label-set-id ens-test-env
     ```
     This creates `ens-test-env-0.ensrainbow`.
 
@@ -268,12 +268,12 @@ These steps are typically performed by project maintainers for releasing officia
 
 ### 1. Prepare `.ensrainbow` Files
 
-This section covers the conversion of source data (like SQL dumps or empty files for initial datasets) into the `.ensrainbow` format. For detailed conversion instructions and examples, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide.
+This section covers the conversion of source data into the `.ensrainbow` format. For detailed conversion instructions and examples, see the [Creating ENSRainbow Files](/ensrainbow/concepts/creating-files) guide.
 
-**For the `subgraph` Label Set (main dataset):**
-This command converts a SQL dump file (`ens_names.sql.gz`) into an `.ensrainbow` file for version 0 of the `subgraph` Label Set.
+**For the `subgraph` Label Set (legacy migration only):**
+This command converts a SQL dump file (`ens_names.sql.gz`) from the legacy ENS Subgraph into an `.ensrainbow` file for version 0 of the `subgraph` Label Set. **Note:** SQL conversion is only for migrating legacy ENS Subgraph data. For all new label sets, use CSV conversion instead.
 ```bash
-# Assuming ens_names.sql.gz contains the primary dataset
+# Assuming ens_names.sql.gz contains the dataset
 time pnpm run convert-sql --input-file ens_names.sql.gz --output-file subgraph_0.ensrainbow --label-set-id subgraph --label-set-version 0
 ```
 
@@ -282,13 +282,13 @@ This creates an empty `.ensrainbow` file for version 0 of the `discovery-a` Labe
 ```bash
 # Create empty CSV file for discovery dataset
 echo "" > empty.csv
-time pnpm run convert --input-file empty.csv --output-file discovery-a_0.ensrainbow --label-set-id discovery-a --label-set-version 0
+time pnpm run convert --input-file empty.csv --output-file discovery-a_0.ensrainbow --label-set-id discovery-a
 ```
 
 **For the `ens-test-env` Label Set (for testing):**
-This converts a test dataset SQL dump into an `.ensrainbow` file for version 0 of the `ens-test-env` Label Set.
+This converts a test dataset CSV file into an `.ensrainbow` file for version 0 of the `ens-test-env` Label Set.
 ```bash
-time pnpm run convert-sql --input-file test/fixtures/ens_test_env_names.sql.gz --output-file ens-test-env_0.ensrainbow --label-set-id ens-test-env --label-set-version 0
+time pnpm run convert --input-file test/fixtures/ens_test_env_names.csv --output-file ens-test-env_0.ensrainbow --label-set-id ens-test-env
 ```
 
 ### 2. Upload `.ensrainbow` Files to R2 Storage
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx
index 5262fd4c1..fbf621e7c 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx
@@ -22,26 +22,14 @@ To stay informed about new versions, monitor the [Available Label Sets](/ensrain
 
 ## How can I create my own .ensrainbow file?
 
-ENSRainbow provides two methods for creating `.ensrainbow` files:
-
-**Method 1: CSV Conversion (Recommended)**
-
-The `convert` command is the **primary method** for creating new `.ensrainbow` files from CSV data:
+You can create your own `.ensrainbow` files from CSV data using the `convert` command, which generates new `.ensrainbow` files from your supplied CSV input.
 
 1. **Prepare your data** as a CSV file with labels (1 column) or labels and labelhashes (2 columns)
 2. **Run the convert command:**
    ```bash
-   pnpm run convert --input-file your_labels.csv --output-file custom.ensrainbow --label-set-id my-dataset --label-set-version 0
+   pnpm run convert --input-file your_labels.csv --output-file custom.ensrainbow --label-set-id my-dataset
    ```
 
-**Method 2: SQL Conversion (Legacy)**
-
-For converting legacy ENS Subgraph data from PostgreSQL dumps:
-
-```bash
-pnpm run convert-sql --input-file ens_names.sql.gz --output-file custom.ensrainbow --label-set-id subgraph --label-set-version 0
-```
-
 **Note:** You can also download existing `.ensrainbow` files using the download scripts.
 
 See the [CLI Reference](/ensrainbow/contributing/cli-reference/) for detailed command usage.

From 7125f230a0bb35c31e6f6d3d0e460c3433fc5f20 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 7 Jan 2026 13:25:11 +0100
Subject: [PATCH 24/30] feat: rename convert command for SQL dumps

---
 apps/ensrainbow/src/cli.ts                                      | 2 +-
 .../src/commands/{convert-command.ts => convert-command-sql.ts} | 0
 .../src/content/docs/ensrainbow/concepts/creating-files.mdx     | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename apps/ensrainbow/src/commands/{convert-command.ts => convert-command-sql.ts} (100%)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index cb4c18b23..35732e4e8 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -12,7 +12,7 @@ import {
   type LabelSetVersion,
 } from "@ensnode/ensnode-sdk";
 
-import { convertCommand } from "@/commands/convert-command";
+import { convertCommand } from "@/commands/convert-command-sql";
 import { convertCsvCommand } from "@/commands/convert-csv-command";
 // import { ingestCommand } from "@/commands/ingest-command";
 import { ingestProtobufCommand } from "@/commands/ingest-protobuf-command";
diff --git a/apps/ensrainbow/src/commands/convert-command.ts b/apps/ensrainbow/src/commands/convert-command-sql.ts
similarity index 100%
rename from apps/ensrainbow/src/commands/convert-command.ts
rename to apps/ensrainbow/src/commands/convert-command-sql.ts
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 4af35a0ee..62b036eef 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -78,7 +78,7 @@ pnpm run convert \
 
 - `--output-file`: Output file path (defaults to `rainbow-records-{label-set-id}_{label-set-version}.ensrainbow`)
 - `--progress-interval`: Progress logging frequency (default: 50000 records)
-- `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file
+- `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file and determine the next label set version
 - `--silent`: Disable progress bar (useful for scripts and automated workflows)
 
 ### CSV Format Support

From f7ca2448c9b6f9e7962f33ee927ce8ebb669ae12 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 7 Jan 2026 13:38:38 +0100
Subject: [PATCH 25/30] refactor: update CSV conversion documentation

---
 apps/ensrainbow/src/commands/convert-csv-command.test.ts      | 2 +-
 apps/ensrainbow/src/commands/convert-csv-command.ts           | 3 +--
 .../src/content/docs/ensrainbow/concepts/creating-files.mdx   | 4 +++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 7646f5a77..12015137f 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -513,7 +513,7 @@ describe("convert-csv-command", () => {
       expect(stats.isFile()).toBe(true);
     });
 
-    it("should skip CSV header row if present", async () => {
+    it("should process all CSV rows including potential headers", async () => {
       const inputFile = join(tempDir, "with_header.csv");
       const outputFile = join(tempDir, "output_header.ensrainbow");
       const csvContent =
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 9d1f75497..05c1c5e57 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -264,7 +264,7 @@ async function initializeConversion(
   labelSetVersion: number,
   outputFile: string,
 ) {
-  logger.info("Starting conversion from CSV to protobuf format...");
+  logger.info("Starting conversion from CSV to .ensrainbow format...");
   logger.info(`Input file: ${options.inputFile}`);
   logger.info(`Output file: ${outputFile}`);
   logger.info(`Label set id: ${options.labelSetId}`);
@@ -428,7 +428,6 @@ async function processCSVFile(
   let lineNumber = 0;
   let processedRecords = 0;
   let lastLoggedLine = 0;
-  const startTime = Date.now();
   let lastLogTime = Date.now();
 
   const fileStream = createReadStream(inputFile, { encoding: "utf8" });
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 62b036eef..8aec77266 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -83,7 +83,7 @@ pnpm run convert \
 
 ### CSV Format Support
 
-The CSV converter supports two formats:
+The CSV converter supports two formats and expects CSV files **without a header row**.
 
 #### Single Column Format (Label Only)
 ```csv
@@ -458,6 +458,8 @@ Create documentation for your custom label set including:
 - Checksum: `https://example.com/my-dataset_0.tgz.sha256sum`
 
 ### Usage
+```
+
 ```bash
 # Using with Docker
 docker run -d \

From 7ab51653c96fa2b8b4437c9ef48614e0e792ab8b Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 7 Jan 2026 14:27:13 +0100
Subject: [PATCH 26/30] test: add error handling test for label set ID mismatch
 in CSV conversion

---
 .../src/commands/convert-csv-command.test.ts  | 46 +++++++++++++++++++
 .../src/commands/convert-csv-command.ts       | 21 +++++----
 2 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 12015137f..706e22166 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -338,6 +338,52 @@ describe("convert-csv-command", () => {
       ).rejects.toThrow(/Database is not open/);
     });
 
+    it("should throw error when label set ID mismatches existing database", async () => {
+      const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
+      const outputFile = join(tempDir, "output_mismatch.ensrainbow");
+      const dataDir = join(tempDir, "db_mismatch");
+
+      // First, create a database with one label set ID
+      const initialOutputFile = join(tempDir, "initial_mismatch.ensrainbow");
+      await convertCsvCommand({
+        inputFile,
+        outputFile: initialOutputFile,
+        labelSetId: "test-label-set-a" as LabelSetId,
+        silent: true,
+      });
+
+      // Ingest the initial file to create the database
+      const cli = createCLI({ exitProcess: false });
+      await cli.parse([
+        "ingest-ensrainbow",
+        "--input-file",
+        initialOutputFile,
+        "--data-dir",
+        dataDir,
+      ]);
+
+      // Verify initial database was created
+      const db = await ENSRainbowDB.open(dataDir);
+      expect(await db.validate()).toBe(true);
+      const labelSet = await db.getLabelSet();
+      expect(labelSet.labelSetId).toBe("test-label-set-a");
+      await db.close();
+
+      // Now try to convert with a different label set ID and the existing database path
+      // This should throw an error about label set ID mismatch
+      await expect(
+        convertCsvCommand({
+          inputFile,
+          outputFile,
+          labelSetId: "test-label-set-b" as LabelSetId,
+          existingDbPath: dataDir,
+          silent: true,
+        }),
+      ).rejects.toThrow(
+        /Label set ID mismatch! Database label set id: test-label-set-a, provided label set id: test-label-set-b/,
+      );
+    });
+
     it("should work through CLI with existing database path", async () => {
       const inputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv");
       const outputFile = join(tempDir, "cli_output_with_db.ensrainbow");
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 05c1c5e57..77b800df8 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -206,8 +206,11 @@ async function checkLabelHashExists(db: ENSRainbowDB, labelHashBytes: Buffer): P
     const record = await db.getVersionedRainbowRecord(labelHashBytes);
     return record !== null;
   } catch (error) {
-    // If there's an error checking, assume it doesn't exist
-    return false;
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    logger.error(
+      `Error while checking if labelhash exists in ENSRainbow database: ${errorMessage}`,
+    );
+    throw error;
   }
 }
 
@@ -564,7 +567,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
 
   let existingDb: ENSRainbowDB | null = null;
   let dedupDb: DeduplicationDB | undefined;
-  let tempDedupDir: string | null = null;
+  let temporaryDedupDir: string | null = null;
 
   try {
     const {
@@ -575,9 +578,9 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     existingDb = db;
 
     // Create temporary deduplication database
-    tempDedupDir = join(process.cwd(), "temp-dedup-" + Date.now());
-    logger.info(`Creating temporary deduplication database at: ${tempDedupDir}`);
-    const tempDb = new ClassicLevel<string, string>(tempDedupDir, {
+    temporaryDedupDir = join(process.cwd(), "temp-dedup-" + Date.now());
+    logger.info(`Creating temporary deduplication database at: ${temporaryDedupDir}`);
+    const tempDb = new ClassicLevel<string, string>(temporaryDedupDir, {
       keyEncoding: "utf8",
       valueEncoding: "utf8",
       createIfMissing: true,
@@ -649,10 +652,10 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     }
 
     // Remove temporary deduplication database directory
-    if (tempDedupDir) {
+    if (temporaryDedupDir) {
       try {
-        rmSync(tempDedupDir, { recursive: true, force: true });
-        logger.info(`Removed temporary deduplication database: ${tempDedupDir}`);
+        rmSync(temporaryDedupDir, { recursive: true, force: true });
+        logger.info(`Removed temporary deduplication database: ${temporaryDedupDir}`);
       } catch (error) {
         logger.warn(`Failed to remove temporary deduplication database: ${error}`);
       }

From 3967d4ce4713818014ce2720fe019ec6aea50985 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Wed, 7 Jan 2026 14:44:51 +0100
Subject: [PATCH 27/30] refactor: rename and enhance label set version
 retrieval function to return database connection

---
 .../src/commands/convert-csv-command.ts       | 42 +++++++------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index 77b800df8..ac981c537 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -215,14 +215,15 @@ async function checkLabelHashExists(db: ENSRainbowDB, labelHashBytes: Buffer): P
 }
 
 /**
- * Determine the label set version based on existing database or default to 0
+ * Get the label set version and open database connection if needed
+ * Returns both the version and the open database connection (if opened) to avoid redundant opens
  */
-async function determineLabelSetVersion(
+async function getLabelSetVersionAndDatabase(
   existingDbPath: string | undefined,
   labelSetId: string,
-): Promise<number> {
+): Promise<{ version: number; existingDb: ENSRainbowDB | null }> {
   if (!existingDbPath) {
-    return 0;
+    return { version: 0, existingDb: null };
   }
 
   try {
@@ -239,11 +240,11 @@ async function determineLabelSetVersion(
     }
 
     const nextVersion = labelSet.highestLabelSetVersion + 1;
-    await existingDb.close();
     logger.info(
       `Determined next label set version: ${nextVersion} (current highest: ${labelSet.highestLabelSetVersion})`,
     );
-    return nextVersion;
+    // Return the open database connection instead of closing it
+    return { version: nextVersion, existingDb };
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     throw new Error(
@@ -266,6 +267,7 @@ async function initializeConversion(
   options: ConvertCsvCommandOptions,
   labelSetVersion: number,
   outputFile: string,
+  existingDb: ENSRainbowDB | null,
 ) {
   logger.info("Starting conversion from CSV to .ensrainbow format...");
   logger.info(`Input file: ${options.inputFile}`);
@@ -286,22 +288,9 @@ async function initializeConversion(
     logger.warn(`Could not determine file size: ${error}`);
   }
 
-  // Open existing database if path is provided (for filtering existing labels)
-  let existingDb: ENSRainbowDB | null = null;
-  if (options.existingDbPath) {
-    try {
-      logger.info(`Opening existing database for filtering: ${options.existingDbPath}`);
-      existingDb = await ENSRainbowDB.open(options.existingDbPath);
-      logger.info("Successfully opened existing database for label filtering");
-    } catch (error) {
-      const errorMessage = error instanceof Error ? error.message : String(error);
-      logger.error(
-        `Failed to open existing database at ${options.existingDbPath}: ${errorMessage}`,
-      );
-      throw new Error(
-        `Cannot proceed without existing database. Failed to open database at ${options.existingDbPath}: ${errorMessage}`,
-      );
-    }
+  // Log if using existing database for filtering
+  if (existingDb) {
+    logger.info("Using existing database connection for label filtering");
   }
 
   const { RainbowRecordType, RainbowRecordCollectionType } = createRainbowProtobufRoot();
@@ -546,8 +535,9 @@ async function processCSVFile(
  * Main CSV conversion command with true streaming using fast-csv
  */
 export async function convertCsvCommand(options: ConvertCsvCommandOptions): Promise<void> {
-  // Determine label set version from existing database or default to 0
-  const labelSetVersion = await determineLabelSetVersion(
+  // Get label set version from existing database or default to 0
+  // This also opens the database if needed, and we'll reuse that connection
+  const { version: labelSetVersion, existingDb: openedDb } = await getLabelSetVersionAndDatabase(
     options.existingDbPath,
     options.labelSetId,
   );
@@ -565,7 +555,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
     startTime: new Date(),
   };
 
-  let existingDb: ENSRainbowDB | null = null;
+  let existingDb: ENSRainbowDB | null = openedDb;
   let dedupDb: DeduplicationDB | undefined;
   let temporaryDedupDir: string | null = null;
 
@@ -574,7 +564,7 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom
       RainbowRecordType,
       outputStream,
       existingDb: db,
-    } = await initializeConversion(options, labelSetVersion, outputFile);
+    } = await initializeConversion(options, labelSetVersion, outputFile, existingDb);
     existingDb = db;
 
     // Create temporary deduplication database

From 0a555540411202e6a056f26aaafccc6ff537f280 Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Thu, 8 Jan 2026 00:42:32 +0100
Subject: [PATCH 28/30] refactor: remove label set version requirement from CLI
 commands and update related tests

---
 apps/ensrainbow/src/cli.test.ts               | 161 ++++++++++++++----
 apps/ensrainbow/src/cli.ts                    |  19 +--
 .../src/commands/convert-csv-command.test.ts  |   2 +-
 .../src/commands/convert-csv-command.ts       |   8 +-
 .../ensrainbow/concepts/creating-files.mdx    |   4 +-
 5 files changed, 137 insertions(+), 57 deletions(-)

diff --git a/apps/ensrainbow/src/cli.test.ts b/apps/ensrainbow/src/cli.test.ts
index 596b35663..dedf1b88a 100644
--- a/apps/ensrainbow/src/cli.test.ts
+++ b/apps/ensrainbow/src/cli.test.ts
@@ -107,7 +107,6 @@ describe("CLI", () => {
         const ensrainbowFile = join(TEST_FIXTURES_DIR, "test_ens_names_0.ensrainbow");
         const ensrainbowOutputFile = join(tempDir, "test_ens_names_0.ensrainbow");
         const labelSetId = "test-ens-names"; // Needed for convert
-        const labelSetVersion = 0; // Needed for convert
 
         expect(() =>
           cli.parse([
@@ -117,7 +116,7 @@ describe("CLI", () => {
             "--output-file",
             ensrainbowOutputFile,
           ]),
-        ).toThrow(/Missing required arguments: label-set-id, label-set-version/);
+        ).toThrow(/Missing required argument: label-set-id/);
 
         // Successful convert with args
         const ingestCli = createCLI({ exitProcess: false });
@@ -129,8 +128,6 @@ describe("CLI", () => {
           ensrainbowOutputFile,
           "--label-set-id",
           labelSetId,
-          "--label-set-version",
-          labelSetVersion.toString(),
         ]);
         //command: pnpm convert-sql --input-file test/fixtures/test_ens_names.sql.gz --output-file test/fixtures/test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0
         //verify that the file is created
@@ -159,7 +156,6 @@ describe("CLI", () => {
         const sqlInputFile = join(TEST_FIXTURES_DIR, "ens_test_env_names.sql.gz");
         const ensrainbowOutputFile = join(tempDir, "ens_test_env_0.ensrainbow");
         const labelSetId = "ens-test-env"; // Needed for convert
-        const labelSetVersion = 0; // Needed for convert
 
         expect(() =>
           cli.parse([
@@ -169,7 +165,7 @@ describe("CLI", () => {
             "--output-file",
             ensrainbowOutputFile,
           ]),
-        ).toThrow(/Missing required arguments: label-set-id, label-set-version/);
+        ).toThrow(/Missing required argument: label-set-id/);
 
         // Successful convert with args
         const ingestCli = createCLI({ exitProcess: false });
@@ -181,8 +177,6 @@ describe("CLI", () => {
           ensrainbowOutputFile,
           "--label-set-id",
           labelSetId,
-          "--label-set-version",
-          labelSetVersion.toString(),
         ]);
         //command: pnpm convert-sql --input-file test_ens_names.sql.gz --output-file test_ens_names_0.ensrainbow --label-set-id test-ens-names --label-set-version 0
         //verify that the file is created
@@ -207,7 +201,6 @@ describe("CLI", () => {
         const sqlInputFile = join(TEST_FIXTURES_DIR, "test_ens_names.sql.gz");
         const ensrainbowOutputFile = join(tempDir, "test_ens_names_1.ensrainbow");
         const labelSetId = "test-ens-names"; // Needed for convert
-        const labelSetVersion = 1; // Needed for convert
 
         expect(() =>
           cli.parse([
@@ -217,20 +210,47 @@ describe("CLI", () => {
             "--output-file",
             ensrainbowOutputFile,
           ]),
-        ).toThrow(/Missing required arguments: label-set-id, label-set-version/);
+        ).toThrow(/Missing required argument: label-set-id/);
 
         const ingestCli2 = createCLI({ exitProcess: false });
-        // Successful convert with args
+        // Successful convert with args (convert-sql always creates version 0)
+        // To test version 1, we need to use convert command with existing database
+        // But for this test, we'll create version 0 and then manually test the ingestion failure
+        const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv");
+        const tempDbDirForV1 = join(tempDir, "temp-db-for-v1");
+        const version0FileForV1 = join(tempDir, "test_ens_names_0_for_v1.ensrainbow");
+
+        // Create version 0 file
         await ingestCli2.parse([
-          "convert-sql",
+          "convert",
           "--input-file",
-          sqlInputFile,
+          csvInputFile,
+          "--output-file",
+          version0FileForV1,
+          "--label-set-id",
+          labelSetId,
+        ]);
+
+        // Ingest version 0 to create database
+        await ingestCli2.parse([
+          "ingest-ensrainbow",
+          "--input-file",
+          version0FileForV1,
+          "--data-dir",
+          tempDbDirForV1,
+        ]);
+
+        // Create version 1 file using existing database
+        await ingestCli2.parse([
+          "convert",
+          "--input-file",
+          csvInputFile,
           "--output-file",
           ensrainbowOutputFile,
           "--label-set-id",
           labelSetId,
-          "--label-set-version",
-          labelSetVersion.toString(),
+          "--existing-db-path",
+          tempDbDirForV1,
         ]);
         //verify it is created
         await expect(stat(ensrainbowOutputFile)).resolves.toBeDefined();
@@ -254,38 +274,99 @@ describe("CLI", () => {
       });
 
       it("should ingest first file successfully but reject second file with label set version not being 1 higher than the current highest label set version", async () => {
-        // First, ingest a valid file with label set version 0
-        const firstInputFile = join(TEST_FIXTURES_DIR, "test_ens_names_0.ensrainbow");
+        // First, we'll create a version 0 file and then a version 2 file
         const secondInputFile = join(tempDir, "test_ens_names_2.ensrainbow");
 
         // Create an ensrainbow file with label set version 2
-        const sqlInputFile = join(TEST_FIXTURES_DIR, "test_ens_names.sql.gz");
+        // To create version 2, we need to create version 0, ingest it, create version 1, ingest it, then create version 2
+        const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv");
         const labelSetId = "test-ens-names";
-        const labelSetVersion = 2; // Higher than 1
 
-        // Successful convert with label set version 2
+        // Create temporary directory for building up versions sequentially
+        const tempDbDir = join(tempDir, "temp-db");
+        const version0File = join(tempDir, "test_ens_names_0_temp.ensrainbow");
+        const version1File = join(tempDir, "test_ens_names_1_temp.ensrainbow");
+
         const convertCli = createCLI({ exitProcess: false });
+
+        // Step 1: Create version 0 file
         await convertCli.parse([
-          "convert-sql",
+          "convert",
           "--input-file",
-          sqlInputFile,
+          csvInputFile,
+          "--output-file",
+          version0File,
+          "--label-set-id",
+          labelSetId,
+        ]);
+
+        // Step 2: Ingest version 0 to create database (database now has version 0)
+        await convertCli.parse([
+          "ingest-ensrainbow",
+          "--input-file",
+          version0File,
+          "--data-dir",
+          tempDbDir,
+        ]);
+
+        // Step 3: Create version 1 file using existing database (will be version 1)
+        await convertCli.parse([
+          "convert",
+          "--input-file",
+          csvInputFile,
+          "--output-file",
+          version1File,
+          "--label-set-id",
+          labelSetId,
+          "--existing-db-path",
+          tempDbDir,
+        ]);
+
+        // Step 4: Ingest version 1 into the same database (database now has versions 0 and 1, highest is 1)
+        await convertCli.parse([
+          "ingest-ensrainbow",
+          "--input-file",
+          version1File,
+          "--data-dir",
+          tempDbDir,
+        ]);
+
+        // Step 5: Create version 2 file using existing database (will be version 2, since highest is 1)
+        await convertCli.parse([
+          "convert",
+          "--input-file",
+          csvInputFile,
           "--output-file",
           secondInputFile,
           "--label-set-id",
           labelSetId,
-          "--label-set-version",
-          labelSetVersion.toString(),
+          "--existing-db-path",
+          tempDbDir,
         ]);
 
         // Verify the file with label set version 2 was created
         await expect(stat(secondInputFile)).resolves.toBeDefined();
 
+        // Create a completely separate version 0 file for the final test
+        // Use a fresh CLI instance and ensure no existing-db-path is used
+        const finalTestCli = createCLI({ exitProcess: false });
+        const finalTestVersion0File = join(tempDir, "final_test_v0.ensrainbow");
+        await finalTestCli.parse([
+          "convert",
+          "--input-file",
+          csvInputFile,
+          "--output-file",
+          finalTestVersion0File,
+          "--label-set-id",
+          labelSetId,
+        ]);
+
         // First ingest succeeds with label set version 0
         const ingestCli = createCLI({ exitProcess: false });
         await ingestCli.parse([
           "ingest-ensrainbow",
           "--input-file",
-          firstInputFile,
+          finalTestVersion0File,
           "--data-dir",
           testDataDir,
         ]);
@@ -311,35 +392,45 @@ describe("CLI", () => {
         const thirdInputFile = join(tempDir, "different_label_set_id_1.ensrainbow");
 
         // Create an ensrainbow file with different label set id
-        const sqlInputFile = join(TEST_FIXTURES_DIR, "test_ens_names.sql.gz");
+        const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv");
         const labelSetId = "different-label-set-id"; // Different from test-ens-names
-        const labelSetVersion = 0;
+
+        // Create temporary directory for version 0 database
+        const tempDbDir0 = join(tempDir, "temp-db-different-v0");
 
         // Create second file with different label set id and label set version 0
         const convertCli = createCLI({ exitProcess: false });
         await convertCli.parse([
-          "convert-sql",
+          "convert",
           "--input-file",
-          sqlInputFile,
+          csvInputFile,
           "--output-file",
           secondInputFile,
           "--label-set-id",
           labelSetId,
-          "--label-set-version",
-          labelSetVersion.toString(),
         ]);
 
         // Create third file with different label set id and label set version 1
+        // First, ingest version 0 to create database
         await convertCli.parse([
-          "convert-sql",
+          "ingest-ensrainbow",
           "--input-file",
-          sqlInputFile,
+          secondInputFile,
+          "--data-dir",
+          tempDbDir0,
+        ]);
+
+        // Then create version 1 using existing database
+        await convertCli.parse([
+          "convert",
+          "--input-file",
+          csvInputFile,
           "--output-file",
           thirdInputFile,
           "--label-set-id",
           labelSetId,
-          "--label-set-version",
-          "1",
+          "--existing-db-path",
+          tempDbDir0,
         ]);
 
         // Verify the file with different label set id was created
diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 35732e4e8..a3fb392bb 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -5,12 +5,7 @@ import type { ArgumentsCamelCase, Argv } from "yargs";
 import { hideBin } from "yargs/helpers";
 import yargs from "yargs/yargs";
 
-import {
-  buildLabelSetId,
-  buildLabelSetVersion,
-  type LabelSetId,
-  type LabelSetVersion,
-} from "@ensnode/ensnode-sdk";
+import { buildLabelSetId, type LabelSetId } from "@ensnode/ensnode-sdk";
 
 import { convertCommand } from "@/commands/convert-command-sql";
 import { convertCsvCommand } from "@/commands/convert-csv-command";
@@ -59,7 +54,6 @@ interface ConvertArgs {
   "input-file": string;
   "output-file"?: string;
   "label-set-id": LabelSetId;
-  "label-set-version": LabelSetVersion;
 }
 
 interface ConvertCsvArgs {
@@ -256,12 +250,6 @@ export function createCLI(options: CLIOptions = {}) {
               demandOption: true,
             })
             .coerce("label-set-id", buildLabelSetId)
-            .option("label-set-version", {
-              type: "number",
-              description: "Label set version for the generated ensrainbow file",
-              demandOption: true,
-            })
-            .coerce("label-set-version", buildLabelSetVersion)
             .option("output-file", {
               type: "string",
               description: "Path to where the resulting ensrainbow file will be output",
@@ -269,13 +257,12 @@ export function createCLI(options: CLIOptions = {}) {
         },
         async (argv: ArgumentsCamelCase<ConvertArgs>) => {
           const outputFile =
-            argv["output-file"] ??
-            join(process.cwd(), `${argv["label-set-id"]}_${argv["label-set-version"]}.ensrainbow`);
+            argv["output-file"] ?? join(process.cwd(), `${argv["label-set-id"]}_.ensrainbow`);
           await convertCommand({
             inputFile: argv["input-file"],
             outputFile,
             labelSetId: argv["label-set-id"],
-            labelSetVersion: argv["label-set-version"],
+            labelSetVersion: 0,
           });
         },
       )
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
index 706e22166..42b1cdd47 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts
@@ -107,7 +107,7 @@ describe("convert-csv-command", () => {
           outputFile,
           labelSetId: "test-csv-invalid" as LabelSetId,
         }),
-      ).rejects.toThrow(/Failed on line 1: Invalid labelHash/);
+      ).rejects.toThrow(/Failed on line 1: Expected 1 or 2 col/);
     });
 
     it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => {
diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts
index ac981c537..edeabfbba 100644
--- a/apps/ensrainbow/src/commands/convert-csv-command.ts
+++ b/apps/ensrainbow/src/commands/convert-csv-command.ts
@@ -316,7 +316,7 @@ function createRainbowRecord(row: string[]): RainbowRecord {
       labelHash: labelHashBytes,
       label: label,
     };
-  } else {
+  } else if (row.length === 2) {
     // Two columns: validate labelhash format and use provided hash
     // Trim whitespace from hash (metadata), but preserve label as-is
     const providedHash = String(row[1]).trim();
@@ -327,13 +327,15 @@ function createRainbowRecord(row: string[]): RainbowRecord {
     try {
       const labelHash = labelHashToBytes(maybeLabelHash as LabelHash); // performs labelhash format validation
       return {
-        labelHash: labelHash,
-        label: label,
+        labelHash,
+        label,
       };
     } catch (error) {
       const errorMessage = error instanceof Error ? error.message : String(error);
       throw new Error(`Invalid labelHash: ${errorMessage}`);
     }
+  } else {
+    throw new Error(`Expected 1 or 2 columns, but found ${row.length} columns`);
   }
 }
 
diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
index 8aec77266..335a112b5 100644
--- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
+++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx
@@ -76,7 +76,7 @@ pnpm run convert \
 
 ### Optional Parameters
 
-- `--output-file`: Output file path (defaults to `rainbow-records-{label-set-id}_{label-set-version}.ensrainbow`)
+- `--output-file`: Output file path (defaults to `{label-set-id}_{label-set-version}.ensrainbow`)
 - `--progress-interval`: Progress logging frequency (default: 50000 records)
 - `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file and determine the next label set version
 - `--silent`: Disable progress bar (useful for scripts and automated workflows)
@@ -196,7 +196,7 @@ pnpm run convert-sql \
 
 ### Optional Parameters
 
-- `--output-file`: Output file path (defaults to `rainbow-records-{label-set-id}_{label-set-version}.ensrainbow`)
+- `--output-file`: Output file path (defaults to `{label-set-id}_{label-set-version}.ensrainbow`)
 
 ### Example: Converting Legacy ENS Subgraph Data
 

From 653d200418f5b21cee4ae063874601d262152cca Mon Sep 17 00:00:00 2001
From: "kwrobel.eth" <djstrong@gmail.com>
Date: Thu, 8 Jan 2026 00:55:01 +0100
Subject: [PATCH 29/30] Update apps/ensrainbow/src/cli.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 apps/ensrainbow/src/cli.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index a3fb392bb..32ab22b7b 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -257,7 +257,8 @@ export function createCLI(options: CLIOptions = {}) {
         },
         async (argv: ArgumentsCamelCase<ConvertArgs>) => {
           const outputFile =
-            argv["output-file"] ?? join(process.cwd(), `${argv["label-set-id"]}_.ensrainbow`);
+            argv["output-file"] ??
+            join(process.cwd(), `${argv["label-set-id"]}_0.ensrainbow`);
           await convertCommand({
             inputFile: argv["input-file"],
             outputFile,

From af0c7f0842f3a0e9070dbd68ddb1ebed431f624b Mon Sep 17 00:00:00 2001
From: djstrong <djstrong@gmail.com>
Date: Thu, 8 Jan 2026 00:56:44 +0100
Subject: [PATCH 30/30] lint

---
 apps/ensrainbow/src/cli.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts
index 32ab22b7b..445f41cbe 100644
--- a/apps/ensrainbow/src/cli.ts
+++ b/apps/ensrainbow/src/cli.ts
@@ -257,8 +257,7 @@ export function createCLI(options: CLIOptions = {}) {
         },
         async (argv: ArgumentsCamelCase<ConvertArgs>) => {
           const outputFile =
-            argv["output-file"] ??
-            join(process.cwd(), `${argv["label-set-id"]}_0.ensrainbow`);
+            argv["output-file"] ?? join(process.cwd(), `${argv["label-set-id"]}_0.ensrainbow`);
           await convertCommand({
             inputFile: argv["input-file"],
             outputFile,