From 7e6cec7d9714f0b117964b21a208168f299990b4 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Tue, 11 Nov 2025 00:47:40 +0100 Subject: [PATCH 01/17] overhaul the javascript dict implementation --- src/dict.mjs | 1153 ++++++++++++++---------------------------- src/gleam/dict.gleam | 228 +++++---- src/gleam/list.gleam | 20 +- src/gleam_stdlib.mjs | 48 +- 4 files changed, 533 insertions(+), 916 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index f39cd546..eaafa156 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -3,12 +3,13 @@ * These types can be checked using the typescript compiler with "checkjs" option. */ -import { isEqual } from "./gleam.mjs"; +import { isEqual, Result$Error, Result$Ok } from "./gleam.mjs"; +import { Option$Some, Option$None } from "./gleam/option.mjs"; + +// -- HASH -------------------------------------------------------------------- const referenceMap = /* @__PURE__ */ new WeakMap(); -const tempDataView = /* @__PURE__ */ new DataView( - /* @__PURE__ */ new ArrayBuffer(8), -); +const tempDataView = /* @__PURE__ */ new DataView(/* @__PURE__ */ new ArrayBuffer(8)); let referenceUID = 0; /** * hash the object by reference using a weak map and incrementing uid @@ -149,845 +150,459 @@ export function getHash(u) { } } -/** - * @template K,V - * @typedef {ArrayNode | IndexNode | CollisionNode} Node - */ -/** - * @template K,V - * @typedef {{ type: typeof ENTRY, k: K, v: V }} Entry - */ -/** - * @template K,V - * @typedef {{ type: typeof ARRAY_NODE, size: number, array: (undefined | Entry | Node)[] }} ArrayNode - */ -/** - * @template K,V - * @typedef {{ type: typeof INDEX_NODE, bitmap: number, array: (Entry | Node)[] }} IndexNode - */ -/** - * @template K,V - * @typedef {{ type: typeof COLLISION_NODE, hash: number, array: Entry[] }} CollisionNode - */ -/** - * @typedef {{ val: boolean }} Flag - */ -const SHIFT = 5; // number of bits you need to shift by to get the next bucket -const BUCKET_SIZE = Math.pow(2, SHIFT); -const MASK = BUCKET_SIZE - 1; // used to zero out all bits not in the bucket -const MAX_INDEX_NODE = BUCKET_SIZE / 2; // when does index node grow into array node -const MIN_ARRAY_NODE = BUCKET_SIZE / 4; // when does array node shrink to index node -const ENTRY = 0; -const ARRAY_NODE = 1; -const INDEX_NODE = 2; -const COLLISION_NODE = 3; - -/** @type {IndexNode} */ -const EMPTY = { - type: INDEX_NODE, - bitmap: 0, - array: [], -}; -/** - * Mask the hash to get only the bucket corresponding to shift - * @param {number} hash - * @param {number} shift - * @returns {number} - */ -function mask(hash, shift) { - return (hash >>> shift) & MASK; -} +// -- DICT -------------------------------------------------------------------- /** - * Set only the Nth bit where N is the masked hash - * @param {number} hash - * @param {number} shift - * @returns {number} + * An implementation of the CHAMP data structure, an optimised HAMT. + * + * See: M. J. Steindorfer, J.J. Vinju (2015). Optimizing Hash-Array Mapped Tries for + Fast and Lean Immutable JVM Collections. Available: https://michael.steindorfer.name/publications/oopsla15.pdf */ -function bitpos(hash, shift) { - return 1 << mask(hash, shift); -} - -/** - * Count the number of 1 bits in a number - * @param {number} x - * @returns {number} - */ -function bitcount(x) { - x -= (x >> 1) & 0x55555555; - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x + (x >> 4)) & 0x0f0f0f0f; - x += x >> 8; - x += x >> 16; - return x & 0x7f; +export default class Dict { + constructor(size, root) { + this.size = size; + this.root = root; + } } -/** - * Calculate the array index of an item in a bitmap index node - * @param {number} bitmap - * @param {number} bit - * @returns {number} - */ -function index(bitmap, bit) { - return bitcount(bitmap & (bit - 1)); -} +class Node { + #generation; -/** - * Efficiently copy an array and set one value at an index - * @template T - * @param {T[]} arr - * @param {number} at - * @param {T} val - * @returns {T[]} - */ -function cloneAndSet(arr, at, val) { - const len = arr.length; - const out = new Array(len); - for (let i = 0; i < len; ++i) { - out[i] = arr[i]; + constructor(generation, datamap, nodemap, data) { + // The generation value for transient copy tracking. + this.#generation = generation; + // A node is a high-arity (32 in practice) hybrid tree node. + // Hybrid means that it stores data directly as well as pointers to child nodes. + // + // Each node contains 2 bitmaps: + // - The datamap has a bit set if that slot in the node contains direct data + // - The nodemap has a bit set if that slot in the node ctonains another node. + // + // Both are exclusive to on another, so datamap & nodemap == 0. + // + // Every key/hash value directly correlates to a specific bit by using a trie + // suffix (least significant bits first) encoding. + // For example, if the last 5 bits of the hash are 1101, the bit to check for + // that value is the 13th bit. + this.datamap = datamap; + this.nodemap = nodemap; + // The slots itself are stored in a single contiguous array that contains + // both direct k/v-pairs and child nodes. + // + // The direct children come first, followed by the child nodes in _reverse order_: + // + // 7654321 + // datamap: 1000100 + // nodemap: 10011 + // data: [key3, value3, key7, value7, child5, child2, child1] + // -------------------------> <--------------------- + // datamap nodemap + // + // Every `1` bit in the datamap corresponds to a pair of [key, value] entries, + // and every `1` bit in the nodemap corresponds to a child node entry. + // + // Children are stored in reverse order to avoid having to store or calculate an + // "offset" value to skip over the direct children. + this.data = data; } - out[at] = val; - return out; -} -/** - * Efficiently copy an array and insert one value at an index - * @template T - * @param {T[]} arr - * @param {number} at - * @param {T} val - * @returns {T[]} - */ -function spliceIn(arr, at, val) { - const len = arr.length; - const out = new Array(len + 1); - let i = 0; - let g = 0; - while (i < at) { - out[g++] = arr[i++]; + get generation() { + // hide generation so it's not enumerable - this makes dicts + // supported by the default equality and hash codes without custom implementations. + return this.#generation; } - out[g++] = val; - while (i < len) { - out[g++] = arr[i++]; + + set generation(value) { + this.#generation = value; } - return out; } +/// The power-of-2 branch factor for the dict. For example, a vlaue of `5` indicates a 32-ary tree. +const bits = 5; +const mask = (1 << bits) - 1; + +/// This symbol is used internally to avoid constructing results. +const noElementMarker = Symbol(); + +// Some commonly used constants throughout the code. +const emptyNode = /* @__PURE__ */ new Node(0, 0, 0, []); +const emptyDict = /* @__PURE__ */ new Dict(0, emptyNode); +const errorNil = /* @__PURE__ */ Result$Error(undefined); + /** - * Efficiently copy an array and remove one value at an index - * @template T - * @param {T[]} arr - * @param {number} at - * @returns {T[]} + * Copies a node and its data array if it's from another generation, making it safe + * to mutate the node. */ -function spliceOut(arr, at) { - const len = arr.length; - const out = new Array(len - 1); - let i = 0; - let g = 0; - while (i < at) { - out[g++] = arr[i++]; - } - ++i; - while (i < len) { - out[g++] = arr[i++]; +function copyNode(node, generation) { + if (node.generation === generation) { + return node; } - return out; + + const { datamap, nodemap, data } = node; + return new Node(generation, datamap, nodemap, data.slice(0)); } -/** - * Create a new node containing two entries - * @template K,V - * @param {number} shift - * @param {K} key1 - * @param {V} val1 - * @param {number} key2hash - * @param {K} key2 - * @param {V} val2 - * @returns {Node} - */ -function createNode(shift, key1, val1, key2hash, key2, val2) { - const key1hash = getHash(key1); - if (key1hash === key2hash) { - return { - type: COLLISION_NODE, - hash: key1hash, - array: [ - { type: ENTRY, k: key1, v: val1 }, - { type: ENTRY, k: key2, v: val2 }, - ], - }; - } - const addedLeaf = { val: false }; - return assoc( - assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), - shift, - key2hash, - key2, - val2, - addedLeaf, - ); +export function make() { + return emptyDict; } -/** - * @template T,K,V - * @callback AssocFunction - * @param {T} root - * @param {number} shift - * @param {number} hash - * @param {K} key - * @param {V} val - * @param {Flag} addedLeaf - * @returns {Node} - */ -/** - * Associate a node with a new entry, creating a new node - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assoc(root, shift, hash, key, val, addedLeaf) { - switch (root.type) { - case ARRAY_NODE: - return assocArray(root, shift, hash, key, val, addedLeaf); - case INDEX_NODE: - return assocIndex(root, shift, hash, key, val, addedLeaf); - case COLLISION_NODE: - return assocCollision(root, shift, hash, key, val, addedLeaf); +export function from(iterable) { + let transient = toTransient(emptyDict); + for (const [key, value] of iterable) { + transient = put(key, value, transient); } + return fromTransient(transient); } -/** - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assocArray(root, shift, hash, key, val, addedLeaf) { - const idx = mask(hash, shift); - const node = root.array[idx]; - // if the corresponding index is empty set the index to a newly created node - if (node === undefined) { - addedLeaf.val = true; - return { - type: ARRAY_NODE, - size: root.size + 1, - array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), - }; - } - if (node.type === ENTRY) { - // if keys are equal replace the entry - if (isEqual(key, node.k)) { - if (val === node.v) { - return root; - } - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, { - type: ENTRY, - k: key, - v: val, - }), - }; - } - // otherwise upgrade the entry to a node and insert - addedLeaf.val = true; - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet( - root.array, - idx, - createNode(shift + SHIFT, node.k, node.v, hash, key, val), - ), - }; - } - // otherwise call assoc on the child node - const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); - // if the child node hasn't changed just return the old root - if (n === node) { - return root; - } - // otherwise set the index to the new node - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, n), - }; + +export function size(dict) { + return dict.size; } -/** - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assocIndex(root, shift, hash, key, val, addedLeaf) { - const bit = bitpos(hash, shift); - const idx = index(root.bitmap, bit); - // if there is already a item at this hash index.. - if ((root.bitmap & bit) !== 0) { - // if there is a node at the index (not an entry), call assoc on the child node - const node = root.array[idx]; - if (node.type !== ENTRY) { - const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); - if (n === node) { - return root; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, n), - }; - } - // otherwise there is an entry at the index - // if the keys are equal replace the entry with the updated value - const nodeKey = node.k; - if (isEqual(key, nodeKey)) { - if (val === node.v) { - return root; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, { - type: ENTRY, - k: key, - v: val, - }), - }; - } - // if the keys are not equal, replace the entry with a new child node - addedLeaf.val = true; - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet( - root.array, - idx, - createNode(shift + SHIFT, nodeKey, node.v, hash, key, val), - ), - }; - } else { - // else there is currently no item at the hash index - const n = root.array.length; - // if the number of nodes is at the maximum, expand this node into an array node - if (n >= MAX_INDEX_NODE) { - // create a 32 length array for the new array node (one for each bit in the hash) - const nodes = new Array(32); - // create and insert a node for the new entry - const jdx = mask(hash, shift); - nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf); - let j = 0; - let bitmap = root.bitmap; - // place each item in the index node into the correct spot in the array node - // loop through all 32 bits / array positions - for (let i = 0; i < 32; i++) { - if ((bitmap & 1) !== 0) { - const node = root.array[j++]; - nodes[i] = node; - } - // shift the bitmap to process the next bit - bitmap = bitmap >>> 1; - } - return { - type: ARRAY_NODE, - size: n + 1, - array: nodes, - }; + +export function get(dict, key) { + return lookup(dict.root, key); +} + +function lookup(node, key) { + const hash = getHash(key); + + for (let shift = 0; shift < 32; shift += bits) { + const { data, datamap, nodemap } = node; + const bit = hashbit(hash, shift); + + if (datamap & bit) { + // we store this hash directly! + // + // this also means that there are no other values with the same + // hash prefix in this dict. + // + // We still need to check if the key matches, but if it does we know for + // sure that this is the correct value, and if it doesn't that we don't + // contain the value in question. + const dataidx = 2 * index(datamap, bit); + return isEqual(key, data[dataidx]) ? Result$Ok(data[dataidx + 1]) : errorNil; + } else if (nodemap & bit) { + // we found our hash inside the nodemap, so we can continue our search there. + node = data[data.length - 1 - index(nodemap, bit)]; } else { - // else there is still space in this index node - // simply insert a new entry at the hash index - const newArray = spliceIn(root.array, idx, { - type: ENTRY, - k: key, - v: val, - }); - addedLeaf.val = true; - return { - type: INDEX_NODE, - bitmap: root.bitmap | bit, - array: newArray, - }; + // if the hash bit is not set in neither bitmaps, we immediately know that + // this key cannot be inside this dict. + return errorNil; } } -} -/** - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assocCollision(root, shift, hash, key, val, addedLeaf) { - // if there is a hash collision - if (hash === root.hash) { - const idx = collisionIndexOf(root, key); - // if this key already exists replace the entry with the new value - if (idx !== -1) { - const entry = root.array[idx]; - if (entry.v === val) { - return root; - } - return { - type: COLLISION_NODE, - hash: hash, - array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), - }; + + // our shift has exceeded 32 bits. Everything that follows is + // implicitely an overflow node and only contains direct children. + const overflow = node.data; + for (let i = 0; i < overflow.length; i += 2) { + if (isEqual(key, overflow[i])) { + return Result$Ok(overflow[i + 1]); } - // otherwise insert the entry at the end of the array - const size = root.array.length; - addedLeaf.val = true; - return { - type: COLLISION_NODE, - hash: hash, - array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }), - }; } - // if there is no hash collision, upgrade to an index node - return assoc( - { - type: INDEX_NODE, - bitmap: bitpos(root.hash, shift), - array: [root], - }, - shift, - hash, - key, - val, - addedLeaf, - ); + + return errorNil; } + /** - * Find the index of a key in the collision node's array - * @template K,V - * @param {CollisionNode} root - * @param {K} key - * @returns {number} - */ -function collisionIndexOf(root, key) { - const size = root.array.length; - for (let i = 0; i < size; i++) { - if (isEqual(key, root.array[i].k)) { - return i; - } - } - return -1; + * We use "transient" values to allow for safer internal mutations of the data + * structure. This is an optimisation only. No mutable API is exposed to the user. + * + * Transients are to be treated as having a linear (single-use, think rust) type. + * A transient value becomes invalid as soon as it's passed to one of the functions. + * + * Internally, we track a "generation" value on each node. If the generation + * doesn't match the one for the current transient, we have to copy - the node + * could still be referenced by another dict instance! + * After that, no other references than the transient one exists, so it's safe + * to mutate in place. + */ +export function toTransient(dict) { + return { + generation: nextGeneration(dict), + root: dict.root, + size: dict.size, + dict: dict, + }; } + /** - * @template T,K,V - * @callback FindFunction - * @param {T} root - * @param {number} shift - * @param {number} hash - * @param {K} key - * @returns {undefined | Entry} - */ -/** - * Return the found entry or undefined if not present in the root - * @template K,V - * @type {FindFunction,K,V>} + * Consume a transient, producing a normal Dict again. */ -function find(root, shift, hash, key) { - switch (root.type) { - case ARRAY_NODE: - return findArray(root, shift, hash, key); - case INDEX_NODE: - return findIndex(root, shift, hash, key); - case COLLISION_NODE: - return findCollision(root, key); +export function fromTransient(transient) { + if (transient.root === transient.dict.root) { + return transient.dict; } + + return new Dict(transient.size, transient.root); } + /** + * Find and allocate the next generation id. + * * @template K,V - * @type {FindFunction,K,V>} + * @param {Dict} dict + * @returns {number} */ -function findArray(root, shift, hash, key) { - const idx = mask(hash, shift); - const node = root.array[idx]; - if (node === undefined) { - return undefined; +function nextGeneration(dict) { + const root = dict.root; + if (root.generation < Number.MAX_SAFE_INTEGER) { + return root.generation + 1; } - if (node.type !== ENTRY) { - return find(node, shift + SHIFT, hash, key); - } - if (isEqual(key, node.k)) { - return node; + + // we have reached MAX_SAFE_INTEGER generations - + // at this point, we have to walk the dictionary once to reset the counter + // on every node. This is safe since it's part of the contract for transient + // that only one of them exists at any given time. + // + const queue = [root]; + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const node = queue.pop(); + + // reset the generation to 0 + node.generation = 0; + + // queue all other referenced nodes + const nodeStart = 2 * popcount(node.datamap); + for (let i = nodeStart; i < node.data.length; ++i) { + queue.push(node.data[i]); + } } - return undefined; + + return 1; } -/** - * @template K,V - * @type {FindFunction,K,V>} + +/* + * Like `get`, but for transient values. Note that this function is not pure! */ -function findIndex(root, shift, hash, key) { - const bit = bitpos(hash, shift); - if ((root.bitmap & bit) === 0) { - return undefined; - } - const idx = index(root.bitmap, bit); - const node = root.array[idx]; - if (node.type !== ENTRY) { - return find(node, shift + SHIFT, hash, key); - } - if (isEqual(key, node.k)) { - return node; - } - return undefined; +export function query(transient, key) { + return lookup(transient.root, key); } + /** - * @template K,V - * @param {CollisionNode} root - * @param {K} key - * @returns {undefined | Entry} + * Consume a transient, writing a new key/value pair into the dictionary it + * represents. If the key already exists, it will be overwritten. + * + * Returns a new transient. */ -function findCollision(root, key) { - const idx = collisionIndexOf(root, key); - if (idx < 0) { - return undefined; - } - return root.array[idx]; +export function put(key, value, transient) { + transient.root = doUpsert(transient, transient.root, key, always(value), getHash(key), 0); + return transient; } + /** - * @template T,K,V - * @callback WithoutFunction - * @param {T} root - * @param {number} shift - * @param {number} hash - * @param {K} key - * @returns {undefined | Node} + * Consume a transient, removing a key if it exists. + * Returns a new transient. */ -/** - * Remove an entry from the root, returning the updated root. - * Returns undefined if the node should be removed from the parent. - * @template K,V - * @type {WithoutFunction,K,V>} - * */ -function without(root, shift, hash, key) { - switch (root.type) { - case ARRAY_NODE: - return withoutArray(root, shift, hash, key); - case INDEX_NODE: - return withoutIndex(root, shift, hash, key); - case COLLISION_NODE: - return withoutCollision(root, key); - } +export function remove(key, transient) { + return put(key, noElementMarker, transient); } -/** - * @template K,V - * @type {WithoutFunction,K,V>} - */ -function withoutArray(root, shift, hash, key) { - const idx = mask(hash, shift); - const node = root.array[idx]; - if (node === undefined) { - return root; // already empty - } - let n = undefined; - // if node is an entry and the keys are not equal there is nothing to remove - // if node is not an entry do a recursive call - if (node.type === ENTRY) { - if (!isEqual(node.k, key)) { - return root; // no changes - } - } else { - n = without(node, shift + SHIFT, hash, key); - if (n === node) { - return root; // no changes - } - } - // if the recursive call returned undefined the node should be removed - if (n === undefined) { - // if the number of child nodes is at the minimum, pack into an index node - if (root.size <= MIN_ARRAY_NODE) { - const arr = root.array; - const out = new Array(root.size - 1); - let i = 0; - let j = 0; - let bitmap = 0; - while (i < idx) { - const nv = arr[i]; - if (nv !== undefined) { - out[j] = nv; - bitmap |= 1 << i; - ++j; - } - ++i; - } - ++i; // skip copying the removed node - while (i < arr.length) { - const nv = arr[i]; - if (nv !== undefined) { - out[j] = nv; - bitmap |= 1 << i; - ++j; - } - ++i; - } - return { - type: INDEX_NODE, - bitmap: bitmap, - array: out, - }; - } - return { - type: ARRAY_NODE, - size: root.size - 1, - array: cloneAndSet(root.array, idx, n), - }; - } - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, n), - }; + +export function upsert(dict, key, fun) { + // we can use our noElementMarker value to skip traversing the dictionary twice. + const transient = toTransient(dict); + const wrapped = (value) => fun(value === noElementMarker ? Option$None() : Option$Some(value)); + transient.root = doUpsert(transient, transient.root, key, wrapped, getHash(key), 0); + return fromTransient(transient); } -/** - * @template K,V - * @type {WithoutFunction,K,V>} - */ -function withoutIndex(root, shift, hash, key) { - const bit = bitpos(hash, shift); - if ((root.bitmap & bit) === 0) { - return root; // already empty - } - const idx = index(root.bitmap, bit); - const node = root.array[idx]; - // if the item is not an entry - if (node.type !== ENTRY) { - const n = without(node, shift + SHIFT, hash, key); - if (n === node) { - return root; // no changes - } - // if not undefined, the child node still has items, so update it - if (n !== undefined) { - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, n), - }; - } - // otherwise the child node should be removed - // if it was the only child node, remove this node from the parent - if (root.bitmap === bit) { - return undefined; + +export function map(dict, fun) { + // map can never modify the structure, so we can walk the dictionary directly, + // but still move to a new generation to make sure we get a new copy of every node. + const generation = nextGeneration(dict); + const root = copyNode(dict.root, generation); + const queue = [root]; + + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const { data, datamap } = queue.pop(); + // every node contains popcount(datamap) direct entries + const edgesStart = 2 * popcount(datamap); + for (let i = 0; i < edgesStart; i += 2) { + // we copied the node while queueing it, so direct mutation here is safe. + data[i + 1] = fun(data[i], data[i + 1]); } - // otherwise just remove the child node - return { - type: INDEX_NODE, - bitmap: root.bitmap ^ bit, - array: spliceOut(root.array, idx), - }; - } - // otherwise the item is an entry, remove it if the key matches - if (isEqual(key, node.k)) { - if (root.bitmap === bit) { - return undefined; + // the remaining entries are other nodes we can queue + for (let i = edgesStart; i < data.length; ++i) { + // copy the node first to make it safe to mutate + data[i] = copyNode(data[i], generation); + queue.push(data[i]); } - return { - type: INDEX_NODE, - bitmap: root.bitmap ^ bit, - array: spliceOut(root.array, idx), - }; - } - return root; -} -/** - * @template K,V - * @param {CollisionNode} root - * @param {K} key - * @returns {undefined | Node} - */ -function withoutCollision(root, key) { - const idx = collisionIndexOf(root, key); - // if the key not found, no changes - if (idx < 0) { - return root; } - // otherwise the entry was found, remove it - // if it was the only entry in this node, remove the whole node - if (root.array.length === 1) { - return undefined; - } - // otherwise just remove the entry - return { - type: COLLISION_NODE, - hash: root.hash, - array: spliceOut(root.array, idx), - }; + + return new Dict(dict.size, root); } -/** - * @template K,V - * @param {undefined | Node} root - * @param {(value:V,key:K)=>void} fn - * @returns {void} - */ -function forEach(root, fn) { - if (root === undefined) { - return; - } - const items = root.array; - const size = items.length; - for (let i = 0; i < size; i++) { - const item = items[i]; - if (item === undefined) { - continue; + +export function fold(dict, state, fun) { + const queue = [dict.root]; + + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const { data, datamap } = queue.pop(); + // every node contains popcount(datamap) direct entries + const edgesStart = 2 * popcount(datamap); + for (let i = 0; i < edgesStart; i += 2) { + state = fun(state, data[i], data[i + 1]); } - if (item.type === ENTRY) { - fn(item.v, item.k); - continue; + // the remaining entries are child nodes we can queue. + for (let i = edgesStart; i < data.length; ++i) { + queue.push(data[i]); } - forEach(item, fn); } + + return state; } /** - * Extra wrapper to keep track of Dict size and clean up the API - * @template K,V + * Main helper function for insert/upsert/remove. */ -export default class Dict { - /** - * @template V - * @param {Record} o - * @returns {Dict} - */ - static fromObject(o) { - const keys = Object.keys(o); - /** @type Dict */ - let m = Dict.new(); - for (let i = 0; i < keys.length; i++) { - const k = keys[i]; - m = m.set(k, o[k]); +function doUpsert(transient, node, key, fun, hash, shift) { + const { data, datamap, nodemap } = node; + + // 1. Overflow Node + // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match. + if (shift > 32) { + for (let i = 0; i < data.length; i += 2) { + if (isEqual(key, data[i])) { + return doUpdate(transient, node, fun, 0, i); + } } - return m; - } - /** - * @template K,V - * @param {Map} o - * @returns {Dict} - */ - static fromMap(o) { - /** @type Dict */ - let m = Dict.new(); - o.forEach((v, k) => { - m = m.set(k, v); - }); - return m; + return doInsert(transient, node, key, fun, 0, data.length); } - static new() { - return new Dict(undefined, 0); - } + const bit = hashbit(hash, shift); + const nodeidx = data.length - 1 - index(nodemap, bit); + const dataidx = 2 * index(datamap, bit); - /** - * @param {undefined | Node} root - * @param {number} size - */ - constructor(root, size) { - this.root = root; - this.size = size; - } - /** - * @template NotFound - * @param {K} key - * @param {NotFound} notFound - * @returns {NotFound | V} - */ - get(key, notFound) { - if (this.root === undefined) { - return notFound; - } - const found = find(this.root, 0, getHash(key), key); - if (found === undefined) { - return notFound; + // 2. Child Node + // We have to check first if there is already a child node we have to traverse to. + if ((nodemap & bit) !== 0) { + const oldChild = data[nodeidx]; + const newChild = doUpsert(transient, oldChild, key, fun, hash, shift + bits); + if (newChild === oldChild) { + return node; } - return found.v; - } - /** - * @param {K} key - * @param {V} val - * @returns {Dict} - */ - set(key, val) { - const addedLeaf = { val: false }; - const root = this.root === undefined ? EMPTY : this.root; - const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); - if (newRoot === this.root) { - return this; - } - return new Dict(newRoot, addedLeaf.val ? this.size + 1 : this.size); - } - /** - * @param {K} key - * @returns {Dict} - */ - delete(key) { - if (this.root === undefined) { - return this; - } - const newRoot = without(this.root, 0, getHash(key), key); - if (newRoot === this.root) { - return this; - } - if (newRoot === undefined) { - return Dict.new(); + + // the node did change, so let's copy to incorporate that change. + node = copyNode(node, transient.generation); + if (newChild.nodemap !== 0 || newChild.data.length > 2) { + node.data[nodeidx] = newChild; + } else { + // this node only has a single data (k/v-pair) child. + // to restore the CHAMP invariant, we "pull" that pair up into ourselves. + // this ensures that every tree stays in its single optimal representation, + // and allows dicts to be structurally compared. + node.datamap |= bit; + node.nodemap &= ~bit; + // NOTE: the order here is important to avoid mutation bugs! + // Remove the old child node, and insert the data pair into ourselves. + node.data.splice(nodeidx, 1); + node.data.splice(dataidx, 0, newChild.data[0], newChild.data[1]); } - return new Dict(newRoot, this.size - 1); + + return node; } - /** - * @param {K} key - * @returns {boolean} - */ - has(key) { - if (this.root === undefined) { - return false; - } - return find(this.root, 0, getHash(key), key) !== undefined; + + // 3. New Data Node + // No child node and no data node exists yet, so we can potentially just insert a new value. + if ((datamap & bit) === 0) { + return doInsert(transient, node, key, fun, bit, dataidx); } - /** - * @returns {[K,V][]} - */ - entries() { - if (this.root === undefined) { - return []; - } - /** @type [K,V][] */ - const result = []; - this.forEach((v, k) => result.push([k, v])); - return result; + + // 4. Existing Data Node + // We have a match that we can update, or remove. + if (isEqual(key, data[dataidx])) { + return doUpdate(transient, node, fun, bit, dataidx); } - /** - * - * @param {(val:V,key:K)=>void} fn - */ - forEach(fn) { - forEach(this.root, fn); + + // 5. Collision + // There is no child node, but a data node with the same hash, but with a different key. + // To resolve this, we push both nodes down one level. + let child = new Node(transient.generation, 0, 0, []); + child = doUpsert(transient, child, key, fun, hash, shift + bits); + if (!child.data.length) { + return node; } - hashCode() { - let h = 0; - this.forEach((v, k) => { - h = (h + hashMerge(getHash(v), getHash(k))) | 0; - }); - return h; + + const otherKey = data[dataidx]; + child = doUpsert(transient, child, otherKey, always(data[dataidx + 1]), getHash(otherKey), shift + bits); + // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. + transient.size -= 1; + + node = copyNode(node, transient.generation); + node.datamap &= ~bit; + node.nodemap |= bit; + + // remove the old data pair, and insert the new child node. + // because we remove 2 elements first, our indices are off-by-one! + // When calculating the nodeidx, we measure with the length including those + // 2 extra elements, but missing the one we haven't inserted yet, so we have + // to correct for both of these with (1-2) = -1 + node.data.splice(dataidx, 2); + node.data.splice(nodeidx - 1, 0, child); + + return node; +} + +function doUpdate(transient, node, fun, bit, index) { + node = copyNode(node, transient.generation); + + const value = fun(node.data[index + 1]); + + if (value === noElementMarker) { + node.data.splice(index, 2); + node.datamap &= ~bit; + transient.size -= 1; + } else { + node.data[index + 1] = value; } - /** - * @param {unknown} o - * @returns {boolean} - */ - equals(o) { - if (!(o instanceof Dict) || this.size !== o.size) { - return false; - } - try { - this.forEach((v, k) => { - if (!isEqual(o.get(k, !v), v)) { - throw unequalDictSymbol; - } - }); - return true; - } catch (e) { - if (e === unequalDictSymbol) { - return false; - } + return node; +} - throw e; - } +function doInsert(transient, node, key, fun, bit, index) { + const value = fun(noElementMarker); + if (value === noElementMarker) { + return node; } + + node = copyNode(node, transient.generation); + + node.datamap |= bit; + node.data.splice(index, 0, key, value); + transient.size += 1; + + return node; } -// This is thrown internally in Dict.equals() so that it returns false as soon -// as a non-matching key is found -const unequalDictSymbol = /* @__PURE__ */ Symbol(); +function always(value) { + return (_) => value; +} + +/** + * How many `1` bits are set in a 32-bit integer. + */ +function popcount(n) { + n -= (n >>> 1) & 0x55555555; + n = (n & 0x33333333) + ((n >>> 2) & 0x33333333); + return (((n + (n >>> 4)) & 0x0f0f0f0f) * 0x01010101) >>> 24; +} + +/** + * Given a population bitmap and a bit selected from that map, returns + * how many less significant 1 bits there are. + * + * For example, index(10101, 100) returns 1, since there is a single less + * significant `1` bit. This translates to the 0-based "index" of that bit. + */ +function index(bitmap, bit) { + return popcount(bitmap & (bit - 1)); +} + +/** + * Extracts a single slice ofthe hash, and returns a bitmask for the resulting value. + * For example, if the slice returns 5, this function returns 10000 = 1 << 5. + */ +function hashbit(hash, shift) { + return 1 << ((hash >>> shift) & mask); +} diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index 29427278..0d8887f4 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -16,6 +16,25 @@ import gleam/option.{type Option} /// pub type Dict(key, value) +/// "TransientDict" is a mutable view on a dictionary used internally by the +/// javascript target. No mutable API is exposed to the user. +/// +/// Transients are to be treated as having a linear (single-use, think rust) type. +/// A transient value becomes invalid as soon as it's passed to one of the functions. +type TransientDict(key, value) + +/// Convert a normal Dict to a transient dict. +/// A transient dict is a mutable copy of the original. +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../dict.mjs", "toTransient") +fn to_transient(dict: Dict(key, value)) -> TransientDict(key, value) + +/// Convert a transient dict back into a normal dict, freezing its contents. +/// Using the transient after this point is highly unsafe and leads to undefined behavior. +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../dict.mjs", "fromTransient") +fn from_transient(transient: TransientDict(key, value)) -> Dict(key, value) + /// Determines the number of key-value pairs in the dict. /// This function runs in constant time and does not need to iterate the dict. /// @@ -32,7 +51,7 @@ pub type Dict(key, value) /// ``` /// @external(erlang, "maps", "size") -@external(javascript, "../gleam_stdlib.mjs", "map_size") +@external(javascript, "../dict.mjs", "size") pub fn size(dict: Dict(k, v)) -> Int /// Determines whether or not the dict is empty. @@ -76,8 +95,10 @@ pub fn is_empty(dict: Dict(k, v)) -> Bool { /// ``` /// @external(erlang, "maps", "to_list") -@external(javascript, "../gleam_stdlib.mjs", "map_to_list") -pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) +pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) { + use acc, key, value <- fold(dict, from: []) + [#(key, value), ..acc] +} /// Converts a list of 2-element tuples `#(key, value)` to a dict. /// @@ -86,16 +107,16 @@ pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) /// @external(erlang, "maps", "from_list") pub fn from_list(list: List(#(k, v))) -> Dict(k, v) { - from_list_loop(list, new()) + from_list_loop(to_transient(new()), list) } fn from_list_loop( - over list: List(#(k, v)), - from initial: Dict(k, v), + transient: TransientDict(k, v), + list: List(#(k, v)), ) -> Dict(k, v) { case list { - [] -> initial - [#(key, value), ..rest] -> from_list_loop(rest, insert(initial, key, value)) + [] -> from_transient(transient) + [#(key, value), ..rest] -> from_list_loop(put(key, value, transient), rest) } } @@ -119,13 +140,16 @@ pub fn has_key(dict: Dict(k, v), key: k) -> Bool { @external(erlang, "maps", "is_key") fn do_has_key(key: k, dict: Dict(k, v)) -> Bool { - get(dict, key) != Error(Nil) + case get(dict, key) { + Ok(_) -> True + Error(_) -> False + } } /// Creates a fresh dict that contains no values. /// @external(erlang, "maps", "new") -@external(javascript, "../gleam_stdlib.mjs", "new_map") +@external(javascript, "../dict.mjs", "make") pub fn new() -> Dict(k, v) /// Fetches a value from a dict for a given key. @@ -146,7 +170,7 @@ pub fn new() -> Dict(k, v) /// ``` /// @external(erlang, "gleam_stdlib", "map_get") -@external(javascript, "../gleam_stdlib.mjs", "map_get") +@external(javascript, "../dict.mjs", "get") pub fn get(from: Dict(k, v), get: k) -> Result(v, Nil) /// Inserts a value into the dict with the given key. @@ -167,12 +191,12 @@ pub fn get(from: Dict(k, v), get: k) -> Result(v, Nil) /// ``` /// pub fn insert(into dict: Dict(k, v), for key: k, insert value: v) -> Dict(k, v) { - do_insert(key, value, dict) + to_transient(dict) |> put(key, value, _) |> from_transient } @external(erlang, "maps", "put") -@external(javascript, "../gleam_stdlib.mjs", "map_insert") -fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) +@external(javascript, "../dict.mjs", "put") +fn put(key: k, value: v, transient: TransientDict(k, v)) -> TransientDict(k, v) /// Updates all values in a given dict by calling a given function on each key /// and value. @@ -185,15 +209,13 @@ fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) /// // -> from_list([#(3, 9), #(2, 8)]) /// ``` /// +@external(javascript, "../dict.mjs", "map") pub fn map_values(in dict: Dict(k, v), with fun: fn(k, v) -> a) -> Dict(k, a) { do_map_values(fun, dict) } @external(erlang, "maps", "map") -fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) { - let f = fn(dict, k, v) { insert(dict, k, f(k, v)) } - fold(dict, from: new(), with: f) -} +fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) /// Gets a list of all keys in a given dict. /// @@ -210,21 +232,8 @@ fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) { /// @external(erlang, "maps", "keys") pub fn keys(dict: Dict(k, v)) -> List(k) { - do_keys_loop(to_list(dict), []) -} - -fn do_keys_loop(list: List(#(k, v)), acc: List(k)) -> List(k) { - case list { - [] -> reverse_and_concat(acc, []) - [#(key, _value), ..rest] -> do_keys_loop(rest, [key, ..acc]) - } -} - -fn reverse_and_concat(remaining: List(a), accumulator: List(a)) -> List(a) { - case remaining { - [] -> accumulator - [first, ..rest] -> reverse_and_concat(rest, [first, ..accumulator]) - } + use acc, key, _value <- fold(dict, []) + [key, ..acc] } /// Gets a list of all values in a given dict. @@ -242,15 +251,8 @@ fn reverse_and_concat(remaining: List(a), accumulator: List(a)) -> List(a) { /// @external(erlang, "maps", "values") pub fn values(dict: Dict(k, v)) -> List(v) { - let list_of_pairs = to_list(dict) - do_values_loop(list_of_pairs, []) -} - -fn do_values_loop(list: List(#(k, v)), acc: List(v)) -> List(v) { - case list { - [] -> reverse_and_concat(acc, []) - [#(_key, value), ..rest] -> do_values_loop(rest, [value, ..acc]) - } + use acc, _key, value <- fold(dict, []) + [value, ..acc] } /// Creates a new dict from a given dict, minus any entries that a given function @@ -279,14 +281,14 @@ pub fn filter( @external(erlang, "maps", "filter") fn do_filter(f: fn(k, v) -> Bool, dict: Dict(k, v)) -> Dict(k, v) { - let insert = fn(dict, k, v) { - case f(k, v) { - True -> insert(dict, k, v) - False -> dict + to_transient(new()) + |> fold(over: dict, with: fn(transient, key, value) { + case f(key, value) { + True -> put(key, value, transient) + False -> transient } - } - - fold(dict, from: new(), with: insert) + }) + |> from_transient } /// Creates a new dict from a given dict, only including any entries for which the @@ -312,23 +314,21 @@ pub fn take(from dict: Dict(k, v), keeping desired_keys: List(k)) -> Dict(k, v) @external(erlang, "maps", "with") fn do_take(desired_keys: List(k), dict: Dict(k, v)) -> Dict(k, v) { - do_take_loop(dict, desired_keys, new()) + do_take_loop(dict, desired_keys, to_transient(new())) } fn do_take_loop( dict: Dict(k, v), desired_keys: List(k), - acc: Dict(k, v), + acc: TransientDict(k, v), ) -> Dict(k, v) { - let insert = fn(taken, key) { - case get(dict, key) { - Ok(value) -> insert(taken, key, value) - Error(_) -> taken - } - } case desired_keys { - [] -> acc - [first, ..rest] -> do_take_loop(dict, rest, insert(acc, first)) + [] -> from_transient(acc) + [key, ..rest] -> + case get(dict, key) { + Ok(value) -> do_take_loop(dict, rest, put(key, value, acc)) + Error(_) -> do_take_loop(dict, rest, acc) + } } } @@ -348,20 +348,11 @@ fn do_take_loop( /// @external(erlang, "maps", "merge") pub fn merge(into dict: Dict(k, v), from new_entries: Dict(k, v)) -> Dict(k, v) { - new_entries - |> to_list - |> fold_inserts(dict) -} - -fn fold_inserts(new_entries: List(#(k, v)), dict: Dict(k, v)) -> Dict(k, v) { - case new_entries { - [] -> dict - [first, ..rest] -> fold_inserts(rest, insert_pair(dict, first)) - } -} - -fn insert_pair(dict: Dict(k, v), pair: #(k, v)) -> Dict(k, v) { - insert(dict, pair.0, pair.1) + to_transient(dict) + |> fold(over: new_entries, with: fn(transient, key, value) { + put(key, value, transient) + }) + |> from_transient } /// Creates a new dict from a given dict with all the same entries except for the @@ -380,12 +371,12 @@ fn insert_pair(dict: Dict(k, v), pair: #(k, v)) -> Dict(k, v) { /// ``` /// pub fn delete(from dict: Dict(k, v), delete key: k) -> Dict(k, v) { - do_delete(key, dict) + to_transient(dict) |> remove(key, _) |> from_transient } @external(erlang, "maps", "remove") -@external(javascript, "../gleam_stdlib.mjs", "map_remove") -fn do_delete(a: k, b: Dict(k, v)) -> Dict(k, v) +@external(javascript, "../dict.mjs", "remove") +fn remove(a: k, b: TransientDict(k, v)) -> TransientDict(k, v) /// Creates a new dict from a given dict with all the same entries except any with /// keys found in a given list. @@ -408,9 +399,21 @@ fn do_delete(a: k, b: Dict(k, v)) -> Dict(k, v) /// ``` /// pub fn drop(from dict: Dict(k, v), drop disallowed_keys: List(k)) -> Dict(k, v) { + do_drop(disallowed_keys, dict) +} + +@external(erlang, "maps", "without") +fn do_drop(disallowed_keys: List(k), dict: Dict(k, v)) -> Dict(k, v) { + drop_loop(to_transient(dict), disallowed_keys) +} + +fn drop_loop( + transient: TransientDict(k, v), + disallowed_keys: List(k), +) -> Dict(k, v) { case disallowed_keys { - [] -> dict - [first, ..rest] -> drop(delete(dict, first), rest) + [] -> from_transient(transient) + [key, ..rest] -> drop_loop(remove(key, transient), rest) } } @@ -437,6 +440,7 @@ pub fn drop(from dict: Dict(k, v), drop disallowed_keys: List(k)) -> Dict(k, v) /// // -> from_list([#("a", 0), #("b", 0)]) /// ``` /// +@external(javascript, "../dict.mjs", "upsert") pub fn upsert( in dict: Dict(k, v), update key: k, @@ -473,24 +477,18 @@ pub fn upsert( /// // -> "abc" /// ``` /// +@external(javascript, "../dict.mjs", "fold") pub fn fold( over dict: Dict(k, v), from initial: acc, with fun: fn(acc, k, v) -> acc, ) -> acc { - fold_loop(to_list(dict), initial, fun) + let fun = fn(key, value, acc) { fun(acc, key, value) } + do_fold(fun, initial, dict) } -fn fold_loop( - list: List(#(k, v)), - initial: acc, - fun: fn(acc, k, v) -> acc, -) -> acc { - case list { - [] -> initial - [#(k, v), ..rest] -> fold_loop(rest, fun(initial, k, v), fun) - } -} +@external(erlang, "maps", "fold") +fn do_fold(fun: fn(k, v, acc) -> acc, initial: acc, dict: Dict(k, v)) -> acc /// Calls a function for each key and value in a dict, discarding the return /// value. @@ -540,9 +538,53 @@ pub fn combine( other: Dict(k, v), with fun: fn(v, v) -> v, ) -> Dict(k, v) { - use acc, key, value <- fold(over: dict, from: other) - case get(acc, key) { - Ok(other_value) -> insert(acc, key, fun(value, other_value)) - Error(_) -> insert(acc, key, value) + do_combine(fn(_, l, r) { fun(l, r) }, dict, other) +} + +@external(erlang, "maps", "merge_with") +fn do_combine( + combine: fn(k, v, v) -> v, + left: Dict(k, v), + right: Dict(k, v), +) -> Dict(k, v) { + let #(big, small, combine) = case size(left) >= size(right) { + True -> #(left, right, combine) + False -> #(right, left, fn(k, l, r) { combine(k, r, l) }) + } + + to_transient(big) + |> fold(over: small, with: fn(transient, key, value) { + case query(transient, key) { + Ok(existing) -> put(key, combine(key, existing, value), transient) + Error(_) -> put(key, value, transient) + } + }) + |> from_transient +} + +@external(erlang, "gleam_stdlib", "map_get") +@external(javascript, "../dict.mjs", "query") +fn query(transient: TransientDict(k, v), key: k) -> Result(v, Nil) + +@internal +pub fn group(key: fn(v) -> k, list: List(v)) -> Dict(k, List(v)) { + group_loop(to_transient(new()), key, list) +} + +fn group_loop( + transient: TransientDict(k, List(v)), + to_key: fn(v) -> k, + list: List(v), +) -> Dict(k, List(v)) { + case list { + [] -> from_transient(transient) + [value, ..rest] -> { + let key = to_key(value) + case query(transient, key) { + Ok(existing) -> + group_loop(put(key, [value, ..existing], transient), to_key, rest) + Error(_) -> group_loop(put(key, [value], transient), to_key, rest) + } + } } } diff --git a/src/gleam/list.gleam b/src/gleam/list.gleam index 68362a43..5a172969 100644 --- a/src/gleam/list.gleam +++ b/src/gleam/list.gleam @@ -296,25 +296,7 @@ pub fn rest(list: List(a)) -> Result(List(a), Nil) { /// ``` /// pub fn group(list: List(v), by key: fn(v) -> k) -> Dict(k, List(v)) { - group_loop(list, key, dict.new()) -} - -fn group_loop( - list: List(v), - to_key: fn(v) -> k, - groups: Dict(k, List(v)), -) -> Dict(k, List(v)) { - case list { - [] -> groups - [first, ..rest] -> { - let key = to_key(first) - let groups = case dict.get(groups, key) { - Error(_) -> dict.insert(groups, key, [first]) - Ok(existing) -> dict.insert(groups, key, [first, ..existing]) - } - group_loop(rest, to_key, groups) - } - } + dict.group(key, list) } /// Returns a new list containing only the elements from the first list for diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index ebac45f4..490443fb 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -13,12 +13,11 @@ import { CustomType, } from "./gleam.mjs"; import { Some, None } from "./gleam/option.mjs"; -import Dict from "./dict.mjs"; +import { default as Dict, fold as dict_fold, get as dict_get, from as dict_from_iterable } from "./dict.mjs"; import { classify } from "./gleam/dynamic.mjs"; import { DecodeError } from "./gleam/dynamic/decode.mjs"; const Nil = undefined; -const NOT_FOUND = {}; export function identity(x) { return x; @@ -471,34 +470,6 @@ export function utf_codepoint_to_int(utf_codepoint) { return utf_codepoint.value; } -export function new_map() { - return Dict.new(); -} - -export function map_size(map) { - return map.size; -} - -export function map_to_list(map) { - return List.fromArray(map.entries()); -} - -export function map_remove(key, map) { - return map.delete(key); -} - -export function map_get(map, key) { - const value = map.get(key, NOT_FOUND); - if (value === NOT_FOUND) { - return new Error(Nil); - } - return new Ok(value); -} - -export function map_insert(key, value, map) { - return map.set(key, value); -} - function unsafe_percent_decode(string) { return decodeURIComponent(string || ""); } @@ -737,11 +708,13 @@ class Inspector { #dict(map) { let body = "dict.from_list(["; let first = true; - map.forEach((value, key) => { + + body = dict_fold(map, body, (body, key, value) => { if (!first) body = body + ", "; - body = body + "#(" + this.inspect(key) + ", " + this.inspect(value) + ")"; first = false; + return body + "#(" + this.inspect(key) + ", " + this.inspect(value) + ")"; }); + return body + "])"; } @@ -947,7 +920,12 @@ export function list_to_array(list) { export function index(data, key) { // Dictionaries and dictionary-like objects can be indexed - if (data instanceof Dict || data instanceof WeakMap || data instanceof Map) { + if (data instanceof Dict) { + const result = dict_get(data, key); + return new Ok(result.isOk() ? new Some(result[0]) : new None()); + } + + if (data instanceof WeakMap || data instanceof Map) { const token = {}; const entry = data.get(key, token); if (entry === token) return new Ok(new None()); @@ -1007,7 +985,7 @@ export function dict(data) { return new Ok(data); } if (data instanceof Map || data instanceof WeakMap) { - return new Ok(Dict.fromMap(data)); + return new Ok(dict_from_iterable(data)); } if (data == null) { return new Error("Dict"); @@ -1017,7 +995,7 @@ export function dict(data) { } const proto = Object.getPrototypeOf(data); if (proto === Object.prototype || proto === null) { - return new Ok(Dict.fromObject(data)); + return new Ok(dict_from_iterable(Object.entries(data))); } return new Error("Dict"); } From 4010e9dc9e5e300c04847d4b086b7f47378de876 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Tue, 11 Nov 2025 13:14:49 +0100 Subject: [PATCH 02/17] implement has_key as an external to avoid constructing a result --- src/dict.mjs | 18 ++++++++++++------ src/gleam/dict.gleam | 8 ++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index eaafa156..463d1485 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -259,7 +259,12 @@ export function size(dict) { } export function get(dict, key) { - return lookup(dict.root, key); + const result = lookup(dict.root, key); + return result !== noElementMarker ? Result$Ok(result) : errorNil; +} + +export function has(dict, key) { + return lookup(dict.root, key) !== noElementMarker; } function lookup(node, key) { @@ -279,14 +284,14 @@ function lookup(node, key) { // sure that this is the correct value, and if it doesn't that we don't // contain the value in question. const dataidx = 2 * index(datamap, bit); - return isEqual(key, data[dataidx]) ? Result$Ok(data[dataidx + 1]) : errorNil; + return isEqual(key, data[dataidx]) ? data[dataidx + 1] : noElementMarker; } else if (nodemap & bit) { // we found our hash inside the nodemap, so we can continue our search there. node = data[data.length - 1 - index(nodemap, bit)]; } else { // if the hash bit is not set in neither bitmaps, we immediately know that // this key cannot be inside this dict. - return errorNil; + return noElementMarker; } } @@ -295,11 +300,11 @@ function lookup(node, key) { const overflow = node.data; for (let i = 0; i < overflow.length; i += 2) { if (isEqual(key, overflow[i])) { - return Result$Ok(overflow[i + 1]); + return overflow[i + 1]; } } - return errorNil; + return noElementMarker; } /** @@ -375,7 +380,8 @@ function nextGeneration(dict) { * Like `get`, but for transient values. Note that this function is not pure! */ export function query(transient, key) { - return lookup(transient.root, key); + const result = lookup(transient.root, key); + return result !== noElementMarker ? Result$Ok(result) : errorNil; } /** diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index 0d8887f4..725194c3 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -134,17 +134,13 @@ fn from_list_loop( /// // -> False /// ``` /// +@external(javascript, "../dict.mjs", "has") pub fn has_key(dict: Dict(k, v), key: k) -> Bool { do_has_key(key, dict) } @external(erlang, "maps", "is_key") -fn do_has_key(key: k, dict: Dict(k, v)) -> Bool { - case get(dict, key) { - Ok(_) -> True - Error(_) -> False - } -} +fn do_has_key(key: k, dict: Dict(k, v)) -> Bool /// Creates a fresh dict that contains no values. /// From 98f163c20bc49dc0561a01980032298720339b57 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Tue, 11 Nov 2025 15:14:44 +0100 Subject: [PATCH 03/17] query -> update_with --- src/dict.mjs | 14 ++++++-------- src/gleam/dict.gleam | 33 ++++++++++++++++----------------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 463d1485..76902ded 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -376,14 +376,6 @@ function nextGeneration(dict) { return 1; } -/* - * Like `get`, but for transient values. Note that this function is not pure! - */ -export function query(transient, key) { - const result = lookup(transient.root, key); - return result !== noElementMarker ? Result$Ok(result) : errorNil; -} - /** * Consume a transient, writing a new key/value pair into the dictionary it * represents. If the key already exists, it will be overwritten. @@ -411,6 +403,12 @@ export function upsert(dict, key, fun) { return fromTransient(transient); } +export function update_with(key, fun, init, transient) { + const wrapped = (value) => (value === noElementMarker ? init : fun(value)); + transient.root = doUpsert(transient, transient.root, key, wrapped, getHash(key), 0); + return transient; +} + export function map(dict, fun) { // map can never modify the structure, so we can walk the dictionary directly, // but still move to a new generation to make sure we get a new copy of every node. diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index 725194c3..b3312de8 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -344,11 +344,7 @@ fn do_take_loop( /// @external(erlang, "maps", "merge") pub fn merge(into dict: Dict(k, v), from new_entries: Dict(k, v)) -> Dict(k, v) { - to_transient(dict) - |> fold(over: new_entries, with: fn(transient, key, value) { - put(key, value, transient) - }) - |> from_transient + combine(dict, new_entries, fn(_, new_entry) { new_entry }) } /// Creates a new dict from a given dict with all the same entries except for the @@ -550,17 +546,20 @@ fn do_combine( to_transient(big) |> fold(over: small, with: fn(transient, key, value) { - case query(transient, key) { - Ok(existing) -> put(key, combine(key, existing, value), transient) - Error(_) -> put(key, value, transient) - } + let update = fn(existing) { combine(key, existing, value) } + update_with(key, update, value, transient) }) |> from_transient } -@external(erlang, "gleam_stdlib", "map_get") -@external(javascript, "../dict.mjs", "query") -fn query(transient: TransientDict(k, v), key: k) -> Result(v, Nil) +@external(erlang, "maps", "update_with") +@external(javascript, "../dict.mjs", "update_with") +fn update_with( + key: k, + fun: fn(v) -> v, + init: v, + transient: TransientDict(k, v), +) -> TransientDict(k, v) @internal pub fn group(key: fn(v) -> k, list: List(v)) -> Dict(k, List(v)) { @@ -576,11 +575,11 @@ fn group_loop( [] -> from_transient(transient) [value, ..rest] -> { let key = to_key(value) - case query(transient, key) { - Ok(existing) -> - group_loop(put(key, [value, ..existing], transient), to_key, rest) - Error(_) -> group_loop(put(key, [value], transient), to_key, rest) - } + let update = fn(existing) { [value, ..existing] } + + transient + |> update_with(key, update, [value], _) + |> group_loop(to_key, rest) } } } From f438758758f3114befa2546eb3dce9bc450f0b8e Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Thu, 13 Nov 2025 12:58:34 +0100 Subject: [PATCH 04/17] use Math.imul instead of standard multiplication to make sure we don't upcast to a float by accident --- src/dict.mjs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 76902ded..f3cab72f 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -283,7 +283,7 @@ function lookup(node, key) { // We still need to check if the key matches, but if it does we know for // sure that this is the correct value, and if it doesn't that we don't // contain the value in question. - const dataidx = 2 * index(datamap, bit); + const dataidx = Math.imul(index(datamap, bit), 2); return isEqual(key, data[dataidx]) ? data[dataidx + 1] : noElementMarker; } else if (nodemap & bit) { // we found our hash inside the nodemap, so we can continue our search there. @@ -367,7 +367,7 @@ function nextGeneration(dict) { node.generation = 0; // queue all other referenced nodes - const nodeStart = 2 * popcount(node.datamap); + const nodeStart = Math.imul(popcount(node.datamap), 2); for (let i = nodeStart; i < node.data.length; ++i) { queue.push(node.data[i]); } @@ -420,7 +420,7 @@ export function map(dict, fun) { // order doesn't matter, so we can use push/pop for faster array usage. const { data, datamap } = queue.pop(); // every node contains popcount(datamap) direct entries - const edgesStart = 2 * popcount(datamap); + const edgesStart = Math.imul(popcount(datamap), 2); for (let i = 0; i < edgesStart; i += 2) { // we copied the node while queueing it, so direct mutation here is safe. data[i + 1] = fun(data[i], data[i + 1]); @@ -443,7 +443,7 @@ export function fold(dict, state, fun) { // order doesn't matter, so we can use push/pop for faster array usage. const { data, datamap } = queue.pop(); // every node contains popcount(datamap) direct entries - const edgesStart = 2 * popcount(datamap); + const edgesStart = Math.imul(popcount(datamap), 2); for (let i = 0; i < edgesStart; i += 2) { state = fun(state, data[i], data[i + 1]); } @@ -476,7 +476,7 @@ function doUpsert(transient, node, key, fun, hash, shift) { const bit = hashbit(hash, shift); const nodeidx = data.length - 1 - index(nodemap, bit); - const dataidx = 2 * index(datamap, bit); + const dataidx = Math.imul(index(datamap, bit), 2); // 2. Child Node // We have to check first if there is already a child node we have to traverse to. @@ -589,7 +589,7 @@ function always(value) { function popcount(n) { n -= (n >>> 1) & 0x55555555; n = (n & 0x33333333) + ((n >>> 2) & 0x33333333); - return (((n + (n >>> 4)) & 0x0f0f0f0f) * 0x01010101) >>> 24; + return Math.imul((n + (n >>> 4)) & 0x0f0f0f0f, 0x01010101) >>> 24; } /** From ef65bf22cdea8f7a77dc5dadbb00df681898be59 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Thu, 13 Nov 2025 21:08:19 +0100 Subject: [PATCH 05/17] typos --- src/dict.mjs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index f3cab72f..037f95ce 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -176,7 +176,7 @@ class Node { // // Each node contains 2 bitmaps: // - The datamap has a bit set if that slot in the node contains direct data - // - The nodemap has a bit set if that slot in the node ctonains another node. + // - The nodemap has a bit set if that slot in the node contains another node. // // Both are exclusive to on another, so datamap & nodemap == 0. // @@ -217,7 +217,7 @@ class Node { } } -/// The power-of-2 branch factor for the dict. For example, a vlaue of `5` indicates a 32-ary tree. +/// The power-of-2 branching factor for the dict. For example, a value of `5` indicates a 32-ary tree. const bits = 5; const mask = (1 << bits) - 1; @@ -383,7 +383,9 @@ function nextGeneration(dict) { * Returns a new transient. */ export function put(key, value, transient) { - transient.root = doUpsert(transient, transient.root, key, always(value), getHash(key), 0); + const fun = always(value); + const hash = getHash(key); + transient.root = doUpsert(transient, transient.root, key, fun, hash, 0); return transient; } @@ -398,14 +400,17 @@ export function remove(key, transient) { export function upsert(dict, key, fun) { // we can use our noElementMarker value to skip traversing the dictionary twice. const transient = toTransient(dict); - const wrapped = (value) => fun(value === noElementMarker ? Option$None() : Option$Some(value)); - transient.root = doUpsert(transient, transient.root, key, wrapped, getHash(key), 0); + const wrapped = (value) => + fun(value === noElementMarker ? Option$None() : Option$Some(value)); + const hash = getHash(key); + transient.root = doUpsert(transient, transient.root, key, wrapped, hash, 0); return fromTransient(transient); } export function update_with(key, fun, init, transient) { const wrapped = (value) => (value === noElementMarker ? init : fun(value)); - transient.root = doUpsert(transient, transient.root, key, wrapped, getHash(key), 0); + const hash = getHash(key); + transient.root = doUpsert(transient, transient.root, key, wrapped, hash, 0); return transient; } @@ -497,7 +502,7 @@ function doUpsert(transient, node, key, fun, hash, shift) { // this ensures that every tree stays in its single optimal representation, // and allows dicts to be structurally compared. node.datamap |= bit; - node.nodemap &= ~bit; + node.nodemap ^= bit; // NOTE: the order here is important to avoid mutation bugs! // Remove the old child node, and insert the data pair into ourselves. node.data.splice(nodeidx, 1); @@ -529,12 +534,14 @@ function doUpsert(transient, node, key, fun, hash, shift) { } const otherKey = data[dataidx]; - child = doUpsert(transient, child, otherKey, always(data[dataidx + 1]), getHash(otherKey), shift + bits); + const childHash = getHash(otherKey); + const childFun = always(data[dataidx + 1]); + child = doUpsert(transient, child, otherKey, childFun, childHash, shift + bits); // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. transient.size -= 1; node = copyNode(node, transient.generation); - node.datamap &= ~bit; + node.datamap ^= bit; node.nodemap |= bit; // remove the old data pair, and insert the new child node. @@ -555,7 +562,7 @@ function doUpdate(transient, node, fun, bit, index) { if (value === noElementMarker) { node.data.splice(index, 2); - node.datamap &= ~bit; + node.datamap ^= bit; transient.size -= 1; } else { node.data[index + 1] = value; From 8e2517f8267177f4edde0afc0be3db1a7ac81139 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Fri, 14 Nov 2025 14:09:43 +0100 Subject: [PATCH 06/17] prefer a fast insert over fast upserts --- src/dict.mjs | 364 +++++++++++++++++++++++-------------------- src/gleam/dict.gleam | 7 +- 2 files changed, 199 insertions(+), 172 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 037f95ce..22854263 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -4,7 +4,6 @@ */ import { isEqual, Result$Error, Result$Ok } from "./gleam.mjs"; -import { Option$Some, Option$None } from "./gleam/option.mjs"; // -- HASH -------------------------------------------------------------------- @@ -165,12 +164,25 @@ export default class Dict { } } -class Node { - #generation; +/// The power-of-2 branching factor for the dict. For example, a value of `5` indicates a 32-ary tree. +const bits = 5; +const mask = (1 << bits) - 1; + +/// This symbol is used internally to avoid constructing results. +const noElementMarker = Symbol(); + +/// This symbol is used to store the "generation" on a node. +/// Using a symbol makes the property not enumerable, which means the generation +/// will be ignored during equality checks. +const generationKey = Symbol(); - constructor(generation, datamap, nodemap, data) { - // The generation value for transient copy tracking. - this.#generation = generation; +// Some commonly used constants throughout the code. +const emptyNode = /* @__PURE__ */ makeNode(0); +const emptyDict = /* @__PURE__ */ new Dict(0, emptyNode); +const errorNil = /* @__PURE__ */ Result$Error(undefined); + +function makeNode(generation) { + return { // A node is a high-arity (32 in practice) hybrid tree node. // Hybrid means that it stores data directly as well as pointers to child nodes. // @@ -184,8 +196,8 @@ class Node { // suffix (least significant bits first) encoding. // For example, if the last 5 bits of the hash are 1101, the bit to check for // that value is the 13th bit. - this.datamap = datamap; - this.nodemap = nodemap; + datamap: 0, + nodemap: 0, // The slots itself are stored in a single contiguous array that contains // both direct k/v-pairs and child nodes. // @@ -203,43 +215,29 @@ class Node { // // Children are stored in reverse order to avoid having to store or calculate an // "offset" value to skip over the direct children. - this.data = data; - } - - get generation() { - // hide generation so it's not enumerable - this makes dicts - // supported by the default equality and hash codes without custom implementations. - return this.#generation; - } - - set generation(value) { - this.#generation = value; - } + data: [], + // The generation is used to track which nodes need to be copied during transient updates. + [generationKey]: generation, + }; } -/// The power-of-2 branching factor for the dict. For example, a value of `5` indicates a 32-ary tree. -const bits = 5; -const mask = (1 << bits) - 1; - -/// This symbol is used internally to avoid constructing results. -const noElementMarker = Symbol(); - -// Some commonly used constants throughout the code. -const emptyNode = /* @__PURE__ */ new Node(0, 0, 0, []); -const emptyDict = /* @__PURE__ */ new Dict(0, emptyNode); -const errorNil = /* @__PURE__ */ Result$Error(undefined); - /** * Copies a node and its data array if it's from another generation, making it safe * to mutate the node. */ function copyNode(node, generation) { - if (node.generation === generation) { + if (node[generationKey] === generation) { return node; } const { datamap, nodemap, data } = node; - return new Node(generation, datamap, nodemap, data.slice(0)); + + return { + datamap, + nodemap, + data: data.slice(0), + [generationKey]: generation, + }; } export function make() { @@ -259,22 +257,23 @@ export function size(dict) { } export function get(dict, key) { - const result = lookup(dict.root, key); + const result = lookup(dict.root, key, getHash(key)); return result !== noElementMarker ? Result$Ok(result) : errorNil; } export function has(dict, key) { - return lookup(dict.root, key) !== noElementMarker; + return lookup(dict.root, key, getHash(key)) !== noElementMarker; } -function lookup(node, key) { - const hash = getHash(key); - +function lookup(node, key, hash) { for (let shift = 0; shift < 32; shift += bits) { const { data, datamap, nodemap } = node; const bit = hashbit(hash, shift); - if (datamap & bit) { + if (nodemap & bit) { + // we found our hash inside the nodemap, so we can continue our search there. + node = data[data.length - 1 - index(nodemap, bit)]; + } else if (datamap & bit) { // we store this hash directly! // // this also means that there are no other values with the same @@ -285,9 +284,6 @@ function lookup(node, key) { // contain the value in question. const dataidx = Math.imul(index(datamap, bit), 2); return isEqual(key, data[dataidx]) ? data[dataidx + 1] : noElementMarker; - } else if (nodemap & bit) { - // we found our hash inside the nodemap, so we can continue our search there. - node = data[data.length - 1 - index(nodemap, bit)]; } else { // if the hash bit is not set in neither bitmaps, we immediately know that // this key cannot be inside this dict. @@ -349,8 +345,8 @@ export function fromTransient(transient) { */ function nextGeneration(dict) { const root = dict.root; - if (root.generation < Number.MAX_SAFE_INTEGER) { - return root.generation + 1; + if (root[generationKey] < Number.MAX_SAFE_INTEGER) { + return root[generationKey] + 1; } // we have reached MAX_SAFE_INTEGER generations - @@ -364,7 +360,7 @@ function nextGeneration(dict) { const node = queue.pop(); // reset the generation to 0 - node.generation = 0; + node[generationKey] = 0; // queue all other referenced nodes const nodeStart = Math.imul(popcount(node.datamap), 2); @@ -376,6 +372,19 @@ function nextGeneration(dict) { return 1; } +/// Insert is the second-most performance-sensitive operation. +/// We use a global "transient" value here to avoid doing a memory allocation. +const globalTransient = /* @__PURE__ */ toTransient(emptyDict); + +export function insert(dict, key, value) { + globalTransient.generation = nextGeneration(dict); + globalTransient.size = dict.size; + + const root = doPut(globalTransient, dict.root, key, value, getHash(key), 0); + + return new Dict(globalTransient.size, root); +} + /** * Consume a transient, writing a new key/value pair into the dictionary it * represents. If the key already exists, it will be overwritten. @@ -383,88 +392,103 @@ function nextGeneration(dict) { * Returns a new transient. */ export function put(key, value, transient) { - const fun = always(value); const hash = getHash(key); - transient.root = doUpsert(transient, transient.root, key, fun, hash, 0); + transient.root = doPut(transient, transient.root, key, value, hash, 0); return transient; } -/** - * Consume a transient, removing a key if it exists. - * Returns a new transient. - */ -export function remove(key, transient) { - return put(key, noElementMarker, transient); -} +function doPut(transient, node, key, value, hash, shift) { + node = copyNode(node, transient.generation); + const { data, datamap, nodemap } = node; -export function upsert(dict, key, fun) { - // we can use our noElementMarker value to skip traversing the dictionary twice. - const transient = toTransient(dict); - const wrapped = (value) => - fun(value === noElementMarker ? Option$None() : Option$Some(value)); - const hash = getHash(key); - transient.root = doUpsert(transient, transient.root, key, wrapped, hash, 0); - return fromTransient(transient); -} + // 1. Overflow Node + // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match. + if (shift > 32) { + for (let i = 0; i < data.length; i += 2) { + if (isEqual(key, data[i])) { + data[i + 1] = value; + return node; + } + } -export function update_with(key, fun, init, transient) { - const wrapped = (value) => (value === noElementMarker ? init : fun(value)); - const hash = getHash(key); - transient.root = doUpsert(transient, transient.root, key, wrapped, hash, 0); - return transient; -} + data.push(key, value); + transient.size += 1; -export function map(dict, fun) { - // map can never modify the structure, so we can walk the dictionary directly, - // but still move to a new generation to make sure we get a new copy of every node. - const generation = nextGeneration(dict); - const root = copyNode(dict.root, generation); - const queue = [root]; + return node; + } - while (queue.length) { - // order doesn't matter, so we can use push/pop for faster array usage. - const { data, datamap } = queue.pop(); - // every node contains popcount(datamap) direct entries - const edgesStart = Math.imul(popcount(datamap), 2); - for (let i = 0; i < edgesStart; i += 2) { - // we copied the node while queueing it, so direct mutation here is safe. - data[i + 1] = fun(data[i], data[i + 1]); - } - // the remaining entries are other nodes we can queue - for (let i = edgesStart; i < data.length; ++i) { - // copy the node first to make it safe to mutate - data[i] = copyNode(data[i], generation); - queue.push(data[i]); - } + const bit = hashbit(hash, shift); + const nodeidx = data.length - 1 - index(nodemap, bit); + + // 2. Child Node + // We have to check first if there is already a child node we have to traverse to. + if ((nodemap & bit) !== 0) { + const child = data[nodeidx]; + data[nodeidx] = doPut(transient, child, key, value, hash, shift + bits); + return node; } - return new Dict(dict.size, root); -} + // 3. New Data Node + // No child node and no data node exists yet, so we can potentially just insert a new value. + const dataidx = Math.imul(index(datamap, bit), 2); + if ((datamap & bit) === 0) { + node.datamap |= bit; + data.splice(dataidx, 0, key, value); + transient.size += 1; -export function fold(dict, state, fun) { - const queue = [dict.root]; + return node; + } - while (queue.length) { - // order doesn't matter, so we can use push/pop for faster array usage. - const { data, datamap } = queue.pop(); - // every node contains popcount(datamap) direct entries - const edgesStart = Math.imul(popcount(datamap), 2); - for (let i = 0; i < edgesStart; i += 2) { - state = fun(state, data[i], data[i + 1]); - } - // the remaining entries are child nodes we can queue. - for (let i = edgesStart; i < data.length; ++i) { - queue.push(data[i]); - } + // 4. Existing Data Node + // We have a match that we can update, or remove. + if (isEqual(key, data[dataidx])) { + data[dataidx + 1] = value; + return node; } - return state; + // 5. Collision + // There is no child node, but a data node with the same hash, but with a different key. + // To resolve this, we push both nodes down one level. + let child = makeNode(transient.generation); + child = doPut(transient, child, key, value, hash, shift + bits); + + const otherKey = data[dataidx]; + child = doPut( + transient, + child, + otherKey, + data[dataidx + 1], + getHash(otherKey), + shift + bits, + ); + // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. + transient.size -= 1; + + node.datamap ^= bit; + node.nodemap |= bit; + + // remove the old data pair, and insert the new child node. + // because we remove 2 elements first, our indices are off-by-one! + // When calculating the nodeidx, we measure with the length including those + // 2 extra elements, but missing the one we haven't inserted yet, so we have + // to correct for both of these with (1-2) = -1 + + data.splice(dataidx, 2); + data.splice(nodeidx - 1, 0, child); + + return node; } /** - * Main helper function for insert/upsert/remove. + * Consume a transient, removing a key if it exists. + * Returns a new transient. */ -function doUpsert(transient, node, key, fun, hash, shift) { +export function remove(key, transient) { + transient.root = doRemove(transient, transient.root, key, getHash(key), 0); + return transient; +} + +function doRemove(transient, node, key, hash, shift) { const { data, datamap, nodemap } = node; // 1. Overflow Node @@ -472,11 +496,14 @@ function doUpsert(transient, node, key, fun, hash, shift) { if (shift > 32) { for (let i = 0; i < data.length; i += 2) { if (isEqual(key, data[i])) { - return doUpdate(transient, node, fun, 0, i); + node = copyNode(node, transient.generation); + node.data.splice(i, 2); + transient.size -= 1; + break; } } - return doInsert(transient, node, key, fun, 0, data.length); + return node; } const bit = hashbit(hash, shift); @@ -487,7 +514,8 @@ function doUpsert(transient, node, key, fun, hash, shift) { // We have to check first if there is already a child node we have to traverse to. if ((nodemap & bit) !== 0) { const oldChild = data[nodeidx]; - const newChild = doUpsert(transient, oldChild, key, fun, hash, shift + bits); + const newChild = doRemove(transient, oldChild, key, hash, shift + bits); + // no child entry found, we don't have to update this path. if (newChild === oldChild) { return node; } @@ -512,82 +540,78 @@ function doUpsert(transient, node, key, fun, hash, shift) { return node; } - // 3. New Data Node - // No child node and no data node exists yet, so we can potentially just insert a new value. - if ((datamap & bit) === 0) { - return doInsert(transient, node, key, fun, bit, dataidx); - } - - // 4. Existing Data Node - // We have a match that we can update, or remove. - if (isEqual(key, data[dataidx])) { - return doUpdate(transient, node, fun, bit, dataidx); - } - - // 5. Collision - // There is no child node, but a data node with the same hash, but with a different key. - // To resolve this, we push both nodes down one level. - let child = new Node(transient.generation, 0, 0, []); - child = doUpsert(transient, child, key, fun, hash, shift + bits); - if (!child.data.length) { + // 3. Data Node + // There is no data entry here, or it is a prefix for a different key + if ((datamap & bit) === 0 || !isEqual(key, data[dataidx])) { return node; } - const otherKey = data[dataidx]; - const childHash = getHash(otherKey); - const childFun = always(data[dataidx + 1]); - child = doUpsert(transient, child, otherKey, childFun, childHash, shift + bits); - // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. - transient.size -= 1; - + // we found a data entry that we can delete. node = copyNode(node, transient.generation); - node.datamap ^= bit; - node.nodemap |= bit; - - // remove the old data pair, and insert the new child node. - // because we remove 2 elements first, our indices are off-by-one! - // When calculating the nodeidx, we measure with the length including those - // 2 extra elements, but missing the one we haven't inserted yet, so we have - // to correct for both of these with (1-2) = -1 node.data.splice(dataidx, 2); - node.data.splice(nodeidx - 1, 0, child); + node.datamap ^= bit; + transient.size -= 1; return node; } -function doUpdate(transient, node, fun, bit, index) { - node = copyNode(node, transient.generation); - - const value = fun(node.data[index + 1]); +export function update_with(key, fun, value, transient) { + const hash = getHash(key); - if (value === noElementMarker) { - node.data.splice(index, 2); - node.datamap ^= bit; - transient.size -= 1; - } else { - node.data[index + 1] = value; + const existing = lookup(transient.root, key, hash); + if (existing !== noElementMarker) { + value = fun(existing); } - return node; + transient.root = doPut(transient, transient.root, key, value, hash, 0); + return transient; } -function doInsert(transient, node, key, fun, bit, index) { - const value = fun(noElementMarker); - if (value === noElementMarker) { - return node; +export function map(dict, fun) { + // map can never modify the structure, so we can walk the dictionary directly, + // but still move to a new generation to make sure we get a new copy of every node. + const generation = nextGeneration(dict); + const root = copyNode(dict.root, generation); + const queue = [root]; + + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const { data, datamap } = queue.pop(); + // every node contains popcount(datamap) direct entries + const edgesStart = Math.imul(popcount(datamap), 2); + for (let i = 0; i < edgesStart; i += 2) { + // we copied the node while queueing it, so direct mutation here is safe. + data[i + 1] = fun(data[i], data[i + 1]); + } + // the remaining entries are other nodes we can queue + for (let i = edgesStart; i < data.length; ++i) { + // copy the node first to make it safe to mutate + data[i] = copyNode(data[i], generation); + queue.push(data[i]); + } } - node = copyNode(node, transient.generation); + return new Dict(dict.size, root); +} - node.datamap |= bit; - node.data.splice(index, 0, key, value); - transient.size += 1; +export function fold(dict, state, fun) { + const queue = [dict.root]; - return node; -} + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const { data, datamap } = queue.pop(); + // every node contains popcount(datamap) direct entries + const edgesStart = Math.imul(popcount(datamap), 2); + for (let i = 0; i < edgesStart; i += 2) { + state = fun(state, data[i], data[i + 1]); + } + // the remaining entries are child nodes we can queue. + for (let i = edgesStart; i < data.length; ++i) { + queue.push(data[i]); + } + } -function always(value) { - return (_) => value; + return state; } /** diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index b3312de8..da6401f6 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -186,10 +186,14 @@ pub fn get(from: Dict(k, v), get: k) -> Result(v, Nil) /// // -> from_list([#("a", 5)]) /// ``` /// +@external(javascript, "../dict.mjs", "insert") pub fn insert(into dict: Dict(k, v), for key: k, insert value: v) -> Dict(k, v) { - to_transient(dict) |> put(key, value, _) |> from_transient + do_insert(key, value, dict) } +@external(erlang, "maps", "put") +fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) + @external(erlang, "maps", "put") @external(javascript, "../dict.mjs", "put") fn put(key: k, value: v, transient: TransientDict(k, v)) -> TransientDict(k, v) @@ -432,7 +436,6 @@ fn drop_loop( /// // -> from_list([#("a", 0), #("b", 0)]) /// ``` /// -@external(javascript, "../dict.mjs", "upsert") pub fn upsert( in dict: Dict(k, v), update key: k, From e29d9bcf7a923a7882529399e87b139424e5bb4e Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Sat, 15 Nov 2025 13:15:19 +0100 Subject: [PATCH 07/17] skip computing the node index if it's not needed --- src/dict.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dict.mjs b/src/dict.mjs index 22854263..1ce19941 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -418,11 +418,11 @@ function doPut(transient, node, key, value, hash, shift) { } const bit = hashbit(hash, shift); - const nodeidx = data.length - 1 - index(nodemap, bit); // 2. Child Node // We have to check first if there is already a child node we have to traverse to. if ((nodemap & bit) !== 0) { + const nodeidx = data.length - 1 - index(nodemap, bit); const child = data[nodeidx]; data[nodeidx] = doPut(transient, child, key, value, hash, shift + bits); return node; @@ -473,6 +473,8 @@ function doPut(transient, node, key, value, hash, shift) { // 2 extra elements, but missing the one we haven't inserted yet, so we have // to correct for both of these with (1-2) = -1 + const nodeidx = data.length - 1 - index(nodemap, bit); + data.splice(dataidx, 2); data.splice(nodeidx - 1, 0, child); From 0d674cfa9348ea7bfb2fb2ad3a4f625186d2fd8c Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Sat, 15 Nov 2025 19:04:51 +0100 Subject: [PATCH 08/17] optimised copy functions for faster inserts --- src/dict.mjs | 221 +++++++++++++++++++++++++++++---------------------- 1 file changed, 128 insertions(+), 93 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 1ce19941..3a9b8b88 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -177,11 +177,14 @@ const noElementMarker = Symbol(); const generationKey = Symbol(); // Some commonly used constants throughout the code. -const emptyNode = /* @__PURE__ */ makeNode(0); +const emptyNode = /* @__PURE__ */ newNode(0); const emptyDict = /* @__PURE__ */ new Dict(0, emptyNode); const errorNil = /* @__PURE__ */ Result$Error(undefined); -function makeNode(generation) { +function makeNode(generation, datamap, nodemap, data) { + // The order of fields is important, as they define the order `isEqual` will + // compare our fields. Putting the bitmaps first means that equality can + // early-out if the bitmaps are not equal. return { // A node is a high-arity (32 in practice) hybrid tree node. // Hybrid means that it stores data directly as well as pointers to child nodes. @@ -196,8 +199,8 @@ function makeNode(generation) { // suffix (least significant bits first) encoding. // For example, if the last 5 bits of the hash are 1101, the bit to check for // that value is the 13th bit. - datamap: 0, - nodemap: 0, + datamap, + nodemap, // The slots itself are stored in a single contiguous array that contains // both direct k/v-pairs and child nodes. // @@ -215,12 +218,17 @@ function makeNode(generation) { // // Children are stored in reverse order to avoid having to store or calculate an // "offset" value to skip over the direct children. - data: [], + data, // The generation is used to track which nodes need to be copied during transient updates. + // Using a symbol here makes `isEqual` ignore this field. [generationKey]: generation, }; } +function newNode(generation) { + return makeNode(generation, 0, 0, []); +} + /** * Copies a node and its data array if it's from another generation, making it safe * to mutate the node. @@ -230,14 +238,50 @@ function copyNode(node, generation) { return node; } - const { datamap, nodemap, data } = node; + const newData = node.data.slice(0); + return makeNode(generation, node.datamap, node.nodemap, newData); +} - return { - datamap, - nodemap, - data: data.slice(0), - [generationKey]: generation, - }; +/** + * Copies a node if needed ands sets a new value. + */ +function copyAndSet(node, generation, idx, val) { + if (node.data[idx] === val) { + return node; + } + + // Using copyNode is faster than a specialised implementation. + node = copyNode(node, generation); + node.data[idx] = val; + return node; +} + +/** + * Copies a node if needed, and then inserts a new key-value pair. + */ +function copyAndInsertPair(node, generation, bit, idx, key, val) { + const data = node.data; + const length = data.length; + + // the fastest way to insert a pair is to always copy. + const newData = new Array(length + 2); + + let readIndex = 0; + let writeIndex = 0; + + while (readIndex < idx) newData[writeIndex++] = data[readIndex++]; + newData[writeIndex++] = key; + newData[writeIndex++] = val; + while (readIndex < length) newData[writeIndex++] = data[readIndex++]; + + return makeNode(generation, node.datamap | bit, node.nodemap, newData); +} + +function copyAndRemovePair(node, generation, bit, idx) { + node = copyNode(node, generation); + node.datamap ^= bit; + node.data.splice(idx, 2); + return node; } export function make() { @@ -267,13 +311,13 @@ export function has(dict, key) { function lookup(node, key, hash) { for (let shift = 0; shift < 32; shift += bits) { - const { data, datamap, nodemap } = node; + const data = node.data; const bit = hashbit(hash, shift); - if (nodemap & bit) { + if (node.nodemap & bit) { // we found our hash inside the nodemap, so we can continue our search there. - node = data[data.length - 1 - index(nodemap, bit)]; - } else if (datamap & bit) { + node = data[data.length - 1 - index(node.nodemap, bit)]; + } else if (node.datamap & bit) { // we store this hash directly! // // this also means that there are no other values with the same @@ -282,7 +326,7 @@ function lookup(node, key, hash) { // We still need to check if the key matches, but if it does we know for // sure that this is the correct value, and if it doesn't that we don't // contain the value in question. - const dataidx = Math.imul(index(datamap, bit), 2); + const dataidx = Math.imul(index(node.datamap, bit), 2); return isEqual(key, data[dataidx]) ? data[dataidx + 1] : noElementMarker; } else { // if the hash bit is not set in neither bitmaps, we immediately know that @@ -381,6 +425,9 @@ export function insert(dict, key, value) { globalTransient.size = dict.size; const root = doPut(globalTransient, dict.root, key, value, getHash(key), 0); + if (root === dict.root) { + return dict; + } return new Dict(globalTransient.size, root); } @@ -398,89 +445,82 @@ export function put(key, value, transient) { } function doPut(transient, node, key, value, hash, shift) { - node = copyNode(node, transient.generation); - const { data, datamap, nodemap } = node; + const data = node.data; + const generation = transient.generation; // 1. Overflow Node // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match. if (shift > 32) { for (let i = 0; i < data.length; i += 2) { if (isEqual(key, data[i])) { - data[i + 1] = value; - return node; + return copyAndSet(node, generation, i + 1, value); } } - data.push(key, value); transient.size += 1; - - return node; + return copyAndInsertPair(node, generation, 0, data.length, key, value); } const bit = hashbit(hash, shift); // 2. Child Node // We have to check first if there is already a child node we have to traverse to. - if ((nodemap & bit) !== 0) { - const nodeidx = data.length - 1 - index(nodemap, bit); + if (node.nodemap & bit) { + const nodeidx = data.length - 1 - index(node.nodemap, bit); const child = data[nodeidx]; - data[nodeidx] = doPut(transient, child, key, value, hash, shift + bits); - return node; + const newChild = doPut(transient, child, key, value, hash, shift + bits); + return copyAndSet(node, generation, nodeidx, newChild); } // 3. New Data Node // No child node and no data node exists yet, so we can potentially just insert a new value. - const dataidx = Math.imul(index(datamap, bit), 2); - if ((datamap & bit) === 0) { - node.datamap |= bit; - data.splice(dataidx, 0, key, value); + const dataidx = Math.imul(index(node.datamap, bit), 2); + if ((node.datamap & bit) === 0) { transient.size += 1; - - return node; + return copyAndInsertPair(node, generation, bit, dataidx, key, value); } // 4. Existing Data Node // We have a match that we can update, or remove. if (isEqual(key, data[dataidx])) { - data[dataidx + 1] = value; - return node; + return copyAndSet(node, generation, dataidx + 1, value); } // 5. Collision // There is no child node, but a data node with the same hash, but with a different key. // To resolve this, we push both nodes down one level. - let child = makeNode(transient.generation); - child = doPut(transient, child, key, value, hash, shift + bits); - const otherKey = data[dataidx]; - child = doPut( - transient, - child, - otherKey, - data[dataidx + 1], - getHash(otherKey), - shift + bits, - ); + const otherVal = data[dataidx + 1]; + const otherHash = getHash(otherKey); + const childShift = shift + bits; + + let child = emptyNode; + child = doPut(transient, child, key, value, hash, childShift); + child = doPut(transient, child, otherKey, otherVal, otherHash, childShift); + // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. transient.size -= 1; - node.datamap ^= bit; - node.nodemap |= bit; - // remove the old data pair, and insert the new child node. - // because we remove 2 elements first, our indices are off-by-one! - // When calculating the nodeidx, we measure with the length including those - // 2 extra elements, but missing the one we haven't inserted yet, so we have - // to correct for both of these with (1-2) = -1 + const length = data.length; + const nodeidx = length - 1 - index(node.nodemap, bit); - const nodeidx = data.length - 1 - index(nodemap, bit); + // writing these loops in javascript instead of a combination of splices + // turns out to be faster. Copying always turned out to be faster. + const newData = new Array(length - 1); - data.splice(dataidx, 2); - data.splice(nodeidx - 1, 0, child); + let readIndex = 0; + let writeIndex = 0; - return node; -} + // [0..dataidx, skip 2 elements, ..nodeidx, newChild, ..rest] + while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++]; + readIndex += 2; + while (readIndex <= nodeidx) newData[writeIndex++] = data[readIndex++]; + newData[writeIndex++] = child; + while (readIndex < length) newData[writeIndex++] = data[readIndex++]; + return makeNode(generation, node.datamap ^ bit, node.nodemap | bit, newData); +} /** * Consume a transient, removing a key if it exists. * Returns a new transient. @@ -491,17 +531,16 @@ export function remove(key, transient) { } function doRemove(transient, node, key, hash, shift) { - const { data, datamap, nodemap } = node; + const data = node.data; + const generation = transient.generation; // 1. Overflow Node // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match. if (shift > 32) { for (let i = 0; i < data.length; i += 2) { if (isEqual(key, data[i])) { - node = copyNode(node, transient.generation); - node.data.splice(i, 2); transient.size -= 1; - break; + return copyAndRemovePair(node, generation, 0, i); } } @@ -509,52 +548,46 @@ function doRemove(transient, node, key, hash, shift) { } const bit = hashbit(hash, shift); - const nodeidx = data.length - 1 - index(nodemap, bit); - const dataidx = Math.imul(index(datamap, bit), 2); + const dataidx = Math.imul(index(node.datamap, bit), 2); // 2. Child Node // We have to check first if there is already a child node we have to traverse to. - if ((nodemap & bit) !== 0) { + if ((node.nodemap & bit) !== 0) { + const nodeidx = data.length - 1 - index(node.nodemap, bit); + const oldChild = data[nodeidx]; const newChild = doRemove(transient, oldChild, key, hash, shift + bits); - // no child entry found, we don't have to update this path. - if (newChild === oldChild) { - return node; - } // the node did change, so let's copy to incorporate that change. - node = copyNode(node, transient.generation); if (newChild.nodemap !== 0 || newChild.data.length > 2) { - node.data[nodeidx] = newChild; - } else { - // this node only has a single data (k/v-pair) child. - // to restore the CHAMP invariant, we "pull" that pair up into ourselves. - // this ensures that every tree stays in its single optimal representation, - // and allows dicts to be structurally compared. - node.datamap |= bit; - node.nodemap ^= bit; - // NOTE: the order here is important to avoid mutation bugs! - // Remove the old child node, and insert the data pair into ourselves. - node.data.splice(nodeidx, 1); - node.data.splice(dataidx, 0, newChild.data[0], newChild.data[1]); + return copyAndSet(node, generation, nodeidx, newChild); } + // when writing, it looks like since we delete first it's not too bad. + node = copyNode(node, generation); + // this node only has a single data (k/v-pair) child. + // to restore the CHAMP invariant, we "pull" that pair up into ourselves. + // this ensures that every tree stays in its single optimal representation, + // and allows dicts to be structurally compared. + node.datamap |= bit; + node.nodemap ^= bit; + // NOTE: the order here is important to avoid mutation bugs! + // Remove the old child node, and insert the data pair into ourselves. + node.data.splice(nodeidx, 1); + node.data.splice(dataidx, 0, newChild.data[0], newChild.data[1]); + return node; } // 3. Data Node // There is no data entry here, or it is a prefix for a different key - if ((datamap & bit) === 0 || !isEqual(key, data[dataidx])) { + if ((node.datamap & bit) === 0 || !isEqual(key, data[dataidx])) { return node; } // we found a data entry that we can delete. - node = copyNode(node, transient.generation); - node.data.splice(dataidx, 2); - node.datamap ^= bit; transient.size -= 1; - - return node; + return copyAndRemovePair(node, generation, bit, dataidx); } export function update_with(key, fun, value, transient) { @@ -578,9 +611,10 @@ export function map(dict, fun) { while (queue.length) { // order doesn't matter, so we can use push/pop for faster array usage. - const { data, datamap } = queue.pop(); + const node = queue.pop(); + const data = node.data; // every node contains popcount(datamap) direct entries - const edgesStart = Math.imul(popcount(datamap), 2); + const edgesStart = Math.imul(popcount(node.datamap), 2); for (let i = 0; i < edgesStart; i += 2) { // we copied the node while queueing it, so direct mutation here is safe. data[i + 1] = fun(data[i], data[i + 1]); @@ -601,9 +635,10 @@ export function fold(dict, state, fun) { while (queue.length) { // order doesn't matter, so we can use push/pop for faster array usage. - const { data, datamap } = queue.pop(); + const node = queue.pop(); + const data = node.data; // every node contains popcount(datamap) direct entries - const edgesStart = Math.imul(popcount(datamap), 2); + const edgesStart = Math.imul(popcount(node.datamap), 2); for (let i = 0; i < edgesStart; i += 2) { state = fun(state, data[i], data[i + 1]); } From 5a872d3433a8094371b3467d78c82678cef2de9a Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Sun, 16 Nov 2025 15:53:42 +0100 Subject: [PATCH 09/17] optimised delete --- src/dict.mjs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 3a9b8b88..564f15b4 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -279,8 +279,16 @@ function copyAndInsertPair(node, generation, bit, idx, key, val) { function copyAndRemovePair(node, generation, bit, idx) { node = copyNode(node, generation); + + const data = node.data; + const length = data.length; + for (let w = idx, r = idx + 2; r < length; ++r, ++w) { + data[w] = data[r]; + } + data.pop(); + data.pop(); + node.datamap ^= bit; - node.data.splice(idx, 2); return node; } @@ -563,20 +571,24 @@ function doRemove(transient, node, key, hash, shift) { return copyAndSet(node, generation, nodeidx, newChild); } - // when writing, it looks like since we delete first it's not too bad. - node = copyNode(node, generation); // this node only has a single data (k/v-pair) child. // to restore the CHAMP invariant, we "pull" that pair up into ourselves. // this ensures that every tree stays in its single optimal representation, // and allows dicts to be structurally compared. - node.datamap |= bit; - node.nodemap ^= bit; - // NOTE: the order here is important to avoid mutation bugs! - // Remove the old child node, and insert the data pair into ourselves. - node.data.splice(nodeidx, 1); - node.data.splice(dataidx, 0, newChild.data[0], newChild.data[1]); + const length = data.length; + const newData = new Array(length + 1); - return node; + let readIndex = 0; + let writeIndex = 0; + + while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++]; + newData[writeIndex++] = newChild.data[0]; + newData[writeIndex++] = newChild.data[1]; + while (readIndex < nodeidx) newData[writeIndex++] = data[readIndex++]; + readIndex++; + while (readIndex < length) newData[writeIndex++] = data[readIndex++]; + + return makeNode(generation, node.datamap | bit, node.nodemap ^ bit, newData); } // 3. Data Node From 45dc8e62fb1acda3f63cf04ab89e73bc17278f90 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Sun, 16 Nov 2025 17:24:42 +0100 Subject: [PATCH 10/17] update to gleam v1.13 --- .github/workflows/ci.yml | 8 ++++---- gleam.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5d18ec50..dcdc4113 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: ${{ matrix.erlang_version }} - gleam-version: "1.11.0" + gleam-version: "1.13.0" - run: gleam test --target erlang - run: gleam format --check src test @@ -44,7 +44,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: "28" - gleam-version: "1.11.0" + gleam-version: "1.13.0" - uses: actions/setup-node@v4 with: node-version: ${{ matrix.node_version }} @@ -62,7 +62,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: "28" - gleam-version: "1.11.0" + gleam-version: "1.13.0" - uses: oven-sh/setup-bun@v2 with: bun-version: ${{ matrix.bun_version }} @@ -80,7 +80,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: "28" - gleam-version: "1.11.0" + gleam-version: "1.13.0" - uses: denoland/setup-deno@v1 with: deno-version: ${{ matrix.deno_version }} diff --git a/gleam.toml b/gleam.toml index 31eb0487..ef52224a 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,6 +1,6 @@ name = "gleam_stdlib" version = "0.67.1" -gleam = ">= 1.11.0" +gleam = ">= 1.13.0" licences = ["Apache-2.0"] description = "A standard library for the Gleam programming language" From 2d3daf4d9ceacd746c01535520fbacbeade3ddb6 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Sun, 16 Nov 2025 17:26:26 +0100 Subject: [PATCH 11/17] gleam format --- test/gleeunit/should.gleam | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/gleeunit/should.gleam b/test/gleeunit/should.gleam index 11cd82b7..99cd16c4 100644 --- a/test/gleeunit/should.gleam +++ b/test/gleeunit/should.gleam @@ -8,11 +8,11 @@ pub fn equal(a: t, b: t) -> Nil { True -> Nil _ -> panic as string.concat([ - "\n", - string.inspect(a), - "\nshould equal\n", - string.inspect(b), - ]) + "\n", + string.inspect(a), + "\nshould equal\n", + string.inspect(b), + ]) } } @@ -21,11 +21,11 @@ pub fn not_equal(a: t, b: t) -> Nil { True -> Nil _ -> panic as string.concat([ - "\n", - string.inspect(a), - "\nshould not equal\n", - string.inspect(b), - ]) + "\n", + string.inspect(a), + "\nshould not equal\n", + string.inspect(b), + ]) } } From 1c7222319977504cea822cc98e42fed3a84cefa7 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Thu, 20 Nov 2025 23:31:52 +0100 Subject: [PATCH 12/17] convert from `use` --- src/gleam/dict.gleam | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index da6401f6..ab9817f7 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -96,8 +96,7 @@ pub fn is_empty(dict: Dict(k, v)) -> Bool { /// @external(erlang, "maps", "to_list") pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) { - use acc, key, value <- fold(dict, from: []) - [#(key, value), ..acc] + fold(dict, from: [], with: fn(acc, key, value) { [#(key, value), ..acc] }) } /// Converts a list of 2-element tuples `#(key, value)` to a dict. @@ -232,8 +231,7 @@ fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) /// @external(erlang, "maps", "keys") pub fn keys(dict: Dict(k, v)) -> List(k) { - use acc, key, _value <- fold(dict, []) - [key, ..acc] + fold(dict, [], fn(acc, key, _value) { [key, ..acc] }) } /// Gets a list of all values in a given dict. @@ -251,8 +249,7 @@ pub fn keys(dict: Dict(k, v)) -> List(k) { /// @external(erlang, "maps", "values") pub fn values(dict: Dict(k, v)) -> List(v) { - use acc, _key, value <- fold(dict, []) - [value, ..acc] + fold(dict, [], fn(acc, _key, value) { [value, ..acc] }) } /// Creates a new dict from a given dict, minus any entries that a given function From 14dd3ab62adcf5fa91db946fa1758c9c07973d66 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Fri, 21 Nov 2025 00:10:43 +0100 Subject: [PATCH 13/17] rename transient functions --- src/dict.mjs | 58 +++++++++++++++++++++++++------------------- src/gleam/dict.gleam | 31 +++++++++++++---------- 2 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 564f15b4..14d1b467 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -8,7 +8,9 @@ import { isEqual, Result$Error, Result$Ok } from "./gleam.mjs"; // -- HASH -------------------------------------------------------------------- const referenceMap = /* @__PURE__ */ new WeakMap(); -const tempDataView = /* @__PURE__ */ new DataView(/* @__PURE__ */ new ArrayBuffer(8)); +const tempDataView = /* @__PURE__ */ new DataView( + /* @__PURE__ */ new ArrayBuffer(8), +); let referenceUID = 0; /** * hash the object by reference using a weak map and incrementing uid @@ -299,7 +301,7 @@ export function make() { export function from(iterable) { let transient = toTransient(emptyDict); for (const [key, value] of iterable) { - transient = put(key, value, transient); + transient = transientInsert(key, value, transient); } return fromTransient(transient); } @@ -432,7 +434,7 @@ export function insert(dict, key, value) { globalTransient.generation = nextGeneration(dict); globalTransient.size = dict.size; - const root = doPut(globalTransient, dict.root, key, value, getHash(key), 0); + const root = doInsert(globalTransient, dict.root, key, value, getHash(key), 0); if (root === dict.root) { return dict; } @@ -446,13 +448,30 @@ export function insert(dict, key, value) { * * Returns a new transient. */ -export function put(key, value, transient) { +export function transientInsert(key, value, transient) { const hash = getHash(key); - transient.root = doPut(transient, transient.root, key, value, hash, 0); + transient.root = doInsert(transient, transient.root, key, value, hash, 0); return transient; } -function doPut(transient, node, key, value, hash, shift) { +/** + * Consume a transient, writing a new key/value pair if the key doesn't exist or updating + * the existing value with a function if it does. + * + * Returns a new transient. + */ +export function transientUpdateWith(key, fun, value, transient) { + const hash = getHash(key); + + const existing = lookup(transient.root, key, hash); + if (existing !== noElementMarker) { + value = fun(existing); + } + transient.root = doInsert(transient, transient.root, key, value, hash, 0); + return transient; +} + +function doInsert(transient, node, key, value, hash, shift) { const data = node.data; const generation = transient.generation; @@ -476,7 +495,7 @@ function doPut(transient, node, key, value, hash, shift) { if (node.nodemap & bit) { const nodeidx = data.length - 1 - index(node.nodemap, bit); const child = data[nodeidx]; - const newChild = doPut(transient, child, key, value, hash, shift + bits); + const newChild = doInsert(transient, child, key, value, hash, shift + bits); return copyAndSet(node, generation, nodeidx, newChild); } @@ -503,8 +522,8 @@ function doPut(transient, node, key, value, hash, shift) { const childShift = shift + bits; let child = emptyNode; - child = doPut(transient, child, key, value, hash, childShift); - child = doPut(transient, child, otherKey, otherVal, otherHash, childShift); + child = doInsert(transient, child, key, value, hash, childShift); + child = doInsert(transient, child, otherKey, otherVal, otherHash, childShift); // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. transient.size -= 1; @@ -529,16 +548,17 @@ function doPut(transient, node, key, value, hash, shift) { return makeNode(generation, node.datamap ^ bit, node.nodemap | bit, newData); } + /** * Consume a transient, removing a key if it exists. * Returns a new transient. */ -export function remove(key, transient) { - transient.root = doRemove(transient, transient.root, key, getHash(key), 0); +export function transientDelete(key, transient) { + transient.root = doDelete(transient, transient.root, key, getHash(key), 0); return transient; } -function doRemove(transient, node, key, hash, shift) { +function doDelete(transient, node, key, hash, shift) { const data = node.data; const generation = transient.generation; @@ -564,7 +584,7 @@ function doRemove(transient, node, key, hash, shift) { const nodeidx = data.length - 1 - index(node.nodemap, bit); const oldChild = data[nodeidx]; - const newChild = doRemove(transient, oldChild, key, hash, shift + bits); + const newChild = doDelete(transient, oldChild, key, hash, shift + bits); // the node did change, so let's copy to incorporate that change. if (newChild.nodemap !== 0 || newChild.data.length > 2) { @@ -602,18 +622,6 @@ function doRemove(transient, node, key, hash, shift) { return copyAndRemovePair(node, generation, bit, dataidx); } -export function update_with(key, fun, value, transient) { - const hash = getHash(key); - - const existing = lookup(transient.root, key, hash); - if (existing !== noElementMarker) { - value = fun(existing); - } - - transient.root = doPut(transient, transient.root, key, value, hash, 0); - return transient; -} - export function map(dict, fun) { // map can never modify the structure, so we can walk the dictionary directly, // but still move to a new generation to make sure we get a new copy of every node. diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index ab9817f7..3fdb6e12 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -115,7 +115,8 @@ fn from_list_loop( ) -> Dict(k, v) { case list { [] -> from_transient(transient) - [#(key, value), ..rest] -> from_list_loop(put(key, value, transient), rest) + [#(key, value), ..rest] -> + from_list_loop(transient_insert(key, value, transient), rest) } } @@ -194,8 +195,12 @@ pub fn insert(into dict: Dict(k, v), for key: k, insert value: v) -> Dict(k, v) fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) @external(erlang, "maps", "put") -@external(javascript, "../dict.mjs", "put") -fn put(key: k, value: v, transient: TransientDict(k, v)) -> TransientDict(k, v) +@external(javascript, "../dict.mjs", "transientInsert") +fn transient_insert( + key: k, + value: v, + transient: TransientDict(k, v), +) -> TransientDict(k, v) /// Updates all values in a given dict by calling a given function on each key /// and value. @@ -281,7 +286,7 @@ fn do_filter(f: fn(k, v) -> Bool, dict: Dict(k, v)) -> Dict(k, v) { to_transient(new()) |> fold(over: dict, with: fn(transient, key, value) { case f(key, value) { - True -> put(key, value, transient) + True -> transient_insert(key, value, transient) False -> transient } }) @@ -323,7 +328,7 @@ fn do_take_loop( [] -> from_transient(acc) [key, ..rest] -> case get(dict, key) { - Ok(value) -> do_take_loop(dict, rest, put(key, value, acc)) + Ok(value) -> do_take_loop(dict, rest, transient_insert(key, value, acc)) Error(_) -> do_take_loop(dict, rest, acc) } } @@ -364,12 +369,12 @@ pub fn merge(into dict: Dict(k, v), from new_entries: Dict(k, v)) -> Dict(k, v) /// ``` /// pub fn delete(from dict: Dict(k, v), delete key: k) -> Dict(k, v) { - to_transient(dict) |> remove(key, _) |> from_transient + to_transient(dict) |> transient_delete(key, _) |> from_transient } @external(erlang, "maps", "remove") -@external(javascript, "../dict.mjs", "remove") -fn remove(a: k, b: TransientDict(k, v)) -> TransientDict(k, v) +@external(javascript, "../dict.mjs", "transientDelete") +fn transient_delete(a: k, b: TransientDict(k, v)) -> TransientDict(k, v) /// Creates a new dict from a given dict with all the same entries except any with /// keys found in a given list. @@ -406,7 +411,7 @@ fn drop_loop( ) -> Dict(k, v) { case disallowed_keys { [] -> from_transient(transient) - [key, ..rest] -> drop_loop(remove(key, transient), rest) + [key, ..rest] -> drop_loop(transient_delete(key, transient), rest) } } @@ -547,14 +552,14 @@ fn do_combine( to_transient(big) |> fold(over: small, with: fn(transient, key, value) { let update = fn(existing) { combine(key, existing, value) } - update_with(key, update, value, transient) + transient_update_with(key, update, value, transient) }) |> from_transient } @external(erlang, "maps", "update_with") -@external(javascript, "../dict.mjs", "update_with") -fn update_with( +@external(javascript, "../dict.mjs", "transientUpdateWith") +fn transient_update_with( key: k, fun: fn(v) -> v, init: v, @@ -578,7 +583,7 @@ fn group_loop( let update = fn(existing) { [value, ..existing] } transient - |> update_with(key, update, [value], _) + |> transient_update_with(key, update, [value], _) |> group_loop(to_key, rest) } } From de9fc8a8bff435a208dbae36213784fafb97c8a9 Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Fri, 21 Nov 2025 11:58:25 +0100 Subject: [PATCH 14/17] make it more clear that transient updates destroy the old value --- src/dict.mjs | 8 ++++---- src/gleam/dict.gleam | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 14d1b467..536b67d9 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -301,7 +301,7 @@ export function make() { export function from(iterable) { let transient = toTransient(emptyDict); for (const [key, value] of iterable) { - transient = transientInsert(key, value, transient); + transient = destructiveTransientInsert(key, value, transient); } return fromTransient(transient); } @@ -448,7 +448,7 @@ export function insert(dict, key, value) { * * Returns a new transient. */ -export function transientInsert(key, value, transient) { +export function destructiveTransientInsert(key, value, transient) { const hash = getHash(key); transient.root = doInsert(transient, transient.root, key, value, hash, 0); return transient; @@ -460,7 +460,7 @@ export function transientInsert(key, value, transient) { * * Returns a new transient. */ -export function transientUpdateWith(key, fun, value, transient) { +export function destructiveTransientUpdateWith(key, fun, value, transient) { const hash = getHash(key); const existing = lookup(transient.root, key, hash); @@ -553,7 +553,7 @@ function doInsert(transient, node, key, value, hash, shift) { * Consume a transient, removing a key if it exists. * Returns a new transient. */ -export function transientDelete(key, transient) { +export function destructiveTransientDelete(key, transient) { transient.root = doDelete(transient, transient.root, key, getHash(key), 0); return transient; } diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index 3fdb6e12..556246ce 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -195,7 +195,7 @@ pub fn insert(into dict: Dict(k, v), for key: k, insert value: v) -> Dict(k, v) fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) @external(erlang, "maps", "put") -@external(javascript, "../dict.mjs", "transientInsert") +@external(javascript, "../dict.mjs", "destructiveTransientInsert") fn transient_insert( key: k, value: v, @@ -373,7 +373,7 @@ pub fn delete(from dict: Dict(k, v), delete key: k) -> Dict(k, v) { } @external(erlang, "maps", "remove") -@external(javascript, "../dict.mjs", "transientDelete") +@external(javascript, "../dict.mjs", "destructiveTransientDelete") fn transient_delete(a: k, b: TransientDict(k, v)) -> TransientDict(k, v) /// Creates a new dict from a given dict with all the same entries except any with @@ -558,7 +558,7 @@ fn do_combine( } @external(erlang, "maps", "update_with") -@external(javascript, "../dict.mjs", "transientUpdateWith") +@external(javascript, "../dict.mjs", "destructiveTransientUpdateWith") fn transient_update_with( key: k, fun: fn(v) -> v, From 1746b8e8b1c6a1522ee2cf0ded69c855332823ba Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Fri, 21 Nov 2025 11:59:12 +0100 Subject: [PATCH 15/17] rename doPut/doRemove helpers, remove potentially boken oldChild references --- src/dict.mjs | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/dict.mjs b/src/dict.mjs index 536b67d9..0e314423 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -434,7 +434,8 @@ export function insert(dict, key, value) { globalTransient.generation = nextGeneration(dict); globalTransient.size = dict.size; - const root = doInsert(globalTransient, dict.root, key, value, getHash(key), 0); + const hash = getHash(key); + const root = insertIntoNode(globalTransient, dict.root, key, value, hash, 0); if (root === dict.root) { return dict; } @@ -450,7 +451,7 @@ export function insert(dict, key, value) { */ export function destructiveTransientInsert(key, value, transient) { const hash = getHash(key); - transient.root = doInsert(transient, transient.root, key, value, hash, 0); + transient.root = insertIntoNode(transient, transient.root, key, value, hash, 0); return transient; } @@ -467,11 +468,11 @@ export function destructiveTransientUpdateWith(key, fun, value, transient) { if (existing !== noElementMarker) { value = fun(existing); } - transient.root = doInsert(transient, transient.root, key, value, hash, 0); + transient.root = insertIntoNode(transient, transient.root, key, value, hash, 0); return transient; } -function doInsert(transient, node, key, value, hash, shift) { +function insertIntoNode(transient, node, key, value, hash, shift) { const data = node.data; const generation = transient.generation; @@ -494,9 +495,10 @@ function doInsert(transient, node, key, value, hash, shift) { // We have to check first if there is already a child node we have to traverse to. if (node.nodemap & bit) { const nodeidx = data.length - 1 - index(node.nodemap, bit); - const child = data[nodeidx]; - const newChild = doInsert(transient, child, key, value, hash, shift + bits); - return copyAndSet(node, generation, nodeidx, newChild); + + let child = data[nodeidx]; + child = insertIntoNode(transient, child, key, value, hash, shift + bits); + return copyAndSet(node, generation, nodeidx, child); } // 3. New Data Node @@ -516,14 +518,15 @@ function doInsert(transient, node, key, value, hash, shift) { // 5. Collision // There is no child node, but a data node with the same hash, but with a different key. // To resolve this, we push both nodes down one level. - const otherKey = data[dataidx]; - const otherVal = data[dataidx + 1]; - const otherHash = getHash(otherKey); const childShift = shift + bits; let child = emptyNode; - child = doInsert(transient, child, key, value, hash, childShift); - child = doInsert(transient, child, otherKey, otherVal, otherHash, childShift); + child = insertIntoNode(transient, emptyNode, key, value, hash, childShift); + + const key2 = data[dataidx]; + const value2 = data[dataidx + 1]; + const hash2 = getHash(key2); + child = insertIntoNode(transient, child, key2, value2, hash2, childShift); // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. transient.size -= 1; @@ -554,11 +557,12 @@ function doInsert(transient, node, key, value, hash, shift) { * Returns a new transient. */ export function destructiveTransientDelete(key, transient) { - transient.root = doDelete(transient, transient.root, key, getHash(key), 0); + const hash = getHash(key); + transient.root = deleteFromNode(transient, transient.root, key, hash, 0); return transient; } -function doDelete(transient, node, key, hash, shift) { +function deleteFromNode(transient, node, key, hash, shift) { const data = node.data; const generation = transient.generation; @@ -583,12 +587,12 @@ function doDelete(transient, node, key, hash, shift) { if ((node.nodemap & bit) !== 0) { const nodeidx = data.length - 1 - index(node.nodemap, bit); - const oldChild = data[nodeidx]; - const newChild = doDelete(transient, oldChild, key, hash, shift + bits); + let child = data[nodeidx]; + child = deleteFromNode(transient, child, key, hash, shift + bits); // the node did change, so let's copy to incorporate that change. - if (newChild.nodemap !== 0 || newChild.data.length > 2) { - return copyAndSet(node, generation, nodeidx, newChild); + if (child.nodemap !== 0 || child.data.length > 2) { + return copyAndSet(node, generation, nodeidx, child); } // this node only has a single data (k/v-pair) child. @@ -602,8 +606,8 @@ function doDelete(transient, node, key, hash, shift) { let writeIndex = 0; while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++]; - newData[writeIndex++] = newChild.data[0]; - newData[writeIndex++] = newChild.data[1]; + newData[writeIndex++] = child.data[0]; + newData[writeIndex++] = child.data[1]; while (readIndex < nodeidx) newData[writeIndex++] = data[readIndex++]; readIndex++; while (readIndex < length) newData[writeIndex++] = data[readIndex++]; From baea2ff5066279c051df29d7eafd0c0e388fb56a Mon Sep 17 00:00:00 2001 From: yoshie <802632-arkandos@users.noreply.gitlab.com> Date: Fri, 28 Nov 2025 15:13:59 +0100 Subject: [PATCH 16/17] add random operations / property tests --- test/gleam/dict_test.gleam | 199 +++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/test/gleam/dict_test.gleam b/test/gleam/dict_test.gleam index a225b0d0..03b5ad90 100644 --- a/test/gleam/dict_test.gleam +++ b/test/gleam/dict_test.gleam @@ -391,3 +391,202 @@ pub fn combine_with_no_overlapping_keys_test() { assert dict.combine(map1, map2, fn(one, _) { one }) == dict.from_list([#("a", 1), #("b", 2), #("c", 3), #("d", 4)]) } + +// Enums without fields all hash to 0 due to how the hash function works - +// we use this fact here to produce and test collisions. +// +// Object.keys() returns [] for variants without fields, so the hash always +// stays on it's initial value. +type CollidingKey { + CollidingKey1 + CollidingKey2 +} + +pub fn hash_collision_overflow_test() { + let d = + dict.new() |> dict.insert(CollidingKey1, 1) |> dict.insert(CollidingKey2, 2) + + assert dict.size(d) == 2 + assert dict.get(d, CollidingKey1) == Ok(1) + assert dict.get(d, CollidingKey2) == Ok(2) + + let d = dict.delete(d, CollidingKey1) + + assert dict.size(d) == 1 + assert dict.get(d, CollidingKey1) == Error(Nil) + assert dict.get(d, CollidingKey2) == Ok(2) +} + +fn test_random_operations( + initial_seed: Int, + num_ops: Int, + key_space: Int, + initial: dict.Dict(Int, Int), +) -> Nil { + test_random_operations_loop( + initial_seed, + prng(initial_seed), + num_ops, + key_space, + dict.to_list(initial), + initial, + ) +} + +fn test_random_operations_loop( + initial_seed: Int, + seed: Int, + remaining: Int, + key_space: Int, + proplist: List(#(Int, Int)), + dict: dict.Dict(Int, Int), +) -> Nil { + case remaining > 0 { + False -> { + assert_dict_matches_proplist(dict, proplist, initial_seed) + } + True -> { + let seed = prng(seed) + let op_choice = seed % 2 + let seed = prng(seed) + let key = seed % key_space + + case op_choice { + // Insert + 0 -> { + let new_proplist = list.key_set(proplist, key, key * 2) + let new_dict = dict.insert(dict, key, key * 2) + test_random_operations_loop( + initial_seed, + seed, + remaining - 1, + key_space, + new_proplist, + new_dict, + ) + } + // Delete + _ -> { + let new_proplist = case list.key_pop(proplist, key) { + Ok(#(_, remaining)) -> remaining + Error(Nil) -> proplist + } + let new_dict = dict.delete(dict, key) + test_random_operations_loop( + initial_seed, + seed, + remaining - 1, + key_space, + new_proplist, + new_dict, + ) + } + } + } + } +} + +fn run_many_random_tests( + count count: Int, + ops_per_test ops_per_test: Int, + key_space key_space: Int, + initial dict: dict.Dict(Int, Int), +) -> Nil { + case count { + 0 -> Nil + _ -> { + let start_seed = int.random(0x7fffffff) + test_random_operations(start_seed, ops_per_test, key_space, dict) + run_many_random_tests( + count: count - 1, + ops_per_test: ops_per_test, + key_space: key_space, + initial: dict, + ) + } + } +} + +pub fn random_operations_small_test() { + run_many_random_tests( + count: 100, + ops_per_test: 50, + key_space: 32, + initial: dict.new(), + ) +} + +pub fn random_operations_medium_test() { + run_many_random_tests( + count: 100, + ops_per_test: 50, + key_space: 200, + initial: range_dict(50), + ) +} + +pub fn random_operations_large_test() { + run_many_random_tests( + count: 100, + ops_per_test: 1000, + key_space: 2000, + initial: range_dict(1000), + ) +} + +fn range_dict(size) { + list.range(1, size) + |> list.map(fn(x) { #(x, x) }) + |> dict.from_list +} + +fn prng(state: Int) -> Int { + { state * 48_271 } % 0x7FFFFFFF +} + +fn assert_dict_matches_proplist( + d: dict.Dict(k, v), + proplist: List(#(k, v)), + seed: Int, +) -> Nil { + case dict.size(d) == list.length(proplist) { + True -> Nil + False -> + panic as { + "Size mismatch with seed " + <> int.to_string(seed) + <> ": dict.size=" + <> int.to_string(dict.size(d)) + <> " proplist.size=" + <> int.to_string(list.length(proplist)) + } + } + + list.each(proplist, fn(pair) { + let #(key, value) = pair + let result = dict.get(d, key) + + case result == Ok(value) { + True -> Nil + False -> + panic as { + "Get mismatch with seed " + <> int.to_string(seed) + <> ": key=" + <> string.inspect(key) + <> ", value=" + <> string.inspect(value) + <> ", dict.get=" + <> string.inspect(result) + } + } + }) + + case d == dict.from_list(proplist) { + True -> Nil + False -> + panic as { + "Structural equality failed with seed " <> int.to_string(seed) + } + } +} From f99d4da3838b45ca7f3838e45b787f7038e2cc00 Mon Sep 17 00:00:00 2001 From: yoshie <84042103+yoshi-monster@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:23:06 +0100 Subject: [PATCH 17/17] re-introduce Dict.prototype.forEach. --- src/dict.mjs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/dict.mjs b/src/dict.mjs index 0e314423..b61fcd39 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -164,6 +164,13 @@ export default class Dict { this.size = size; this.root = root; } + + /** + * @deprecated Use the `fold` function instead. + */ + forEach(f) { + fold(this, null, (_, k, v) => f(v, k)); + } } /// The power-of-2 branching factor for the dict. For example, a value of `5` indicates a 32-ary tree.