From 39c932d3539c8b9bac130e430c859c5e47f83ef0 Mon Sep 17 00:00:00 2001 From: Ian Bicking Date: Fri, 31 May 2024 13:03:37 -0500 Subject: [PATCH 1/2] Apply the patches from #465 to fix indentation See https://github.com/ohmjs/ohm/issues/465#issuecomment-1977985628 This changes memoization some so it can consume dedents --- packages/ohm-js/src/MatchState.js | 24 ++++++++++++++++++++---- packages/ohm-js/src/pexprs-eval.js | 6 +++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/packages/ohm-js/src/MatchState.js b/packages/ohm-js/src/MatchState.js index afc676b0..34212b09 100644 --- a/packages/ohm-js/src/MatchState.js +++ b/packages/ohm-js/src/MatchState.js @@ -1,3 +1,4 @@ +import { StringBuffer } from './common.js'; import {InputStream} from './InputStream.js'; import {MatchResult} from './MatchResult.js'; import {PosInfo} from './PosInfo.js'; @@ -234,10 +235,25 @@ export class MatchState { const actuals = app ? app.args : []; expr = expr.substituteParams(actuals); } - return ( - this.getMemoizedTraceEntry(pos, expr) || - new Trace(this.input, pos, this.inputStream.pos, expr, succeeded, bindings, this.trace) - ); + const memoTrace = this.getMemoizedTraceEntry(pos, expr) + let indentsLessThanPos = 0 + let indentsLessThanInputPos = 0 + if (!memoTrace) { + const input = new StringBuffer() + for (let i = 0; i < this.input.length; i++) { + if (this.inputStream._indentationAt(i) !== 0) { + for (let numPlaces = this.inputStream._indentationAt(i); numPlaces !== 0; numPlaces += (numPlaces < 0 ? 1 : -1)) { + input.append(numPlaces < 0 ? "\u21e6" : "\u21e8") + if (i < pos) indentsLessThanPos += 1; + if (i < this.inputStream.pos) indentsLessThanInputPos += 1; + } + } + input.append(this.inputStream.source[i]) + } + return new Trace(input.contents(), pos + indentsLessThanPos, this.inputStream.pos + indentsLessThanInputPos, expr, succeeded, bindings, this.trace) + } else { + return memoTrace; + } } isTracing() { diff --git a/packages/ohm-js/src/pexprs-eval.js b/packages/ohm-js/src/pexprs-eval.js index 32f74727..dfe8dda8 100644 --- a/packages/ohm-js/src/pexprs-eval.js +++ b/packages/ohm-js/src/pexprs-eval.js @@ -232,7 +232,7 @@ pexprs.Apply.prototype.handleCycle = function(state) { const memoKey = this.toMemoKey(); let memoRec = posInfo.memo[memoKey]; - if (currentLeftRecursion && currentLeftRecursion.headApplication.toMemoKey() === memoKey) { + if (memoRec && currentLeftRecursion && currentLeftRecursion.headApplication.toMemoKey() === memoKey) { // We already know about this left recursion, but it's possible there are "involved // applications" that we don't already know about, so... memoRec.updateInvolvedApplicationMemoKeys(); @@ -275,7 +275,7 @@ pexprs.Apply.prototype.reallyEval = function(state) { let memoRec; if (state.doNotMemoize) { - state.doNotMemoize = false; + // state.doNotMemoize = false; } else if (isHeadOfLeftRecursion) { value = this.growSeedResult(body, state, origPos, currentLR, value); origPosInfo.endLeftRecursion(); @@ -307,7 +307,7 @@ pexprs.Apply.prototype.reallyEval = function(state) { // Record trace information in the memo table, so that it is available if the memoized result // is used later. - if (state.isTracing() && memoRec) { + if (state.isTracing() && memoRec && !state.doNotMemoize) { const entry = state.getTraceEntry(origPos, this, succeeded, succeeded ? [value] : []); if (isHeadOfLeftRecursion) { common.assert(entry.terminatingLREntry != null || !succeeded); From 454746e09fbff9b240859b462a5a2fb04ef55878 Mon Sep 17 00:00:00 2001 From: Ian Bicking Date: Mon, 3 Jun 2024 14:16:19 -0500 Subject: [PATCH 2/2] When parsing whitespace-sensitive grammar, do not treat an empty line as an indent or dedent This is a case like: if foo: bar() baz() --- packages/ohm-js/src/findIndentation.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/ohm-js/src/findIndentation.js b/packages/ohm-js/src/findIndentation.js index 32a7befb..668827ea 100644 --- a/packages/ohm-js/src/findIndentation.js +++ b/packages/ohm-js/src/findIndentation.js @@ -19,7 +19,9 @@ export function findIndentation(input) { const indentPos = pos + indentSize; - if (indentSize > prevSize) { + if (!line.trim()) { + // Empty line, cannot be an indent or dedent. + } else if (indentSize > prevSize) { // Indent -- always only 1. stack.push(indentSize); result[indentPos] = 1;