Skip to content

Commit 8424f65

Browse files
authored
Merge pull request microsoft#262215 from mjbvz/disciplinary-moth
Don't break up inline math during response streaming
2 parents b860d80 + 42c26cd commit 8424f65

File tree

4 files changed

+126
-105
lines changed

4 files changed

+126
-105
lines changed

src/vs/workbench/contrib/chat/common/chatWordCounter.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55

6+
import * as markedKatexExtension from '../../markdown/common/markedKatexExtension.js';
7+
68
export interface IWordCountResult {
79
value: string;
810
returnedWordCount: number;
@@ -43,11 +45,14 @@ const linkPattern =
4345
export function getNWords(str: string, numWordsToCount: number): IWordCountResult {
4446
// This regex matches each word and skips over whitespace and separators. A word is:
4547
// A markdown link
48+
// Inline math
4649
// One chinese character
4750
// One or more + - =, handled so that code like "a=1+2-3" is broken up better
4851
// One or more characters that aren't whitepace or any of the above
4952
const backtick = '`';
50-
const allWordMatches = Array.from(str.matchAll(new RegExp(linkPattern + r`|\p{sc=Han}|=+|\++|-+|[^\s\|\p{sc=Han}|=|\+|\-|${backtick}]+`, 'gu')));
53+
54+
const wordRegExp = new RegExp('(?:' + linkPattern + ')|(?:' + markedKatexExtension.mathInlineRegExp.source + r`)|\p{sc=Han}|=+|\++|-+|[^\s\|\p{sc=Han}|=|\+|\-|${backtick}]+`, 'gu');
55+
const allWordMatches = Array.from(str.matchAll(wordRegExp));
5156

5257
const targetWords = allWordMatches.slice(0, numWordsToCount);
5358

src/vs/workbench/contrib/chat/test/common/chatWordCounter.test.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ suite('ChatWordCounter', () => {
102102

103103
cases.forEach(([str, nWords, result]) => doTest(str, nWords, result));
104104
});
105-
});
106105

106+
test(`Inline math shouldn't be broken up`, () => {
107+
const cases: [string, number, string][] = [
108+
['a $x + y$ b', 3, 'a $x + y$ b'],
109+
['a $\\frac{1}{2} + \\sqrt{x^2 + y^2}$ b', 3, 'a $\\frac{1}{2} + \\sqrt{x^2 + y^2}$ b'],
110+
];
111+
112+
cases.forEach(([str, nWords, result]) => doTest(str, nWords, result));
113+
});
114+
});
107115
});

src/vs/workbench/contrib/markdown/browser/markedKatexSupport.ts

Lines changed: 1 addition & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { MarkdownSanitizerConfig } from '../../../../base/browser/markdownRender
99
import { CodeWindow } from '../../../../base/browser/window.js';
1010
import { Lazy } from '../../../../base/common/lazy.js';
1111
import type * as marked from '../../../../base/common/marked/marked.js';
12+
import { MarkedKatexExtension } from '../common/markedKatexExtension.js';
1213

1314
export class MarkedKatexSupport {
1415

@@ -160,109 +161,6 @@ export class MarkedKatexSupport {
160161
}
161162
}
162163

163-
164-
export namespace MarkedKatexExtension {
165-
type KatexOptions = import('katex').KatexOptions;
166-
167-
// From https://github.com/UziTech/marked-katex-extension/blob/main/src/index.js
168-
// From https://github.com/UziTech/marked-katex-extension/blob/main/src/index.js
169-
export interface MarkedKatexOptions extends KatexOptions { }
170-
171-
const inlineRule = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?=[\s?!\.,:'\uff1f\uff01\u3002\uff0c\uff1a']|$)/;
172-
const inlineRuleNonStandard = /^(?<![a-zA-Z0-9])(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?![a-zA-Z0-9])/; // Non-standard, but ensure opening $ is not preceded and closing $ is not followed by word/number characters
173-
174-
const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/;
175-
176-
export function extension(katex: typeof import('katex').default, options: MarkedKatexOptions = {}): marked.MarkedExtension {
177-
return {
178-
extensions: [
179-
inlineKatex(options, createRenderer(katex, options, false)),
180-
blockKatex(options, createRenderer(katex, options, true)),
181-
],
182-
};
183-
}
184-
185-
function createRenderer(katex: typeof import('katex').default, options: MarkedKatexOptions, isBlock: boolean): marked.RendererExtensionFunction {
186-
return (token: marked.Tokens.Generic) => {
187-
let out: string;
188-
try {
189-
out = katex.renderToString(token.text, {
190-
...options,
191-
throwOnError: true,
192-
displayMode: token.displayMode,
193-
});
194-
} catch {
195-
// On failure, just use the original text including the wrapping $ or $$
196-
out = token.raw;
197-
}
198-
return out + (isBlock ? '\n' : '');
199-
};
200-
}
201-
202-
function inlineKatex(options: MarkedKatexOptions, renderer: marked.RendererExtensionFunction): marked.TokenizerAndRendererExtension {
203-
const nonStandard = true;
204-
const ruleReg = nonStandard ? inlineRuleNonStandard : inlineRule;
205-
return {
206-
name: 'inlineKatex',
207-
level: 'inline',
208-
start(src: string) {
209-
let index;
210-
let indexSrc = src;
211-
212-
while (indexSrc) {
213-
index = indexSrc.indexOf('$');
214-
if (index === -1) {
215-
return;
216-
}
217-
218-
const possibleKatex = indexSrc.substring(index);
219-
if (possibleKatex.match(ruleReg)) {
220-
return index;
221-
}
222-
223-
indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, '');
224-
}
225-
return;
226-
},
227-
tokenizer(src: string, tokens: marked.Token[]) {
228-
const match = src.match(ruleReg);
229-
if (match) {
230-
return {
231-
type: 'inlineKatex',
232-
raw: match[0],
233-
text: match[2].trim(),
234-
displayMode: match[1].length === 2,
235-
};
236-
}
237-
return;
238-
},
239-
renderer,
240-
};
241-
}
242-
243-
function blockKatex(options: MarkedKatexOptions, renderer: marked.RendererExtensionFunction): marked.TokenizerAndRendererExtension {
244-
return {
245-
name: 'blockKatex',
246-
level: 'block',
247-
start(src: string) {
248-
return src.match(new RegExp(blockRule.source, 'm'))?.index;
249-
},
250-
tokenizer(src: string, tokens: marked.Token[]) {
251-
const match = src.match(blockRule);
252-
if (match) {
253-
return {
254-
type: 'blockKatex',
255-
raw: match[0],
256-
text: match[2].trim(),
257-
displayMode: match[1].length === 2,
258-
};
259-
}
260-
return;
261-
},
262-
renderer,
263-
};
264-
}
265-
}
266164
const trustedMathMlTags = Object.freeze([
267165
'semantics',
268166
'annotation',
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
import type * as marked from '../../../../base/common/marked/marked.js';
6+
7+
export const mathInlineRegExp = /(?<![a-zA-Z0-9])(?<dollars>\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\k<dollars>(?![a-zA-Z0-9])/; // Non-standard, but ensure opening $ is not preceded and closing $ is not followed by word/number characters
8+
9+
10+
const inlineRule = new RegExp('^' + mathInlineRegExp.source);
11+
12+
13+
export namespace MarkedKatexExtension {
14+
type KatexOptions = import('katex').KatexOptions;
15+
16+
// From https://github.com/UziTech/marked-katex-extension/blob/main/src/index.js
17+
// From https://github.com/UziTech/marked-katex-extension/blob/main/src/index.js
18+
export interface MarkedKatexOptions extends KatexOptions { }
19+
20+
const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/;
21+
22+
export function extension(katex: typeof import('katex').default, options: MarkedKatexOptions = {}): marked.MarkedExtension {
23+
return {
24+
extensions: [
25+
inlineKatex(options, createRenderer(katex, options, false)),
26+
blockKatex(options, createRenderer(katex, options, true)),
27+
],
28+
};
29+
}
30+
31+
function createRenderer(katex: typeof import('katex').default, options: MarkedKatexOptions, isBlock: boolean): marked.RendererExtensionFunction {
32+
return (token: marked.Tokens.Generic) => {
33+
let out: string;
34+
try {
35+
out = katex.renderToString(token.text, {
36+
...options,
37+
throwOnError: true,
38+
displayMode: token.displayMode,
39+
});
40+
} catch {
41+
// On failure, just use the original text including the wrapping $ or $$
42+
out = token.raw;
43+
}
44+
return out + (isBlock ? '\n' : '');
45+
};
46+
}
47+
48+
function inlineKatex(options: MarkedKatexOptions, renderer: marked.RendererExtensionFunction): marked.TokenizerAndRendererExtension {
49+
const ruleReg = inlineRule;
50+
return {
51+
name: 'inlineKatex',
52+
level: 'inline',
53+
start(src: string) {
54+
let index;
55+
let indexSrc = src;
56+
57+
while (indexSrc) {
58+
index = indexSrc.indexOf('$');
59+
if (index === -1) {
60+
return;
61+
}
62+
63+
const possibleKatex = indexSrc.substring(index);
64+
if (possibleKatex.match(ruleReg)) {
65+
return index;
66+
}
67+
68+
indexSrc = indexSrc.substring(index + 1).replace(/^\$+/, '');
69+
}
70+
return;
71+
},
72+
tokenizer(src: string, tokens: marked.Token[]) {
73+
const match = src.match(ruleReg);
74+
if (match) {
75+
return {
76+
type: 'inlineKatex',
77+
raw: match[0],
78+
text: match[2].trim(),
79+
displayMode: match[1].length === 2,
80+
};
81+
}
82+
return;
83+
},
84+
renderer,
85+
};
86+
}
87+
88+
function blockKatex(options: MarkedKatexOptions, renderer: marked.RendererExtensionFunction): marked.TokenizerAndRendererExtension {
89+
return {
90+
name: 'blockKatex',
91+
level: 'block',
92+
start(src: string) {
93+
return src.match(new RegExp(blockRule.source, 'm'))?.index;
94+
},
95+
tokenizer(src: string, tokens: marked.Token[]) {
96+
const match = src.match(blockRule);
97+
if (match) {
98+
return {
99+
type: 'blockKatex',
100+
raw: match[0],
101+
text: match[2].trim(),
102+
displayMode: match[1].length === 2,
103+
};
104+
}
105+
return;
106+
},
107+
renderer,
108+
};
109+
}
110+
}

0 commit comments

Comments
 (0)