Skip to content

Commit ea655f4

Browse files
fix: Add citation repairing. Also remove revision as a tool param.
1 parent 1109969 commit ea655f4

File tree

4 files changed

+132
-16
lines changed

4 files changed

+132
-16
lines changed

packages/web/src/features/chat/agent.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,12 @@ When you have sufficient context, output your answer as a structured markdown re
207207
- Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" → must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`)
208208
- Any code snippet or line you're explaining
209209
- Class names, method calls, imports, etc.
210+
- Some examples of both correct and incorrect code references:
211+
- Correct: @file:{path/to/file.ts}
212+
- Correct: @file:{path/to/file.ts:10-15}
213+
- Incorrect: @file{path/to/file.ts} (missing colon)
214+
- Incorrect: @file:path/to/file.ts (missing curly braces)
215+
- Incorrect: @file:{path/to/file.ts:10-25,30-35} (multiple ranges not supported)
210216
- Be clear and very concise. Use bullet points where appropriate
211217
- Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference
212218
- If you cannot provide a code reference for something you're discussing, do not mention that specific code element

packages/web/src/features/chat/tools.ts

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@ export const findSymbolReferencesTool = tool({
1414
inputSchema: z.object({
1515
symbol: z.string().describe("The symbol to find references to"),
1616
language: z.string().describe("The programming language of the symbol"),
17-
revision: z.string().describe("The revision to search for the symbol in"),
1817
}),
19-
execute: async ({ symbol, language, revision }) => {
18+
execute: async ({ symbol, language }) => {
19+
// @todo: make revision configurable.
20+
const revision = "HEAD";
21+
2022
const response = await findSearchBasedSymbolReferences({
2123
symbolName: symbol,
2224
language,
23-
revisionName: revision,
25+
revisionName: "HEAD",
2426
// @todo(mt): handle multi-tenancy.
2527
}, SINGLE_TENANT_ORG_DOMAIN);
2628

@@ -50,9 +52,11 @@ export const findSymbolDefinitionsTool = tool({
5052
inputSchema: z.object({
5153
symbol: z.string().describe("The symbol to find definitions of"),
5254
language: z.string().describe("The programming language of the symbol"),
53-
revision: z.string().describe("The revision to search for the symbol in"),
5455
}),
55-
execute: async ({ symbol, language, revision }) => {
56+
execute: async ({ symbol, language }) => {
57+
// @todo: make revision configurable.
58+
const revision = "HEAD";
59+
5660
const response = await findSearchBasedSymbolDefinitions({
5761
symbolName: symbol,
5862
language,
@@ -86,9 +90,11 @@ export const readFilesTool = tool({
8690
inputSchema: z.object({
8791
paths: z.array(z.string()).describe("The paths to the files to read"),
8892
repository: z.string().describe("The repository to read the files from"),
89-
revision: z.string().describe("The revision to read the files from"),
9093
}),
91-
execute: async ({ paths, repository, revision }) => {
94+
execute: async ({ paths, repository }) => {
95+
// @todo: make revision configurable.
96+
const revision = "HEAD";
97+
9298
const responses = await Promise.all(paths.map(async (path) => {
9399
return getFileSource({
94100
fileName: path,

packages/web/src/features/chat/utils.test.ts

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { expect, test, vi } from 'vitest'
2-
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps } from './utils'
2+
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairCitations } from './utils'
33
import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants';
44
import { SBChatMessage, SBChatMessagePart } from './types';
55

@@ -238,3 +238,88 @@ test('getAnswerPartFromAssistantMessage returns undefined when streaming and no
238238

239239
expect(result).toBeUndefined();
240240
});
241+
242+
test('repairCitations fixes missing colon after @file', () => {
243+
const input = 'See the function in @file{auth.ts} for details.';
244+
const expected = 'See the function in @file:{auth.ts} for details.';
245+
expect(repairCitations(input)).toBe(expected);
246+
});
247+
248+
test('repairCitations fixes missing colon with range', () => {
249+
const input = 'Check @file{config.ts:15-20} for the configuration.';
250+
const expected = 'Check @file:{config.ts:15-20} for the configuration.';
251+
expect(repairCitations(input)).toBe(expected);
252+
});
253+
254+
test('repairCitations fixes missing braces around filename', () => {
255+
const input = 'The logic is in @file:utils.js and handles validation.';
256+
const expected = 'The logic is in @file:{utils.js} and handles validation.';
257+
expect(repairCitations(input)).toBe(expected);
258+
});
259+
260+
test('repairCitations fixes missing braces with path', () => {
261+
const input = 'Look at @file:src/components/Button.tsx for the component.';
262+
const expected = 'Look at @file:{src/components/Button.tsx} for the component.';
263+
expect(repairCitations(input)).toBe(expected);
264+
});
265+
266+
test('repairCitations removes multiple ranges keeping only first', () => {
267+
const input = 'See @file:{service.ts:10-15,20-25,30-35} for implementation.';
268+
const expected = 'See @file:{service.ts:10-15} for implementation.';
269+
expect(repairCitations(input)).toBe(expected);
270+
});
271+
272+
test('repairCitations fixes malformed triple number ranges', () => {
273+
const input = 'Check @file:{handler.ts:5-10-15} for the logic.';
274+
const expected = 'Check @file:{handler.ts:5-10} for the logic.';
275+
expect(repairCitations(input)).toBe(expected);
276+
});
277+
278+
test('repairCitations handles multiple citations in same text', () => {
279+
const input = 'See @file{auth.ts} and @file:config.js for setup details.';
280+
const expected = 'See @file:{auth.ts} and @file:{config.js} for setup details.';
281+
expect(repairCitations(input)).toBe(expected);
282+
});
283+
284+
test('repairCitations leaves correctly formatted citations unchanged', () => {
285+
const input = 'The function @file:{utils.ts:42-50} handles validation correctly.';
286+
expect(repairCitations(input)).toBe(input);
287+
});
288+
289+
test('repairCitations handles edge cases with spaces and punctuation', () => {
290+
const input = 'Functions like @file:helper.ts, @file{main.js}, and @file:{app.ts:1-5,10-15} work.';
291+
const expected = 'Functions like @file:{helper.ts}, @file:{main.js}, and @file:{app.ts:1-5} work.';
292+
expect(repairCitations(input)).toBe(expected);
293+
});
294+
295+
test('repairCitations returns empty string unchanged', () => {
296+
expect(repairCitations('')).toBe('');
297+
});
298+
299+
test('repairCitations returns text without citations unchanged', () => {
300+
const input = 'This is just regular text without any file references.';
301+
expect(repairCitations(input)).toBe(input);
302+
});
303+
304+
test('repairCitations handles complex file paths correctly', () => {
305+
const input = 'Check @file:src/components/ui/Button/index.tsx for implementation.';
306+
const expected = 'Check @file:{src/components/ui/Button/index.tsx} for implementation.';
307+
expect(repairCitations(input)).toBe(expected);
308+
});
309+
310+
test('repairCitations handles files with numbers and special characters', () => {
311+
const input = 'See @file{utils-v2.0.1.ts} and @file:config_2024.json for setup.';
312+
const expected = 'See @file:{utils-v2.0.1.ts} and @file:{config_2024.json} for setup.';
313+
expect(repairCitations(input)).toBe(expected);
314+
});
315+
316+
test('repairCitations handles citation at end of sentence', () => {
317+
const input = 'The implementation is in @file:helper.ts.';
318+
const expected = 'The implementation is in @file:{helper.ts}.';
319+
expect(repairCitations(input)).toBe(expected);
320+
});
321+
322+
test('repairCitations preserves already correct citations with ranges', () => {
323+
const input = 'The function @file:{utils.ts:10-20} and variable @file:{config.js:5} work correctly.';
324+
expect(repairCitations(input)).toBe(input);
325+
});

packages/web/src/features/chat/utils.ts

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ export const createFileReference = ({ fileName, startLine, endLine }: { fileName
243243
export const convertLLMOutputToPortableMarkdown = (text: string): string => {
244244
return text.replace(FILE_REFERENCE_REGEX, (_, fileName, startLine, endLine) => {
245245
const displayName = fileName.split('/').pop() || fileName;
246-
246+
247247
let linkText = displayName;
248248
if (startLine) {
249249
if (endLine && startLine !== endLine) {
@@ -252,7 +252,7 @@ export const convertLLMOutputToPortableMarkdown = (text: string): string => {
252252
linkText += `:${startLine}`;
253253
}
254254
}
255-
255+
256256
return `[${linkText}](${fileName})`;
257257
});
258258
}
@@ -265,10 +265,10 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => {
265265

266266
const steps: SBChatMessagePart[][] = [];
267267
let currentStep: SBChatMessagePart[] = [];
268-
268+
269269
for (let i = 0; i < parts.length; i++) {
270270
const part = parts[i];
271-
271+
272272
if (part.type === 'step-start') {
273273
if (currentStep.length > 0) {
274274
steps.push([...currentStep]);
@@ -278,28 +278,47 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => {
278278
currentStep.push(part);
279279
}
280280
}
281-
281+
282282
if (currentStep.length > 0) {
283283
steps.push(currentStep);
284284
}
285-
285+
286286
return steps;
287287
}
288288

289+
// LLMs like to not follow instructions... this takes care of some common mistakes they tend to make.
290+
export const repairCitations = (text: string): string => {
291+
return text
292+
// Fix missing colon: @file{...} -> @file:{...}
293+
.replace(/@file\{([^}]+)\}/g, '@file:{$1}')
294+
// Fix missing braces: @file:filename -> @file:{filename}
295+
.replace(/@file:([^\s{]\S*?)(\s|[,;!?](?:\s|$)|\.(?:\s|$)|$)/g, '@file:{$1}$2')
296+
// Fix multiple ranges: keep only first range
297+
.replace(/@file:\{([^:}]+):(\d+-\d+),[\d,-]+\}/g, '@file:{$1:$2}')
298+
// Fix malformed ranges
299+
.replace(/@file:\{([^:}]+):(\d+)-(\d+)-(\d+)\}/g, '@file:{$1:$2-$3}');
300+
};
301+
289302
// Attempts to find the part of the assistant's message
290303
// that contains the answer.
291304
export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => {
292305
const lastTextPart = message.parts
293306
.findLast((part) => part.type === 'text')
294307

295308
if (lastTextPart?.text.startsWith(ANSWER_TAG)) {
296-
return lastTextPart;
309+
return {
310+
...lastTextPart,
311+
text: repairCitations(lastTextPart.text),
312+
};
297313
}
298314

299315
// If the agent did not include the answer tag, then fallback to using the last text part.
300316
// Only do this when we are no longer streaming since the agent may still be thinking.
301317
if (!isStreaming && lastTextPart) {
302-
return lastTextPart;
318+
return {
319+
...lastTextPart,
320+
text: repairCitations(lastTextPart.text),
321+
};
303322
}
304323

305324
return undefined;

0 commit comments

Comments
 (0)