Merge pull request #4232 from Kilo-Org/mark/adaptive-debounce-delay

markijbema · web-flow · commit 56ee5d6b19e8 · 2025-12-05T10:31:05.000+01:00
feat(ghost): implement adaptive debounce delay for autocomplete
diff --git a/src/services/ghost/classic-auto-complete/GhostInlineCompletionProvider.ts b/src/services/ghost/classic-auto-complete/GhostInlineCompletionProvider.ts
@@ -29,7 +29,21 @@ import { ClineProvider } from "../../../core/webview/ClineProvider"
 import * as telemetry from "./AutocompleteTelemetry"
 
 const MAX_SUGGESTIONS_HISTORY = 20
-const DEBOUNCE_DELAY_MS = 300
+
+/**
+ * Initial debounce delay in milliseconds.
+ * This value is used as the starting debounce delay before enough latency samples
+ * are collected. Once LATENCY_SAMPLE_SIZE samples are collected, the debounce delay
+ * is dynamically adjusted to the average of recent request latencies.
+ */
+const INITIAL_DEBOUNCE_DELAY_MS = 300
+
+/**
+ * Number of latency samples to collect before using adaptive debounce delay.
+ * Once this many samples are collected, the debounce delay becomes the average
+ * of the stored latencies, updated after each request.
+ */
+const LATENCY_SAMPLE_SIZE = 10
 
 export type { CostTrackingCallback, GhostPrompt, MatchingSuggestionResult, LLMRetrievalResult }
 
@@ -118,6 +132,8 @@ export class GhostInlineCompletionProvider implements vscode.InlineCompletionIte
 	private isFirstCall: boolean = true
 	private ignoreController?: Promise<RooIgnoreController>
 	private acceptedCommand: vscode.Disposable | null = null
+	private debounceDelayMs: number = INITIAL_DEBOUNCE_DELAY_MS
+	private latencyHistory: number[] = []
 
 	constructor(
 		context: vscode.ExtensionContext,
@@ -240,6 +256,28 @@ export class GhostInlineCompletionProvider implements vscode.InlineCompletionIte
 		}
 	}
 
+	/**
+	 * Records a latency measurement and updates the adaptive debounce delay.
+	 * Maintains a rolling window of the last LATENCY_SAMPLE_SIZE latencies.
+	 * Once enough samples are collected, the debounce delay is set to the
+	 * average of all stored latencies.
+	 *
+	 * @param latencyMs - The latency of the most recent request in milliseconds
+	 */
+	public recordLatency(latencyMs: number): void {
+		// Add the new latency to the history
+		this.latencyHistory.push(latencyMs)
+
+		// Remove oldest if we exceed the sample size
+		if (this.latencyHistory.length > LATENCY_SAMPLE_SIZE) {
+			this.latencyHistory.shift()
+
+			// Once we have enough samples, update the debounce delay to the average
+			const sum = this.latencyHistory.reduce((acc, val) => acc + val, 0)
+			this.debounceDelayMs = Math.round(sum / this.latencyHistory.length)
+		}
+	}
+
 	public dispose(): void {
 		if (this.debounceTimer !== null) {
 			clearTimeout(this.debounceTimer)
@@ -432,7 +470,7 @@ export class GhostInlineCompletionProvider implements vscode.InlineCompletionIte
 				// Remove this request from pending when done
 				this.removePendingRequest(pendingRequest)
 				resolve()
-			}, DEBOUNCE_DELAY_MS)
+			}, this.debounceDelayMs)
 		})
 
 		// Complete the pending request object
@@ -482,6 +520,9 @@ export class GhostInlineCompletionProvider implements vscode.InlineCompletionIte
 				telemetryContext,
 			)
 
+			// Record latency for adaptive debounce delay
+			this.recordLatency(latencyMs)
+
 			this.costTrackingCallback(result.cost, result.inputTokens, result.outputTokens)
 
 			// Always update suggestions, even if text is empty (for caching)
diff --git a/src/services/ghost/classic-auto-complete/__tests__/GhostInlineCompletionProvider.test.ts b/src/services/ghost/classic-auto-complete/__tests__/GhostInlineCompletionProvider.test.ts
@@ -1954,6 +1954,179 @@ describe("GhostInlineCompletionProvider", () => {
 		})
 	})
 
+	describe("adaptive debounce delay", () => {
+		it("should start with initial debounce delay of 300ms", async () => {
+			let callCount = 0
+			vi.mocked(mockModel.generateResponse).mockImplementation(async () => {
+				callCount++
+				return {
+					cost: 0.01,
+					inputTokens: 100,
+					outputTokens: 50,
+					cacheWriteTokens: 0,
+					cacheReadTokens: 0,
+				}
+			})
+
+			// First call - executes immediately (leading edge)
+			await provider.provideInlineCompletionItems(mockDocument, mockPosition, mockContext, mockToken)
+			expect(callCount).toBe(1)
+
+			// Second call - should be debounced with initial 300ms delay
+			const mockDocument2 = new MockTextDocument(vscode.Uri.file("/test2.ts"), "const a = 1\nconst b = 2")
+			const mockPosition2 = new vscode.Position(0, 11)
+			const promise2 = provider.provideInlineCompletionItems(mockDocument2, mockPosition2, mockContext, mockToken)
+
+			// Should not have called yet (debounced)
+			expect(callCount).toBe(1)
+
+			// Advance 200ms - should still be waiting
+			await vi.advanceTimersByTimeAsync(200)
+			expect(callCount).toBe(1)
+
+			// Advance remaining 100ms to complete the 300ms debounce
+			await vi.advanceTimersByTimeAsync(100)
+			await promise2
+			expect(callCount).toBe(2)
+		})
+
+		it("should record latency and not update debounce delay until 10 samples collected", () => {
+			// Record 9 latencies - should not update debounce delay yet
+			for (let i = 0; i < 9; i++) {
+				provider.recordLatency(100 + i * 10) // 100, 110, 120, ..., 180
+			}
+
+			// Access private field via any cast for testing
+			const providerAny = provider as any
+			expect(providerAny.latencyHistory.length).toBe(9)
+			expect(providerAny.debounceDelayMs).toBe(300) // Still initial value
+		})
+
+		it("should update debounce delay to average after exceeding 10 samples", () => {
+			// Record 10 latencies of 200ms each - debounce delay not updated yet
+			for (let i = 0; i < 10; i++) {
+				provider.recordLatency(200)
+			}
+
+			// Access private field via any cast for testing
+			const providerAny = provider as any
+			expect(providerAny.latencyHistory.length).toBe(10)
+			expect(providerAny.debounceDelayMs).toBe(300) // Still initial value (not updated until > 10)
+
+			// Record 11th latency - now debounce delay is updated
+			provider.recordLatency(200)
+			expect(providerAny.latencyHistory.length).toBe(10) // Still 10 (oldest removed)
+			expect(providerAny.debounceDelayMs).toBe(200) // Now updated to average
+		})
+
+		it("should maintain rolling window of 10 latencies", () => {
+			// Record 15 latencies
+			for (let i = 0; i < 15; i++) {
+				provider.recordLatency(100 + i * 10) // 100, 110, 120, ..., 240
+			}
+
+			// Access private field via any cast for testing
+			const providerAny = provider as any
+			expect(providerAny.latencyHistory.length).toBe(10) // Only last 10 kept
+
+			// Last 10 values should be 150, 160, 170, 180, 190, 200, 210, 220, 230, 240
+			// Average = (150+160+170+180+190+200+210+220+230+240) / 10 = 195
+			expect(providerAny.debounceDelayMs).toBe(195)
+		})
+
+		it("should update debounce delay on each new latency after exceeding 10 samples", () => {
+			// Record 11 latencies of 200ms each (need > 10 to trigger update)
+			for (let i = 0; i < 11; i++) {
+				provider.recordLatency(200)
+			}
+
+			const providerAny = provider as any
+			expect(providerAny.debounceDelayMs).toBe(200)
+
+			// Add one more latency of 300ms
+			// New average = (200*9 + 300) / 10 = 210
+			provider.recordLatency(300)
+			expect(providerAny.debounceDelayMs).toBe(210)
+
+			// Add another latency of 400ms
+			// New average = (200*8 + 300 + 400) / 10 = 230
+			provider.recordLatency(400)
+			expect(providerAny.debounceDelayMs).toBe(230)
+		})
+
+		it("should use adaptive debounce delay after collecting enough samples", async () => {
+			let callCount = 0
+			vi.mocked(mockModel.generateResponse).mockImplementation(async () => {
+				callCount++
+				return {
+					cost: 0.01,
+					inputTokens: 100,
+					outputTokens: 50,
+					cacheWriteTokens: 0,
+					cacheReadTokens: 0,
+				}
+			})
+
+			// Record 11 latencies of 150ms each to set debounce delay to 150ms
+			// (need > 10 to trigger update)
+			for (let i = 0; i < 11; i++) {
+				provider.recordLatency(150)
+			}
+
+			const providerAny = provider as any
+			expect(providerAny.debounceDelayMs).toBe(150)
+
+			// First call - executes immediately (leading edge)
+			await provider.provideInlineCompletionItems(mockDocument, mockPosition, mockContext, mockToken)
+			expect(callCount).toBe(1)
+
+			// Second call - should be debounced with adaptive 150ms delay
+			const mockDocument2 = new MockTextDocument(vscode.Uri.file("/test2.ts"), "const a = 1\nconst b = 2")
+			const mockPosition2 = new vscode.Position(0, 11)
+			const promise2 = provider.provideInlineCompletionItems(mockDocument2, mockPosition2, mockContext, mockToken)
+
+			// Should not have called yet (debounced)
+			expect(callCount).toBe(1)
+
+			// Advance 100ms - should still be waiting (150ms debounce)
+			await vi.advanceTimersByTimeAsync(100)
+			expect(callCount).toBe(1)
+
+			// Advance remaining 50ms to complete the 150ms debounce
+			await vi.advanceTimersByTimeAsync(50)
+			await promise2
+			expect(callCount).toBe(2)
+		})
+
+		it("should record latency from LLM requests", async () => {
+			// Mock the model to simulate a delay
+			vi.mocked(mockModel.generateResponse).mockImplementation(async (_sys, _user, onChunk) => {
+				// Simulate some processing time
+				if (onChunk) {
+					onChunk({ type: "text", text: "<COMPLETION>" })
+					onChunk({ type: "text", text: "console.log('test');" })
+					onChunk({ type: "text", text: "</COMPLETION>" })
+				}
+				return {
+					cost: 0.01,
+					inputTokens: 100,
+					outputTokens: 50,
+					cacheWriteTokens: 0,
+					cacheReadTokens: 0,
+				}
+			})
+
+			const providerAny = provider as any
+			expect(providerAny.latencyHistory.length).toBe(0)
+
+			// Make a request that will record latency
+			await provider.provideInlineCompletionItems(mockDocument, mockPosition, mockContext, mockToken)
+
+			// Latency should have been recorded
+			expect(providerAny.latencyHistory.length).toBe(1)
+		})
+	})
+
 	describe("telemetry tracking", () => {
 		beforeEach(() => {
 			vi.mocked(telemetry.captureAcceptSuggestion).mockClear()