Skip to content

Commit 622cf2e

Browse files
authored
feat: add retry information display with exponential backoff in task logs (#533)
# Enhanced Retry Information Display in Edge Worker Logs This PR adds improved retry information to the Edge Worker logs when a task fails but has retries remaining. The changes include: - Added `baseDelay` parameter to the task context to support exponential backoff calculations - Enhanced the `FancyFormatter` to display retry information with a visual indicator (↻) showing: - Current retry attempt number - Maximum retries allowed - Calculated delay before the next retry attempt - Updated the `SimpleFormatter` to include retry information in key-value format for structured logging - Implemented exponential backoff calculation: `baseDelay * 2^(attempt-1)` - Added comprehensive unit tests for both formatters to verify retry information display These changes make it easier to understand retry behavior when tasks fail, providing clear visibility into when a task will be retried and with what delay.
1 parent fb10d8b commit 622cf2e

File tree

4 files changed

+219
-1
lines changed

4 files changed

+219
-1
lines changed

pkgs/edge-worker/src/flow/StepTaskExecutor.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ export class StepTaskExecutor<TFlow extends AnyFlow, TContext extends StepTaskHa
6161
const stepDef = this.flow.getStepDefinition(this.stepTask.step_slug);
6262
// maxAttempts includes the initial attempt, so maxRetries = maxAttempts - 1
6363
const maxRetries = stepDef?.options?.maxAttempts ? stepDef.options.maxAttempts - 1 : undefined;
64+
// baseDelay from step options for retry delay calculation
65+
const baseDelay = stepDef?.options?.baseDelay;
6466

6567
return {
6668
flowSlug: this.stepTask.flow_slug,
@@ -73,6 +75,7 @@ export class StepTaskExecutor<TFlow extends AnyFlow, TContext extends StepTaskHa
7375
queueName: this.workerIdentity.queueName,
7476
retryAttempt,
7577
maxRetries,
78+
baseDelay,
7679
};
7780
}
7881

pkgs/edge-worker/src/platform/logging.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,22 @@ class FancyFormatter {
108108
let result = `${prefix} ${icon} ${path}`;
109109
if (ids) result += ` ${ids}`;
110110
result += `\n${prefix} ${errorMsg}`;
111+
112+
// Show retry info if there are retries remaining
113+
// retryAttempt = read_ct from pgmq (1-indexed: read_ct=1 is first attempt, read_ct=2 is first retry)
114+
// maxRetries = maxAttempts - 1 (e.g., maxAttempts=4 means 3 retries allowed)
115+
// Show retry pending when: retryAttempt <= maxRetries (still have retries left)
116+
if (ctx.retryAttempt !== undefined && ctx.maxRetries !== undefined && ctx.retryAttempt <= ctx.maxRetries) {
117+
const retryIcon = colorize('↻', ANSI.yellow, this.colorsEnabled);
118+
// Exponential backoff: baseDelay * 2^(attempt-1)
119+
// Example with baseDelay=5: attempt 1 -> 5s, attempt 2 -> 10s, attempt 3 -> 20s
120+
// Default 5s matches pgflow SQL default in create_flow/add_step
121+
const baseDelay = ctx.baseDelay ?? 5;
122+
const delay = baseDelay * Math.pow(2, ctx.retryAttempt - 1);
123+
const retryInfo = colorize(`retry ${ctx.retryAttempt}/${ctx.maxRetries} in ${delay}s`, ANSI.yellow, this.colorsEnabled);
124+
result += `\n${prefix} ${retryIcon} ${retryInfo}`;
125+
}
126+
111127
return result;
112128
}
113129

@@ -186,7 +202,17 @@ class SimpleFormatter {
186202

187203
taskFailed(ctx: TaskLogContext, error: Error): string {
188204
// Phase 3b: worker=X queue=Y flow=Z step=W format
189-
return `[VERBOSE] worker=${ctx.workerName} queue=${ctx.queueName} flow=${ctx.flowSlug} step=${ctx.stepSlug} status=failed error="${error.message}" run_id=${ctx.runId} msg_id=${ctx.msgId} worker_id=${ctx.workerId}`;
205+
let result = `[VERBOSE] worker=${ctx.workerName} queue=${ctx.queueName} flow=${ctx.flowSlug} step=${ctx.stepSlug} status=failed error="${error.message}" run_id=${ctx.runId} msg_id=${ctx.msgId} worker_id=${ctx.workerId}`;
206+
207+
// Add retry info if there are retries remaining (see FancyFormatter for detailed comments)
208+
// Default 5s matches pgflow SQL default in create_flow/add_step
209+
if (ctx.retryAttempt !== undefined && ctx.maxRetries !== undefined && ctx.retryAttempt <= ctx.maxRetries) {
210+
const baseDelay = ctx.baseDelay ?? 5;
211+
const delay = baseDelay * Math.pow(2, ctx.retryAttempt - 1);
212+
result += ` retry=${ctx.retryAttempt}/${ctx.maxRetries} retry_delay_s=${delay}`;
213+
}
214+
215+
return result;
190216
}
191217

192218
polling(): string {

pkgs/edge-worker/src/platform/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ export interface TaskLogContext {
1313
queueName: string;
1414
retryAttempt?: number;
1515
maxRetries?: number;
16+
/** Base delay in seconds for retry calculation (from step options) */
17+
baseDelay?: number;
1618
}
1719

1820
/**

pkgs/edge-worker/tests/unit/platform/formatters.test.ts

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ interface TaskLogContext {
1717
queueName: string;
1818
retryAttempt?: number;
1919
maxRetries?: number;
20+
baseDelay?: number;
2021
}
2122

2223
interface StartupContext {
@@ -615,3 +616,189 @@ Deno.test('FancyFormatter - taskFailed at DEBUG level includes identifiers', ()
615616
restore();
616617
}
617618
});
619+
620+
// ============================================================
621+
// Phase 5: Retry Information Display Tests
622+
// ============================================================
623+
624+
Deno.test('FancyFormatter - taskFailed shows retry pending info when retries remaining', () => {
625+
const consoleSpy = spy(console, 'log');
626+
627+
try {
628+
const factory = createLoggingFactory({
629+
SUPABASE_ANON_KEY: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0',
630+
});
631+
const logger = factory.createLogger('test');
632+
633+
const ctx: TaskLogContext = {
634+
flowSlug: 'greetUser',
635+
stepSlug: 'sendEmail',
636+
msgId: '1044',
637+
runId: 'run-123',
638+
workerId: 'worker-1',
639+
workerName: 'greet-user-worker',
640+
queueName: 'orders',
641+
retryAttempt: 1, // First attempt (read_ct=1)
642+
maxRetries: 3, // 3 retries allowed
643+
baseDelay: 5, // 5 second base delay
644+
};
645+
646+
const error = new Error('Request failed');
647+
logger.taskFailed(ctx, error);
648+
649+
assertSpyCalls(consoleSpy, 1);
650+
const output = consoleSpy.calls[0].args[0] as string;
651+
652+
// Should contain retry icon and info
653+
assertStringIncludes(output, '↻');
654+
assertStringIncludes(output, 'retry 1/3');
655+
assertStringIncludes(output, '5s'); // baseDelay * 2^0 = 5s for first attempt
656+
} finally {
657+
restore();
658+
}
659+
});
660+
661+
Deno.test('FancyFormatter - taskFailed calculates exponential backoff delay correctly', () => {
662+
const consoleSpy = spy(console, 'log');
663+
664+
try {
665+
const factory = createLoggingFactory({
666+
SUPABASE_ANON_KEY: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0',
667+
});
668+
const logger = factory.createLogger('test');
669+
670+
const ctx: TaskLogContext = {
671+
flowSlug: 'greetUser',
672+
stepSlug: 'sendEmail',
673+
msgId: '1044',
674+
runId: 'run-123',
675+
workerId: 'worker-1',
676+
workerName: 'greet-user-worker',
677+
queueName: 'orders',
678+
retryAttempt: 2, // Second attempt (first retry)
679+
maxRetries: 3,
680+
baseDelay: 5, // 5 second base delay
681+
};
682+
683+
const error = new Error('Request failed');
684+
logger.taskFailed(ctx, error);
685+
686+
assertSpyCalls(consoleSpy, 1);
687+
const output = consoleSpy.calls[0].args[0] as string;
688+
689+
// Should show retry 2/3 with 10s delay (5 * 2^1 = 10)
690+
assertStringIncludes(output, 'retry 2/3');
691+
assertStringIncludes(output, '10s');
692+
} finally {
693+
restore();
694+
}
695+
});
696+
697+
Deno.test('FancyFormatter - taskFailed does NOT show retry info when no retries remaining', () => {
698+
const consoleSpy = spy(console, 'log');
699+
700+
try {
701+
const factory = createLoggingFactory({
702+
SUPABASE_ANON_KEY: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0',
703+
});
704+
const logger = factory.createLogger('test');
705+
706+
const ctx: TaskLogContext = {
707+
flowSlug: 'greetUser',
708+
stepSlug: 'sendEmail',
709+
msgId: '1044',
710+
runId: 'run-123',
711+
workerId: 'worker-1',
712+
workerName: 'greet-user-worker',
713+
queueName: 'orders',
714+
retryAttempt: 4, // 4th attempt, exceeds maxRetries
715+
maxRetries: 3, // Only 3 retries allowed
716+
baseDelay: 5,
717+
};
718+
719+
const error = new Error('Request failed');
720+
logger.taskFailed(ctx, error);
721+
722+
assertSpyCalls(consoleSpy, 1);
723+
const output = consoleSpy.calls[0].args[0] as string;
724+
725+
// Should NOT contain retry icon when no retries remaining
726+
assertEquals(output.includes('↻'), false);
727+
} finally {
728+
restore();
729+
}
730+
});
731+
732+
Deno.test('SimpleFormatter - taskFailed shows retry info in key=value format', () => {
733+
const consoleSpy = spy(console, 'log');
734+
735+
try {
736+
const factory = createLoggingFactory({
737+
SUPABASE_ANON_KEY: 'some-production-key',
738+
EDGE_WORKER_LOG_LEVEL: 'verbose',
739+
});
740+
const logger = factory.createLogger('test');
741+
742+
const ctx: TaskLogContext = {
743+
flowSlug: 'greetUser',
744+
stepSlug: 'sendEmail',
745+
msgId: '1044',
746+
runId: 'run-123',
747+
workerId: 'worker-1',
748+
workerName: 'greet-user-worker',
749+
queueName: 'orders',
750+
retryAttempt: 1,
751+
maxRetries: 3,
752+
baseDelay: 5,
753+
};
754+
755+
const error = new Error('Request failed');
756+
logger.taskFailed(ctx, error);
757+
758+
assertSpyCalls(consoleSpy, 1);
759+
const output = consoleSpy.calls[0].args[0] as string;
760+
761+
// Should contain retry info in key=value format
762+
assertStringIncludes(output, 'retry=1/3');
763+
assertStringIncludes(output, 'retry_delay_s=5');
764+
} finally {
765+
restore();
766+
}
767+
});
768+
769+
Deno.test('SimpleFormatter - taskFailed does NOT show retry info when no retries remaining', () => {
770+
const consoleSpy = spy(console, 'log');
771+
772+
try {
773+
const factory = createLoggingFactory({
774+
SUPABASE_ANON_KEY: 'some-production-key',
775+
EDGE_WORKER_LOG_LEVEL: 'verbose',
776+
});
777+
const logger = factory.createLogger('test');
778+
779+
const ctx: TaskLogContext = {
780+
flowSlug: 'greetUser',
781+
stepSlug: 'sendEmail',
782+
msgId: '1044',
783+
runId: 'run-123',
784+
workerId: 'worker-1',
785+
workerName: 'greet-user-worker',
786+
queueName: 'orders',
787+
retryAttempt: 4, // Exceeds maxRetries
788+
maxRetries: 3,
789+
baseDelay: 5,
790+
};
791+
792+
const error = new Error('Request failed');
793+
logger.taskFailed(ctx, error);
794+
795+
assertSpyCalls(consoleSpy, 1);
796+
const output = consoleSpy.calls[0].args[0] as string;
797+
798+
// Should NOT contain retry info when no retries remaining
799+
assertEquals(output.includes('retry='), false);
800+
assertEquals(output.includes('retry_delay_s='), false);
801+
} finally {
802+
restore();
803+
}
804+
});

0 commit comments

Comments
 (0)