Testing Recipes
Common testing patterns — tool call assertions, error handling, streaming, and multi-agent tests.
Test Tool Call Order
it('should search then process', async () => {
const mock = createMockLLM({
responses: [
{ toolCalls: [{ name: 'search', input: { q: 'test' } }] },
{ toolCalls: [{ name: 'process', input: { data: 'found' } }] },
{ text: 'Done processing.' },
],
});
const harness = createTestHarness({ agent: config, llm: mock });
const result = await harness.run({ task: 'Search and process' });
const calls = result.toolCalls();
expect(calls[0].name).toBe('search');
expect(calls[1].name).toBe('process');
});Test Error Recovery
it('should handle tool errors gracefully', async () => {
const failingTool = defineTool({
name: 'flaky-api',
description: 'Sometimes fails',
input: z.object({}),
output: z.object({ data: z.string() }),
execute: async () => { throw new Error('API down'); },
retry: { maxAttempts: 1 },
});
const mock = createMockLLM({
responses: [
{ toolCalls: [{ name: 'flaky-api', input: {} }] },
{ text: 'The API is currently unavailable.' },
],
});
const harness = createTestHarness({
agent: { ...config, tools: [failingTool] },
llm: mock,
});
const result = await harness.run({ task: 'Fetch data' });
expect(result.status).toBe('completed');
expect(result.toolCalls('flaky-api')[0].error).toBeDefined();
});Test Budget Limits
it('should stop when budget exceeded', async () => {
const mock = createMockLLM({
responses: Array(20).fill({ text: 'thinking...' }),
defaultUsage: { inputTokens: 10_000, outputTokens: 5_000 },
});
const harness = createTestHarness({
agent: { ...config, budget: { maxTokens: 20_000 } },
llm: mock,
});
const result = await harness.run({ task: 'Expensive task' });
expect(result.status).toBe('failed');
});Test with StepDebugger
it('should create order before charging', async () => {
const dbg = await harness.startDebug({ task: 'Process payment' });
const step1 = await dbg.next();
expect(step1.step.toolCall?.name).toBe('create-order');
const step2 = await dbg.next();
expect(step2.step.toolCall?.name).toBe('charge-payment');
const result = await dbg.finish();
expect(result.status).toBe('completed');
});Next Steps
- MockLLM — scripted responses
- TestHarness — test runner