Example: Testing an Agent
Write a complete test suite for an AgentForge agent using MockLLM and TestHarness.
Overview
This example shows how to test the support triage agent with deterministic, reproducible tests.
Full Test Suite
import { describe, it, expect } from 'vitest';
import {
createMockLLM, createTestHarness, defineTool, defineAgent,
} from '@ahzan-agentforge/core';
import { z } from 'zod';
// Agent config (without LLM — we'll inject MockLLM)
const classifyTool = defineTool({
name: 'classify-ticket',
description: 'Classify a support ticket',
input: z.object({ ticketId: z.string(), content: z.string() }),
output: z.object({
category: z.enum(['billing', 'technical', 'general']),
priority: z.enum(['low', 'medium', 'high']),
}),
execute: async ({ ticketId, content }) => ({
category: 'technical',
priority: 'medium',
}),
});
const agentConfig = {
name: 'support-triage',
description: 'Triages support tickets',
tools: [classifyTool],
systemPrompt: 'Classify and route support tickets.',
maxSteps: 5,
};
describe('support-triage agent', () => {
it('should classify a technical ticket', async () => {
const mockLLM = createMockLLM({
responses: [
{
toolCalls: [{
name: 'classify-ticket',
input: { ticketId: '123', content: '500 error on checkout' },
}],
},
{ text: 'Ticket #123 classified as technical, medium priority.' },
],
});
const harness = createTestHarness({ agent: agentConfig, llm: mockLLM });
const result = await harness.run({ task: 'Triage ticket #123: 500 error on checkout' });
expect(result.status).toBe('completed');
expect(result.toolCalls('classify-ticket')).toHaveLength(1);
expect(result.hasError()).toBe(false);
});
it('should complete within budget', async () => {
const mockLLM = createMockLLM({
responses: [
{ toolCalls: [{ name: 'classify-ticket', input: { ticketId: '1', content: 'test' } }] },
{ text: 'Done.' },
],
defaultUsage: { inputTokens: 500, outputTokens: 200 },
});
const harness = createTestHarness({
agent: { ...agentConfig, budget: { maxTokens: 10_000 } },
llm: mockLLM,
});
const result = await harness.run({ task: 'Triage ticket' });
expect(result.status).toBe('completed');
expect(result.trace.summary.totalTokens).toBeLessThan(10_000);
});
it('should handle tool errors gracefully', async () => {
const failingTool = defineTool({
name: 'classify-ticket',
description: 'Classify a support ticket',
input: z.object({ ticketId: z.string(), content: z.string() }),
output: z.object({ category: z.string(), priority: z.string() }),
execute: async () => { throw new Error('Database unavailable'); },
retry: { maxAttempts: 1 },
});
const mockLLM = createMockLLM({
responses: [
{ toolCalls: [{ name: 'classify-ticket', input: { ticketId: '1', content: 'test' } }] },
{ text: 'Unable to classify ticket due to a system error.' },
],
});
const harness = createTestHarness({
agent: { ...agentConfig, tools: [failingTool] },
llm: mockLLM,
});
const result = await harness.run({ task: 'Triage ticket' });
expect(result.status).toBe('completed');
expect(result.hasError('tool')).toBe(true);
});
it('should step through execution', async () => {
const mockLLM = createMockLLM({
responses: [
{ toolCalls: [{ name: 'classify-ticket', input: { ticketId: '1', content: 'test' } }] },
{ text: 'Classified.' },
],
});
const harness = createTestHarness({ agent: agentConfig, llm: mockLLM });
const dbg = await harness.startDebug({ task: 'Triage ticket' });
// Step 1: LLM decides to call classify-ticket
const step1 = await dbg.next();
expect(step1.done).toBe(false);
// Step 2: LLM returns final text
const step2 = await dbg.next();
expect(step2.done).toBe(true);
const result = await dbg.finish();
expect(result.status).toBe('completed');
});
});Key Points
- Use
createMockLLM()for deterministic responses - Use
createTestHarness()for rich result assertions toolCalls()andhasError()are methodsMockLLMusestextfield (notcontent)StepDebuggerlets you verify execution order
Next Steps
- MockLLM — mock LLM reference
- TestHarness — harness reference
- Testing Recipes — more patterns