Example: Testing an Agent

Write a complete test suite for an AgentForge agent using MockLLM and TestHarness.

Overview

This example shows how to test the support triage agent with deterministic, reproducible tests.

Full Test Suite

import { describe, it, expect } from 'vitest';
import {
  createMockLLM, createTestHarness, defineTool, defineAgent,
} from '@ahzan-agentforge/core';
import { z } from 'zod';

// Agent config (without LLM — we'll inject MockLLM)
const classifyTool = defineTool({
  name: 'classify-ticket',
  description: 'Classify a support ticket',
  input: z.object({ ticketId: z.string(), content: z.string() }),
  output: z.object({
    category: z.enum(['billing', 'technical', 'general']),
    priority: z.enum(['low', 'medium', 'high']),
  }),
  execute: async ({ ticketId, content }) => ({
    category: 'technical',
    priority: 'medium',
  }),
});

const agentConfig = {
  name: 'support-triage',
  description: 'Triages support tickets',
  tools: [classifyTool],
  systemPrompt: 'Classify and route support tickets.',
  maxSteps: 5,
};

describe('support-triage agent', () => {
  it('should classify a technical ticket', async () => {
    const mockLLM = createMockLLM({
      responses: [
        {
          toolCalls: [{
            name: 'classify-ticket',
            input: { ticketId: '123', content: '500 error on checkout' },
          }],
        },
        { text: 'Ticket #123 classified as technical, medium priority.' },
      ],
    });

    const harness = createTestHarness({ agent: agentConfig, llm: mockLLM });
    const result = await harness.run({ task: 'Triage ticket #123: 500 error on checkout' });

    expect(result.status).toBe('completed');
    expect(result.toolCalls('classify-ticket')).toHaveLength(1);
    expect(result.hasError()).toBe(false);
  });

  it('should complete within budget', async () => {
    const mockLLM = createMockLLM({
      responses: [
        { toolCalls: [{ name: 'classify-ticket', input: { ticketId: '1', content: 'test' } }] },
        { text: 'Done.' },
      ],
      defaultUsage: { inputTokens: 500, outputTokens: 200 },
    });

    const harness = createTestHarness({
      agent: { ...agentConfig, budget: { maxTokens: 10_000 } },
      llm: mockLLM,
    });
    const result = await harness.run({ task: 'Triage ticket' });

    expect(result.status).toBe('completed');
    expect(result.trace.summary.totalTokens).toBeLessThan(10_000);
  });

  it('should handle tool errors gracefully', async () => {
    const failingTool = defineTool({
      name: 'classify-ticket',
      description: 'Classify a support ticket',
      input: z.object({ ticketId: z.string(), content: z.string() }),
      output: z.object({ category: z.string(), priority: z.string() }),
      execute: async () => { throw new Error('Database unavailable'); },
      retry: { maxAttempts: 1 },
    });

    const mockLLM = createMockLLM({
      responses: [
        { toolCalls: [{ name: 'classify-ticket', input: { ticketId: '1', content: 'test' } }] },
        { text: 'Unable to classify ticket due to a system error.' },
      ],
    });

    const harness = createTestHarness({
      agent: { ...agentConfig, tools: [failingTool] },
      llm: mockLLM,
    });
    const result = await harness.run({ task: 'Triage ticket' });

    expect(result.status).toBe('completed');
    expect(result.hasError('tool')).toBe(true);
  });

  it('should step through execution', async () => {
    const mockLLM = createMockLLM({
      responses: [
        { toolCalls: [{ name: 'classify-ticket', input: { ticketId: '1', content: 'test' } }] },
        { text: 'Classified.' },
      ],
    });

    const harness = createTestHarness({ agent: agentConfig, llm: mockLLM });
    const dbg = await harness.startDebug({ task: 'Triage ticket' });

    // Step 1: LLM decides to call classify-ticket
    const step1 = await dbg.next();
    expect(step1.done).toBe(false);

    // Step 2: LLM returns final text
    const step2 = await dbg.next();
    expect(step2.done).toBe(true);

    const result = await dbg.finish();
    expect(result.status).toBe('completed');
  });
});

Key Points

Use createMockLLM() for deterministic responses
Use createTestHarness() for rich result assertions
toolCalls() and hasError() are methods
MockLLM uses text field (not content)
StepDebugger lets you verify execution order

Next Steps

MockLLM — mock LLM reference
TestHarness — harness reference
Testing Recipes — more patterns

Example: Testing an Agent

Overview

Full Test Suite

Key Points

Next Steps

On this page