AgentForge

TestHarness

Run agents in test mode with rich assertions — tool calls, errors, messages, and steps.

createTestHarness

import { createTestHarness, createMockLLM } from '@ahzan-agentforge/core';

const harness = createTestHarness({
  agent: myAgentConfig,  // AgentConfig or Agent instance
  llm: createMockLLM({ responses: [...] }),
});

TestHarnessConfig

interface TestHarnessConfig {
  agent: AgentConfig | Agent;
  llm?: LLM;  // Override the agent's LLM
}

Running Tests

const result = await harness.run({ task: 'Do something' });

TestResult

TestResult extends RunResult with test-specific methods:

interface TestResult extends RunResult {
  toolCalls(toolName?: string): ToolCallRecord[];  // METHOD, not property
  stepAt(index: number): StepRecord | undefined;
  hasError(source?: ErrorSource): boolean;          // METHOD, not property
  messages: Message[];
}

Assertions

const result = await harness.run({ task: 'Find and process data' });

// Check status
expect(result.status).toBe('completed');

// Check tool calls (method, not property!)
expect(result.toolCalls()).toHaveLength(2);
expect(result.toolCalls('search')).toHaveLength(1);
expect(result.toolCalls('process')).toHaveLength(1);

// Check for errors
expect(result.hasError()).toBe(false);
expect(result.hasError('tool')).toBe(false);

// Check specific steps
const firstStep = result.stepAt(0);
expect(firstStep?.type).toBe('llm');

// Check messages
expect(result.messages).toContainEqual(
  expect.objectContaining({ role: 'assistant' })
);

ToolCallRecord

interface ToolCallRecord {
  name: string;
  input: unknown;
  output: unknown;
  error?: string;
  retries: number;
  duration: number;
}

Next Steps