TestHarness
Run agents in test mode with rich assertions — tool calls, errors, messages, and steps.
createTestHarness
import { createTestHarness, createMockLLM } from '@ahzan-agentforge/core';
const harness = createTestHarness({
agent: myAgentConfig, // AgentConfig or Agent instance
llm: createMockLLM({ responses: [...] }),
});TestHarnessConfig
interface TestHarnessConfig {
agent: AgentConfig | Agent;
llm?: LLM; // Override the agent's LLM
}Running Tests
const result = await harness.run({ task: 'Do something' });TestResult
TestResult extends RunResult with test-specific methods:
interface TestResult extends RunResult {
toolCalls(toolName?: string): ToolCallRecord[]; // METHOD, not property
stepAt(index: number): StepRecord | undefined;
hasError(source?: ErrorSource): boolean; // METHOD, not property
messages: Message[];
}Assertions
const result = await harness.run({ task: 'Find and process data' });
// Check status
expect(result.status).toBe('completed');
// Check tool calls (method, not property!)
expect(result.toolCalls()).toHaveLength(2);
expect(result.toolCalls('search')).toHaveLength(1);
expect(result.toolCalls('process')).toHaveLength(1);
// Check for errors
expect(result.hasError()).toBe(false);
expect(result.hasError('tool')).toBe(false);
// Check specific steps
const firstStep = result.stepAt(0);
expect(firstStep?.type).toBe('llm');
// Check messages
expect(result.messages).toContainEqual(
expect.objectContaining({ role: 'assistant' })
);ToolCallRecord
interface ToolCallRecord {
name: string;
input: unknown;
output: unknown;
error?: string;
retries: number;
duration: number;
}Next Steps
- StepDebugger — step-by-step debugging
- MockLLM — scripted responses