Intelligence
Harness
Dispatch complex tasks to coding agents — Claude Code, Codex, Gemini CLI, and OpenCode
Dispatch complex tasks to coding agents that can read files, write code, run commands, and return structured results. Unlike app.ai() which makes a single LLM call, the harness spawns a multi-turn agent with full tool access -- Claude Code, Codex, Gemini CLI, or OpenCode -- that can navigate codebases, run tests, and produce verified output with cost controls.
from pydantic import BaseModel
from agentfield import Agent, HarnessConfig
class MigrationPlan(BaseModel):
sql_statements: list[str] # ordered DDL/DML
rollback_steps: list[str] # how to undo each change
risk_assessment: str # safety analysis
# Structured output from a coding agent — reads files, writes SQL, validates
result = await app.harness(
f"Analyze the database schema and generate a migration plan: {description}",
schema=MigrationPlan, # validated typed output, not free text
max_budget_usd=1.00, # hard cost cap — agent stops if exceeded
)
# Full observability — cost, turns, duration, session replay
print(f"${result.cost_usd:.3f}") # $0.042
print(f"{result.num_turns} turns") # 8 turns
print(f"{result.duration_ms}ms") # 12400ms
# Swap providers per-call — Claude Code, Codex, Gemini, OpenCode
result = await app.harness(
"Generate a test suite for the payment module.",
provider="codex", # OpenAI Codex for this task
model="o4-mini",
max_turns=40,
)
# Different providers for different strengths
refactor = await app.harness(
"Refactor auth to use JWT. Run tests after.",
provider="gemini", # Gemini CLI for broad refactors
model="gemini-2.5-pro",
max_budget_usd=2.00,
)
# Robust error handling — typed failure modes, not just true/false
if result.is_error:
match result.failure_type:
case "timeout": log.warning(f"Timed out after {result.duration_ms}ms")
case "crash": log.error(f"Agent crashed: {result.error_message}")
case "schema": log.warning("Output didn't match schema after retries")
case "api_error": log.error("Transient API error")
else:
plan = result.parsed # MigrationPlan, fully validatedagent.reasoner('planDbMigration', async (ctx) => {
// Coding agent reads schema, writes SQL, returns structured output
const result = await agent.harness(
`Analyze the DB schema and generate a migration plan: ${ctx.input.description}`,
{
schema: MigrationPlanSchema, // validated output, not free text
maxBudgetUsd: 1.00, // hard cost cap
}
);
// Full observability on every call
console.log(`Cost: $${result.costUsd?.toFixed(3)}`);
console.log(`Turns: ${result.numTurns}`);
console.log(`Duration: ${result.durationMs}ms`);
// Swap providers per-call — Codex for test generation
const tests = await agent.harness(
'Generate a comprehensive test suite for the payment module.',
{ provider: 'codex', model: 'o4-mini', maxTurns: 40 }
);
// Error handling
if (result.isError) {
console.error(`Harness failed: ${result.errorMessage}`);
}
return { plan: result.parsed, cost: result.costUsd };
});// Coding agent reads files, writes SQL, returns validated struct
var plan MigrationPlan
schema, _ := harness.StructToJSONSchema(plan)
result, _ := app.Harness(ctx,
"Analyze the database schema and generate a migration plan: add user roles",
schema, &plan, // structured output
harness.Options{MaxBudgetUSD: 1.00}, // hard cost cap
)
// Full observability
fmt.Printf("Turns: %d | Duration: %dms\n",
result.NumTurns, result.DurationMS)
// Swap providers per-call
testResult, _ := app.Harness(ctx,
"Generate a test suite for the payment module.",
nil, nil, // no schema
harness.Options{
Provider: "claude-code", // override provider for this task
Model: "sonnet",
MaxTurns: 40,
},
)
// Typed failure handling
if result.IsError {
switch result.FailureType {
case harness.FailureTimeout: log.Printf("Timed out after %dms", result.DurationMS)
case harness.FailureCrash: log.Printf("Agent crashed: %s", result.ErrorMessage)
case harness.FailureSchema: log.Printf("Schema validation failed after retries")
}
}What just happened
The harness example did not just call a model. It launched a coding agent with tool access, enforced turn and budget limits, and returned structured output with execution metrics. That is the main distinction this page needs to make visible immediately.
{
"provider": "codex",
"max_budget_usd": 1.0,
"num_turns": 8,
"duration_ms": 12400,
"parsed_output": "validated_against_schema"
}