BuildIntelligence
Harness
Harness orchestration for Claude Code, Codex, Gemini CLI, and OpenCode
Harness orchestration dispatches complex work to Claude Code, Codex, Gemini CLI, or OpenCode with full tool access and structured results. Unlike app.ai() which makes a single LLM call, app.harness() runs a multi-turn harness that can navigate codebases, run tests, and produce verified output with cost controls.
from pydantic import BaseModel
from agentfield import Agent, HarnessConfig
class MigrationPlan(BaseModel):
sql_statements: list[str] # ordered DDL/DML
rollback_steps: list[str] # how to undo each change
risk_assessment: str # safety analysis
# Structured output from a harness -- reads files, writes SQL, validates
result = await app.harness(
f"Analyze the database schema and generate a migration plan: {description}",
schema=MigrationPlan, # validated typed output, not free text
max_budget_usd=1.00, # hard cost cap — agent stops if exceeded
)
# Full observability — cost, turns, duration, session replay
print(f"${result.cost_usd:.3f}") # $0.042
print(f"{result.num_turns} turns") # 8 turns
print(f"{result.duration_ms}ms") # 12400ms
# Swap providers per-call — Claude Code, Codex, Gemini, OpenCode
result = await app.harness(
"Generate a test suite for the payment module.",
provider="codex", # OpenAI Codex for this task
model="o4-mini",
max_turns=40,
)
# Different providers for different strengths
refactor = await app.harness(
"Refactor auth to use JWT. Run tests after.",
provider="gemini", # Gemini CLI for broad refactors
model="gemini-2.5-pro",
max_budget_usd=2.00,
)
# Robust error handling — typed failure modes, not just true/false
if result.is_error:
match result.failure_type:
case "timeout": log.warning(f"Timed out after {result.duration_ms}ms")
case "crash": log.error(f"Agent crashed: {result.error_message}")
case "schema": log.warning("Output didn't match schema after retries")
case "api_error": log.error("Transient API error")
else:
plan = result.parsed # MigrationPlan, fully validatedagent.reasoner('planDbMigration', async (ctx) => {
// Coding agent reads schema, writes SQL, returns structured output
const result = await agent.harness(
`Analyze the DB schema and generate a migration plan: ${ctx.input.description}`,
{
schema: MigrationPlanSchema, // validated output, not free text
maxBudgetUsd: 1.00, // hard cost cap
}
);
// Full observability on every call
console.log(`Cost: $${result.costUsd?.toFixed(3)}`);
console.log(`Turns: ${result.numTurns}`);
console.log(`Duration: ${result.durationMs}ms`);
// Swap providers per-call — Codex for test generation
const tests = await agent.harness(
'Generate a comprehensive test suite for the payment module.',
{ provider: 'codex', model: 'o4-mini', maxTurns: 40 }
);
// Error handling
if (result.isError) {
console.error(`Harness failed: ${result.errorMessage}`);
}
return { plan: result.parsed, cost: result.costUsd };
});// Coding agent reads files, writes SQL, returns validated struct
var plan MigrationPlan
schema, _ := harness.StructToJSONSchema(plan)
result, _ := app.Harness(ctx,
"Analyze the database schema and generate a migration plan: add user roles",
schema, &plan, // structured output
harness.Options{MaxBudgetUSD: 1.00}, // hard cost cap
)
// Full observability
fmt.Printf("Turns: %d | Duration: %dms\n",
result.NumTurns, result.DurationMS)
// Swap providers per-call
testResult, _ := app.Harness(ctx,
"Generate a test suite for the payment module.",
nil, nil, // no schema
harness.Options{
Provider: "claude-code", // override provider for this task
Model: "sonnet",
MaxTurns: 40,
},
)
// Typed failure handling
if result.IsError {
switch result.FailureType {
case harness.FailureTimeout: log.Printf("Timed out after %dms", result.DurationMS)
case harness.FailureCrash: log.Printf("Agent crashed: %s", result.ErrorMessage)
case harness.FailureSchema: log.Printf("Schema validation failed after retries")
}
}What just happened
The harness example did not just call a model. It launched a tool-using harness, enforced turn and budget limits, and returned structured output with execution metrics. That is the main distinction this page needs to make visible immediately.
{
"provider": "codex",
"max_budget_usd": 1.0,
"num_turns": 8,
"duration_ms": 12400,
"parsed_output": "validated_against_schema"
}