Skip to content
AgentField
Intelligence

Harness

Dispatch complex tasks to coding agents — Claude Code, Codex, Gemini CLI, and OpenCode

Any coding agent, one API, structured output

Dispatch complex tasks to coding agents that can read files, write code, run commands, and return structured results. Unlike app.ai() which makes a single LLM call, the harness spawns a multi-turn agent with full tool access -- Claude Code, Codex, Gemini CLI, or OpenCode -- that can navigate codebases, run tests, and produce verified output with cost controls.

from pydantic import BaseModel
from agentfield import Agent, HarnessConfig

class MigrationPlan(BaseModel):
    sql_statements: list[str]  # ordered DDL/DML
    rollback_steps: list[str]  # how to undo each change
    risk_assessment: str       # safety analysis

# Structured output from a coding agent — reads files, writes SQL, validates
result = await app.harness(
    f"Analyze the database schema and generate a migration plan: {description}",
    schema=MigrationPlan,       # validated typed output, not free text
    max_budget_usd=1.00,        # hard cost cap — agent stops if exceeded
)

# Full observability — cost, turns, duration, session replay
print(f"${result.cost_usd:.3f}")        # $0.042
print(f"{result.num_turns} turns")      # 8 turns
print(f"{result.duration_ms}ms")        # 12400ms

# Swap providers per-call — Claude Code, Codex, Gemini, OpenCode
result = await app.harness(
    "Generate a test suite for the payment module.",
    provider="codex",               # OpenAI Codex for this task
    model="o4-mini",
    max_turns=40,
)

# Different providers for different strengths
refactor = await app.harness(
    "Refactor auth to use JWT. Run tests after.",
    provider="gemini",              # Gemini CLI for broad refactors
    model="gemini-2.5-pro",
    max_budget_usd=2.00,
)

# Robust error handling — typed failure modes, not just true/false
if result.is_error:
    match result.failure_type:
        case "timeout":  log.warning(f"Timed out after {result.duration_ms}ms")
        case "crash":    log.error(f"Agent crashed: {result.error_message}")
        case "schema":   log.warning("Output didn't match schema after retries")
        case "api_error": log.error("Transient API error")
else:
    plan = result.parsed              # MigrationPlan, fully validated
agent.reasoner('planDbMigration', async (ctx) => {
  // Coding agent reads schema, writes SQL, returns structured output
  const result = await agent.harness(
    `Analyze the DB schema and generate a migration plan: ${ctx.input.description}`,
    {
      schema: MigrationPlanSchema,  // validated output, not free text
      maxBudgetUsd: 1.00,           // hard cost cap
    }
  );

  // Full observability on every call
  console.log(`Cost: $${result.costUsd?.toFixed(3)}`);
  console.log(`Turns: ${result.numTurns}`);
  console.log(`Duration: ${result.durationMs}ms`);

  // Swap providers per-call — Codex for test generation
  const tests = await agent.harness(
    'Generate a comprehensive test suite for the payment module.',
    { provider: 'codex', model: 'o4-mini', maxTurns: 40 }
  );

  // Error handling
  if (result.isError) {
    console.error(`Harness failed: ${result.errorMessage}`);
  }

  return { plan: result.parsed, cost: result.costUsd };
});
// Coding agent reads files, writes SQL, returns validated struct
var plan MigrationPlan
schema, _ := harness.StructToJSONSchema(plan)
result, _ := app.Harness(ctx,
    "Analyze the database schema and generate a migration plan: add user roles",
    schema, &plan,                        // structured output
    harness.Options{MaxBudgetUSD: 1.00},  // hard cost cap
)

// Full observability
fmt.Printf("Turns: %d | Duration: %dms\n",
    result.NumTurns, result.DurationMS)

// Swap providers per-call
testResult, _ := app.Harness(ctx,
    "Generate a test suite for the payment module.",
    nil, nil,                             // no schema
    harness.Options{
        Provider: "claude-code",          // override provider for this task
        Model:    "sonnet",
        MaxTurns: 40,
    },
)

// Typed failure handling
if result.IsError {
    switch result.FailureType {
    case harness.FailureTimeout: log.Printf("Timed out after %dms", result.DurationMS)
    case harness.FailureCrash:   log.Printf("Agent crashed: %s", result.ErrorMessage)
    case harness.FailureSchema:  log.Printf("Schema validation failed after retries")
    }
}

What just happened

The harness example did not just call a model. It launched a coding agent with tool access, enforced turn and budget limits, and returned structured output with execution metrics. That is the main distinction this page needs to make visible immediately.

{
  "provider": "codex",
  "max_budget_usd": 1.0,
  "num_turns": 8,
  "duration_ms": 12400,
  "parsed_output": "validated_against_schema"
}