Lesson 13 of 46 ~25 min
Course progress
0%

Building Effort-Aware Applications

Design applications that automatically select the right thinking effort level based on task classification — maximizing quality while controlling costs.

The best way to use adaptive thinking is not to manage it manually for every request, but to build systems that automatically classify tasks and set the appropriate effort level.

Task Classification System

from dataclasses import dataclass
from enum import Enum

class EffortLevel(Enum):
    NONE = "none"
    QUICK = "quick"
    STANDARD = "standard"
    DEEP = "deep"
    MAXIMUM = "maximum"

@dataclass
class TaskClassification:
    effort: EffortLevel
    max_tokens: int
    reasoning: str

def classify_task(task_type: str, input_size: int,
                  criticality: str = "normal") -> TaskClassification:
    """Classify a task and determine the optimal effort level."""

    # Critical tasks always get deep or maximum thinking
    if criticality == "critical":
        if task_type in ("security_review", "architecture_decision"):
            return TaskClassification(
                effort=EffortLevel.MAXIMUM,
                max_tokens=16384,
                reasoning="Critical task requiring exhaustive analysis"
            )
        return TaskClassification(
            effort=EffortLevel.DEEP,
            max_tokens=8192,
            reasoning="Critical task requiring thorough analysis"
        )

    # Task-specific routing
    task_map = {
        "formatting":       (EffortLevel.NONE, 1024),
        "classification":   (EffortLevel.QUICK, 512),
        "code_completion":  (EffortLevel.STANDARD, 4096),
        "code_review":      (EffortLevel.DEEP, 8192),
        "bug_diagnosis":    (EffortLevel.DEEP, 8192),
        "refactoring":      (EffortLevel.STANDARD, 8192),
        "security_audit":   (EffortLevel.MAXIMUM, 16384),
        "architecture":     (EffortLevel.MAXIMUM, 16384),
        "documentation":    (EffortLevel.STANDARD, 4096),
        "research":         (EffortLevel.DEEP, 8192),
    }

    effort, max_tokens = task_map.get(
        task_type, (EffortLevel.STANDARD, 4096)
    )

    # Adjust for input size
    if input_size > 100_000 and effort.value in ("none", "quick"):
        effort = EffortLevel.STANDARD  # Large inputs need more processing

    return TaskClassification(
        effort=effort,
        max_tokens=max_tokens,
        reasoning=f"Task '{task_type}' with {input_size} tokens input"
    )

Effort-Aware API Client

class AdaptiveClient:
    """Claude client that automatically manages thinking effort."""

    def __init__(self):
        self.client = Anthropic()
        self.cost_tracker = CostTracker()

    def query(self, prompt: str, task_type: str,
              criticality: str = "normal") -> str:
        """Send a query with automatically calibrated effort."""
        input_tokens = self.client.count_tokens(prompt)
        classification = classify_task(task_type, input_tokens, criticality)

        thinking_config = (
            {"type": "none"} if classification.effort == EffortLevel.NONE
            else {"type": "adaptive", "effort": classification.effort.value}
        )

        response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            max_tokens=classification.max_tokens,
            thinking=thinking_config,
            messages=[{"role": "user", "content": prompt}]
        )

        # Track cost
        self.cost_tracker.record(response.usage, classification)

        # Extract text response
        return next(
            block.text for block in response.content
            if block.type == "text"
        )

# Usage
client = AdaptiveClient()

# Automatically uses NONE effort
formatted = client.query("Format this JSON: {...}", task_type="formatting")

# Automatically uses MAXIMUM effort
audit = client.query(code, task_type="security_audit", criticality="critical")

Measuring Thinking Quality

Not all thinking is productive. Track the correlation between effort level and output quality:

@dataclass
class QualityMetric:
    effort_level: str
    thinking_tokens: int
    output_quality: float  # 0.0-1.0, rated by human or automated check
    cost: float
    latency_ms: int

def analyze_effort_roi(metrics: list[QualityMetric]):
    """Analyze which effort levels provide the best quality per dollar."""
    by_level = {}
    for m in metrics:
        by_level.setdefault(m.effort_level, []).append(m)

    for level, entries in by_level.items():
        avg_quality = sum(e.output_quality for e in entries) / len(entries)
        avg_cost = sum(e.cost for e in entries) / len(entries)
        quality_per_dollar = avg_quality / avg_cost if avg_cost > 0 else 0

        print(f"{level:10} | Quality: {avg_quality:.2f} | "
              f"Cost: ${avg_cost:.4f} | Q/$: {quality_per_dollar:.1f}")

In the next lesson, you will learn to optimize thinking token budgets for maximum cost efficiency.