The best way to use adaptive thinking is not to manage it manually for every request, but to build systems that automatically classify tasks and set the appropriate effort level.
Task Classification System
from dataclasses import dataclass
from enum import Enum
class EffortLevel(Enum):
NONE = "none"
QUICK = "quick"
STANDARD = "standard"
DEEP = "deep"
MAXIMUM = "maximum"
@dataclass
class TaskClassification:
effort: EffortLevel
max_tokens: int
reasoning: str
def classify_task(task_type: str, input_size: int,
criticality: str = "normal") -> TaskClassification:
"""Classify a task and determine the optimal effort level."""
# Critical tasks always get deep or maximum thinking
if criticality == "critical":
if task_type in ("security_review", "architecture_decision"):
return TaskClassification(
effort=EffortLevel.MAXIMUM,
max_tokens=16384,
reasoning="Critical task requiring exhaustive analysis"
)
return TaskClassification(
effort=EffortLevel.DEEP,
max_tokens=8192,
reasoning="Critical task requiring thorough analysis"
)
# Task-specific routing
task_map = {
"formatting": (EffortLevel.NONE, 1024),
"classification": (EffortLevel.QUICK, 512),
"code_completion": (EffortLevel.STANDARD, 4096),
"code_review": (EffortLevel.DEEP, 8192),
"bug_diagnosis": (EffortLevel.DEEP, 8192),
"refactoring": (EffortLevel.STANDARD, 8192),
"security_audit": (EffortLevel.MAXIMUM, 16384),
"architecture": (EffortLevel.MAXIMUM, 16384),
"documentation": (EffortLevel.STANDARD, 4096),
"research": (EffortLevel.DEEP, 8192),
}
effort, max_tokens = task_map.get(
task_type, (EffortLevel.STANDARD, 4096)
)
# Adjust for input size
if input_size > 100_000 and effort.value in ("none", "quick"):
effort = EffortLevel.STANDARD # Large inputs need more processing
return TaskClassification(
effort=effort,
max_tokens=max_tokens,
reasoning=f"Task '{task_type}' with {input_size} tokens input"
)
Effort-Aware API Client
class AdaptiveClient:
"""Claude client that automatically manages thinking effort."""
def __init__(self):
self.client = Anthropic()
self.cost_tracker = CostTracker()
def query(self, prompt: str, task_type: str,
criticality: str = "normal") -> str:
"""Send a query with automatically calibrated effort."""
input_tokens = self.client.count_tokens(prompt)
classification = classify_task(task_type, input_tokens, criticality)
thinking_config = (
{"type": "none"} if classification.effort == EffortLevel.NONE
else {"type": "adaptive", "effort": classification.effort.value}
)
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=classification.max_tokens,
thinking=thinking_config,
messages=[{"role": "user", "content": prompt}]
)
# Track cost
self.cost_tracker.record(response.usage, classification)
# Extract text response
return next(
block.text for block in response.content
if block.type == "text"
)
# Usage
client = AdaptiveClient()
# Automatically uses NONE effort
formatted = client.query("Format this JSON: {...}", task_type="formatting")
# Automatically uses MAXIMUM effort
audit = client.query(code, task_type="security_audit", criticality="critical")
Measuring Thinking Quality
Not all thinking is productive. Track the correlation between effort level and output quality:
@dataclass
class QualityMetric:
effort_level: str
thinking_tokens: int
output_quality: float # 0.0-1.0, rated by human or automated check
cost: float
latency_ms: int
def analyze_effort_roi(metrics: list[QualityMetric]):
"""Analyze which effort levels provide the best quality per dollar."""
by_level = {}
for m in metrics:
by_level.setdefault(m.effort_level, []).append(m)
for level, entries in by_level.items():
avg_quality = sum(e.output_quality for e in entries) / len(entries)
avg_cost = sum(e.cost for e in entries) / len(entries)
quality_per_dollar = avg_quality / avg_cost if avg_cost > 0 else 0
print(f"{level:10} | Quality: {avg_quality:.2f} | "
f"Cost: ${avg_cost:.4f} | Q/$: {quality_per_dollar:.1f}")
In the next lesson, you will learn to optimize thinking token budgets for maximum cost efficiency.