Lesson 25 of 46 ~25 min
Course progress
0%

Thinking Budget Optimization

Optimize thinking token budgets — set cost caps, implement dynamic adjustment, and build monitoring for thinking token usage.

Thinking tokens are billed at the same rate as input tokens ($5/1M for Opus 4.6). Without budget controls, a single “maximum” effort request can consume 200K thinking tokens — costing $1.00 just for the thinking phase.

Setting Budget Caps

# Hard cap on thinking tokens
response = client.messages.create(
    model="claude-opus-4-6-20260205",
    max_tokens=4096,
    thinking={
        "type": "adaptive",
        "effort": "deep",
        "budget_tokens": 30000  # Hard cap at 30K thinking tokens
    },
    messages=[{"role": "user", "content": prompt}]
)

Budget Templates by Task

THINKING_BUDGETS = {
    # Quick tasks — minimal thinking
    "formatting":          {"effort": "none",     "budget_tokens": 0},
    "classification":      {"effort": "quick",    "budget_tokens": 2000},
    "simple_qa":           {"effort": "quick",    "budget_tokens": 1500},

    # Standard tasks — moderate thinking
    "code_generation":     {"effort": "standard", "budget_tokens": 8000},
    "code_review":         {"effort": "standard", "budget_tokens": 10000},
    "documentation":       {"effort": "standard", "budget_tokens": 5000},
    "debugging":           {"effort": "standard", "budget_tokens": 15000},

    # Deep tasks — significant thinking
    "architecture_review": {"effort": "deep",     "budget_tokens": 40000},
    "bug_root_cause":      {"effort": "deep",     "budget_tokens": 30000},
    "data_migration_plan": {"effort": "deep",     "budget_tokens": 35000},

    # Maximum tasks — exhaustive thinking
    "security_audit":      {"effort": "maximum",  "budget_tokens": 100000},
    "novel_algorithm":     {"effort": "maximum",  "budget_tokens": 150000},
    "compliance_review":   {"effort": "maximum",  "budget_tokens": 80000},
}

Dynamic Budget Adjustment

Start conservative and increase the budget if the model indicates it needs more thinking:

class DynamicBudget:
    def __init__(self, initial_budget: int = 10000, max_budget: int = 100000):
        self.budget = initial_budget
        self.max_budget = max_budget
        self.history: list[dict] = []

    def get_config(self) -> dict:
        return {
            "type": "adaptive",
            "budget_tokens": self.budget
        }

    def adjust(self, response) -> None:
        """Adjust budget based on response quality signals."""
        thinking_used = sum(
            len(b.thinking) for b in response.content if b.type == "thinking"
        )

        # If model used >90% of budget, it might need more
        usage_ratio = thinking_used / self.budget if self.budget > 0 else 0

        if usage_ratio > 0.9 and self.budget < self.max_budget:
            self.budget = min(self.budget * 2, self.max_budget)
            print(f"⬆️ Budget increased to {self.budget:,} tokens")
        elif usage_ratio < 0.3 and self.budget > 2000:
            self.budget = max(self.budget // 2, 2000)
            print(f"⬇️ Budget decreased to {self.budget:,} tokens")

        self.history.append({
            "budget": self.budget,
            "used": thinking_used,
            "ratio": usage_ratio
        })

Cost Monitoring Dashboard

class ThinkingCostMonitor:
    def __init__(self):
        self.daily_thinking_tokens = 0
        self.daily_thinking_cost = 0.0

    def record(self, response):
        thinking_tokens = getattr(response.usage, 'thinking_tokens', 0)
        cost = thinking_tokens * 5 / 1_000_000  # $5/1M tokens

        self.daily_thinking_tokens += thinking_tokens
        self.daily_thinking_cost += cost

        if self.daily_thinking_cost > 50.0:
            print(f"🚨 Daily thinking cost: ${self.daily_thinking_cost:.2f}")
        elif self.daily_thinking_cost > 20.0:
            print(f"⚠️ Daily thinking cost: ${self.daily_thinking_cost:.2f}")

    def report(self) -> str:
        return (f"Thinking tokens today: {self.daily_thinking_tokens:,}\n"
                f"Thinking cost today: ${self.daily_thinking_cost:.2f}")

In the next lesson, we explore when adaptive thinking fails and how to handle those cases.