Migrating a production system from one model version to another is not a find-and-replace on the model string. Opus 4.6 has breaking API changes, different behavior characteristics, and new capabilities that require careful validation. This lesson provides a complete migration playbook.
Breaking Changes: 4.5 → 4.6
| Change | Opus 4.5 | Opus 4.6 | Migration Impact |
|---|---|---|---|
| Model string | claude-opus-4-5-20250101 | claude-opus-4-6-20260205 | Low — config change |
| Thinking mode | thinking: {type: "enabled"} | thinking: {type: "adaptive"} | Medium — code change |
| Assistant prefilling | Supported | Removed | High — refactor needed |
| Context window | 200K | 1M (beta) | Low — backward compatible |
| Output format | Same | New thinking block types | Medium — parser update |
| Pricing | Different | $5/$25 standard | Low — budget update |
Phase 1: Feature Flags
Deploy the ability to switch models without a code deploy:
from enum import Enum
class ModelVersion(Enum):
OPUS_45 = "claude-opus-4-5-20250101"
OPUS_46 = "claude-opus-4-6-20260205"
class FeatureFlags:
"""Feature flag system for model migration."""
def __init__(self):
self._flags: dict[str, dict] = {
"opus_46_enabled": {
"enabled": False,
"rollout_percentage": 0,
"user_allowlist": set(),
"task_allowlist": set(),
}
}
def is_opus_46_enabled(self, user_id: str = "",
task_type: str = "") -> bool:
"""Check if Opus 4.6 is enabled for this request."""
flag = self._flags["opus_46_enabled"]
if not flag["enabled"]:
return False
# Check user allowlist first
if user_id in flag["user_allowlist"]:
return True
# Check task allowlist
if task_type in flag["task_allowlist"]:
return True
# Percentage-based rollout
if flag["rollout_percentage"] >= 100:
return True
import hashlib
hash_val = int(
hashlib.md5(user_id.encode()).hexdigest(), 16
) % 100
return hash_val < flag["rollout_percentage"]
def set_rollout_percentage(self, percentage: int):
"""Set the percentage of traffic routed to Opus 4.6."""
self._flags["opus_46_enabled"]["rollout_percentage"] = max(
0, min(100, percentage)
)
def enable_for_user(self, user_id: str):
self._flags["opus_46_enabled"]["user_allowlist"].add(user_id)
def enable_for_task(self, task_type: str):
self._flags["opus_46_enabled"]["task_allowlist"].add(task_type)
def set_enabled(self, enabled: bool):
self._flags["opus_46_enabled"]["enabled"] = enabled
Phase 2: API Compatibility Layer
Translate between the two API formats:
class MigrationCompatLayer:
"""Translate API calls between Opus 4.5 and 4.6 formats."""
def translate_thinking_config(self, config: dict,
target: ModelVersion) -> dict:
"""Translate thinking configuration between versions."""
if target == ModelVersion.OPUS_46:
# 4.5 → 4.6
if config.get("type") == "enabled":
return {"type": "adaptive"}
if config.get("type") == "disabled":
return {"type": "none"}
return config # Already 4.6 format
else:
# 4.6 → 4.5
if config.get("type") == "adaptive":
return {"type": "enabled", "budget_tokens": 100000}
if config.get("type") == "none":
return {"type": "disabled"}
return config
def handle_prefilling_removal(self, messages: list[dict],
target: ModelVersion) -> list[dict]:
"""Handle the removal of assistant prefilling in 4.6."""
if target != ModelVersion.OPUS_46:
return messages
# Check if last message is an assistant prefill
if (messages and messages[-1].get("role") == "assistant"
and not messages[-1].get("_is_response")):
# Move the prefill into the user prompt as an instruction
prefill = messages[-1]["content"]
messages = messages[:-1]
if messages and messages[-1].get("role") == "user":
messages[-1] = {
**messages[-1],
"content": (
f"{messages[-1]['content']}\n\n"
f"Begin your response with: {prefill}"
)
}
return messages
def translate_request(self, kwargs: dict,
target: ModelVersion) -> dict:
"""Translate a full API request to the target version."""
translated = {**kwargs}
translated["model"] = target.value
# Translate thinking config
if "thinking" in translated:
translated["thinking"] = self.translate_thinking_config(
translated["thinking"], target
)
# Handle prefilling
if "messages" in translated:
translated["messages"] = self.handle_prefilling_removal(
translated["messages"], target
)
return translated
Phase 3: A/B Testing
Run both models in parallel and compare results:
import concurrent.futures
import json
class ABTester:
"""A/B test Opus 4.5 vs 4.6 responses."""
def __init__(self):
from anthropic import Anthropic
self.client = Anthropic()
self.compat = MigrationCompatLayer()
self.results: list[dict] = []
def test_request(self, kwargs: dict) -> dict:
"""Send the same request to both models and compare."""
kwargs_45 = self.compat.translate_request(
kwargs, ModelVersion.OPUS_45
)
kwargs_46 = self.compat.translate_request(
kwargs, ModelVersion.OPUS_46
)
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future_45 = executor.submit(
self.client.messages.create, **kwargs_45
)
future_46 = executor.submit(
self.client.messages.create, **kwargs_46
)
response_45 = future_45.result()
response_46 = future_46.result()
# Compare responses
text_45 = next(
b.text for b in response_45.content if b.type == "text"
)
text_46 = next(
b.text for b in response_46.content if b.type == "text"
)
comparison = {
"input_preview": str(kwargs.get("messages", []))[:200],
"opus_45": {
"text_length": len(text_45),
"input_tokens": response_45.usage.input_tokens,
"output_tokens": response_45.usage.output_tokens,
"text_preview": text_45[:500],
},
"opus_46": {
"text_length": len(text_46),
"input_tokens": response_46.usage.input_tokens,
"output_tokens": response_46.usage.output_tokens,
"text_preview": text_46[:500],
},
"length_difference": len(text_46) - len(text_45),
"cost_difference": self._cost_diff(response_45, response_46),
}
self.results.append(comparison)
return comparison
def _cost_diff(self, r45, r46) -> float:
cost_45 = (r45.usage.input_tokens * 5 + r45.usage.output_tokens * 25) / 1_000_000
cost_46 = (r46.usage.input_tokens * 5 + r46.usage.output_tokens * 25) / 1_000_000
return round(cost_46 - cost_45, 6)
def summary(self) -> dict:
"""Summarize A/B test results."""
if not self.results:
return {"error": "No test results"}
return {
"total_tests": len(self.results),
"avg_length_diff": sum(
r["length_difference"] for r in self.results
) / len(self.results),
"avg_cost_diff": sum(
r["cost_difference"] for r in self.results
) / len(self.results),
"cost_higher_46": sum(
1 for r in self.results if r["cost_difference"] > 0
),
"longer_response_46": sum(
1 for r in self.results if r["length_difference"] > 0
),
}
Phase 4: Gradual Rollout
class GradualRollout:
"""Manage gradual migration from 4.5 to 4.6."""
ROLLOUT_STAGES = [
{"percentage": 0, "name": "internal_testing", "duration_days": 3},
{"percentage": 5, "name": "canary", "duration_days": 2},
{"percentage": 25, "name": "early_adopters", "duration_days": 3},
{"percentage": 50, "name": "half_traffic", "duration_days": 5},
{"percentage": 100, "name": "full_rollout", "duration_days": 0},
]
def __init__(self, flags: FeatureFlags, metrics_collector):
self.flags = flags
self.metrics = metrics_collector
self.current_stage = 0
def advance(self) -> dict:
"""Advance to the next rollout stage."""
if self.current_stage >= len(self.ROLLOUT_STAGES) - 1:
return {"status": "fully_rolled_out"}
# Check health before advancing
health = self._check_health()
if not health["healthy"]:
return {
"status": "blocked",
"reason": health["issues"],
"recommendation": "Fix issues before advancing",
}
self.current_stage += 1
stage = self.ROLLOUT_STAGES[self.current_stage]
self.flags.set_rollout_percentage(stage["percentage"])
self.flags.set_enabled(True)
return {
"status": "advanced",
"stage": stage["name"],
"percentage": stage["percentage"],
"hold_for_days": stage["duration_days"],
}
def rollback(self) -> dict:
"""Rollback to previous stage."""
if self.current_stage <= 0:
return {"status": "already_at_baseline"}
self.current_stage -= 1
stage = self.ROLLOUT_STAGES[self.current_stage]
self.flags.set_rollout_percentage(stage["percentage"])
if stage["percentage"] == 0:
self.flags.set_enabled(False)
return {
"status": "rolled_back",
"stage": stage["name"],
"percentage": stage["percentage"],
}
def emergency_rollback(self) -> dict:
"""Immediately rollback to 0% (Opus 4.5 only)."""
self.current_stage = 0
self.flags.set_rollout_percentage(0)
self.flags.set_enabled(False)
return {
"status": "emergency_rollback_complete",
"model": "claude-opus-4-5-20250101",
"percentage": 0,
}
def _check_health(self) -> dict:
"""Check if current rollout stage is healthy."""
error_rate = self.metrics.error_rate()
fallback_rate = self.metrics.fallback_rate()
issues = []
if error_rate > 0.05:
issues.append(f"Error rate too high: {error_rate:.1%}")
if fallback_rate > 0.10:
issues.append(f"Fallback rate too high: {fallback_rate:.1%}")
return {
"healthy": len(issues) == 0,
"issues": issues,
"error_rate": error_rate,
"fallback_rate": fallback_rate,
}
Phase 5: Rollback Procedures
class RollbackProcedure:
"""Documented rollback procedure for migration failures."""
RUNBOOK = """
ROLLBACK RUNBOOK: Opus 4.6 → 4.5
==================================
Trigger conditions (any one):
- Error rate > 5% for 5+ minutes
- Latency p95 > 60s for 10+ minutes
- Customer-reported quality issues > 3 in 1 hour
- Cost spike > 200% of baseline
Steps:
1. Set feature flag rollout to 0%
2. Verify all traffic is routing to 4.5
3. Monitor error rate for 5 minutes
4. Notify engineering team via #llm-ops Slack channel
5. Create incident report
Post-rollback:
- Analyze failure root cause
- Update A/B test suite with failing cases
- Plan re-migration with fixes
"""
@staticmethod
def execute(rollout: GradualRollout) -> dict:
"""Execute rollback procedure."""
# Step 1: Emergency rollback
result = rollout.emergency_rollback()
# Step 2: Verify
# (In production, this would check live metrics)
return {
"rollback_executed": True,
"result": result,
"next_steps": [
"Monitor error rates for 5 minutes",
"Notify team in #llm-ops",
"Create incident report",
"Schedule post-mortem",
]
}
Migration Checklist
MIGRATION_CHECKLIST = [
# Pre-migration
("Pre", "Baseline metrics recorded for Opus 4.5"),
("Pre", "A/B test suite covers all critical use cases"),
("Pre", "Feature flags deployed and tested"),
("Pre", "Rollback procedure documented and rehearsed"),
("Pre", "Team trained on new API changes"),
("Pre", "Budget updated for Opus 4.6 pricing"),
# Migration
("During", "Internal testing at 0% public traffic"),
("During", "Canary at 5% for 48+ hours"),
("During", "Early adopters at 25% for 72+ hours"),
("During", "Half traffic at 50% for 5+ days"),
("During", "Full rollout at 100%"),
# Post-migration
("Post", "Remove Opus 4.5 code paths after 30 days"),
("Post", "Update documentation and runbooks"),
("Post", "Archive migration feature flags"),
("Post", "Final cost comparison report"),
("Post", "Update monitoring baselines"),
]
This concludes the course. You now have the knowledge to build, deploy, monitor, and maintain production-grade AI systems powered by Claude Opus 4.6 — from the first API call through enterprise-scale deployment.