Opus 4.6 is available through three cloud providers beyond the direct Anthropic API. Each has different pricing, latency characteristics, compliance certifications, and integration points with your existing infrastructure.
Provider Comparison
| Feature | Anthropic API | AWS Bedrock | Google Vertex AI | Microsoft Foundry |
|---|---|---|---|---|
| Pricing | $5/$25 per 1M | Markup varies | Markup varies | Markup varies |
| Billing | Separate account | AWS bill | GCP bill | Azure bill |
| Data residency | US | Multi-region | Multi-region | Multi-region |
| SOC 2 | ✅ | ✅ | ✅ | ✅ |
| HIPAA BAA | ✅ | ✅ | ✅ | ✅ |
| VPC/Private Link | ❌ | ✅ | ✅ | ✅ |
| 1M context | ✅ | ✅ (beta) | ✅ (beta) | ✅ |
| M365 integration | ❌ | ❌ | ❌ | ✅ Native |
| Fine-tuning | ❌ | ❌ | ❌ | ❌ |
AWS Bedrock Deployment
Configuration
import boto3
import json
class BedrockOpusClient:
"""Opus 4.6 client for AWS Bedrock."""
MODEL_ID = "anthropic.claude-opus-4-6-20260205-v1:0"
def __init__(self, region: str = "us-east-1"):
self.client = boto3.client(
"bedrock-runtime",
region_name=region
)
def query(self, prompt: str, system: str = "",
max_tokens: int = 4096) -> str:
body = {
"anthropic_version": "bedrock-2024-10-22",
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}],
"thinking": {"type": "adaptive"},
}
if system:
body["system"] = system
response = self.client.invoke_model(
modelId=self.MODEL_ID,
contentType="application/json",
accept="application/json",
body=json.dumps(body)
)
result = json.loads(response["body"].read())
return next(
b["text"] for b in result["content"] if b["type"] == "text"
)
def query_streaming(self, prompt: str, system: str = "",
max_tokens: int = 4096):
"""Stream responses from Bedrock."""
body = {
"anthropic_version": "bedrock-2024-10-22",
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}],
"thinking": {"type": "adaptive"},
}
if system:
body["system"] = system
response = self.client.invoke_model_with_response_stream(
modelId=self.MODEL_ID,
contentType="application/json",
body=json.dumps(body)
)
for event in response["body"]:
chunk = json.loads(event["chunk"]["bytes"])
if chunk.get("type") == "content_block_delta":
delta = chunk.get("delta", {})
if delta.get("type") == "text_delta":
yield delta["text"]
Bedrock-Specific Features
# Cross-region inference for higher throughput
client = boto3.client("bedrock-runtime", region_name="us-east-1")
# Enable model invocation logging for audit
logging_client = boto3.client("bedrock", region_name="us-east-1")
logging_client.put_model_invocation_logging_configuration(
loggingConfig={
"cloudWatchConfig": {
"logGroupName": "/aws/bedrock/opus-46-invocations",
"roleArn": "arn:aws:iam::role/BedrockLogging"
},
"textDataDeliveryEnabled": True,
}
)
Google Vertex AI Deployment
Configuration
from anthropic import AnthropicVertex
class VertexOpusClient:
"""Opus 4.6 client for Google Vertex AI."""
def __init__(self, project_id: str, region: str = "us-east5"):
self.client = AnthropicVertex(
project_id=project_id,
region=region
)
def query(self, prompt: str, system: str = "",
max_tokens: int = 4096) -> str:
kwargs = {
"model": "claude-opus-4-6@20260205",
"max_tokens": max_tokens,
"thinking": {"type": "adaptive"},
"messages": [{"role": "user", "content": prompt}],
}
if system:
kwargs["system"] = system
response = self.client.messages.create(**kwargs)
return next(
b.text for b in response.content if b.type == "text"
)
def query_with_grounding(self, prompt: str) -> str:
"""Use Vertex AI grounding with Google Search."""
response = self.client.messages.create(
model="claude-opus-4-6@20260205",
max_tokens=4096,
thinking={"type": "adaptive"},
messages=[{"role": "user", "content": prompt}],
metadata={
"google_search_grounding": {"enabled": True}
}
)
return next(
b.text for b in response.content if b.type == "text"
)
Vertex-Specific Features
# Vertex AI supports batch prediction for cost savings
from google.cloud import aiplatform
def submit_batch_job(prompts: list[str], project_id: str):
"""Submit a batch prediction job for bulk processing."""
aiplatform.init(project=project_id, location="us-east5")
batch_job = aiplatform.BatchPredictionJob.create(
job_display_name="opus-46-batch",
model_name=f"projects/{project_id}/locations/us-east5/"
f"publishers/anthropic/models/claude-opus-4-6@20260205",
instances=[{"prompt": p} for p in prompts],
machine_type="n1-standard-4",
)
return batch_job
Microsoft Foundry Deployment
Configuration
from anthropic import Anthropic
class FoundryOpusClient:
"""Opus 4.6 client for Microsoft Foundry."""
def __init__(self, resource_name: str, deployment_name: str):
self.client = Anthropic(
base_url=f"https://{resource_name}.services.ai.azure.com/"
f"anthropic/deployments/{deployment_name}",
# Uses Azure AD token or API key
)
def query(self, prompt: str, system: str = "",
max_tokens: int = 4096) -> str:
kwargs = {
"model": "claude-opus-4-6-20260205",
"max_tokens": max_tokens,
"thinking": {"type": "adaptive"},
"messages": [{"role": "user", "content": prompt}],
}
if system:
kwargs["system"] = system
response = self.client.messages.create(**kwargs)
return next(
b.text for b in response.content if b.type == "text"
)
def query_with_m365(self, prompt: str,
excel_file: str = None) -> str:
"""Query with Microsoft 365 document access."""
kwargs = {
"model": "claude-opus-4-6-20260205",
"max_tokens": 8192,
"thinking": {"type": "adaptive", "effort": "deep"},
"messages": [{"role": "user", "content": prompt}],
}
if excel_file:
kwargs["metadata"] = {
"m365_context": {
"files": [excel_file],
"permissions": ["read"]
}
}
response = self.client.messages.create(**kwargs)
return next(
b.text for b in response.content if b.type == "text"
)
Multi-Provider Client with Failover
class MultiProviderClient:
"""Opus 4.6 client with automatic failover across providers."""
def __init__(self, primary: str = "anthropic"):
self.providers = {}
self.primary = primary
self.failover_order = ["anthropic", "bedrock", "vertex", "foundry"]
def register_provider(self, name: str, client):
self.providers[name] = client
def query(self, prompt: str, **kwargs) -> dict:
"""Query with automatic failover."""
errors = []
# Try primary first, then failover
order = [self.primary] + [
p for p in self.failover_order if p != self.primary
]
for provider_name in order:
if provider_name not in self.providers:
continue
try:
client = self.providers[provider_name]
result = client.query(prompt, **kwargs)
return {
"result": result,
"provider": provider_name,
"failover": provider_name != self.primary,
}
except Exception as e:
errors.append({"provider": provider_name, "error": str(e)})
continue
raise RuntimeError(
f"All providers failed: {errors}"
)
# Usage
multi = MultiProviderClient(primary="anthropic")
multi.register_provider("anthropic", Anthropic())
multi.register_provider("bedrock", BedrockOpusClient("us-east-1"))
multi.register_provider("vertex", VertexOpusClient("my-project"))
result = multi.query("Analyze this code for security issues...",
system="You are a security auditor.")
print(f"Answered by: {result['provider']}")
print(f"Failover used: {result['failover']}")
Provider Selection Decision Tree
START
│
├─ Already on AWS? ──────────────────────▶ Use Bedrock
│ (unified billing, VPC, IAM)
│
├─ Already on GCP? ──────────────────────▶ Use Vertex AI
│ (unified billing, VPC-SC)
│
├─ Already on Azure? ────────────────────▶ Use Foundry
│ (unified billing, M365 integration)
│
├─ Need M365 integration? ───────────────▶ Use Foundry
│ (native Excel/PPT/Word access)
│
├─ Need lowest latency? ─────────────────▶ Use Anthropic API
│ (direct, no intermediary)
│
├─ Need VPC / private networking? ───────▶ Use cloud provider
│ (Bedrock, Vertex, or Foundry)
│
└─ Just getting started? ────────────────▶ Use Anthropic API
(simplest setup, pay-as-you-go)
This concludes the Enterprise Integration module. In the next module, you will learn how to secure your Opus 4.6 deployment — data classification, audit trails, and compliance frameworks.