Lesson 41 of 46 ~25 min
Course progress
0%

Cloud Provider Deployment

Deploy Opus 4.6 via AWS Bedrock, Google Vertex AI, and Microsoft Foundry — configuration, pricing differences, and provider selection criteria.

Opus 4.6 is available through three cloud providers beyond the direct Anthropic API. Each has different pricing, latency characteristics, compliance certifications, and integration points with your existing infrastructure.

Provider Comparison

FeatureAnthropic APIAWS BedrockGoogle Vertex AIMicrosoft Foundry
Pricing$5/$25 per 1MMarkup variesMarkup variesMarkup varies
BillingSeparate accountAWS billGCP billAzure bill
Data residencyUSMulti-regionMulti-regionMulti-region
SOC 2
HIPAA BAA
VPC/Private Link
1M context✅ (beta)✅ (beta)
M365 integration✅ Native
Fine-tuning

AWS Bedrock Deployment

Configuration

import boto3
import json

class BedrockOpusClient:
    """Opus 4.6 client for AWS Bedrock."""

    MODEL_ID = "anthropic.claude-opus-4-6-20260205-v1:0"

    def __init__(self, region: str = "us-east-1"):
        self.client = boto3.client(
            "bedrock-runtime",
            region_name=region
        )

    def query(self, prompt: str, system: str = "",
              max_tokens: int = 4096) -> str:
        body = {
            "anthropic_version": "bedrock-2024-10-22",
            "max_tokens": max_tokens,
            "messages": [{"role": "user", "content": prompt}],
            "thinking": {"type": "adaptive"},
        }
        if system:
            body["system"] = system

        response = self.client.invoke_model(
            modelId=self.MODEL_ID,
            contentType="application/json",
            accept="application/json",
            body=json.dumps(body)
        )

        result = json.loads(response["body"].read())
        return next(
            b["text"] for b in result["content"] if b["type"] == "text"
        )

    def query_streaming(self, prompt: str, system: str = "",
                        max_tokens: int = 4096):
        """Stream responses from Bedrock."""
        body = {
            "anthropic_version": "bedrock-2024-10-22",
            "max_tokens": max_tokens,
            "messages": [{"role": "user", "content": prompt}],
            "thinking": {"type": "adaptive"},
        }
        if system:
            body["system"] = system

        response = self.client.invoke_model_with_response_stream(
            modelId=self.MODEL_ID,
            contentType="application/json",
            body=json.dumps(body)
        )

        for event in response["body"]:
            chunk = json.loads(event["chunk"]["bytes"])
            if chunk.get("type") == "content_block_delta":
                delta = chunk.get("delta", {})
                if delta.get("type") == "text_delta":
                    yield delta["text"]

Bedrock-Specific Features

# Cross-region inference for higher throughput
client = boto3.client("bedrock-runtime", region_name="us-east-1")

# Enable model invocation logging for audit
logging_client = boto3.client("bedrock", region_name="us-east-1")
logging_client.put_model_invocation_logging_configuration(
    loggingConfig={
        "cloudWatchConfig": {
            "logGroupName": "/aws/bedrock/opus-46-invocations",
            "roleArn": "arn:aws:iam::role/BedrockLogging"
        },
        "textDataDeliveryEnabled": True,
    }
)

Google Vertex AI Deployment

Configuration

from anthropic import AnthropicVertex

class VertexOpusClient:
    """Opus 4.6 client for Google Vertex AI."""

    def __init__(self, project_id: str, region: str = "us-east5"):
        self.client = AnthropicVertex(
            project_id=project_id,
            region=region
        )

    def query(self, prompt: str, system: str = "",
              max_tokens: int = 4096) -> str:
        kwargs = {
            "model": "claude-opus-4-6@20260205",
            "max_tokens": max_tokens,
            "thinking": {"type": "adaptive"},
            "messages": [{"role": "user", "content": prompt}],
        }
        if system:
            kwargs["system"] = system

        response = self.client.messages.create(**kwargs)

        return next(
            b.text for b in response.content if b.type == "text"
        )

    def query_with_grounding(self, prompt: str) -> str:
        """Use Vertex AI grounding with Google Search."""
        response = self.client.messages.create(
            model="claude-opus-4-6@20260205",
            max_tokens=4096,
            thinking={"type": "adaptive"},
            messages=[{"role": "user", "content": prompt}],
            metadata={
                "google_search_grounding": {"enabled": True}
            }
        )

        return next(
            b.text for b in response.content if b.type == "text"
        )

Vertex-Specific Features

# Vertex AI supports batch prediction for cost savings
from google.cloud import aiplatform

def submit_batch_job(prompts: list[str], project_id: str):
    """Submit a batch prediction job for bulk processing."""
    aiplatform.init(project=project_id, location="us-east5")

    batch_job = aiplatform.BatchPredictionJob.create(
        job_display_name="opus-46-batch",
        model_name=f"projects/{project_id}/locations/us-east5/"
                   f"publishers/anthropic/models/claude-opus-4-6@20260205",
        instances=[{"prompt": p} for p in prompts],
        machine_type="n1-standard-4",
    )

    return batch_job

Microsoft Foundry Deployment

Configuration

from anthropic import Anthropic

class FoundryOpusClient:
    """Opus 4.6 client for Microsoft Foundry."""

    def __init__(self, resource_name: str, deployment_name: str):
        self.client = Anthropic(
            base_url=f"https://{resource_name}.services.ai.azure.com/"
                     f"anthropic/deployments/{deployment_name}",
            # Uses Azure AD token or API key
        )

    def query(self, prompt: str, system: str = "",
              max_tokens: int = 4096) -> str:
        kwargs = {
            "model": "claude-opus-4-6-20260205",
            "max_tokens": max_tokens,
            "thinking": {"type": "adaptive"},
            "messages": [{"role": "user", "content": prompt}],
        }
        if system:
            kwargs["system"] = system

        response = self.client.messages.create(**kwargs)
        return next(
            b.text for b in response.content if b.type == "text"
        )

    def query_with_m365(self, prompt: str,
                        excel_file: str = None) -> str:
        """Query with Microsoft 365 document access."""
        kwargs = {
            "model": "claude-opus-4-6-20260205",
            "max_tokens": 8192,
            "thinking": {"type": "adaptive", "effort": "deep"},
            "messages": [{"role": "user", "content": prompt}],
        }
        if excel_file:
            kwargs["metadata"] = {
                "m365_context": {
                    "files": [excel_file],
                    "permissions": ["read"]
                }
            }

        response = self.client.messages.create(**kwargs)
        return next(
            b.text for b in response.content if b.type == "text"
        )

Multi-Provider Client with Failover

class MultiProviderClient:
    """Opus 4.6 client with automatic failover across providers."""

    def __init__(self, primary: str = "anthropic"):
        self.providers = {}
        self.primary = primary
        self.failover_order = ["anthropic", "bedrock", "vertex", "foundry"]

    def register_provider(self, name: str, client):
        self.providers[name] = client

    def query(self, prompt: str, **kwargs) -> dict:
        """Query with automatic failover."""
        errors = []

        # Try primary first, then failover
        order = [self.primary] + [
            p for p in self.failover_order if p != self.primary
        ]

        for provider_name in order:
            if provider_name not in self.providers:
                continue

            try:
                client = self.providers[provider_name]
                result = client.query(prompt, **kwargs)
                return {
                    "result": result,
                    "provider": provider_name,
                    "failover": provider_name != self.primary,
                }
            except Exception as e:
                errors.append({"provider": provider_name, "error": str(e)})
                continue

        raise RuntimeError(
            f"All providers failed: {errors}"
        )

# Usage
multi = MultiProviderClient(primary="anthropic")
multi.register_provider("anthropic", Anthropic())
multi.register_provider("bedrock", BedrockOpusClient("us-east-1"))
multi.register_provider("vertex", VertexOpusClient("my-project"))

result = multi.query("Analyze this code for security issues...",
                     system="You are a security auditor.")
print(f"Answered by: {result['provider']}")
print(f"Failover used: {result['failover']}")

Provider Selection Decision Tree

START

  ├─ Already on AWS? ──────────────────────▶ Use Bedrock
  │    (unified billing, VPC, IAM)

  ├─ Already on GCP? ──────────────────────▶ Use Vertex AI
  │    (unified billing, VPC-SC)

  ├─ Already on Azure? ────────────────────▶ Use Foundry
  │    (unified billing, M365 integration)

  ├─ Need M365 integration? ───────────────▶ Use Foundry
  │    (native Excel/PPT/Word access)

  ├─ Need lowest latency? ─────────────────▶ Use Anthropic API
  │    (direct, no intermediary)

  ├─ Need VPC / private networking? ───────▶ Use cloud provider
  │    (Bedrock, Vertex, or Foundry)

  └─ Just getting started? ────────────────▶ Use Anthropic API
       (simplest setup, pay-as-you-go)

This concludes the Enterprise Integration module. In the next module, you will learn how to secure your Opus 4.6 deployment — data classification, audit trails, and compliance frameworks.