Lesson 36 of 46 ~25 min
Course progress
0%

Compliance Frameworks for LLM Deployment

Navigate GDPR, SOC 2, and HIPAA compliance when deploying Claude — data residency, processor agreements, risk assessments, and practical controls.

Deploying an LLM in a regulated environment requires mapping AI-specific risks to established compliance frameworks. This lesson covers the practical controls you need to implement — not legal advice, but engineering decisions that satisfy auditors.

GDPR Compliance

The General Data Protection Regulation applies when you process personal data of EU residents, regardless of where your company is located.

Key GDPR Requirements for LLM Deployment

GDPR ArticleRequirementLLM Implementation
Art. 5Data minimizationSend only necessary data — sanitize before API calls
Art. 6Lawful basisDocument why you need to process this data via LLM
Art. 13/14TransparencyInform users that AI processes their data
Art. 17Right to erasureAbility to delete all LLM interaction logs for a user
Art. 22Automated decisionsHuman review for decisions with legal/significant effects
Art. 28Processor agreementDPA with Anthropic (or cloud provider)
Art. 35Impact assessmentDPIA for high-risk AI processing

GDPR-Compliant Architecture

class GDPRCompliantClient:
    """LLM client with GDPR compliance controls."""

    def __init__(self, user_id: str, consent_manager, audit_logger):
        from anthropic import Anthropic
        self.client = Anthropic()
        self.user_id = user_id
        self.consent = consent_manager
        self.logger = audit_logger

    def process(self, messages: list[dict], purpose: str,
                **kwargs) -> dict:
        """Process a request with GDPR compliance checks."""

        # 1. Verify lawful basis
        if not self.consent.has_consent(self.user_id, purpose):
            raise ComplianceError(
                f"No consent for purpose '{purpose}' from user {self.user_id}"
            )

        # 2. Data minimization — strip unnecessary fields
        minimized = self._minimize_data(messages)

        # 3. Process with audit trail
        response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            messages=minimized,
            **kwargs
        )

        # 4. Log with retention metadata
        self.logger.log_request(
            user_id=self.user_id,
            messages=minimized,
            response=response,
            metadata={
                "gdpr_purpose": purpose,
                "retention_days": 90,
                "erasure_eligible": True,
            }
        )

        return response

    def _minimize_data(self, messages: list[dict]) -> list[dict]:
        """Strip unnecessary personal data from messages."""
        minimized = []
        for msg in messages:
            content = msg.get("content", "")
            if isinstance(content, str):
                # Remove email addresses unless needed
                import re
                content = re.sub(
                    r'[\w.+-]+@[\w-]+\.[\w.]+',
                    '[email]',
                    content
                )
                # Remove phone numbers
                content = re.sub(
                    r'\+?\d{1,3}[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
                    '[phone]',
                    content
                )
            minimized.append({**msg, "content": content})
        return minimized

    def erase_user_data(self, user_id: str):
        """Right to erasure — delete all LLM interaction logs."""
        self.logger.delete_by_user(user_id)

class ComplianceError(Exception):
    pass

SOC 2 Compliance

SOC 2 audits examine your controls around security, availability, processing integrity, confidentiality, and privacy. LLM deployments touch all five trust service criteria.

SOC 2 Control Mapping

SOC2_CONTROLS = {
    "CC6.1": {
        "criteria": "Logical and physical access controls",
        "llm_controls": [
            "API key rotation every 90 days",
            "Role-based access to different model tiers",
            "IP allowlisting for API access",
            "MFA for admin access to LLM configuration",
        ]
    },
    "CC6.6": {
        "criteria": "System boundary controls",
        "llm_controls": [
            "VPC/Private Link for cloud provider connections",
            "Data sanitization pipeline before API calls",
            "Network segmentation between LLM and data stores",
        ]
    },
    "CC7.2": {
        "criteria": "Monitoring system components",
        "llm_controls": [
            "Audit logging of all API interactions",
            "Anomaly detection on token usage and costs",
            "Alerting on error rate spikes",
            "Monthly access reviews for API keys",
        ]
    },
    "CC8.1": {
        "criteria": "Change management",
        "llm_controls": [
            "Model version pinning (not 'latest')",
            "Prompt change tracking in version control",
            "A/B testing for model upgrades",
            "Rollback procedures documented",
        ]
    },
}

SOC 2 Evidence Collection

class SOC2EvidenceCollector:
    """Collect audit evidence for SOC 2 compliance."""

    def __init__(self, audit_logger):
        self.logger = audit_logger

    def generate_access_review(self, period_start: str,
                                period_end: str) -> dict:
        """Generate access review evidence."""
        entries = self.logger.query(period_start, period_end)
        unique_users = set(e.get("user_id") for e in entries)

        return {
            "period": f"{period_start} to {period_end}",
            "total_requests": len(entries),
            "unique_users": len(unique_users),
            "users": list(unique_users),
            "models_accessed": list(set(
                e.get("model") for e in entries
            )),
            "error_rate": (
                sum(1 for e in entries if e.get("status") == "error")
                / len(entries) if entries else 0
            ),
        }

    def generate_change_log(self, prompt_versions: list[dict]) -> dict:
        """Document prompt and configuration changes."""
        return {
            "changes": [
                {
                    "date": v["date"],
                    "type": v["type"],
                    "description": v["description"],
                    "approved_by": v["approved_by"],
                    "tested": v["tested"],
                }
                for v in prompt_versions
            ],
            "total_changes": len(prompt_versions),
        }

HIPAA Compliance

If your LLM processes Protected Health Information (PHI), HIPAA applies. The key requirement: PHI must never be sent to the model without a Business Associate Agreement (BAA) and appropriate safeguards.

HIPAA Architecture

┌─────────────┐    ┌──────────────┐    ┌────────────────┐
│  EHR/EMR    │───▶│  PHI Strip   │───▶│  Opus 4.6 API  │
│  System     │    │  Layer       │    │  (BAA in place) │
│             │◀───│              │◀───│                 │
└─────────────┘    └──────────────┘    └────────────────┘

                    ┌────┴────┐
                    │  Audit  │
                    │  Log    │
                    └─────────┘

HIPAA Safeguards Implementation

class HIPAACompliantProcessor:
    """Process clinical data with HIPAA safeguards."""

    # HIPAA Safe Harbor de-identification: 18 identifiers to remove
    SAFE_HARBOR_IDENTIFIERS = [
        "names", "geographic_data", "dates", "phone_numbers",
        "fax_numbers", "email_addresses", "ssn", "mrn",
        "health_plan_numbers", "account_numbers", "certificate_numbers",
        "vehicle_identifiers", "device_identifiers", "urls",
        "ip_addresses", "biometric_identifiers", "photos",
        "any_other_unique_identifier"
    ]

    def __init__(self, has_baa: bool = False):
        from anthropic import Anthropic
        self.client = Anthropic()
        self.has_baa = has_baa

    def process_clinical_note(self, note: str,
                              task: str = "summarize") -> str:
        """Process a clinical note with HIPAA safeguards."""
        if not self.has_baa:
            # Without BAA, must fully de-identify
            note = self._deidentify(note)

        response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            max_tokens=4096,
            thinking={"type": "adaptive", "effort": "deep"},
            system="""You are a clinical documentation assistant.
Never include patient identifiers in your output.
If the input contains identifiers, ignore them.""",
            messages=[{
                "role": "user",
                "content": f"Task: {task}\n\nClinical note:\n{note}"
            }]
        )

        output = next(
            b.text for b in response.content if b.type == "text"
        )

        # Verify output does not contain PHI
        if self._contains_phi(output):
            raise ComplianceError(
                "Model output contains potential PHI — blocked"
            )

        return output

    def _deidentify(self, text: str) -> str:
        """Apply Safe Harbor de-identification."""
        import re
        deidentified = text

        # Remove dates (replace with offset)
        deidentified = re.sub(
            r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
            '[DATE]', deidentified
        )

        # Remove ages over 89
        deidentified = re.sub(
            r'\b(9[0-9]|[1-9]\d{2,})\s*(?:year|yr|y\.?o\.?)\b',
            '[AGE>89]', deidentified, flags=re.IGNORECASE
        )

        # Remove MRN patterns
        deidentified = re.sub(
            r'\bMRN\s*[:# ]\s*\d+\b',
            'MRN: [REDACTED]', deidentified, flags=re.IGNORECASE
        )

        return deidentified

    def _contains_phi(self, text: str) -> bool:
        """Check if text contains potential PHI."""
        import re
        phi_patterns = [
            r'\b\d{3}-\d{2}-\d{4}\b',      # SSN
            r'\bMRN\s*[:# ]\s*\d+\b',       # MRN
            r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',  # Dates
        ]
        return any(re.search(p, text, re.IGNORECASE) for p in phi_patterns)

Compliance Checklist

Before going to production, verify every item:

COMPLIANCE_CHECKLIST = {
    "data_protection": [
        "Data classification policy documented",
        "Sanitization pipeline tested with real data patterns",
        "Encryption at rest for stored interactions",
        "Encryption in transit (TLS 1.3) verified",
    ],
    "access_control": [
        "API keys stored in secrets manager",
        "Role-based access to model tiers",
        "Key rotation schedule configured",
        "Access reviews scheduled quarterly",
    ],
    "audit_logging": [
        "All API interactions logged",
        "Retention policy configured",
        "Log integrity protection (append-only or signed)",
        "Search and analysis capability verified",
    ],
    "incident_response": [
        "Incident response plan includes AI-specific scenarios",
        "Data breach notification procedures updated for AI data",
        "Rollback procedures tested",
        "Contact information for Anthropic support documented",
    ],
}

In the next lesson, you will examine Opus 4.6’s safety properties — alignment testing results, misalignment rates, and how to build appropriate trust in AI outputs.