Lesson 29 of 46 ~20 min
Course progress
0%

Advanced Compaction Strategies

Master advanced compaction techniques — custom preservation rules, priority markers, measuring information loss, and fallback patterns when compaction degrades quality.

Default compaction works for simple conversations. Production systems need more control — you need to tell the compaction engine what matters, detect when information is being lost, and gracefully handle degradation.

Custom Preservation Rules

Mark specific messages or content as high-priority to resist compression:

def build_message_with_priority(content: str, priority: str = "normal") -> dict:
    """Wrap message content with compaction priority markers."""
    if priority == "critical":
        # Critical content is tagged for the compaction engine
        return {
            "role": "user",
            "content": f"[COMPACTION_PRIORITY: CRITICAL]\n{content}",
        }
    elif priority == "high":
        return {
            "role": "user",
            "content": f"[COMPACTION_PRIORITY: HIGH]\n{content}",
        }
    return {"role": "user", "content": content}

# Usage
messages = [
    build_message_with_priority(
        "Project requirements: The system must handle 10K concurrent users "
        "with p99 latency under 200ms. All data must be encrypted at rest "
        "and in transit.",
        priority="critical"
    ),
    build_message_with_priority(
        "Can you review this database schema?",
        priority="normal"
    ),
]

Structured Context Injection

Instead of relying on compaction to preserve everything, proactively manage what stays in context:

from dataclasses import dataclass, field

@dataclass
class ManagedContext:
    """Explicitly managed context that survives compaction."""
    decisions: list[str] = field(default_factory=list)
    requirements: list[str] = field(default_factory=list)
    code_state: dict[str, str] = field(default_factory=dict)
    open_issues: list[str] = field(default_factory=list)

    def to_system_context(self) -> str:
        sections = []

        if self.requirements:
            sections.append("## Requirements\n" +
                          "\n".join(f"- {r}" for r in self.requirements))

        if self.decisions:
            sections.append("## Decisions Made\n" +
                          "\n".join(f"- {d}" for d in self.decisions))

        if self.open_issues:
            sections.append("## Open Issues\n" +
                          "\n".join(f"- {i}" for i in self.open_issues))

        if self.code_state:
            code_section = "## Current Code State\n"
            for filename, code in self.code_state.items():
                code_section += f"\n### {filename}\n```\n{code}\n```\n"
            sections.append(code_section)

        return "\n\n".join(sections)

    def add_decision(self, decision: str):
        self.decisions.append(decision)

    def resolve_issue(self, issue: str):
        self.open_issues = [i for i in self.open_issues if i != issue]


class CompactionAwareSession:
    """Session that combines compaction with explicit context management."""

    def __init__(self, base_system_prompt: str):
        self.client = Anthropic()
        self.base_prompt = base_system_prompt
        self.context = ManagedContext()
        self.messages: list[dict] = []

    def send(self, message: str) -> str:
        self.messages.append({"role": "user", "content": message})

        # Inject managed context into system prompt
        system = f"""{self.base_prompt}

{self.context.to_system_context()}"""

        response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            max_tokens=8192,
            system=system,
            messages=self.messages,
            metadata={
                "compaction": {
                    "enabled": True,
                    "trigger_tokens": 150_000,
                    "preserve_recent": 10,
                }
            }
        )

        text = next(b.text for b in response.content if b.type == "text")
        self.messages.append({"role": "assistant", "content": text})
        return text

Measuring Information Loss

Compaction is inherently lossy. Measure how much you are losing:

import hashlib

class CompactionLossTracker:
    """Track information loss across compaction events."""

    def __init__(self):
        self.checkpoints: list[dict] = []

    def checkpoint(self, messages: list[dict], label: str):
        """Save a snapshot of conversation state before compaction."""
        content_hash = hashlib.sha256(
            str(messages).encode()
        ).hexdigest()[:16]

        self.checkpoints.append({
            "label": label,
            "message_count": len(messages),
            "total_chars": sum(len(str(m.get("content", "")))
                               for m in messages),
            "hash": content_hash,
        })

    def measure_loss(self, original_messages: list[dict],
                     compacted_messages: list[dict]) -> dict:
        """Compare pre/post compaction to measure information loss."""
        original_chars = sum(
            len(str(m.get("content", ""))) for m in original_messages
        )
        compacted_chars = sum(
            len(str(m.get("content", ""))) for m in compacted_messages
        )

        # Count preserved entities (code blocks, URLs, numbers)
        original_code = self._count_code_blocks(original_messages)
        compacted_code = self._count_code_blocks(compacted_messages)

        original_urls = self._count_urls(original_messages)
        compacted_urls = self._count_urls(compacted_messages)

        return {
            "char_reduction": 1 - (compacted_chars / original_chars),
            "message_reduction": 1 - (len(compacted_messages)
                                      / len(original_messages)),
            "code_blocks_preserved": (compacted_code / original_code
                                      if original_code > 0 else 1.0),
            "urls_preserved": (compacted_urls / original_urls
                               if original_urls > 0 else 1.0),
        }

    def _count_code_blocks(self, messages: list[dict]) -> int:
        import re
        content = " ".join(str(m.get("content", "")) for m in messages)
        return len(re.findall(r'```', content)) // 2

    def _count_urls(self, messages: list[dict]) -> int:
        import re
        content = " ".join(str(m.get("content", "")) for m in messages)
        return len(re.findall(r'https?://\S+', content))

Fallback Patterns for Degraded Compaction

When compaction quality drops, you need automatic fallbacks:

class CompactionWithFallback:
    """Compaction system with automatic degradation handling."""

    def __init__(self):
        self.client = Anthropic()
        self.messages: list[dict] = []
        self.context = ManagedContext()
        self.compression_history: list[float] = []

    def send(self, message: str, system: str) -> str:
        self.messages.append({"role": "user", "content": message})

        strategy = self._select_strategy()

        if strategy == "normal":
            return self._send_with_compaction(system)
        elif strategy == "aggressive_pinning":
            return self._send_with_aggressive_pinning(system)
        elif strategy == "restart":
            return self._restart_conversation(system, message)

    def _select_strategy(self) -> str:
        if len(self.compression_history) < 3:
            return "normal"

        recent_ratios = self.compression_history[-3:]
        avg_ratio = sum(recent_ratios) / len(recent_ratios)

        if avg_ratio < 0.15:
            return "restart"
        elif avg_ratio < 0.30:
            return "aggressive_pinning"
        return "normal"

    def _send_with_compaction(self, system: str) -> str:
        response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            max_tokens=8192,
            system=system,
            messages=self.messages,
            metadata={
                "compaction": {
                    "enabled": True,
                    "trigger_tokens": 150_000,
                    "preserve_recent": 10,
                }
            }
        )

        if hasattr(response, 'compaction_info') and response.compaction_info.triggered:
            self.compression_history.append(
                response.compaction_info.compression_ratio
            )

        text = next(b.text for b in response.content if b.type == "text")
        self.messages.append({"role": "assistant", "content": text})
        return text

    def _send_with_aggressive_pinning(self, system: str) -> str:
        """Pin more context when compression is getting too aggressive."""
        enriched_system = f"""{system}

{self.context.to_system_context()}

## Conversation Summary (pinned due to high compression)
This is turn {len(self.messages) // 2} of an extended conversation.
Previous compaction has been aggressive — refer to pinned context above
for critical details."""

        return self._send_with_compaction(enriched_system)

    def _restart_conversation(self, system: str, latest_message: str) -> str:
        """Restart with a fresh context, seeded with managed state."""
        summary_prompt = (
            "Summarize the key decisions, open issues, and current state "
            "of our conversation in a structured format."
        )
        self.messages.append({"role": "user", "content": summary_prompt})

        summary_response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            max_tokens=4096,
            system=system,
            messages=self.messages,
        )

        summary = next(
            b.text for b in summary_response.content if b.type == "text"
        )

        # Restart with clean context
        self.messages = []
        self.compression_history = []

        restart_system = f"""{system}

## Previous Session Summary
{summary}

{self.context.to_system_context()}"""

        self.messages.append({"role": "user", "content": latest_message})

        response = self.client.messages.create(
            model="claude-opus-4-6-20260205",
            max_tokens=8192,
            system=restart_system,
            messages=self.messages,
        )

        text = next(b.text for b in response.content if b.type == "text")
        self.messages.append({"role": "assistant", "content": text})
        return text

Compaction Strategy Decision Matrix

Conversation LengthContent TypeRecommended Strategy
< 50K tokensAnyNo compaction needed
50K–200KMostly discussionStandard compaction, preserve 10 turns
50K–200KCode-heavyHigher preserve_recent (15-20), pin code artifacts
200K–500KMixedAggressive pinning + managed context
500K+AnyRestart with summary, seed with managed context
IndefiniteSupport/chatCompaction + external transcript archive

The critical insight: compaction is not a “set and forget” feature. It is one tool in a context management strategy that includes pinning, explicit state management, health monitoring, and graceful restart. The best systems combine all four.