Default compaction works for simple conversations. Production systems need more control — you need to tell the compaction engine what matters, detect when information is being lost, and gracefully handle degradation.
Custom Preservation Rules
Mark specific messages or content as high-priority to resist compression:
def build_message_with_priority(content: str, priority: str = "normal") -> dict:
"""Wrap message content with compaction priority markers."""
if priority == "critical":
# Critical content is tagged for the compaction engine
return {
"role": "user",
"content": f"[COMPACTION_PRIORITY: CRITICAL]\n{content}",
}
elif priority == "high":
return {
"role": "user",
"content": f"[COMPACTION_PRIORITY: HIGH]\n{content}",
}
return {"role": "user", "content": content}
# Usage
messages = [
build_message_with_priority(
"Project requirements: The system must handle 10K concurrent users "
"with p99 latency under 200ms. All data must be encrypted at rest "
"and in transit.",
priority="critical"
),
build_message_with_priority(
"Can you review this database schema?",
priority="normal"
),
]
Structured Context Injection
Instead of relying on compaction to preserve everything, proactively manage what stays in context:
from dataclasses import dataclass, field
@dataclass
class ManagedContext:
"""Explicitly managed context that survives compaction."""
decisions: list[str] = field(default_factory=list)
requirements: list[str] = field(default_factory=list)
code_state: dict[str, str] = field(default_factory=dict)
open_issues: list[str] = field(default_factory=list)
def to_system_context(self) -> str:
sections = []
if self.requirements:
sections.append("## Requirements\n" +
"\n".join(f"- {r}" for r in self.requirements))
if self.decisions:
sections.append("## Decisions Made\n" +
"\n".join(f"- {d}" for d in self.decisions))
if self.open_issues:
sections.append("## Open Issues\n" +
"\n".join(f"- {i}" for i in self.open_issues))
if self.code_state:
code_section = "## Current Code State\n"
for filename, code in self.code_state.items():
code_section += f"\n### {filename}\n```\n{code}\n```\n"
sections.append(code_section)
return "\n\n".join(sections)
def add_decision(self, decision: str):
self.decisions.append(decision)
def resolve_issue(self, issue: str):
self.open_issues = [i for i in self.open_issues if i != issue]
class CompactionAwareSession:
"""Session that combines compaction with explicit context management."""
def __init__(self, base_system_prompt: str):
self.client = Anthropic()
self.base_prompt = base_system_prompt
self.context = ManagedContext()
self.messages: list[dict] = []
def send(self, message: str) -> str:
self.messages.append({"role": "user", "content": message})
# Inject managed context into system prompt
system = f"""{self.base_prompt}
{self.context.to_system_context()}"""
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=8192,
system=system,
messages=self.messages,
metadata={
"compaction": {
"enabled": True,
"trigger_tokens": 150_000,
"preserve_recent": 10,
}
}
)
text = next(b.text for b in response.content if b.type == "text")
self.messages.append({"role": "assistant", "content": text})
return text
Measuring Information Loss
Compaction is inherently lossy. Measure how much you are losing:
import hashlib
class CompactionLossTracker:
"""Track information loss across compaction events."""
def __init__(self):
self.checkpoints: list[dict] = []
def checkpoint(self, messages: list[dict], label: str):
"""Save a snapshot of conversation state before compaction."""
content_hash = hashlib.sha256(
str(messages).encode()
).hexdigest()[:16]
self.checkpoints.append({
"label": label,
"message_count": len(messages),
"total_chars": sum(len(str(m.get("content", "")))
for m in messages),
"hash": content_hash,
})
def measure_loss(self, original_messages: list[dict],
compacted_messages: list[dict]) -> dict:
"""Compare pre/post compaction to measure information loss."""
original_chars = sum(
len(str(m.get("content", ""))) for m in original_messages
)
compacted_chars = sum(
len(str(m.get("content", ""))) for m in compacted_messages
)
# Count preserved entities (code blocks, URLs, numbers)
original_code = self._count_code_blocks(original_messages)
compacted_code = self._count_code_blocks(compacted_messages)
original_urls = self._count_urls(original_messages)
compacted_urls = self._count_urls(compacted_messages)
return {
"char_reduction": 1 - (compacted_chars / original_chars),
"message_reduction": 1 - (len(compacted_messages)
/ len(original_messages)),
"code_blocks_preserved": (compacted_code / original_code
if original_code > 0 else 1.0),
"urls_preserved": (compacted_urls / original_urls
if original_urls > 0 else 1.0),
}
def _count_code_blocks(self, messages: list[dict]) -> int:
import re
content = " ".join(str(m.get("content", "")) for m in messages)
return len(re.findall(r'```', content)) // 2
def _count_urls(self, messages: list[dict]) -> int:
import re
content = " ".join(str(m.get("content", "")) for m in messages)
return len(re.findall(r'https?://\S+', content))
Fallback Patterns for Degraded Compaction
When compaction quality drops, you need automatic fallbacks:
class CompactionWithFallback:
"""Compaction system with automatic degradation handling."""
def __init__(self):
self.client = Anthropic()
self.messages: list[dict] = []
self.context = ManagedContext()
self.compression_history: list[float] = []
def send(self, message: str, system: str) -> str:
self.messages.append({"role": "user", "content": message})
strategy = self._select_strategy()
if strategy == "normal":
return self._send_with_compaction(system)
elif strategy == "aggressive_pinning":
return self._send_with_aggressive_pinning(system)
elif strategy == "restart":
return self._restart_conversation(system, message)
def _select_strategy(self) -> str:
if len(self.compression_history) < 3:
return "normal"
recent_ratios = self.compression_history[-3:]
avg_ratio = sum(recent_ratios) / len(recent_ratios)
if avg_ratio < 0.15:
return "restart"
elif avg_ratio < 0.30:
return "aggressive_pinning"
return "normal"
def _send_with_compaction(self, system: str) -> str:
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=8192,
system=system,
messages=self.messages,
metadata={
"compaction": {
"enabled": True,
"trigger_tokens": 150_000,
"preserve_recent": 10,
}
}
)
if hasattr(response, 'compaction_info') and response.compaction_info.triggered:
self.compression_history.append(
response.compaction_info.compression_ratio
)
text = next(b.text for b in response.content if b.type == "text")
self.messages.append({"role": "assistant", "content": text})
return text
def _send_with_aggressive_pinning(self, system: str) -> str:
"""Pin more context when compression is getting too aggressive."""
enriched_system = f"""{system}
{self.context.to_system_context()}
## Conversation Summary (pinned due to high compression)
This is turn {len(self.messages) // 2} of an extended conversation.
Previous compaction has been aggressive — refer to pinned context above
for critical details."""
return self._send_with_compaction(enriched_system)
def _restart_conversation(self, system: str, latest_message: str) -> str:
"""Restart with a fresh context, seeded with managed state."""
summary_prompt = (
"Summarize the key decisions, open issues, and current state "
"of our conversation in a structured format."
)
self.messages.append({"role": "user", "content": summary_prompt})
summary_response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=4096,
system=system,
messages=self.messages,
)
summary = next(
b.text for b in summary_response.content if b.type == "text"
)
# Restart with clean context
self.messages = []
self.compression_history = []
restart_system = f"""{system}
## Previous Session Summary
{summary}
{self.context.to_system_context()}"""
self.messages.append({"role": "user", "content": latest_message})
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=8192,
system=restart_system,
messages=self.messages,
)
text = next(b.text for b in response.content if b.type == "text")
self.messages.append({"role": "assistant", "content": text})
return text
Compaction Strategy Decision Matrix
| Conversation Length | Content Type | Recommended Strategy |
|---|---|---|
| < 50K tokens | Any | No compaction needed |
| 50K–200K | Mostly discussion | Standard compaction, preserve 10 turns |
| 50K–200K | Code-heavy | Higher preserve_recent (15-20), pin code artifacts |
| 200K–500K | Mixed | Aggressive pinning + managed context |
| 500K+ | Any | Restart with summary, seed with managed context |
| Indefinite | Support/chat | Compaction + external transcript archive |
The critical insight: compaction is not a “set and forget” feature. It is one tool in a context management strategy that includes pinning, explicit state management, health monitoring, and graceful restart. The best systems combine all four.