A “normal” conversation with Opus 4.6 dies at 1M tokens. With compaction, that limit becomes theoretical — you can sustain conversations for hours, days, or indefinitely. The key is designing your system so compaction works with your workflow instead of silently degrading it.
Architecture of an Infinite Conversation
┌─────────────────────────────────────────────┐
│ Infinite Conversation │
├─────────────────────────────────────────────┤
│ Active Window [Last 10 turns — full] │
│ Compacted Zone [Turns 11-N — summarized] │
│ Archive [Full transcript — stored] │
│ Pinned Context [Never compacted] │
└─────────────────────────────────────────────┘
The trick is separating what the model sees (active window + compacted zone + pinned context) from what you store (the complete transcript in an external database).
Support Chatbot with Infinite History
import json
from datetime import datetime
from anthropic import Anthropic
class InfiniteSupportBot:
"""Support chatbot that maintains context across unlimited interactions."""
SYSTEM_PROMPT = """You are a senior support engineer. You have access to
the full conversation history through compaction. When referencing earlier
parts of the conversation, note that details may be summarized.
Always:
- Reference ticket numbers and case details by name
- Track unresolved issues across the conversation
- Summarize your understanding of the problem before proposing solutions"""
def __init__(self, customer_id: str, db):
self.client = Anthropic()
self.customer_id = customer_id
self.db = db
self.messages: list[dict] = []
self._load_history()
def _load_history(self):
"""Load previous conversation history from database."""
history = self.db.get_conversation(self.customer_id)
if history:
self.messages = history
def respond(self, user_message: str) -> str:
self.messages.append({
"role": "user",
"content": user_message,
})
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=4096,
system=self.SYSTEM_PROMPT,
messages=self.messages,
metadata={
"compaction": {
"enabled": True,
"trigger_tokens": 100_000,
"preserve_recent": 15,
}
}
)
assistant_text = next(
b.text for b in response.content if b.type == "text"
)
self.messages.append({"role": "assistant", "content": assistant_text})
# Archive full transcript to database
self.db.save_message(self.customer_id, "user", user_message)
self.db.save_message(self.customer_id, "assistant", assistant_text)
return assistant_text
Research Assistant with Session Continuity
Research sessions can span days. The assistant must remember which papers have been reviewed, which hypotheses have been proposed, and which leads have been abandoned:
class ResearchAssistant:
"""Research assistant that maintains context across multi-day sessions."""
SYSTEM_PROMPT = """You are a research assistant helping with academic
research. You maintain a running knowledge base of:
- Papers reviewed and their key findings
- Hypotheses under investigation
- Methodology decisions and rationale
- Open questions and next steps
When compaction has summarized earlier parts of our conversation, rely on
the summary but flag when you're uncertain about specific details."""
def __init__(self, project_name: str):
self.client = Anthropic()
self.project_name = project_name
self.messages: list[dict] = []
self.pinned_context: list[str] = []
def pin(self, context: str):
"""Pin critical context that must never be compacted."""
self.pinned_context.append(context)
def _build_system_prompt(self) -> str:
if not self.pinned_context:
return self.SYSTEM_PROMPT
pinned = "\n\n---\n\n".join(self.pinned_context)
return f"""{self.SYSTEM_PROMPT}
## Pinned Research Context (never summarize these)
{pinned}"""
def query(self, question: str) -> str:
self.messages.append({"role": "user", "content": question})
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=8192,
thinking={"type": "adaptive", "effort": "deep"},
system=self._build_system_prompt(),
messages=self.messages,
metadata={
"compaction": {
"enabled": True,
"trigger_tokens": 200_000,
"preserve_recent": 20,
}
}
)
assistant_text = next(
b.text for b in response.content if b.type == "text"
)
self.messages.append({"role": "assistant", "content": assistant_text})
return assistant_text
# Usage
assistant = ResearchAssistant("quantum-error-correction")
# Pin critical context that survives compaction
assistant.pin("Research question: Can surface codes achieve fault-tolerant "
"thresholds below 0.5% physical error rate?")
assistant.pin("Key constraint: All simulations must use the rotated surface "
"code with minimum distance d=5.")
# Day 1: Literature review
assistant.query("Summarize the top 5 papers on surface code thresholds "
"published in 2025.")
# Day 2: Continue where you left off — compaction preserves key findings
assistant.query("Based on yesterday's literature review, which papers suggest "
"the most promising approaches for sub-0.5% thresholds?")
Long-Running Development Session
Development sessions are the hardest case for compaction because code context is dense and hard to compress. The strategy: extract and pin code artifacts separately from the discussion:
import re
class DevSession:
"""Long-running development session with code-aware compaction."""
SYSTEM_PROMPT = """You are a principal engineer working on a codebase.
Track all code changes, architecture decisions, and TODOs across the session.
When referencing compacted history, note which files were modified and why."""
def __init__(self):
self.client = Anthropic()
self.messages: list[dict] = []
self.code_artifacts: dict[str, str] = {} # filename -> latest code
def _extract_code_blocks(self, text: str) -> dict[str, str]:
"""Extract named code blocks from assistant responses."""
pattern = r'```(\w+)\s*\n# (\S+)\n(.*?)```'
artifacts = {}
for match in re.finditer(pattern, text, re.DOTALL):
filename = match.group(2)
code = match.group(3).strip()
artifacts[filename] = code
return artifacts
def send(self, message: str) -> str:
self.messages.append({"role": "user", "content": message})
# Build system prompt with current code state
code_context = ""
if self.code_artifacts:
code_context = "\n\n## Current Code State\n\n"
for filename, code in self.code_artifacts.items():
code_context += f"### {filename}\n```\n{code}\n```\n\n"
response = self.client.messages.create(
model="claude-opus-4-6-20260205",
max_tokens=8192,
system=self.SYSTEM_PROMPT + code_context,
messages=self.messages,
metadata={
"compaction": {
"enabled": True,
"trigger_tokens": 180_000,
"preserve_recent": 12,
}
}
)
assistant_text = next(
b.text for b in response.content if b.type == "text"
)
# Extract and store code artifacts
new_artifacts = self._extract_code_blocks(assistant_text)
self.code_artifacts.update(new_artifacts)
self.messages.append({"role": "assistant", "content": assistant_text})
return assistant_text
Monitoring Conversation Health
As conversations get very long, you need to monitor whether compaction is degrading quality:
class ConversationHealthMonitor:
"""Track conversation health metrics over time."""
def __init__(self):
self.metrics: list[dict] = []
def record(self, turn: int, response, user_satisfaction: float = None):
metric = {
"turn": turn,
"timestamp": datetime.now().isoformat(),
"input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens,
"total_tokens": (response.usage.input_tokens
+ response.usage.output_tokens),
}
if hasattr(response, 'compaction_info'):
info = response.compaction_info
metric["compacted"] = info.triggered
metric["compression_ratio"] = info.compression_ratio
else:
metric["compacted"] = False
if user_satisfaction is not None:
metric["satisfaction"] = user_satisfaction
self.metrics.append(metric)
def detect_degradation(self) -> bool:
"""Detect if compaction is causing quality degradation."""
recent = self.metrics[-10:]
if len(recent) < 10:
return False
compacted = [m for m in recent if m.get("compacted")]
if not compacted:
return False
# Check if satisfaction drops after compaction events
pre_compaction = [m.get("satisfaction", 1.0) for m in recent
if not m.get("compacted") and "satisfaction" in m]
post_compaction = [m.get("satisfaction", 1.0) for m in recent
if m.get("compacted") and "satisfaction" in m]
if pre_compaction and post_compaction:
avg_pre = sum(pre_compaction) / len(pre_compaction)
avg_post = sum(post_compaction) / len(post_compaction)
if avg_pre - avg_post > 0.2:
return True
return False
def should_restart(self) -> bool:
"""Recommend conversation restart if quality is degrading."""
if not self.metrics:
return False
latest = self.metrics[-1]
# If compression ratio drops below 0.2, very little is being preserved
if latest.get("compression_ratio", 1.0) < 0.2:
return True
return self.detect_degradation()
Key Design Principles
-
Always archive the full transcript externally. Compaction is lossy. If you need the original conversation for audit, legal, or debugging purposes, store it in a database before compaction runs.
-
Pin critical context in the system prompt. System prompts are never compacted. Use them for information that must survive indefinitely — project requirements, user preferences, key decisions.
-
Extract and maintain code artifacts separately. Do not rely on compaction to preserve code — extract it into a structured store and inject it via the system prompt.
-
Monitor and restart when needed. Even with compaction, conversations eventually degrade. Build health checks and restart mechanisms.
In the next lesson, you will learn advanced compaction strategies — custom preservation rules, priority markers, and fallback patterns.