Skills → Compact

s05 (187 LOC) → s06 (205 LOC)

LOC Delta

+18lines

New Tools

1

compact

New Classes

0

New Functions

3

estimate_tokensmicro_compactauto_compact

Skills

Load on Demand

187 LOC

5 tools: bash, read_file, write_file, edit_file, load_skill

planning

Compact

Three-Layer Compression

205 LOC

5 tools: bash, read_file, write_file, edit_file, compact

memory

Source Code Diff

s05 (s05_skill_loading.py) -> s06 (s06_context_compact.py)

1	1		#!/usr/bin/env python3
2		-	# Harness: on-demand knowledge -- domain expertise, loaded when the model asks.
	2	+	# Harness: compression -- clean memory for infinite sessions.
3	3		"""
4		-	s05_skill_loading.py - Skills
	4	+	s06_context_compact.py - Compact
5	5
6		-	Two-layer skill injection that avoids bloating the system prompt:
	6	+	Three-layer compression pipeline so the agent can work forever:
7	7
8		-	Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)
9		-	Layer 2 (on demand): full skill body in tool_result
	8	+	Every turn:
	9	+	+------------------+
	10	+	\| Tool call result \|
	11	+	+------------------+
	12	+	\|
	13	+	v
	14	+	[Layer 1: micro_compact] (silent, every turn)
	15	+	Replace tool_result content older than last 3
	16	+	with "[Previous: used {tool_name}]"
	17	+	\|
	18	+	v
	19	+	[Check: tokens > 50000?]
	20	+	\| \|
	21	+	no yes
	22	+	\| \|
	23	+	v v
	24	+	continue [Layer 2: auto_compact]
	25	+	Save full transcript to .transcripts/
	26	+	Ask LLM to summarize conversation.
	27	+	Replace all messages with [summary].
	28	+	\|
	29	+	v
	30	+	[Layer 3: compact tool]
	31	+	Model calls compact -> immediate summarization.
	32	+	Same as auto, triggered manually.
10	33
11		-	skills/
12		-	pdf/
13		-	SKILL.md <-- frontmatter (name, description) + body
14		-	code-review/
15		-	SKILL.md
16		-
17		-	System prompt:
18		-	+--------------------------------------+
19		-	\| You are a coding agent. \|
20		-	\| Skills available: \|
21		-	\| - pdf: Process PDF files... \| <-- Layer 1: metadata only
22		-	\| - code-review: Review code... \|
23		-	+--------------------------------------+
24		-
25		-	When model calls load_skill("pdf"):
26		-	+--------------------------------------+
27		-	\| tool_result: \|
28		-	\| <skill> \|
29		-	\| Full PDF processing instructions \| <-- Layer 2: full body
30		-	\| Step 1: ... \|
31		-	\| Step 2: ... \|
32		-	\| </skill> \|
33		-	+--------------------------------------+
34		-
35		-	Key insight: "Don't put everything in the system prompt. Load on demand."
	34	+	Key insight: "The agent can forget strategically and keep working forever."
36	35		"""
37	36
	37	+	import json
38	38		import os
39		-	import re
40	39		import subprocess
	40	+	import time
41	41		from pathlib import Path
42	42
43	43		from anthropic import Anthropic
44	44		from dotenv import load_dotenv
45	45
46	46		load_dotenv(override=True)
47	47
48	48		if os.getenv("ANTHROPIC_BASE_URL"):
49	49		os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
50	50
51	51		WORKDIR = Path.cwd()
52	52		client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
53	53		MODEL = os.environ["MODEL_ID"]
54		-	SKILLS_DIR = WORKDIR / "skills"
55	54
	55	+	SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks."
56	56
57		-	# -- SkillLoader: scan skills/<name>/SKILL.md with YAML frontmatter --
58		-	class SkillLoader:
59		-	def __init__(self, skills_dir: Path):
60		-	self.skills_dir = skills_dir
61		-	self.skills = {}
62		-	self._load_all()
	57	+	THRESHOLD = 50000
	58	+	TRANSCRIPT_DIR = WORKDIR / ".transcripts"
	59	+	KEEP_RECENT = 3
63	60
64		-	def _load_all(self):
65		-	if not self.skills_dir.exists():
66		-	return
67		-	for f in sorted(self.skills_dir.rglob("SKILL.md")):
68		-	text = f.read_text()
69		-	meta, body = self._parse_frontmatter(text)
70		-	name = meta.get("name", f.parent.name)
71		-	self.skills[name] = {"meta": meta, "body": body, "path": str(f)}
72	61
73		-	def _parse_frontmatter(self, text: str) -> tuple:
74		-	"""Parse YAML frontmatter between --- delimiters."""
75		-	match = re.match(r"^---\n(.?)\n---\n(.)", text, re.DOTALL)
76		-	if not match:
77		-	return {}, text
78		-	meta = {}
79		-	for line in match.group(1).strip().splitlines():
80		-	if ":" in line:
81		-	key, val = line.split(":", 1)
82		-	meta[key.strip()] = val.strip()
83		-	return meta, match.group(2).strip()
	62	+	def estimate_tokens(messages: list) -> int:
	63	+	"""Rough token count: ~4 chars per token."""
	64	+	return len(str(messages)) // 4
84	65
85		-	def get_descriptions(self) -> str:
86		-	"""Layer 1: short descriptions for the system prompt."""
87		-	if not self.skills:
88		-	return "(no skills available)"
89		-	lines = []
90		-	for name, skill in self.skills.items():
91		-	desc = skill["meta"].get("description", "No description")
92		-	tags = skill["meta"].get("tags", "")
93		-	line = f" - {name}: {desc}"
94		-	if tags:
95		-	line += f" [{tags}]"
96		-	lines.append(line)
97		-	return "\n".join(lines)
98	66
99		-	def get_content(self, name: str) -> str:
100		-	"""Layer 2: full skill body returned in tool_result."""
101		-	skill = self.skills.get(name)
102		-	if not skill:
103		-	return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}"
104		-	return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>"
	67	+	# -- Layer 1: micro_compact - replace old tool results with placeholders --
	68	+	def micro_compact(messages: list) -> list:
	69	+	# Collect (msg_index, part_index, tool_result_dict) for all tool_result entries
	70	+	tool_results = []
	71	+	for msg_idx, msg in enumerate(messages):
	72	+	if msg["role"] == "user" and isinstance(msg.get("content"), list):
	73	+	for part_idx, part in enumerate(msg["content"]):
	74	+	if isinstance(part, dict) and part.get("type") == "tool_result":
	75	+	tool_results.append((msg_idx, part_idx, part))
	76	+	if len(tool_results) <= KEEP_RECENT:
	77	+	return messages
	78	+	# Find tool_name for each result by matching tool_use_id in prior assistant messages
	79	+	tool_name_map = {}
	80	+	for msg in messages:
	81	+	if msg["role"] == "assistant":
	82	+	content = msg.get("content", [])
	83	+	if isinstance(content, list):
	84	+	for block in content:
	85	+	if hasattr(block, "type") and block.type == "tool_use":
	86	+	tool_name_map[block.id] = block.name
	87	+	# Clear old results (keep last KEEP_RECENT)
	88	+	to_clear = tool_results[:-KEEP_RECENT]
	89	+	for _, _, result in to_clear:
	90	+	if isinstance(result.get("content"), str) and len(result["content"]) > 100:
	91	+	tool_id = result.get("tool_use_id", "")
	92	+	tool_name = tool_name_map.get(tool_id, "unknown")
	93	+	result["content"] = f"[Previous: used {tool_name}]"
	94	+	return messages
105	95
106	96
107		-	SKILL_LOADER = SkillLoader(SKILLS_DIR)
	97	+	# -- Layer 2: auto_compact - save transcript, summarize, replace messages --
	98	+	def auto_compact(messages: list) -> list:
	99	+	# Save full transcript to disk
	100	+	TRANSCRIPT_DIR.mkdir(exist_ok=True)
	101	+	transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
	102	+	with open(transcript_path, "w") as f:
	103	+	for msg in messages:
	104	+	f.write(json.dumps(msg, default=str) + "\n")
	105	+	print(f"[transcript saved: {transcript_path}]")
	106	+	# Ask LLM to summarize
	107	+	conversation_text = json.dumps(messages, default=str)[:80000]
	108	+	response = client.messages.create(
	109	+	model=MODEL,
	110	+	messages=[{"role": "user", "content":
	111	+	"Summarize this conversation for continuity. Include: "
	112	+	"1) What was accomplished, 2) Current state, 3) Key decisions made. "
	113	+	"Be concise but preserve critical details.\n\n" + conversation_text}],
	114	+	max_tokens=2000,
	115	+	)
	116	+	summary = response.content[0].text
	117	+	# Replace all messages with compressed summary
	118	+	return [
	119	+	{"role": "user", "content": f"[Conversation compressed. Transcript: {transcript_path}]\n\n{summary}"},
	120	+	{"role": "assistant", "content": "Understood. I have the context from the summary. Continuing."},
	121	+	]
108	122
109		-	# Layer 1: skill metadata injected into system prompt
110		-	SYSTEM = f"""You are a coding agent at {WORKDIR}.
111		-	Use load_skill to access specialized knowledge before tackling unfamiliar topics.
112	123
113		-	Skills available:
114		-	{SKILL_LOADER.get_descriptions()}"""
115		-
116		-
117	124		# -- Tool implementations --
118	125		def safe_path(p: str) -> Path:
119	126		path = (WORKDIR / p).resolve()
120	127		if not path.is_relative_to(WORKDIR):
121	128		raise ValueError(f"Path escapes workspace: {p}")
122	129		return path
123	130
124	131		def run_bash(command: str) -> str:
125	132		dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
126	133		if any(d in command for d in dangerous):
127	134		return "Error: Dangerous command blocked"
128	135		try:
129	136		r = subprocess.run(command, shell=True, cwd=WORKDIR,
130	137		capture_output=True, text=True, timeout=120)
131	138		out = (r.stdout + r.stderr).strip()
132	139		return out[:50000] if out else "(no output)"
133	140		except subprocess.TimeoutExpired:
134	141		return "Error: Timeout (120s)"
135	142
136	143		def run_read(path: str, limit: int = None) -> str:
137	144		try:
138	145		lines = safe_path(path).read_text().splitlines()
139	146		if limit and limit < len(lines):
140	147		lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
141	148		return "\n".join(lines)[:50000]
142	149		except Exception as e:
143	150		return f"Error: {e}"
144	151
145	152		def run_write(path: str, content: str) -> str:
146	153		try:
147	154		fp = safe_path(path)
148	155		fp.parent.mkdir(parents=True, exist_ok=True)
149	156		fp.write_text(content)
150	157		return f"Wrote {len(content)} bytes"
151	158		except Exception as e:
152	159		return f"Error: {e}"
153	160
154	161		def run_edit(path: str, old_text: str, new_text: str) -> str:
155	162		try:
156	163		fp = safe_path(path)
157	164		content = fp.read_text()
158	165		if old_text not in content:
159	166		return f"Error: Text not found in {path}"
160	167		fp.write_text(content.replace(old_text, new_text, 1))
161	168		return f"Edited {path}"
162	169		except Exception as e:
163	170		return f"Error: {e}"
164	171
165	172
166	173		TOOL_HANDLERS = {
167	174		"bash": lambda **kw: run_bash(kw["command"]),
168	175		"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
169	176		"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
170	177		"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
171		-	"load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]),
	178	+	"compact": lambda **kw: "Manual compression requested.",
172	179		}
173	180
174	181		TOOLS = [
175	182		{"name": "bash", "description": "Run a shell command.",
176	183		"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
177	184		{"name": "read_file", "description": "Read file contents.",
178	185		"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
179	186		{"name": "write_file", "description": "Write content to file.",
180	187		"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
181	188		{"name": "edit_file", "description": "Replace exact text in file.",
182	189		"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
183		-	{"name": "load_skill", "description": "Load specialized knowledge by name.",
184		-	"input_schema": {"type": "object", "properties": {"name": {"type": "string", "description": "Skill name to load"}}, "required": ["name"]}},
	190	+	{"name": "compact", "description": "Trigger manual conversation compression.",
	191	+	"input_schema": {"type": "object", "properties": {"focus": {"type": "string", "description": "What to preserve in the summary"}}}},
185	192		]
186	193
187	194
188	195		def agent_loop(messages: list):
189	196		while True:
	197	+	# Layer 1: micro_compact before each LLM call
	198	+	micro_compact(messages)
	199	+	# Layer 2: auto_compact if token estimate exceeds threshold
	200	+	if estimate_tokens(messages) > THRESHOLD:
	201	+	print("[auto_compact triggered]")
	202	+	messages[:] = auto_compact(messages)
190	203		response = client.messages.create(
191	204		model=MODEL, system=SYSTEM, messages=messages,
192	205		tools=TOOLS, max_tokens=8000,
193	206		)
194	207		messages.append({"role": "assistant", "content": response.content})
195	208		if response.stop_reason != "tool_use":
196	209		return
197	210		results = []
	211	+	manual_compact = False
198	212		for block in response.content:
199	213		if block.type == "tool_use":
200		-	handler = TOOL_HANDLERS.get(block.name)
201		-	try:
202		-	output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
203		-	except Exception as e:
204		-	output = f"Error: {e}"
	214	+	if block.name == "compact":
	215	+	manual_compact = True
	216	+	output = "Compressing..."
	217	+	else:
	218	+	handler = TOOL_HANDLERS.get(block.name)
	219	+	try:
	220	+	output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
	221	+	except Exception as e:
	222	+	output = f"Error: {e}"
205	223		print(f"> {block.name}: {str(output)[:200]}")
206	224		results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
207	225		messages.append({"role": "user", "content": results})
	226	+	# Layer 3: manual compact triggered by the compact tool
	227	+	if manual_compact:
	228	+	print("[manual compact]")
	229	+	messages[:] = auto_compact(messages)
208	230
209	231
210	232		if __name__ == "__main__":
211	233		history = []
212	234		while True:
213	235		try:
214		-	query = input("\033[36ms05 >> \033[0m")
	236	+	query = input("\033[36ms06 >> \033[0m")
215	237		except (EOFError, KeyboardInterrupt):
216	238		break
217	239		if query.strip().lower() in ("q", "exit", ""):
218	240		break
219	241		history.append({"role": "user", "content": query})
220	242		agent_loop(history)
221	243		response_content = history[-1]["content"]
222	244		if isinstance(response_content, list):
223	245		for block in response_content:
224	246		if hasattr(block, "text"):
225	247		print(block.text)
226	248		print()