Back to Subagents
TodoWrite → Subagents
s03 (176 LOC) → s04 (151 LOC)
LOC Delta
-25lines
New Tools
1
task
New Classes
0
New Functions
1
run_subagent
TodoWrite
Plan Before You Act
176 LOC
5 tools: bash, read_file, write_file, edit_file, todo
planningSubagents
Clean Context Per Subtask
151 LOC
5 tools: bash, read_file, write_file, edit_file, task
planningSource Code Diff
s03 (s03_todo_write.py) -> s04 (s04_subagent.py)
| 1 | 1 | #!/usr/bin/env python3 | |
| 2 | - | # Harness: planning -- keeping the model on course without scripting the route. | |
| 2 | + | # Harness: context isolation -- protecting the model's clarity of thought. | |
| 3 | 3 | """ | |
| 4 | - | s03_todo_write.py - TodoWrite | |
| 4 | + | s04_subagent.py - Subagents | |
| 5 | 5 | ||
| 6 | - | The model tracks its own progress via a TodoManager. A nag reminder | |
| 7 | - | forces it to keep updating when it forgets. | |
| 6 | + | Spawn a child agent with fresh messages=[]. The child works in its own | |
| 7 | + | context, sharing the filesystem, then returns only a summary to the parent. | |
| 8 | 8 | ||
| 9 | - | +----------+ +-------+ +---------+ | |
| 10 | - | | User | ---> | LLM | ---> | Tools | | |
| 11 | - | | prompt | | | | + todo | | |
| 12 | - | +----------+ +---+---+ +----+----+ | |
| 13 | - | ^ | | |
| 14 | - | | tool_result | | |
| 15 | - | +---------------+ | |
| 16 | - | | | |
| 17 | - | +-----------+-----------+ | |
| 18 | - | | TodoManager state | | |
| 19 | - | | [ ] task A | | |
| 20 | - | | [>] task B <- doing | | |
| 21 | - | | [x] task C | | |
| 22 | - | +-----------------------+ | |
| 23 | - | | | |
| 24 | - | if rounds_since_todo >= 3: | |
| 25 | - | inject <reminder> | |
| 9 | + | Parent agent Subagent | |
| 10 | + | +------------------+ +------------------+ | |
| 11 | + | | messages=[...] | | messages=[] | <-- fresh | |
| 12 | + | | | dispatch | | | |
| 13 | + | | tool: task | ---------->| while tool_use: | | |
| 14 | + | | prompt="..." | | call tools | | |
| 15 | + | | description="" | | append results | | |
| 16 | + | | | summary | | | |
| 17 | + | | result = "..." | <--------- | return last text | | |
| 18 | + | +------------------+ +------------------+ | |
| 19 | + | | | |
| 20 | + | Parent context stays clean. | |
| 21 | + | Subagent context is discarded. | |
| 26 | 22 | ||
| 27 | - | Key insight: "The agent can track its own progress -- and I can see it." | |
| 23 | + | Key insight: "Process isolation gives context isolation for free." | |
| 28 | 24 | """ | |
| 29 | 25 | ||
| 30 | 26 | import os | |
| 31 | 27 | import subprocess | |
| 32 | 28 | from pathlib import Path | |
| 33 | 29 | ||
| 34 | 30 | from anthropic import Anthropic | |
| 35 | 31 | from dotenv import load_dotenv | |
| 36 | 32 | ||
| 37 | 33 | load_dotenv(override=True) | |
| 38 | 34 | ||
| 39 | 35 | if os.getenv("ANTHROPIC_BASE_URL"): | |
| 40 | 36 | os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) | |
| 41 | 37 | ||
| 42 | 38 | WORKDIR = Path.cwd() | |
| 43 | 39 | client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) | |
| 44 | 40 | MODEL = os.environ["MODEL_ID"] | |
| 45 | 41 | ||
| 46 | - | SYSTEM = f"""You are a coding agent at {WORKDIR}. | |
| 47 | - | Use the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done. | |
| 48 | - | Prefer tools over prose.""" | |
| 42 | + | SYSTEM = f"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks." | |
| 43 | + | SUBAGENT_SYSTEM = f"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings." | |
| 49 | 44 | ||
| 50 | 45 | ||
| 51 | - | # -- TodoManager: structured state the LLM writes to -- | |
| 52 | - | class TodoManager: | |
| 53 | - | def __init__(self): | |
| 54 | - | self.items = [] | |
| 55 | - | ||
| 56 | - | def update(self, items: list) -> str: | |
| 57 | - | if len(items) > 20: | |
| 58 | - | raise ValueError("Max 20 todos allowed") | |
| 59 | - | validated = [] | |
| 60 | - | in_progress_count = 0 | |
| 61 | - | for i, item in enumerate(items): | |
| 62 | - | text = str(item.get("text", "")).strip() | |
| 63 | - | status = str(item.get("status", "pending")).lower() | |
| 64 | - | item_id = str(item.get("id", str(i + 1))) | |
| 65 | - | if not text: | |
| 66 | - | raise ValueError(f"Item {item_id}: text required") | |
| 67 | - | if status not in ("pending", "in_progress", "completed"): | |
| 68 | - | raise ValueError(f"Item {item_id}: invalid status '{status}'") | |
| 69 | - | if status == "in_progress": | |
| 70 | - | in_progress_count += 1 | |
| 71 | - | validated.append({"id": item_id, "text": text, "status": status}) | |
| 72 | - | if in_progress_count > 1: | |
| 73 | - | raise ValueError("Only one task can be in_progress at a time") | |
| 74 | - | self.items = validated | |
| 75 | - | return self.render() | |
| 76 | - | ||
| 77 | - | def render(self) -> str: | |
| 78 | - | if not self.items: | |
| 79 | - | return "No todos." | |
| 80 | - | lines = [] | |
| 81 | - | for item in self.items: | |
| 82 | - | marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}[item["status"]] | |
| 83 | - | lines.append(f"{marker} #{item['id']}: {item['text']}") | |
| 84 | - | done = sum(1 for t in self.items if t["status"] == "completed") | |
| 85 | - | lines.append(f"\n({done}/{len(self.items)} completed)") | |
| 86 | - | return "\n".join(lines) | |
| 87 | - | ||
| 88 | - | ||
| 89 | - | TODO = TodoManager() | |
| 90 | - | ||
| 91 | - | ||
| 92 | - | # -- Tool implementations -- | |
| 46 | + | # -- Tool implementations shared by parent and child -- | |
| 93 | 47 | def safe_path(p: str) -> Path: | |
| 94 | 48 | path = (WORKDIR / p).resolve() | |
| 95 | 49 | if not path.is_relative_to(WORKDIR): | |
| 96 | 50 | raise ValueError(f"Path escapes workspace: {p}") | |
| 97 | 51 | return path | |
| 98 | 52 | ||
| 99 | 53 | def run_bash(command: str) -> str: | |
| 100 | 54 | dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] | |
| 101 | 55 | if any(d in command for d in dangerous): | |
| 102 | 56 | return "Error: Dangerous command blocked" | |
| 103 | 57 | try: | |
| 104 | 58 | r = subprocess.run(command, shell=True, cwd=WORKDIR, | |
| 105 | 59 | capture_output=True, text=True, timeout=120) | |
| 106 | 60 | out = (r.stdout + r.stderr).strip() | |
| 107 | 61 | return out[:50000] if out else "(no output)" | |
| 108 | 62 | except subprocess.TimeoutExpired: | |
| 109 | 63 | return "Error: Timeout (120s)" | |
| 110 | 64 | ||
| 111 | 65 | def run_read(path: str, limit: int = None) -> str: | |
| 112 | 66 | try: | |
| 113 | 67 | lines = safe_path(path).read_text().splitlines() | |
| 114 | 68 | if limit and limit < len(lines): | |
| 115 | 69 | lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] | |
| 116 | 70 | return "\n".join(lines)[:50000] | |
| 117 | 71 | except Exception as e: | |
| 118 | 72 | return f"Error: {e}" | |
| 119 | 73 | ||
| 120 | 74 | def run_write(path: str, content: str) -> str: | |
| 121 | 75 | try: | |
| 122 | 76 | fp = safe_path(path) | |
| 123 | 77 | fp.parent.mkdir(parents=True, exist_ok=True) | |
| 124 | 78 | fp.write_text(content) | |
| 125 | 79 | return f"Wrote {len(content)} bytes" | |
| 126 | 80 | except Exception as e: | |
| 127 | 81 | return f"Error: {e}" | |
| 128 | 82 | ||
| 129 | 83 | def run_edit(path: str, old_text: str, new_text: str) -> str: | |
| 130 | 84 | try: | |
| 131 | 85 | fp = safe_path(path) | |
| 132 | 86 | content = fp.read_text() | |
| 133 | 87 | if old_text not in content: | |
| 134 | 88 | return f"Error: Text not found in {path}" | |
| 135 | 89 | fp.write_text(content.replace(old_text, new_text, 1)) | |
| 136 | 90 | return f"Edited {path}" | |
| 137 | 91 | except Exception as e: | |
| 138 | 92 | return f"Error: {e}" | |
| 139 | 93 | ||
| 140 | 94 | ||
| 141 | 95 | TOOL_HANDLERS = { | |
| 142 | 96 | "bash": lambda **kw: run_bash(kw["command"]), | |
| 143 | 97 | "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), | |
| 144 | 98 | "write_file": lambda **kw: run_write(kw["path"], kw["content"]), | |
| 145 | 99 | "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), | |
| 146 | - | "todo": lambda **kw: TODO.update(kw["items"]), | |
| 147 | 100 | } | |
| 148 | 101 | ||
| 149 | - | TOOLS = [ | |
| 102 | + | # Child gets all base tools except task (no recursive spawning) | |
| 103 | + | CHILD_TOOLS = [ | |
| 150 | 104 | {"name": "bash", "description": "Run a shell command.", | |
| 151 | 105 | "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, | |
| 152 | 106 | {"name": "read_file", "description": "Read file contents.", | |
| 153 | 107 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, | |
| 154 | 108 | {"name": "write_file", "description": "Write content to file.", | |
| 155 | 109 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, | |
| 156 | 110 | {"name": "edit_file", "description": "Replace exact text in file.", | |
| 157 | 111 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, | |
| 158 | - | {"name": "todo", "description": "Update task list. Track progress on multi-step tasks.", | |
| 159 | - | "input_schema": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}, "text": {"type": "string"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}}, "required": ["id", "text", "status"]}}}, "required": ["items"]}}, | |
| 160 | 112 | ] | |
| 161 | 113 | ||
| 162 | 114 | ||
| 163 | - | # -- Agent loop with nag reminder injection -- | |
| 115 | + | # -- Subagent: fresh context, filtered tools, summary-only return -- | |
| 116 | + | def run_subagent(prompt: str) -> str: | |
| 117 | + | sub_messages = [{"role": "user", "content": prompt}] # fresh context | |
| 118 | + | for _ in range(30): # safety limit | |
| 119 | + | response = client.messages.create( | |
| 120 | + | model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages, | |
| 121 | + | tools=CHILD_TOOLS, max_tokens=8000, | |
| 122 | + | ) | |
| 123 | + | sub_messages.append({"role": "assistant", "content": response.content}) | |
| 124 | + | if response.stop_reason != "tool_use": | |
| 125 | + | break | |
| 126 | + | results = [] | |
| 127 | + | for block in response.content: | |
| 128 | + | if block.type == "tool_use": | |
| 129 | + | handler = TOOL_HANDLERS.get(block.name) | |
| 130 | + | output = handler(**block.input) if handler else f"Unknown tool: {block.name}" | |
| 131 | + | results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)[:50000]}) | |
| 132 | + | sub_messages.append({"role": "user", "content": results}) | |
| 133 | + | # Only the final text returns to the parent -- child context is discarded | |
| 134 | + | return "".join(b.text for b in response.content if hasattr(b, "text")) or "(no summary)" | |
| 135 | + | ||
| 136 | + | ||
| 137 | + | # -- Parent tools: base tools + task dispatcher -- | |
| 138 | + | PARENT_TOOLS = CHILD_TOOLS + [ | |
| 139 | + | {"name": "task", "description": "Spawn a subagent with fresh context. It shares the filesystem but not conversation history.", | |
| 140 | + | "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}, "description": {"type": "string", "description": "Short description of the task"}}, "required": ["prompt"]}}, | |
| 141 | + | ] | |
| 142 | + | ||
| 143 | + | ||
| 164 | 144 | def agent_loop(messages: list): | |
| 165 | - | rounds_since_todo = 0 | |
| 166 | 145 | while True: | |
| 167 | - | # Nag reminder is injected below, alongside tool results | |
| 168 | 146 | response = client.messages.create( | |
| 169 | 147 | model=MODEL, system=SYSTEM, messages=messages, | |
| 170 | - | tools=TOOLS, max_tokens=8000, | |
| 148 | + | tools=PARENT_TOOLS, max_tokens=8000, | |
| 171 | 149 | ) | |
| 172 | 150 | messages.append({"role": "assistant", "content": response.content}) | |
| 173 | 151 | if response.stop_reason != "tool_use": | |
| 174 | 152 | return | |
| 175 | 153 | results = [] | |
| 176 | - | used_todo = False | |
| 177 | 154 | for block in response.content: | |
| 178 | 155 | if block.type == "tool_use": | |
| 179 | - | handler = TOOL_HANDLERS.get(block.name) | |
| 180 | - | try: | |
| 156 | + | if block.name == "task": | |
| 157 | + | desc = block.input.get("description", "subtask") | |
| 158 | + | print(f"> task ({desc}): {block.input['prompt'][:80]}") | |
| 159 | + | output = run_subagent(block.input["prompt"]) | |
| 160 | + | else: | |
| 161 | + | handler = TOOL_HANDLERS.get(block.name) | |
| 181 | 162 | output = handler(**block.input) if handler else f"Unknown tool: {block.name}" | |
| 182 | - | except Exception as e: | |
| 183 | - | output = f"Error: {e}" | |
| 184 | - | print(f"> {block.name}: {str(output)[:200]}") | |
| 163 | + | print(f" {str(output)[:200]}") | |
| 185 | 164 | results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) | |
| 186 | - | if block.name == "todo": | |
| 187 | - | used_todo = True | |
| 188 | - | rounds_since_todo = 0 if used_todo else rounds_since_todo + 1 | |
| 189 | - | if rounds_since_todo >= 3: | |
| 190 | - | results.insert(0, {"type": "text", "text": "<reminder>Update your todos.</reminder>"}) | |
| 191 | 165 | messages.append({"role": "user", "content": results}) | |
| 192 | 166 | ||
| 193 | 167 | ||
| 194 | 168 | if __name__ == "__main__": | |
| 195 | 169 | history = [] | |
| 196 | 170 | while True: | |
| 197 | 171 | try: | |
| 198 | - | query = input("\033[36ms03 >> \033[0m") | |
| 172 | + | query = input("\033[36ms04 >> \033[0m") | |
| 199 | 173 | except (EOFError, KeyboardInterrupt): | |
| 200 | 174 | break | |
| 201 | 175 | if query.strip().lower() in ("q", "exit", ""): | |
| 202 | 176 | break | |
| 203 | 177 | history.append({"role": "user", "content": query}) | |
| 204 | 178 | agent_loop(history) | |
| 205 | 179 | response_content = history[-1]["content"] | |
| 206 | 180 | if isinstance(response_content, list): | |
| 207 | 181 | for block in response_content: | |
| 208 | 182 | if hasattr(block, "text"): | |
| 209 | 183 | print(block.text) | |
| 210 | 184 | print() |