Back to TodoWrite
Tools → TodoWrite
s02 (120 LOC) → s03 (176 LOC)
LOC Delta
+56lines
New Tools
1
todo
New Classes
1
TodoManager
New Functions
0
Tools
One Handler Per Tool
120 LOC
4 tools: bash, read_file, write_file, edit_file
toolsTodoWrite
Plan Before You Act
176 LOC
5 tools: bash, read_file, write_file, edit_file, todo
planningSource Code Diff
s02 (s02_tool_use.py) -> s03 (s03_todo_write.py)
| 1 | 1 | #!/usr/bin/env python3 | |
| 2 | - | # Harness: tool dispatch -- expanding what the model can reach. | |
| 2 | + | # Harness: planning -- keeping the model on course without scripting the route. | |
| 3 | 3 | """ | |
| 4 | - | s02_tool_use.py - Tools | |
| 4 | + | s03_todo_write.py - TodoWrite | |
| 5 | 5 | ||
| 6 | - | The agent loop from s01 didn't change. We just added tools to the array | |
| 7 | - | and a dispatch map to route calls. | |
| 6 | + | The model tracks its own progress via a TodoManager. A nag reminder | |
| 7 | + | forces it to keep updating when it forgets. | |
| 8 | 8 | ||
| 9 | - | +----------+ +-------+ +------------------+ | |
| 10 | - | | User | ---> | LLM | ---> | Tool Dispatch | | |
| 11 | - | | prompt | | | | { | | |
| 12 | - | +----------+ +---+---+ | bash: run_bash | | |
| 13 | - | ^ | read: run_read | | |
| 14 | - | | | write: run_wr | | |
| 15 | - | +----------+ edit: run_edit | | |
| 16 | - | tool_result| } | | |
| 17 | - | +------------------+ | |
| 9 | + | +----------+ +-------+ +---------+ | |
| 10 | + | | User | ---> | LLM | ---> | Tools | | |
| 11 | + | | prompt | | | | + todo | | |
| 12 | + | +----------+ +---+---+ +----+----+ | |
| 13 | + | ^ | | |
| 14 | + | | tool_result | | |
| 15 | + | +---------------+ | |
| 16 | + | | | |
| 17 | + | +-----------+-----------+ | |
| 18 | + | | TodoManager state | | |
| 19 | + | | [ ] task A | | |
| 20 | + | | [>] task B <- doing | | |
| 21 | + | | [x] task C | | |
| 22 | + | +-----------------------+ | |
| 23 | + | | | |
| 24 | + | if rounds_since_todo >= 3: | |
| 25 | + | inject <reminder> | |
| 18 | 26 | ||
| 19 | - | Key insight: "The loop didn't change at all. I just added tools." | |
| 27 | + | Key insight: "The agent can track its own progress -- and I can see it." | |
| 20 | 28 | """ | |
| 21 | 29 | ||
| 22 | 30 | import os | |
| 23 | 31 | import subprocess | |
| 24 | 32 | from pathlib import Path | |
| 25 | 33 | ||
| 26 | 34 | from anthropic import Anthropic | |
| 27 | 35 | from dotenv import load_dotenv | |
| 28 | 36 | ||
| 29 | 37 | load_dotenv(override=True) | |
| 30 | 38 | ||
| 31 | 39 | if os.getenv("ANTHROPIC_BASE_URL"): | |
| 32 | 40 | os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) | |
| 33 | 41 | ||
| 34 | 42 | WORKDIR = Path.cwd() | |
| 35 | 43 | client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) | |
| 36 | 44 | MODEL = os.environ["MODEL_ID"] | |
| 37 | 45 | ||
| 38 | - | SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain." | |
| 46 | + | SYSTEM = f"""You are a coding agent at {WORKDIR}. | |
| 47 | + | Use the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done. | |
| 48 | + | Prefer tools over prose.""" | |
| 39 | 49 | ||
| 40 | 50 | ||
| 51 | + | # -- TodoManager: structured state the LLM writes to -- | |
| 52 | + | class TodoManager: | |
| 53 | + | def __init__(self): | |
| 54 | + | self.items = [] | |
| 55 | + | ||
| 56 | + | def update(self, items: list) -> str: | |
| 57 | + | if len(items) > 20: | |
| 58 | + | raise ValueError("Max 20 todos allowed") | |
| 59 | + | validated = [] | |
| 60 | + | in_progress_count = 0 | |
| 61 | + | for i, item in enumerate(items): | |
| 62 | + | text = str(item.get("text", "")).strip() | |
| 63 | + | status = str(item.get("status", "pending")).lower() | |
| 64 | + | item_id = str(item.get("id", str(i + 1))) | |
| 65 | + | if not text: | |
| 66 | + | raise ValueError(f"Item {item_id}: text required") | |
| 67 | + | if status not in ("pending", "in_progress", "completed"): | |
| 68 | + | raise ValueError(f"Item {item_id}: invalid status '{status}'") | |
| 69 | + | if status == "in_progress": | |
| 70 | + | in_progress_count += 1 | |
| 71 | + | validated.append({"id": item_id, "text": text, "status": status}) | |
| 72 | + | if in_progress_count > 1: | |
| 73 | + | raise ValueError("Only one task can be in_progress at a time") | |
| 74 | + | self.items = validated | |
| 75 | + | return self.render() | |
| 76 | + | ||
| 77 | + | def render(self) -> str: | |
| 78 | + | if not self.items: | |
| 79 | + | return "No todos." | |
| 80 | + | lines = [] | |
| 81 | + | for item in self.items: | |
| 82 | + | marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}[item["status"]] | |
| 83 | + | lines.append(f"{marker} #{item['id']}: {item['text']}") | |
| 84 | + | done = sum(1 for t in self.items if t["status"] == "completed") | |
| 85 | + | lines.append(f"\n({done}/{len(self.items)} completed)") | |
| 86 | + | return "\n".join(lines) | |
| 87 | + | ||
| 88 | + | ||
| 89 | + | TODO = TodoManager() | |
| 90 | + | ||
| 91 | + | ||
| 92 | + | # -- Tool implementations -- | |
| 41 | 93 | def safe_path(p: str) -> Path: | |
| 42 | 94 | path = (WORKDIR / p).resolve() | |
| 43 | 95 | if not path.is_relative_to(WORKDIR): | |
| 44 | 96 | raise ValueError(f"Path escapes workspace: {p}") | |
| 45 | 97 | return path | |
| 46 | 98 | ||
| 47 | - | ||
| 48 | 99 | def run_bash(command: str) -> str: | |
| 49 | 100 | dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] | |
| 50 | 101 | if any(d in command for d in dangerous): | |
| 51 | 102 | return "Error: Dangerous command blocked" | |
| 52 | 103 | try: | |
| 53 | 104 | r = subprocess.run(command, shell=True, cwd=WORKDIR, | |
| 54 | 105 | capture_output=True, text=True, timeout=120) | |
| 55 | 106 | out = (r.stdout + r.stderr).strip() | |
| 56 | 107 | return out[:50000] if out else "(no output)" | |
| 57 | 108 | except subprocess.TimeoutExpired: | |
| 58 | 109 | return "Error: Timeout (120s)" | |
| 59 | 110 | ||
| 60 | - | ||
| 61 | 111 | def run_read(path: str, limit: int = None) -> str: | |
| 62 | 112 | try: | |
| 63 | - | text = safe_path(path).read_text() | |
| 64 | - | lines = text.splitlines() | |
| 113 | + | lines = safe_path(path).read_text().splitlines() | |
| 65 | 114 | if limit and limit < len(lines): | |
| 66 | - | lines = lines[:limit] + [f"... ({len(lines) - limit} more lines)"] | |
| 115 | + | lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] | |
| 67 | 116 | return "\n".join(lines)[:50000] | |
| 68 | 117 | except Exception as e: | |
| 69 | 118 | return f"Error: {e}" | |
| 70 | 119 | ||
| 71 | - | ||
| 72 | 120 | def run_write(path: str, content: str) -> str: | |
| 73 | 121 | try: | |
| 74 | 122 | fp = safe_path(path) | |
| 75 | 123 | fp.parent.mkdir(parents=True, exist_ok=True) | |
| 76 | 124 | fp.write_text(content) | |
| 77 | - | return f"Wrote {len(content)} bytes to {path}" | |
| 125 | + | return f"Wrote {len(content)} bytes" | |
| 78 | 126 | except Exception as e: | |
| 79 | 127 | return f"Error: {e}" | |
| 80 | 128 | ||
| 81 | - | ||
| 82 | 129 | def run_edit(path: str, old_text: str, new_text: str) -> str: | |
| 83 | 130 | try: | |
| 84 | 131 | fp = safe_path(path) | |
| 85 | 132 | content = fp.read_text() | |
| 86 | 133 | if old_text not in content: | |
| 87 | 134 | return f"Error: Text not found in {path}" | |
| 88 | 135 | fp.write_text(content.replace(old_text, new_text, 1)) | |
| 89 | 136 | return f"Edited {path}" | |
| 90 | 137 | except Exception as e: | |
| 91 | 138 | return f"Error: {e}" | |
| 92 | 139 | ||
| 93 | 140 | ||
| 94 | - | # -- The dispatch map: {tool_name: handler} -- | |
| 95 | 141 | TOOL_HANDLERS = { | |
| 96 | 142 | "bash": lambda **kw: run_bash(kw["command"]), | |
| 97 | 143 | "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), | |
| 98 | 144 | "write_file": lambda **kw: run_write(kw["path"], kw["content"]), | |
| 99 | 145 | "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), | |
| 146 | + | "todo": lambda **kw: TODO.update(kw["items"]), | |
| 100 | 147 | } | |
| 101 | 148 | ||
| 102 | 149 | TOOLS = [ | |
| 103 | 150 | {"name": "bash", "description": "Run a shell command.", | |
| 104 | 151 | "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, | |
| 105 | 152 | {"name": "read_file", "description": "Read file contents.", | |
| 106 | 153 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, | |
| 107 | 154 | {"name": "write_file", "description": "Write content to file.", | |
| 108 | 155 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, | |
| 109 | 156 | {"name": "edit_file", "description": "Replace exact text in file.", | |
| 110 | 157 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, | |
| 158 | + | {"name": "todo", "description": "Update task list. Track progress on multi-step tasks.", | |
| 159 | + | "input_schema": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}, "text": {"type": "string"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}}, "required": ["id", "text", "status"]}}}, "required": ["items"]}}, | |
| 111 | 160 | ] | |
| 112 | 161 | ||
| 113 | 162 | ||
| 163 | + | # -- Agent loop with nag reminder injection -- | |
| 114 | 164 | def agent_loop(messages: list): | |
| 165 | + | rounds_since_todo = 0 | |
| 115 | 166 | while True: | |
| 167 | + | # Nag reminder is injected below, alongside tool results | |
| 116 | 168 | response = client.messages.create( | |
| 117 | 169 | model=MODEL, system=SYSTEM, messages=messages, | |
| 118 | 170 | tools=TOOLS, max_tokens=8000, | |
| 119 | 171 | ) | |
| 120 | 172 | messages.append({"role": "assistant", "content": response.content}) | |
| 121 | 173 | if response.stop_reason != "tool_use": | |
| 122 | 174 | return | |
| 123 | 175 | results = [] | |
| 176 | + | used_todo = False | |
| 124 | 177 | for block in response.content: | |
| 125 | 178 | if block.type == "tool_use": | |
| 126 | 179 | handler = TOOL_HANDLERS.get(block.name) | |
| 127 | - | output = handler(**block.input) if handler else f"Unknown tool: {block.name}" | |
| 128 | - | print(f"> {block.name}: {output[:200]}") | |
| 129 | - | results.append({"type": "tool_result", "tool_use_id": block.id, "content": output}) | |
| 180 | + | try: | |
| 181 | + | output = handler(**block.input) if handler else f"Unknown tool: {block.name}" | |
| 182 | + | except Exception as e: | |
| 183 | + | output = f"Error: {e}" | |
| 184 | + | print(f"> {block.name}: {str(output)[:200]}") | |
| 185 | + | results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) | |
| 186 | + | if block.name == "todo": | |
| 187 | + | used_todo = True | |
| 188 | + | rounds_since_todo = 0 if used_todo else rounds_since_todo + 1 | |
| 189 | + | if rounds_since_todo >= 3: | |
| 190 | + | results.insert(0, {"type": "text", "text": "<reminder>Update your todos.</reminder>"}) | |
| 130 | 191 | messages.append({"role": "user", "content": results}) | |
| 131 | 192 | ||
| 132 | 193 | ||
| 133 | 194 | if __name__ == "__main__": | |
| 134 | 195 | history = [] | |
| 135 | 196 | while True: | |
| 136 | 197 | try: | |
| 137 | - | query = input("\033[36ms02 >> \033[0m") | |
| 198 | + | query = input("\033[36ms03 >> \033[0m") | |
| 138 | 199 | except (EOFError, KeyboardInterrupt): | |
| 139 | 200 | break | |
| 140 | 201 | if query.strip().lower() in ("q", "exit", ""): | |
| 141 | 202 | break | |
| 142 | 203 | history.append({"role": "user", "content": query}) | |
| 143 | 204 | agent_loop(history) | |
| 144 | 205 | response_content = history[-1]["content"] | |
| 145 | 206 | if isinstance(response_content, list): | |
| 146 | 207 | for block in response_content: | |
| 147 | 208 | if hasattr(block, "text"): | |
| 148 | 209 | print(block.text) | |
| 149 | 210 | print() |