diff --git a/docs/diagrams/architecture_overview.drawio b/docs/diagrams/architecture_overview.drawio
new file mode 100644
index 0000000..0997fa9
--- /dev/null
+++ b/docs/diagrams/architecture_overview.drawio
@@ -0,0 +1,279 @@
+<mxfile host="app.diagrams.net" modified="2026-04-19T00:00:00.000Z" agent="Claude" version="21.0.0">
+  <diagram name="Architecture Overview" id="arch-overview">
+    <mxGraphModel dx="1654" dy="900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1900" pageHeight="1400" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+
+        <!-- ═══════════════════════ TITLE ═══════════════════════ -->
+        <mxCell id="title" value="KnowCode — System Architecture" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=22;fontStyle=1;" vertex="1" parent="1">
+          <mxGeometry x="400" y="20" width="1100" height="40" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ LAYER 0: USER INTERFACES ═══════════════════════ -->
+        <mxCell id="L0" value="Layer 0 — User Interfaces" style="swimlane;startSize=30;fillColor=#dae8fc;strokeColor=#6c8ebf;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="80" width="1820" height="150" as="geometry" />
+        </mxCell>
+
+        <mxCell id="cli" value="&lt;b&gt;CLI&lt;/b&gt;&#xa;click framework&#xa;&#xa;analyze · index · query&#xa;context · semantic-search&#xa;export · stats · server&#xa;history · ask · mcp-server" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=10;align=center;" vertex="1" parent="L0">
+          <mxGeometry x="20" y="40" width="200" height="100" as="geometry" />
+        </mxCell>
+
+        <mxCell id="rest" value="&lt;b&gt;FastAPI REST API&lt;/b&gt;&#xa;:8000  (uvicorn)&#xa;&#xa;GET  /api/v1/health · stats · search&#xa;GET  /api/v1/context · entities/{id}&#xa;GET  /api/v1/callers/{id} · callees/{id}&#xa;GET  /api/v1/trace_calls/{id} · impact/{id}&#xa;POST /api/v1/context/query · reload" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=10;align=left;" vertex="1" parent="L0">
+          <mxGeometry x="240" y="40" width="340" height="100" as="geometry" />
+        </mxCell>
+
+        <mxCell id="mcp" value="&lt;b&gt;MCP Server&lt;/b&gt;&#xa;STDIO / JSON-RPC 2.0&#xa;&#xa;search_codebase&#xa;get_entity_context&#xa;trace_calls&#xa;retrieve_context_for_query" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=10;align=center;" vertex="1" parent="L0">
+          <mxGeometry x="600" y="40" width="210" height="100" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-ui" value="&lt;b&gt;Agent Gateway&lt;/b&gt;&#xa;FastAPI :8081&#xa;&#xa;GET  /health · /ready&#xa;GET  /api/v1/config · /api/v1/tools&#xa;POST /api/v1/chat&#xa;&#xa;→ proxies to KnowCode :8000" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="L0">
+          <mxGeometry x="830" y="40" width="260" height="100" as="geometry" />
+        </mxCell>
+
+        <mxCell id="rate-api" value="&lt;b&gt;API Rate Limiter&lt;/b&gt;&#xa;slowapi  (IP-keyed)&#xa;&#xa;Standard: 60 req/min&#xa;  health · stats · search&#xa;  context · entities · callers&#xa;  callees · reload · query&#xa;Expensive: 10 req/min&#xa;  trace_calls · impact" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;align=left;" vertex="1" parent="L0">
+          <mxGeometry x="1110" y="40" width="240" height="100" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ LAYER 1: SERVICE LAYER ═══════════════════════ -->
+        <mxCell id="L1" value="Layer 1 — Service Layer" style="swimlane;startSize=30;fillColor=#d5e8d4;strokeColor=#82b366;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="250" width="1820" height="100" as="geometry" />
+        </mxCell>
+
+        <mxCell id="svc" value="&lt;b&gt;KnowCodeService&lt;/b&gt;  (service.py)&#xa;&#xa;analyze(dir, output, temporal, coverage)  →  stats dict  [scans + builds index atomically]&#xa;ensure_store() · ensure_index() · get_indexer() · get_search_engine()&#xa;retrieve_context_for_query(query, max_tokens, task_type, limit_entities, verbosity)&#xa;search(pattern) · get_context(target, max_tokens, task_type)&#xa;get_callers(id) · get_callees(id) · get_entity_details(id) · get_stats() · reload()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=10;align=left;" vertex="1" parent="L1">
+          <mxGeometry x="300" y="20" width="1220" height="65" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ LAYER 2: CORE PIPELINES ═══════════════════════ -->
+        <mxCell id="L2" value="Layer 2 — Core Processing Pipelines" style="swimlane;startSize=30;fillColor=#f8cecc;strokeColor=#b85450;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="370" width="1820" height="220" as="geometry" />
+        </mxCell>
+
+        <!-- Indexing sub-pipeline -->
+        <mxCell id="L2-idx" value="Indexing Pipeline" style="swimlane;startSize=24;fillColor=#ffcccc;strokeColor=#b85450;fontStyle=2;fontSize=10;" vertex="1" parent="L2">
+          <mxGeometry x="10" y="40" width="620" height="170" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gb" value="&lt;b&gt;GraphBuilder&lt;/b&gt;&#xa;graph_builder.py&#xa;&#xa;build_from_directory()&#xa;_parse_file() [auto lang]&#xa;_resolve_references()&#xa;→ entities + relationships" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-idx">
+          <mxGeometry x="10" y="34" width="175" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="chunker" value="&lt;b&gt;Chunker&lt;/b&gt;&#xa;chunker.py&#xa;&#xa;process_parse_result()&#xa;module header + imports&#xa;entity chunks&#xa;BM25 tokenization" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-idx">
+          <mxGeometry x="200" y="34" width="160" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="indexer" value="&lt;b&gt;Indexer&lt;/b&gt;&#xa;indexer.py&#xa;&#xa;index_directory()&#xa;index_file()&#xa;save() / load()&#xa;[orchestrates all]" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-idx">
+          <mxGeometry x="375" y="34" width="160" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="e-idx1" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" parent="L2-idx" source="gb" target="chunker">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="e-idx2" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" parent="L2-idx" source="chunker" target="indexer">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+
+        <!-- Retrieval sub-pipeline -->
+        <mxCell id="L2-ret" value="Retrieval Pipeline" style="swimlane;startSize=24;fillColor=#ffcccc;strokeColor=#b85450;fontStyle=2;fontSize=10;" vertex="1" parent="L2">
+          <mxGeometry x="650" y="40" width="830" height="170" as="geometry" />
+        </mxCell>
+
+        <mxCell id="qcls" value="&lt;b&gt;QueryClassifier&lt;/b&gt;&#xa;query_classifier.py&#xa;&#xa;classify_query(query)&#xa;→ (TaskType, confidence)&#xa;5 types: EXPLAIN DEBUG&#xa;EXTEND REVIEW LOCATE" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-ret">
+          <mxGeometry x="10" y="34" width="155" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="hidx" value="&lt;b&gt;HybridIndex&lt;/b&gt;&#xa;hybrid_index.py&#xa;&#xa;search(query, vec, limit)&#xa;BM25 lexical&#xa;  + FAISS dense&#xa;merge + normalize" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-ret">
+          <mxGeometry x="175" y="34" width="155" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="se" value="&lt;b&gt;SearchEngine&lt;/b&gt;&#xa;search_engine.py&#xa;&#xa;search_scored(query, limit)&#xa;1. embed_single(query)&#xa;2. hybrid_index.search()&#xa;3. reranker.rerank()&#xa;4. expand_dependencies()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-ret">
+          <mxGeometry x="340" y="34" width="160" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="reranker" value="&lt;b&gt;Reranker&lt;/b&gt;&#xa;reranker.py&#xa;&#xa;rerank(query, chunks)&#xa;Primary: VoyageAI&#xa;  rerank-2.5 model&#xa;Fallback: signals&#xa;  doc/recency/content" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-ret">
+          <mxGeometry x="510" y="34" width="155" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="compl" value="&lt;b&gt;expand_dependencies&lt;/b&gt;&#xa;completeness.py&#xa;&#xa;expand callees&#xa;up to max_depth=1&#xa;source=&quot;dependency&quot;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;" vertex="1" parent="L2-ret">
+          <mxGeometry x="675" y="34" width="140" height="110" as="geometry" />
+        </mxCell>
+        <mxCell id="e-r1" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" parent="L2-ret" source="qcls" target="hidx">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="e-r2" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" parent="L2-ret" source="hidx" target="se">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="e-r3" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" parent="L2-ret" source="se" target="reranker">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="e-r4" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" parent="L2-ret" source="reranker" target="compl">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+
+        <!-- RetrievalOrchestrator + Context + Agent -->
+        <mxCell id="orch" value="&lt;b&gt;RetrievalOrchestrator&lt;/b&gt;&#xa;orchestrator.py&#xa;&#xa;retrieve_context_for_query()&#xa;→ classify → search → synthesize&#xa;Verbosity: minimal/standard&#xa;           verbose/diagnostic&#xa;Fallback: semantic→lexical" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;align=left;" vertex="1" parent="L2">
+          <mxGeometry x="1490" y="40" width="200" height="125" as="geometry" />
+        </mxCell>
+
+        <mxCell id="ctxsynth" value="&lt;b&gt;ContextSynthesizer&lt;/b&gt;&#xa;context_synthesizer.py&#xa;&#xa;synthesize(entity_id)&#xa;synthesize_with_task(id, task_type)&#xa;  TASK_TEMPLATES: priority+boost&#xa;_calculate_sufficiency() → 0.0-1.0&#xa;ContextBundle {text, tokens, score}" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=10;align=left;" vertex="1" parent="L2">
+          <mxGeometry x="1500" y="175" width="210" height="10" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ LAYER 2b: AGENT (separate row) ═══════════════════════ -->
+        <mxCell id="L2b" value="Layer 2b — LLM Agent" style="swimlane;startSize=30;fillColor=#ffe6cc;strokeColor=#d79b00;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="610" width="1820" height="120" as="geometry" />
+        </mxCell>
+
+        <mxCell id="ctxsynth2" value="&lt;b&gt;ContextSynthesizer&lt;/b&gt;&#xa;context_synthesizer.py&#xa;&#xa;synthesize(entity_id)  /  synthesize_with_task(entity_id, task_type)&#xa;TASK_TEMPLATES {priority[], boost{}, include_tests, include_recent_changes}&#xa;_calculate_sufficiency(task_type, content_included, entity, text)  →  float 0.0–1.0&#xa;ContextBundle {target_entity, context_text, included_entities, total_tokens, truncated, sufficiency_score}" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="L2b">
+          <mxGeometry x="20" y="30" width="760" height="75" as="geometry" />
+        </mxCell>
+
+        <mxCell id="agent" value="&lt;b&gt;Agent&lt;/b&gt;&#xa;agent.py&#xa;&#xa;answer(query) — always calls LLM&#xa;smart_answer(query, force_llm=False) — local-first:&#xa;  if sufficiency ≥ threshold(0.8) → _format_local_answer()&#xa;  else → answer() with model failover&#xa;RateLimiter: RPM+RPD per model → ~/.knowcode/usage_stats.json" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="L2b">
+          <mxGeometry x="800" y="30" width="760" height="75" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ LAYER 3: STORAGE ═══════════════════════ -->
+        <mxCell id="L3" value="Layer 3 — Storage Layer" style="swimlane;startSize=30;fillColor=#e1d5e7;strokeColor=#9673a6;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="750" width="1820" height="120" as="geometry" />
+        </mxCell>
+
+        <mxCell id="ks" value="&lt;b&gt;KnowledgeStore&lt;/b&gt;&#xa;knowledge_store.py&#xa;&#xa;In-memory entity/relationship graph (dict + list)&#xa;Persistence: knowcode_knowledge.json  (schema v2)&#xa;from_graph_builder() · save() · load() · _migrate_schema()&#xa;search() · get_entity() · get_callers() · get_callees()&#xa;get_children() · get_parent() · trace_calls() · get_impact()&#xa;get_dependencies() · get_dependents() · list_by_kind()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=10;align=left;" vertex="1" parent="L3">
+          <mxGeometry x="20" y="30" width="560" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="vs" value="&lt;b&gt;VectorStore&lt;/b&gt;&#xa;vector_store.py&#xa;&#xa;FAISS IndexFlatIP (cosine sim, normalized)&#xa;dimension=1024 (voyage-code-3)&#xa;Persistence: knowcode_index/vectors.index&#xa;               knowcode_index/vectors.json (metadata)&#xa;add(chunks, embeddings) · search(vec, k)&#xa;save() · load() · clear() · _validate_and_migrate_metadata()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=10;align=left;" vertex="1" parent="L3">
+          <mxGeometry x="620" y="30" width="560" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="cr" value="&lt;b&gt;ChunkRepository&lt;/b&gt;&#xa;chunk_repository.py&#xa;&#xa;InMemoryChunkRepository (in-memory dict)&#xa;Persistence: knowcode_index/chunks.json&#xa;add(chunk) · get(chunk_id) · get_by_entity(entity_id)&#xa;search_by_tokens(tokens) — BM25 candidate lookup&#xa;clear()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=10;align=left;" vertex="1" parent="L3">
+          <mxGeometry x="1220" y="30" width="560" height="80" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ LAYER 4: INFRASTRUCTURE ═══════════════════════ -->
+        <mxCell id="L4" value="Layer 4 — Infrastructure / Plugins" style="swimlane;startSize=30;fillColor=#f5f5f5;strokeColor=#666666;fontColor=#333333;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="890" width="1820" height="120" as="geometry" />
+        </mxCell>
+
+        <mxCell id="parsers" value="&lt;b&gt;Parsers&lt;/b&gt;  (parsers/)&#xa;TreeSitterParser (base)&#xa;PythonParser · JavaScriptParser&#xa;TypeScriptParser · JavaParser&#xa;RustParser · VueParser&#xa;MarkdownParser · YAMLParser&#xa;→ ParseResult {entities, rels, errors}" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="20" y="30" width="240" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="embed" value="&lt;b&gt;EmbeddingProviders&lt;/b&gt;  (llm/embedding.py)&#xa;EmbeddingProvider (ABC): embed() · embed_single()&#xa;VoyageAIEmbeddingProvider&#xa;  model: voyage-code-3  dim=1024&#xa;  input_type=document/query&#xa;OpenAIEmbeddingProvider&#xa;  models: text-embedding-3-small/large" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="280" y="30" width="290" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="llmcli" value="&lt;b&gt;LLM Clients&lt;/b&gt;  (llm/agent.py)&#xa;Google GenAI client&#xa;  (google.genai.Client)&#xa;  → models.generate_content()&#xa;OpenAI-compatible client&#xa;  (openai.OpenAI, base_url override)&#xa;  → chat.completions.create()&#xa;  providers: openrouter · mistralai" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="590" y="30" width="270" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="scanner" value="&lt;b&gt;Scanner&lt;/b&gt;  (indexing/scanner.py)&#xa;scan(root_dir) → list[FileInfo]&#xa;scan_all()&#xa;Gitignore: pathspec rules&#xa;_should_ignore(path) filter&#xa;FileInfo {path, size, modified, lang}" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="880" y="30" width="240" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="watch" value="&lt;b&gt;FileMonitor&lt;/b&gt;  (indexing/monitor.py)&#xa;watchdog Observer&#xa;IndexingHandler&#xa;  on_modified() · on_created()&#xa;  _handle_change(path)&#xa;  → queue_file(path)&#xa;&#xa;&lt;b&gt;BackgroundIndexer&lt;/b&gt;  (background_indexer.py)&#xa;daemon thread + Queue&#xa;_worker(): index_file(path)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="1140" y="30" width="270" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="temporal" value="&lt;b&gt;TemporalAnalyzer&lt;/b&gt;  (analysis/temporal.py)&#xa;analyze_history(limit=100)&#xa;GitPython → COMMIT + AUTHOR entities&#xa;MODIFIED / CHANGED_BY / AUTHORED rels&#xa;&#xa;&lt;b&gt;CoverageProcessor&lt;/b&gt;  (analysis/signals.py)&#xa;process_cobertura(xml_path)&#xa;→ COVERAGE_REPORT + COVERS rels" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="1430" y="30" width="280" height="80" as="geometry" />
+        </mxCell>
+
+        <mxCell id="config" value="&lt;b&gt;Config&lt;/b&gt;  (config.py)&#xa;AppConfig.load(): explicit→./aimodels.yaml&#xa;  →~/.aimodels.yaml→defaults&#xa;ModelConfig {name, provider, api_key_env&#xa;  rpm_free_tier, rpd_free_tier}&#xa;default NL: gemini-2.0-flash-lite&#xa;default embed: voyage-code-3&#xa;sufficiency_threshold: 0.8" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;fontSize=10;align=left;" vertex="1" parent="L4">
+          <mxGeometry x="1730" y="30" width="70" height="80" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ AGENT GATEWAY BOX ═══════════════════════ -->
+        <mxCell id="GW" value="Agent Gateway  (apps/agent-gateway — extractable microservice)" style="swimlane;startSize=30;fillColor=#ffe6cc;strokeColor=#d79b00;fontStyle=1;fontSize=13;" vertex="1" parent="1">
+          <mxGeometry x="40" y="1030" width="1820" height="130" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-set" value="&lt;b&gt;GatewaySettings&lt;/b&gt;&#xa;settings.py (from_env())&#xa;&#xa;knowcode_api_base_url: :8000&#xa;litellm_base_url: :4000&#xa;default_model: gemini/gemini-3-flash-preview&#xa;max_tool_rounds: 4&#xa;tool_timeout_seconds: 30&#xa;openapi_cache_ttl_seconds: 300&#xa;allowed_tool_names: query_context&#xa;  search · get_context · trace_calls" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="10" y="35" width="240" height="85" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-orch" value="&lt;b&gt;AgentOrchestrator&lt;/b&gt;&#xa;orchestrator.py&#xa;&#xa;run(ChatRequest) → ChatResponse&#xa;_pick_tool_names(request)&#xa;_first_choice(response)&#xa;_execute_tool_call(tool_call, timeout)&#xa;Loop ≤ max_tool_rounds (4)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="270" y="35" width="230" height="85" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-toolsel" value="&lt;b&gt;ToolSelector&lt;/b&gt;&#xa;tool_selector.py&#xa;&#xa;select_tool_names(message)&#xa;Keyword heuristics on user intent&#xa;→ subset of allowed_tool_names" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="520" y="35" width="220" height="85" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-llm" value="&lt;b&gt;LiteLLMClient&lt;/b&gt;&#xa;litellm_client.py&#xa;&#xa;create_chat_completion(msgs, tools)&#xa;→ LiteLLM Proxy (:4000)&#xa;  → upstream LLM&#xa;check_health()&#xa;_extract_response_cost()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="760" y="35" width="220" height="85" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-kc" value="&lt;b&gt;KnowCodeClient&lt;/b&gt;&#xa;knowcode_client.py&#xa;&#xa;execute_tool(tool_name, args)&#xa;  query_context → POST /context/query&#xa;  search → GET /search&#xa;  get_context → GET /context&#xa;  trace_calls → GET /trace_calls/{id}&#xa;check_health()" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="1000" y="35" width="240" height="85" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-oapi" value="&lt;b&gt;OpenAPIToolRegistry&lt;/b&gt;&#xa;openapi_tools.py&#xa;&#xa;fetch_openapi_spec(url)&#xa;OpenAPIToolTranslator&#xa;  → OpenAI-compatible tool schemas&#xa;Cache TTL: 300s&#xa;Translates /openapi.json → tool defs" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="1260" y="35" width="240" height="85" as="geometry" />
+        </mxCell>
+
+        <mxCell id="gw-litellm" value="&lt;b&gt;LiteLLM Proxy&lt;/b&gt;&#xa;:4000&#xa;&#xa;litellm.config.yaml&#xa;Google API (Gemini)&#xa;Other providers&#xa;Rate-limit passthrough" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=10;align=left;" vertex="1" parent="GW">
+          <mxGeometry x="1520" y="35" width="170" height="85" as="geometry" />
+        </mxCell>
+
+        <!-- ═══════════════════════ CROSS-LAYER ARROWS ═══════════════════════ -->
+        <!-- UI → Service -->
+        <mxCell id="a1" value="calls" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="cli" target="svc" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a2" value="calls" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="rest" target="svc" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a3" value="calls" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="mcp" target="svc" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <!-- Service → Pipelines -->
+        <mxCell id="a4" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="svc" target="gb" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a5" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="svc" target="se" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a6" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="svc" target="orch" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <!-- Pipeline → Storage -->
+        <mxCell id="a7" value="writes" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="indexer" target="ks" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a8" value="writes" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="indexer" target="vs" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a9" value="writes" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="indexer" target="cr" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a10" value="reads" style="edgeStyle=orthogonalEdgeStyle;endArrow=open;endFill=0;dashed=1;" edge="1" source="se" target="vs" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a11" value="reads" style="edgeStyle=orthogonalEdgeStyle;endArrow=open;endFill=0;dashed=1;" edge="1" source="se" target="cr" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a12" value="reads" style="edgeStyle=orthogonalEdgeStyle;endArrow=open;endFill=0;dashed=1;" edge="1" source="ctxsynth2" target="ks" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <!-- Pipeline → Infra -->
+        <mxCell id="a13" value="uses" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="gb" target="parsers" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a14" value="uses" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="indexer" target="embed" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a15" value="uses" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="agent" target="llmcli" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a16" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="gb" target="scanner" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <!-- Gateway → REST API -->
+        <mxCell id="a17" value="HTTP calls" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;strokeColor=#d79b00;dashed=1;" edge="1" source="gw-kc" target="rest" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <!-- Service → Agent -->
+        <mxCell id="a18" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="svc" target="ctxsynth2" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="a19" value="" style="edgeStyle=orthogonalEdgeStyle;endArrow=block;endFill=1;" edge="1" source="svc" target="agent" parent="1">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/diagrams/architecture_overview.md b/docs/diagrams/architecture_overview.md
new file mode 100644
index 0000000..343df10
--- /dev/null
+++ b/docs/diagrams/architecture_overview.md
@@ -0,0 +1,458 @@
+# KnowCode — System Architecture
+
+> Textual narration of [`architecture_overview.drawio`](architecture_overview.drawio).
+> Every component, relationship, and label in the draw.io file is described here in full.
+
+---
+
+## Overview
+
+KnowCode is a code intelligence system that parses a codebase into a semantic knowledge graph, indexes it with hybrid BM25 + vector search, and exposes that intelligence through four distinct interfaces: a CLI, a REST API, an MCP server, and an Agent Gateway. The system is structured into five horizontal layers plus a separately deployable Agent Gateway microservice.
+
+---
+
+## Layer 0 — User Interfaces
+
+All user-facing entry points sit in this layer. Every interface ultimately delegates to the Service Layer beneath it.
+
+### CLI (`cli.py`, click framework)
+
+The command-line interface exposes eleven commands:
+
+| Command | Purpose |
+|---|---|
+| `analyze` | Scan a directory, build knowledge graph, and auto-build semantic index |
+| `index` | (Re)build the semantic index from an existing graph |
+| `query` | Lexical query: callers, callees, dependencies, or search |
+| `context` | Generate a task-aware context bundle for an entity |
+| `semantic-search` | Natural-language search over embeddings |
+| `export` | Export the knowledge graph as Markdown documentation |
+| `stats` | Print entity and relationship counts |
+| `server` | Start the FastAPI REST server (optionally with `--watch`) |
+| `history` | Show git commit history or entity change history |
+| `ask` | Answer a question using the LLM Agent |
+| `mcp-server` | Start the MCP server over STDIO |
+
+### FastAPI REST API (`:8000`, uvicorn)
+
+Eleven endpoints grouped by rate-limit tier:
+
+**Standard (60 req/min):**
+- `GET  /api/v1/health` — liveness check
+- `GET  /api/v1/stats` — entity/relationship counts
+- `GET  /api/v1/search?q=` — lexical entity search
+- `GET  /api/v1/context?target=&task_type=` — context bundle for a named entity
+- `GET  /api/v1/entities/{entity_id}` — raw entity detail
+- `GET  /api/v1/callers/{entity_id}` — direct callers
+- `GET  /api/v1/callees/{entity_id}` — direct callees
+- `POST /api/v1/context/query` — semantic query with retrieval orchestration
+- `POST /api/v1/reload` — reload KnowledgeStore from disk
+
+**Expensive (10 req/min):**
+- `GET  /api/v1/trace_calls/{entity_id}?direction=&depth=` — multi-hop BFS traversal
+- `GET  /api/v1/impact/{entity_id}?max_depth=` — transitive impact analysis
+
+### MCP Server (STDIO, JSON-RPC 2.0)
+
+Used by Claude Desktop and compatible IDEs. Exposes four tools:
+
+1. `search_codebase(query, limit=10)`
+2. `get_entity_context(entity_id, task_type, max_tokens)`
+3. `trace_calls(entity_id, direction, depth)`
+4. `retrieve_context_for_query(query, task_type, max_tokens, limit_entities, expand_deps, verbosity)`
+
+### Agent Gateway (FastAPI `:8081`)
+
+A separately deployable microservice (in `apps/agent-gateway/`) that proxies to the KnowCode REST API and wraps it in an LLM-driven tool-use loop. Its own endpoints:
+
+- `GET  /health` — gateway liveness
+- `GET  /ready` — checks KnowCode + LiteLLM connectivity
+- `GET  /api/v1/config` — current gateway configuration
+- `GET  /api/v1/tools` — list of available tools (from OpenAPI translation)
+- `POST /api/v1/chat` — submit a message; returns answer + tool execution records
+
+### API Rate Limiter (`rate_limit.py`, slowapi, IP-keyed)
+
+Attached to the FastAPI app as middleware. Two tiers:
+- **Standard:** 60 requests/minute — all endpoints except trace and impact
+- **Expensive:** 10 requests/minute — `trace_calls`, `impact`
+
+---
+
+## Layer 1 — Service Layer
+
+### `KnowCodeService` (`service.py`)
+
+The single central orchestrator. All interfaces call this class. Key public methods:
+
+| Method | What it does |
+|---|---|
+| `analyze(dir, output, temporal, coverage)` | Builds knowledge graph via `GraphBuilder` → saves JSON → auto-calls `_build_index()`. Returns stats dict. |
+| `ensure_store()` / `ensure_index()` | Build store or index only if not already present on disk |
+| `get_indexer()` | Lazy-init `Indexer(embedding_provider)`, optionally load existing index |
+| `get_search_engine()` | Lazy-init `SearchEngine(chunk_repo, embedding_provider, HybridIndex, store)` |
+| `retrieve_context_for_query(query, max_tokens, task_type, limit_entities, expand_deps, verbosity)` | Delegates to `RetrievalOrchestrator` |
+| `search(pattern)` | Lexical entity search on `KnowledgeStore` |
+| `get_context(target, max_tokens, task_type)` | Single-entity context bundle via `ContextSynthesizer` |
+| `get_callers(id)` / `get_callees(id)` | Graph traversal shortcuts |
+| `get_entity_details(id)` | Raw entity dict |
+| `get_stats()` | Entity/relationship/chunk/vector counts |
+| `reload()` | Clears in-memory `_store`, re-reads from disk on next access |
+
+The `store` property is lazy: it loads `KnowledgeStore` from disk on first access and caches it as `_store`.
+
+---
+
+## Layer 2 — Core Processing Pipelines
+
+### Indexing Pipeline
+
+Three components form a linear chain: **GraphBuilder → Chunker → Indexer**.
+
+**`GraphBuilder` (`graph_builder.py`)**
+
+- `build_from_directory(root_dir, additional_ignores, analyze_temporal, coverage_path)` — orchestrates the full scan:
+  1. Calls `Scanner.scan(root_dir)` to discover files (applying `.gitignore` via pathspec)
+  2. For each `FileInfo`, calls `_parse_file()` which selects the correct parser by language
+  3. Accumulates `ParseResult` objects via `_merge_result()`
+  4. After all files: calls `_resolve_references()` to wire cross-file `CALLS`, `IMPORTS`, `INHERITS` relationships
+  5. Optionally runs `TemporalAnalyzer` (git history) and `CoverageProcessor` (Cobertura XML)
+- Exposes: `get_entity()`, `get_entities_by_kind()`, `search_entities()`, `stats()`
+
+**`Chunker` (`chunker.py`)**
+
+- `process_parse_result(result)` — splits each entity into overlapping `CodeChunk` objects:
+  - Module header chunks (file-level docstring + metadata)
+  - Import block chunk
+  - Per-entity chunks (signature + docstring + body)
+  - Each chunk carries BM25 tokenized `tokens[]` list
+
+**`Indexer` (`indexer.py`)**
+
+- `index_directory(directory)` — runs its own internal scan+parse+chunk+embed pipeline end-to-end
+- `index_file(file_path)` — incremental re-index of a single file (used by `BackgroundIndexer`)
+- `save(index_path)` — writes `chunks.json`, `vectors.index`, `vectors.json` under `knowcode_index/`
+- `load(index_path)` — restores from disk
+
+> **Note:** `KnowCodeService.analyze()` calls `GraphBuilder` for the knowledge graph, then separately calls `_build_index()` which creates a new `Indexer` that scans again. Both pipelines run during `knowcode analyze`.
+
+### Retrieval Pipeline
+
+Five components: **QueryClassifier → HybridIndex → SearchEngine → Reranker → expand_dependencies**.
+
+**`QueryClassifier` (`query_classifier.py`)**
+
+- `classify_query(query)` → `(TaskType, confidence: float)`
+- Uses regex pattern matching with weighted scoring across five task types: `EXPLAIN`, `DEBUG`, `EXTEND`, `REVIEW`, `LOCATE`
+- Returns `GENERAL` with confidence 0.0 when no patterns match
+- Also provides `get_prompt_template(task_type)` — task-specific LLM system prompt strings
+
+**`HybridIndex` (`hybrid_index.py`)**
+
+- `search(query_text, query_vector, limit)` → `list[(CodeChunk, score)]`
+- Combines:
+  - BM25 lexical search on `ChunkRepository` token lists
+  - FAISS dense similarity search on `VectorStore` (cosine via `IndexFlatIP` with normalized vectors)
+  - Merges and normalizes scores from both retrieval modes
+
+**`SearchEngine` (`search_engine.py`)**
+
+- `search_scored(query, limit, expand_deps)` → `list[ScoredChunk]` — the full pipeline:
+  1. `embedding_provider.embed_single(query)` → query vector
+  2. `hybrid_index.search(query, query_vector, limit×2)`
+  3. `reranker.rerank(query, results, top_k=limit)`
+  4. `expand_dependencies()` for each top result
+- `search(query, limit, expand_deps)` → `list[CodeChunk]` (strips scores)
+- `ScoredChunk` carries `{chunk, score, source: "retrieved"|"dependency"}`
+
+**`Reranker` (`reranker.py`)**
+
+- `rerank(query, chunks, top_k)` → `list[(CodeChunk, score)]`
+- **Primary path:** VoyageAI cross-encoder (`rerank-2.5` model via `voyage_client.rerank()`)
+- **Fallback path** (if VoyageAI unavailable): signal-based scoring:
+  - `boost_documented`: ×1.2 if chunk has docstring
+  - `boost_recent`: ×1.1 if last-modified within 7 days
+  - Query-in-content: ×1.5 if query string appears in chunk text
+  - Exact kind match: ×2.0
+
+**`expand_dependencies` (`completeness.py`)**
+
+- Takes a `CodeChunk` and expands to include its callees (up to `max_depth=1`)
+- Uses `chunk_repo.get_by_entity()` + `knowledge_store.get_callees()`
+- Marks expanded chunks with `source="dependency"`
+
+### `RetrievalOrchestrator` (`retrieval/orchestrator.py`)
+
+Coordinates the full end-to-end retrieval flow:
+
+1. Validate store + index exist
+2. `classify_query()` → resolve task type (override if caller specified one)
+3. `get_search_engine()` → validate index compatibility (embedding dimension + model)
+4. `engine.search_scored()` → semantic retrieval (falls back to lexical on any exception)
+5. For each selected entity: `get_context()` → `ContextSynthesizer`
+6. Assemble `context_text`, compute average `sufficiency_score`
+7. Filter response fields based on `verbosity`:
+   - `minimal` → `{context_text, sufficiency_score, total_tokens, reduction_summary}`
+   - `standard` → + `query, task_type, task_confidence, retrieval_mode, max_tokens, truncated`
+   - `verbose` → + `evidence[]`
+   - `diagnostic` → full dict with all fields and `errors[]`
+
+---
+
+## Layer 2b — LLM Agent
+
+### `ContextSynthesizer` (`analysis/context_synthesizer.py`)
+
+Generates token-budget-aware context bundles for individual entities.
+
+- `synthesize(entity_id, summarize)` — default synthesis: header + docstring + signature + source_code + parent + callers + callees + children (in priority order, stopping at token budget)
+- `synthesize_with_task(entity_id, task_type, summarize)` — task-prioritized synthesis using `TASK_TEMPLATES`:
+
+| TaskType | Priority order | Boosts |
+|---|---|---|
+| `DEBUG` | source_code, callers, callees, signature, docstring | source_code ×2.0, callers ×1.5 |
+| `EXTEND` | signature, docstring, children, parent, source_code | signature ×1.5, children ×1.3 |
+| `REVIEW` | source_code, callers, callees, signature | callers ×1.5, callees ×1.5 |
+| `EXPLAIN` | docstring, signature, source_code, callees, parent | docstring ×1.5, callees ×1.3 |
+| `LOCATE` | signature, docstring, parent | none |
+| `GENERAL` | docstring, signature, source_code, parent, callers, callees | none |
+
+- `_calculate_sufficiency(task_type, content_included, entity, text)` → `float 0.0–1.0`
+  - Weighted sum over priority sections (weight = 1/(rank+1))
+  - Bonus: +0.2 if source_code included; +0.1 if long docstring present
+  - Penalty: ×0.5 if total context < 100 chars
+- Returns `ContextBundle {target_entity, context_text, included_entities, total_tokens, truncated, task_type, sufficiency_score}`
+
+### `Agent` (`llm/agent.py`)
+
+Answers codebase questions using configured LLM providers.
+
+- `answer(query)` — always invokes an LLM:
+  1. `service.retrieve_context_for_query(query)` → context bundle
+  2. `get_prompt_template(task_type)` → system instructions
+  3. Iterate configured models with RPM/RPD rate-limit check:
+     - Google: `client.models.generate_content(model, prompt)`
+     - OpenAI-compatible: `client.chat.completions.create(model, messages)` (with `HTTP-Referer` header for OpenRouter)
+     - On `ResourceExhausted` or error: try next model
+  4. `rate_limiter.record_usage(model.name)` → `~/.knowcode/usage_stats.json`
+
+- `smart_answer(query, force_llm=False)` — local-first:
+  1. Retrieve context and check `sufficiency_score ≥ config.sufficiency_threshold` (default 0.8)
+  2. If sufficient: `_format_local_answer()` — returns context-only answer (zero LLM tokens)
+  3. If insufficient or `force_llm=True`: delegates to `answer()`
+  4. Returns `{answer, source: "local"|"llm", task_type, sufficiency_score, context, llm_tokens_saved}`
+
+---
+
+## Layer 3 — Storage Layer
+
+### `KnowledgeStore` (`storage/knowledge_store.py`)
+
+- In-memory semantic graph: `entities: dict[str, Entity]` + `relationships: list[Relationship]`
+- Persistence: `knowcode_knowledge.json` (schema v2)
+- Core factory: `from_graph_builder(builder)` — transfers parsed data into the store
+- Persistence: `save(path)` / `load(path)` / `_migrate_schema()` (handles v1→v2 upgrade)
+- Graph queries: `search()`, `get_entity()`, `get_callers()`, `get_callees()`, `get_children()`, `get_parent()`, `get_dependencies()`, `get_dependents()`, `trace_calls()`, `get_impact()`, `list_by_kind()`
+
+### `VectorStore` (`storage/vector_store.py`)
+
+- Wraps FAISS `IndexFlatIP` with L2-normalized embeddings (equivalent to cosine similarity)
+- Default embedding dimension: 1024 (voyage-code-3)
+- Persistence: `knowcode_index/vectors.index` (FAISS binary) + `knowcode_index/vectors.json` (metadata)
+- API: `add(chunks, embeddings)`, `search(query_vector, k)`, `save()`, `load()`, `clear()`, `_validate_and_migrate_metadata()`
+
+### `ChunkRepository` (`storage/chunk_repository.py`)
+
+- `InMemoryChunkRepository` implementation
+- Stores `CodeChunk` objects indexed by `chunk_id` and `entity_id`
+- Persistence: `knowcode_index/chunks.json`
+- API: `add(chunk)`, `get(chunk_id)`, `get_by_entity(entity_id)`, `search_by_tokens(tokens)` (BM25 candidate lookup), `clear()`
+
+---
+
+## Layer 4 — Infrastructure / Plugins
+
+### Parsers (`parsers/`)
+
+Nine parser implementations, all extending `TreeSitterParser` (base class):
+
+| Parser | Language |
+|---|---|
+| `PythonParser` | Python |
+| `JavaScriptParser` | JavaScript |
+| `TypeScriptParser` | TypeScript |
+| `JavaParser` | Java |
+| `RustParser` | Rust |
+| `VueParser` | Vue SFCs |
+| `MarkdownParser` | Markdown (docs) |
+| `YAMLParser` | YAML configs |
+
+Each implements `_extract_entities()` and returns `ParseResult {entities[], relationships[], errors[]}`. The base class handles Tree-sitter `parse_file()`, `_get_text()`, `_get_location()`, `_create_entity()`.
+
+### EmbeddingProviders (`llm/embedding.py`)
+
+Abstract base `EmbeddingProvider` with `embed(texts[])` and `embed_single(text)` methods.
+
+- `VoyageAIEmbeddingProvider` — uses `voyage-code-3` (dim=1024), distinguishes `input_type=document` (indexing) vs `input_type=query` (search)
+- `OpenAIEmbeddingProvider` — supports `text-embedding-3-small` (1536-dim) and `text-embedding-3-large` (3072-dim)
+- `create_embedding_provider(app_config)` factory: tries each configured embedding model in order, checks API key availability, falls back to VoyageAI default
+
+### LLM Clients (`llm/agent.py`)
+
+- `_create_google_client(api_key)` → `google.genai.Client`
+- `_create_openai_client(api_key, base_url)` → `openai.OpenAI` (with optional base_url override for OpenRouter/Mistral)
+- Model failover order defined in `AppConfig.models` (loaded from `aimodels.yaml`)
+
+### Scanner (`indexing/scanner.py`)
+
+- `scan(root_dir)` → `list[FileInfo]` — discovers all non-ignored files
+- `_load_gitignore()` — reads `.gitignore` via pathspec
+- `_should_ignore(path)` — applies gitignore rules + extension filter
+- `FileInfo`: `{path, size, modified, language}` — language auto-detected from extension
+
+### FileMonitor + BackgroundIndexer
+
+**`FileMonitor` (`indexing/monitor.py`)**
+- Wraps watchdog `Observer`
+- `IndexingHandler.on_modified(event)` + `on_created(event)` → `_handle_change(path)` → extension filter → `bg_indexer.queue_file(path)`
+- `start()` / `stop()`
+
+**`BackgroundIndexer` (`indexing/background_indexer.py`)**
+- Daemon thread + `threading.Queue`
+- `queue_file(path)` — enqueues a file path for re-indexing
+- `_worker()` — blocking dequeue loop, calls `indexer.index_file(path)` for each entry
+- `start()` / `stop()`
+
+### TemporalAnalyzer + CoverageProcessor
+
+**`TemporalAnalyzer` (`analysis/temporal.py`)**
+- `analyze_history(limit=100)` — uses GitPython to parse commit log
+- Creates `COMMIT` and `AUTHOR` entities with `AUTHORED`, `MODIFIED`, `CHANGED_BY` relationships
+- Stores `insertions`, `deletions` metadata on `MODIFIED` relationships
+
+**`CoverageProcessor` (`analysis/signals.py`)**
+- `process_cobertura(xml_path)` — parses Cobertura XML coverage report
+- Creates `COVERAGE_REPORT` entity and `COVERS` relationships linking the report to covered modules
+
+### Config (`config.py`)
+
+- `AppConfig.load()` — priority: explicit path → `./aimodels.yaml` → `~/.aimodels.yaml` → defaults
+- `ModelConfig {name, provider, api_key_env, rpm_free_tier_limit=10, rpd_free_tier_limit=1000}`
+- Defaults: NL models = `[gemini-2.0-flash-lite, gemini-1.5-flash, gemini-1.5-pro]`; embedding = `voyage-code-3`; `sufficiency_threshold = 0.8`
+
+---
+
+## Agent Gateway (Separate Microservice)
+
+Located in `apps/agent-gateway/`. Can be moved to an independent repository without code changes.
+
+### `GatewaySettings` (`settings.py`)
+
+Frozen dataclass loaded from environment variables via `from_env()`:
+
+| Setting | Default |
+|---|---|
+| `knowcode_api_base_url` | `http://127.0.0.1:8000` |
+| `litellm_base_url` | `http://127.0.0.1:4000` |
+| `litellm_api_key` | `sk-local-proxy` |
+| `default_model` | `gemini/gemini-3-flash-preview` |
+| `max_tool_rounds` | `4` |
+| `tool_timeout_seconds` | `30.0` |
+| `openapi_cache_ttl_seconds` | `300` |
+| `allowed_tool_names` | `{query_context, search, get_context, trace_calls}` |
+
+### `AgentOrchestrator` (`orchestrator.py`)
+
+- `run(ChatRequest)` → `ChatResponse` — the main agentic loop:
+  1. `_pick_tool_names(request)` → `select_tool_names(message)` (keyword heuristics)
+  2. Fetch tool schemas from `OpenAPIToolRegistry`
+  3. Loop ≤ `max_tool_rounds`:
+     - `LiteLLMClient.create_chat_completion(messages, tools)`
+     - `_first_choice(response)` extracts `tool_call`
+     - `_execute_tool_call(tool_call, timeout)` → `KnowCodeClient.execute_tool()`
+     - Append tool result to messages, record `ToolExecutionRecord`
+  4. Build and return `ChatResponse`
+- `list_tools()` → available tool names
+- `readiness()` → checks KnowCode + LiteLLM health
+
+### `ToolSelector` (`tool_selector.py`)
+
+- `select_tool_names(message)` — keyword heuristics on the user message text
+- Returns a subset of `allowed_tool_names` based on detected intent
+
+### `LiteLLMClient` (`litellm_client.py`)
+
+- `create_chat_completion(messages, tools, model, temperature)` → sends to LiteLLM proxy `:4000`
+- `check_health()` — pings LiteLLM
+- `_extract_response_cost(response)` — extracts cost metadata
+
+### `KnowCodeClient` (`knowcode_client.py`)
+
+- `execute_tool(tool_name, args)` — dispatches to KnowCode REST API:
+  - `query_context` → `POST /api/v1/context/query`
+  - `search` → `GET  /api/v1/search?q=...`
+  - `get_context` → `GET  /api/v1/context?target=...`
+  - `trace_calls` → `GET  /api/v1/trace_calls/{entity_id}?direction=...&depth=...`
+- `check_health()` — pings KnowCode `/api/v1/health`
+
+### `OpenAPIToolRegistry` + `OpenAPIToolTranslator` (`openapi_tools.py`)
+
+- `fetch_openapi_spec(url)` → fetches `/openapi.json` from KnowCode
+- `OpenAPIToolTranslator` converts OpenAPI operation objects into OpenAI-compatible tool schema dicts
+- Results cached for `openapi_cache_ttl_seconds = 300` seconds
+
+### LiteLLM Proxy (`:4000`)
+
+- Configured via `litellm.config.yaml`
+- Accepts OpenAI-compatible requests and proxies to configured upstream LLMs (Google Gemini, others)
+- Manages rate-limit passthrough
+
+---
+
+## Key Data Models
+
+### `Entity`
+```
+id: "file_path::qualified_name"
+kind: EntityKind  (MODULE|CLASS|FUNCTION|METHOD|VARIABLE|DOCUMENT|SECTION|CONFIG_KEY|COMMIT|AUTHOR|TEST_RUN|COVERAGE_REPORT)
+name, qualified_name, location: Location{file_path, line_start, line_end, column_start, column_end}
+docstring, signature, source_code, metadata: dict
+```
+
+### `Relationship`
+```
+source_id, target_id, kind: RelationshipKind, metadata: dict
+RelationshipKind: CALLS|IMPORTS|CONTAINS|INHERITS|IMPLEMENTS|USES_TYPE|REFERENCES
+                  CHANGED_BY|AUTHORED|MODIFIED  (temporal)
+                  COVERS|EXECUTED_BY  (runtime)
+```
+
+### `CodeChunk`
+```
+id: "entity_id::chunk_index"
+entity_id, content, tokens: list[str], embedding: list[float] | None, metadata: dict
+```
+
+### `EmbeddingConfig` (default)
+```
+provider: "voyageai", model_name: "voyage-code-3", dimension: 1024, batch_size: 100, normalize: True
+```
+
+---
+
+## Cross-Layer Arrows Summary
+
+| From | To | Nature |
+|---|---|---|
+| CLI / REST API / MCP | KnowCodeService | synchronous call |
+| Agent Gateway | KnowCode REST API | HTTP (dashed) |
+| KnowCodeService | GraphBuilder / SearchEngine / RetrievalOrchestrator / ContextSynthesizer / Agent | delegation |
+| Indexer | KnowledgeStore / VectorStore / ChunkRepository | writes |
+| SearchEngine | VectorStore / ChunkRepository | reads (dashed) |
+| ContextSynthesizer | KnowledgeStore | reads (dashed) |
+| GraphBuilder | Scanner / Parsers | uses |
+| Indexer | EmbeddingProviders | uses |
+| Agent | LLM Clients | calls |
+| Reranker | EmbeddingProviders (VoyageAI) | uses (dashed) |
+| REST API | API Rate Limiter | uses (dashed) |
+| REST API | FileMonitor | triggers (dashed, watch mode) |
diff --git a/docs/diagrams/seq_agent_gateway.drawio b/docs/diagrams/seq_agent_gateway.drawio
new file mode 100644
index 0000000..bdd8a8c
--- /dev/null
+++ b/docs/diagrams/seq_agent_gateway.drawio
@@ -0,0 +1,94 @@
+<mxfile host="app.diagrams.net" modified="2026-04-19T00:00:00.000Z" agent="Claude" version="21.0.0">
+  <diagram name="Seq – Agent Gateway" id="seq-gw">
+    <mxGraphModel dx="1654" dy="900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1900" pageHeight="1350" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+
+        <mxCell id="title" value="Sequence Diagram — Agent Gateway Workflow  (apps/agent-gateway)&#xa;Startup via local_up.sh  |  Request via POST /api/v1/chat  |  Smoke test via smoke_e2e.py" style="text;html=1;strokeColor=none;fillColor=none;align=center;fontSize=16;fontStyle=1;" vertex="1" parent="1">
+          <mxGeometry x="150" y="10" width="1550" height="50" as="geometry" />
+        </mxCell>
+
+        <!-- PARTICIPANTS -->
+        <mxCell id="p-user"    value="&lt;b&gt;User / IDE&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=11;" vertex="1" parent="1"><mxGeometry x="30"   y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-gw"      value="&lt;b&gt;Gateway&lt;/b&gt;&#xa;&lt;b&gt;FastAPI :8081&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="200"  y="80" width="140" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-orch"    value="&lt;b&gt;Agent&lt;/b&gt;&#xa;&lt;b&gt;Orchestrator&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="390"  y="80" width="140" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-toolsel" value="&lt;b&gt;Tool&lt;/b&gt;&#xa;&lt;b&gt;Selector&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="580"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-oapi"    value="&lt;b&gt;OpenAPI&lt;/b&gt;&#xa;&lt;b&gt;ToolRegistry&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="760"  y="80" width="140" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-llmcli"  value="&lt;b&gt;LiteLLM&lt;/b&gt;&#xa;&lt;b&gt;Client&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="950"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-litellm" value="&lt;b&gt;LiteLLM&lt;/b&gt;&#xa;&lt;b&gt;Proxy :4000&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1130" y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-kccli"   value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;Client&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1310" y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-kcapi"   value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;REST API :8000&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1490" y="80" width="140" height="50" as="geometry" /></mxCell>
+
+        <!-- LIFELINES -->
+        <mxCell id="ll-user"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90"   y="130"/><mxPoint x="90"   y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-gw"      value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270"  y="130"/><mxPoint x="270"  y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-orch"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460"  y="130"/><mxPoint x="460"  y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-toolsel" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="645"  y="130"/><mxPoint x="645"  y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-oapi"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="830"  y="130"/><mxPoint x="830"  y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-llmcli"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1015" y="130"/><mxPoint x="1015" y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-litellm" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="130"/><mxPoint x="1195" y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-kccli"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1375" y="130"/><mxPoint x="1375" y="1310"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-kcapi"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1560" y="130"/><mxPoint x="1560" y="1310"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ STARTUP ══ -->
+        <mxCell id="ph-start" value="STARTUP  (local_up.sh)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="158" width="250" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="s1" value="1 · local_up.sh:  start KnowCode :8000  +  LiteLLM :4000" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="180"/><mxPoint x="1560" y="180"/></Array></mxGeometry></mxCell>
+        <mxCell id="s2" value="2 · GatewaySettings.from_env()  →  load knowcode_api_base_url, litellm_base_url, default_model, max_tool_rounds=4, openapi_cache_ttl=300s" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="205"/><mxPoint x="270" y="205"/></Array></mxGeometry></mxCell>
+        <mxCell id="s3" value="3 · fetch_openapi_spec(knowcode_api_base_url + /openapi.json)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="230"/><mxPoint x="1560" y="230"/></Array></mxGeometry></mxCell>
+        <mxCell id="s3r" value="OpenAPI spec JSON" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1560" y="255"/><mxPoint x="830" y="255"/></Array></mxGeometry></mxCell>
+        <mxCell id="s4" value="4 · OpenAPIToolTranslator → OpenAI-compatible tool schemas  (cached 300s)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="830" y="280"/><mxPoint x="830" y="280"/></Array></mxGeometry></mxCell>
+        <mxCell id="s5" value="5 · Gateway ready on :8081" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="305"/><mxPoint x="270" y="305"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ REQUEST HANDLING ══ -->
+        <mxCell id="ph-req" value="AGENTIC REQUEST — POST /api/v1/chat" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="328" width="400" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="r1" value="6 · POST /api/v1/chat  ChatRequest{message, conversation[], model, tags, tool_names, temperature}" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="350"/><mxPoint x="270" y="350"/></Array></mxGeometry></mxCell>
+        <mxCell id="r2" value="7 · orchestrator.run(chat_request)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="375"/><mxPoint x="460" y="375"/></Array></mxGeometry></mxCell>
+        <mxCell id="r3" value="8 · _pick_tool_names(request)  →  select_tool_names(message)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460" y="400"/><mxPoint x="645" y="400"/></Array></mxGeometry></mxCell>
+        <mxCell id="r3-note" value="Keyword heuristics: 'explain'→get_context, 'find'→search, 'trace'→trace_calls, default→all" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="645" y="425"/><mxPoint x="645" y="425"/></Array></mxGeometry></mxCell>
+        <mxCell id="r3r" value="subset of {query_context, search, get_context, trace_calls}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="645" y="450"/><mxPoint x="460" y="450"/></Array></mxGeometry></mxCell>
+        <mxCell id="r4" value="9 · get tool schemas from OpenAPIToolRegistry for selected tools" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460" y="475"/><mxPoint x="830" y="475"/></Array></mxGeometry></mxCell>
+        <mxCell id="r4r" value="list of tool schema dicts  (OpenAI-compatible)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="830" y="500"/><mxPoint x="460" y="500"/></Array></mxGeometry></mxCell>
+
+        <!-- TOOL USE LOOP -->
+        <mxCell id="loop-note" value="[ loop ]  tool-use loop  (max_tool_rounds=4 iterations)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="522" width="450" height="16" as="geometry" /></mxCell>
+
+        <mxCell id="l1" value="10 · litellm_client.create_chat_completion(messages, tools=tool_schemas, model, temperature)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460" y="542"/><mxPoint x="1015" y="542"/></Array></mxGeometry></mxCell>
+        <mxCell id="l2" value="11 · POST http://litellm_base_url/chat/completions" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1015" y="567"/><mxPoint x="1195" y="567"/></Array></mxGeometry></mxCell>
+        <mxCell id="l3" value="12 · proxy → upstream LLM (Gemini / Mistral / …)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="592"/><mxPoint x="1195" y="592"/></Array></mxGeometry></mxCell>
+        <mxCell id="l4" value="13 · ChatCompletion response  {choices[0].finish_reason, message.tool_calls[]}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="617"/><mxPoint x="1015" y="617"/></Array></mxGeometry></mxCell>
+        <mxCell id="l5" value="14 · _first_choice(response)  →  extract tool_call  {id, name, arguments}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1015" y="642"/><mxPoint x="460" y="642"/></Array></mxGeometry></mxCell>
+
+        <!-- IF TOOL CALL -->
+        <mxCell id="if-tc" value="[ if finish_reason == tool_calls ]  execute tool  (timeout=30s)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="660" width="480" height="16" as="geometry" /></mxCell>
+
+        <mxCell id="l6" value="15 · _execute_tool_call(tool_call)  →  knowcode_client.execute_tool(name, args)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460" y="680"/><mxPoint x="1375" y="680"/></Array></mxGeometry></mxCell>
+        <mxCell id="l7a" value="query_context  →  POST /api/v1/context/query  {query, limit, task_type}" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1375" y="705"/><mxPoint x="1560" y="705"/></Array></mxGeometry></mxCell>
+        <mxCell id="l7b" value="search         →  GET  /api/v1/search?q=..." style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1375" y="730"/><mxPoint x="1375" y="730"/></Array></mxGeometry></mxCell>
+        <mxCell id="l7c" value="get_context    →  GET  /api/v1/context?target=...&amp;task_type=..." style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1375" y="755"/><mxPoint x="1375" y="755"/></Array></mxGeometry></mxCell>
+        <mxCell id="l7d" value="trace_calls    →  GET  /api/v1/trace_calls/{entity_id}?direction=...&amp;depth=..." style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1375" y="780"/><mxPoint x="1375" y="780"/></Array></mxGeometry></mxCell>
+        <mxCell id="l8" value="16 · KnowCode API returns result JSON" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1560" y="805"/><mxPoint x="1375" y="805"/></Array></mxGeometry></mxCell>
+        <mxCell id="l9" value="17 · record ToolExecutionRecord{tool_name, tool_call_id, arguments, success, latency_ms}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1375" y="830"/><mxPoint x="460" y="830"/></Array></mxGeometry></mxCell>
+        <mxCell id="l10" value="18 · append tool_result to messages[]  →  continue loop" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460" y="855"/><mxPoint x="460" y="855"/></Array></mxGeometry></mxCell>
+
+        <!-- END LOOP -->
+        <mxCell id="loop-end" value="[ until finish_reason == stop  OR  max_tool_rounds reached ]" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="873" width="500" height="16" as="geometry" /></mxCell>
+
+        <!-- FINAL RESPONSE -->
+        <mxCell id="f1" value="19 · build ChatResponse{answer, model, usage{}, response_cost, finish_reason, selected_tools[], tool_executions[]}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="460" y="895"/><mxPoint x="270" y="895"/></Array></mxGeometry></mxCell>
+        <mxCell id="f2" value="20 · ChatResponse → User" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="920"/><mxPoint x="90" y="920"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ SMOKE E2E ══ -->
+        <mxCell id="ph-smoke" value="SMOKE E2E  (smoke_e2e.py — CI post-deploy or manual)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="950" width="500" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="sm1" value="21 · GET /health  →  assert {status: ok}" style="html=1;endArrow=block;endFill=1;strokeColor=#d79b00;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="972"/><mxPoint x="270" y="972"/></Array></mxGeometry></mxCell>
+        <mxCell id="sm2" value="22 · GET /api/v1/tools  →  assert count ≥ 1" style="html=1;endArrow=block;endFill=1;strokeColor=#d79b00;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="997"/><mxPoint x="270" y="997"/></Array></mxGeometry></mxCell>
+        <mxCell id="sm3" value="23 · POST /api/v1/chat  {message: 'Use query_context and get_context to find search logic...'}" style="html=1;endArrow=block;endFill=1;strokeColor=#d79b00;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="1022"/><mxPoint x="270" y="1022"/></Array></mxGeometry></mxCell>
+        <mxCell id="sm4" value="24 · assert answer != ''  +  len(tool_executions) ≥ min_tool_calls  [optional: filter by tool_name]" style="html=1;endArrow=open;endFill=0;dashed=1;strokeColor=#d79b00;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="270" y="1047"/><mxPoint x="90" y="1047"/></Array></mxGeometry></mxCell>
+
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/diagrams/seq_agent_gateway.md b/docs/diagrams/seq_agent_gateway.md
new file mode 100644
index 0000000..6435530
--- /dev/null
+++ b/docs/diagrams/seq_agent_gateway.md
@@ -0,0 +1,289 @@
+# Sequence Diagram — Agent Gateway Workflow
+
+> Textual narration of [`seq_agent_gateway.drawio`](seq_agent_gateway.drawio).
+> Every participant, message, and note in the draw.io file is described here in full.
+
+**Located in:** `apps/agent-gateway/`
+**Startup:** `local_up.sh`
+**Request entry:** `POST /api/v1/chat`
+**Smoke test:** `scripts/smoke_e2e.py`
+
+---
+
+## Participants
+
+| Participant | File | Role |
+|---|---|---|
+| User / IDE | — | Sends chat requests to the Gateway |
+| Gateway FastAPI `:8081` | `app.py` | HTTP server — validates requests, delegates to orchestrator |
+| AgentOrchestrator | `orchestrator.py` | Agentic tool-use loop (max 4 rounds) |
+| ToolSelector | `tool_selector.py` | Keyword heuristics — selects tool subset from user message |
+| OpenAPIToolRegistry | `openapi_tools.py` | Caches OpenAI-compatible tool schemas derived from KnowCode OpenAPI spec |
+| LiteLLMClient | `litellm_client.py` | Sends chat completion requests to LiteLLM proxy |
+| LiteLLM Proxy `:4000` | external | Normalizes to upstream LLMs (Gemini, Mistral, …) |
+| KnowCodeClient | `knowcode_client.py` | Dispatches tool calls to KnowCode REST API |
+| KnowCode REST API `:8000` | `src/knowcode/api/api.py` | The main KnowCode service API |
+
+---
+
+## Startup — `local_up.sh`
+
+### Step 1 — Start dependencies
+
+```
+local_up.sh:
+  → start KnowCode REST API on :8000
+  → start LiteLLM proxy on :4000
+```
+
+### Step 2 — Load settings
+
+```
+Gateway:  GatewaySettings.from_env()
+```
+
+Settings loaded (frozen dataclass, all from environment variables):
+
+| Setting | Default |
+|---|---|
+| `knowcode_api_base_url` | — (required) |
+| `litellm_base_url` | — (required) |
+| `default_model` | — (required) |
+| `max_tool_rounds` | `4` |
+| `tool_timeout_seconds` | `30` |
+| `openapi_cache_ttl_seconds` | `300` |
+
+### Step 3 — Fetch OpenAPI spec
+
+```
+Gateway → KnowCode REST API:
+  GET  {knowcode_api_base_url}/openapi.json
+```
+
+```
+KnowCode REST API → OpenAPIToolRegistry:  OpenAPI spec JSON
+```
+
+### Step 4 — Translate to tool schemas
+
+```
+OpenAPIToolRegistry:
+  OpenAPIToolTranslator.translate(openapi_spec)
+  →  OpenAI-compatible tool schema list  (cached for 300 s)
+```
+
+### Step 5 — Gateway ready
+
+```
+Gateway:  listening on :8081
+```
+
+---
+
+## Agentic Request — `POST /api/v1/chat`
+
+### Step 6 — Receive chat request
+
+```
+User / IDE → Gateway:
+  POST /api/v1/chat
+  ChatRequest{
+    message,
+    conversation[],
+    model,
+    tags,
+    tool_names,
+    temperature
+  }
+```
+
+### Step 7 — Delegate to orchestrator
+
+```
+Gateway → AgentOrchestrator:  orchestrator.run(chat_request)
+```
+
+### Step 8 — Select tools
+
+```
+AgentOrchestrator → ToolSelector:
+  _pick_tool_names(request)  →  select_tool_names(message)
+```
+
+Keyword heuristics (not ML):
+
+| Keyword pattern | Tool selected |
+|---|---|
+| `explain`, `what is`, `describe` | `get_context` |
+| `find`, `search`, `where` | `search` |
+| `trace`, `who calls`, `callers` | `trace_calls` |
+| (default) | all four tools |
+
+Returns: subset of `{query_context, search, get_context, trace_calls}`.
+
+### Step 9 — Fetch tool schemas
+
+```
+AgentOrchestrator → OpenAPIToolRegistry:
+  get tool schemas for selected tools
+```
+
+Returns: list of OpenAI-compatible tool schema dicts.
+
+---
+
+## Tool-Use Loop — up to `max_tool_rounds=4` iterations
+
+### Step 10 — LLM completion with tools
+
+```
+AgentOrchestrator → LiteLLMClient:
+  litellm_client.create_chat_completion(
+    messages, tools=tool_schemas, model, temperature
+  )
+```
+
+### Step 11 — Forward to LiteLLM proxy
+
+```
+LiteLLMClient → LiteLLM Proxy:
+  POST  http://litellm_base_url/chat/completions
+```
+
+### Step 12 — Upstream LLM call
+
+```
+LiteLLM Proxy:  proxy → upstream LLM  (Gemini / Mistral / …)
+```
+
+### Step 13 — Receive completion response
+
+```
+LiteLLM Proxy → LiteLLMClient:
+  ChatCompletion{
+    choices[0].finish_reason,
+    choices[0].message.tool_calls[]
+  }
+```
+
+### Step 14 — Extract tool call
+
+```
+LiteLLMClient → AgentOrchestrator:
+  _first_choice(response)  →  tool_call{id, name, arguments}
+```
+
+---
+
+### [if `finish_reason == "tool_calls"`] — Execute tool call (timeout = 30 s)
+
+### Step 15 — Dispatch to KnowCodeClient
+
+```
+AgentOrchestrator → KnowCodeClient:
+  _execute_tool_call(tool_call)  →  knowcode_client.execute_tool(name, args)
+```
+
+### Step 16 — KnowCodeClient dispatches to REST API
+
+KnowCodeClient maps tool names to REST endpoints:
+
+| Tool name | HTTP call |
+|---|---|
+| `query_context` | `POST /api/v1/context/query  {query, limit, task_type}` |
+| `search` | `GET  /api/v1/search?q=...` |
+| `get_context` | `GET  /api/v1/context?target=...&task_type=...` |
+| `trace_calls` | `GET  /api/v1/trace_calls/{entity_id}?direction=...&depth=...` |
+
+### Step 17 — API result returned
+
+```
+KnowCode REST API → KnowCodeClient:  result JSON
+```
+
+### Step 18 — Record execution
+
+```
+KnowCodeClient → AgentOrchestrator:
+  ToolExecutionRecord{
+    tool_name,
+    tool_call_id,
+    arguments,
+    success,
+    latency_ms
+  }
+```
+
+### Step 19 — Append result and continue loop
+
+```
+AgentOrchestrator:
+  append tool_result to messages[]
+  → continue loop
+```
+
+---
+
+**Loop exits when:** `finish_reason == "stop"` OR `max_tool_rounds` reached.
+
+---
+
+## Final Response
+
+### Step 19 — Build ChatResponse
+
+```
+AgentOrchestrator → Gateway:
+  ChatResponse{
+    answer,
+    model,
+    usage{},
+    response_cost,
+    finish_reason,
+    selected_tools[],
+    tool_executions[]
+  }
+```
+
+### Step 20 — Return to caller
+
+```
+Gateway → User / IDE:  ChatResponse
+```
+
+---
+
+## Smoke E2E — `scripts/smoke_e2e.py`
+
+Used in CI post-deploy validation or run manually.
+
+### Step 21 — Health check
+
+```
+smoke_e2e.py → Gateway:  GET /health
+Gateway:  assert {status: "ok"}
+```
+
+### Step 22 — Tools check
+
+```
+smoke_e2e.py → Gateway:  GET /api/v1/tools
+Gateway:  assert count ≥ 1 tool available
+```
+
+### Step 23 — Chat round-trip
+
+```
+smoke_e2e.py → Gateway:
+  POST /api/v1/chat
+  {message: "Use query_context and get_context to find search logic..."}
+```
+
+### Step 24 — Validate response
+
+```
+smoke_e2e.py:
+  assert answer != ''
+  assert len(tool_executions) ≥ SmokeConfig.min_tool_calls
+  [optional: filter by specific tool_name]
+```
diff --git a/docs/diagrams/seq_file_watch.drawio b/docs/diagrams/seq_file_watch.drawio
new file mode 100644
index 0000000..eefa9b7
--- /dev/null
+++ b/docs/diagrams/seq_file_watch.drawio
@@ -0,0 +1,83 @@
+<mxfile host="app.diagrams.net" modified="2026-04-19T00:00:00.000Z" agent="Claude" version="21.0.0">
+  <diagram name="Seq – File Watch Hot-Reload" id="seq-watch">
+    <mxGraphModel dx="1654" dy="900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1900" pageHeight="1100" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+
+        <mxCell id="title" value="Sequence Diagram — File Watch / Hot-Reload Workflow&#xa;Triggered by: knowcode server --watch  (incremental re-index on every file save)" style="text;html=1;strokeColor=none;fillColor=none;align=center;fontSize=16;fontStyle=1;" vertex="1" parent="1">
+          <mxGeometry x="200" y="10" width="1300" height="50" as="geometry" />
+        </mxCell>
+
+        <!-- PARTICIPANTS -->
+        <mxCell id="p-dev"  value="&lt;b&gt;Developer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=11;" vertex="1" parent="1"><mxGeometry x="30"   y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-cli"  value="&lt;b&gt;CLI&lt;/b&gt;&#xa;cli.py" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1"><mxGeometry x="200"  y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-svc"  value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;Service&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=11;" vertex="1" parent="1"><mxGeometry x="370"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-mon"  value="&lt;b&gt;FileMonitor&lt;/b&gt;&#xa;&lt;b&gt;(watchdog)&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666;fontSize=11;" vertex="1" parent="1"><mxGeometry x="550"  y="80" width="140" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-hand" value="&lt;b&gt;Indexing&lt;/b&gt;&#xa;&lt;b&gt;Handler&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666;fontSize=11;" vertex="1" parent="1"><mxGeometry x="740"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-bg"   value="&lt;b&gt;Background&lt;/b&gt;&#xa;&lt;b&gt;Indexer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="920"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-idx"  value="&lt;b&gt;Indexer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1100" y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-emb"  value="&lt;b&gt;Embedding&lt;/b&gt;&#xa;&lt;b&gt;Provider&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1280" y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-store" value="&lt;b&gt;KnowledgeStore&lt;/b&gt;&#xa;&lt;b&gt;+ VectorStore&lt;/b&gt;&#xa;&lt;b&gt;+ ChunkRepo&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1460" y="80" width="140" height="50" as="geometry" /></mxCell>
+
+        <!-- LIFELINES -->
+        <mxCell id="ll-dev"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90"   y="130"/><mxPoint x="90"   y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-cli"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="260"  y="130"/><mxPoint x="260"  y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-svc"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435"  y="130"/><mxPoint x="435"  y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-mon"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="620"  y="130"/><mxPoint x="620"  y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-hand" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="805"  y="130"/><mxPoint x="805"  y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-bg"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="985"  y="130"/><mxPoint x="985"  y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-idx"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="130"/><mxPoint x="1165" y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-emb"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1345" y="130"/><mxPoint x="1345" y="1070"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-store" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1530" y="130"/><mxPoint x="1530" y="1070"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ STARTUP ══ -->
+        <mxCell id="ph-s" value="STARTUP" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="155" width="150" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="s1" value="1 · knowcode server --watch" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="178"/><mxPoint x="260" y="178"/></Array></mxGeometry></mxCell>
+        <mxCell id="s2" value="2 · KnowCodeService(store_path, strict_config=True)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="260" y="203"/><mxPoint x="435" y="203"/></Array></mxGeometry></mxCell>
+        <mxCell id="s3" value="3 · service.get_indexer()  →  Indexer(embedding_provider)  +  load(knowcode_index/) if exists" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="228"/><mxPoint x="435" y="228"/></Array></mxGeometry></mxCell>
+        <mxCell id="s4" value="4 · BackgroundIndexer(indexer).start()  →  daemon thread  +  Queue()" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="253"/><mxPoint x="985" y="253"/></Array></mxGeometry></mxCell>
+        <mxCell id="s5" value="5 · FileMonitor(watch_root, bg_indexer).start()  →  watchdog Observer.start()" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="278"/><mxPoint x="620" y="278"/></Array></mxGeometry></mxCell>
+        <mxCell id="s6" value="6 · FastAPI + Uvicorn listening on :8000" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="303"/><mxPoint x="435" y="303"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ FILE CHANGE ══ -->
+        <mxCell id="ph-fc" value="FILE CHANGE EVENT  (inotify / FSEvents / kqueue → watchdog)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="328" width="500" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="fc1" value="7 · developer saves  src/foo.py  (write to filesystem)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="350"/><mxPoint x="620" y="350"/></Array></mxGeometry></mxCell>
+        <mxCell id="fc2" value="8 · watchdog OS event  →  IndexingHandler.on_modified(FileModifiedEvent)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="620" y="375"/><mxPoint x="805" y="375"/></Array></mxGeometry></mxCell>
+        <mxCell id="fc3" value="9 · _handle_change(event.src_path)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="805" y="400"/><mxPoint x="805" y="400"/></Array></mxGeometry></mxCell>
+        <mxCell id="fc4" value="10 · filter: file extension in SUPPORTED_EXTENSIONS  +  not gitignored" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="805" y="425"/><mxPoint x="805" y="425"/></Array></mxGeometry></mxCell>
+        <mxCell id="fc5" value="11 · bg_indexer.queue_file(file_path)  →  Queue.put(file_path)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="805" y="450"/><mxPoint x="985" y="450"/></Array></mxGeometry></mxCell>
+        <mxCell id="fc-create" value="[on_created fires the same path: IndexingHandler.on_created → _handle_change]" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="468" width="600" height="14" as="geometry" /></mxCell>
+
+        <!-- ══ BACKGROUND RE-INDEX ══ -->
+        <mxCell id="ph-bg" value="BACKGROUND RE-INDEXING  (_worker daemon thread)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="493" width="500" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="bg1" value="12 · Queue.get(file_path)  [blocking dequeue]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="985" y="515"/><mxPoint x="985" y="515"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg2" value="13 · indexer.index_file(file_path)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="985" y="540"/><mxPoint x="1165" y="540"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg3" value="14 · parse file with appropriate parser  →  ParseResult{entities, relationships}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="565"/><mxPoint x="1165" y="565"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg4" value="15 · Chunker.process_parse_result()  →  CodeChunks[] with BM25 tokens" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="590"/><mxPoint x="1165" y="590"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg5" value="16 · embedding_provider.embed(chunk_texts[])  →  VoyageAI/OpenAI Embeddings API" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="615"/><mxPoint x="1345" y="615"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg5r" value="vectors  (list[list[float]], L2-normalized)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1345" y="640"/><mxPoint x="1165" y="640"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg6" value="17 · ChunkRepository: remove old chunks for entity  +  add new chunks" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="665"/><mxPoint x="1530" y="665"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg7" value="18 · VectorStore: remove old vectors for entity  +  add new vectors  →  rebuild FAISS index" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="690"/><mxPoint x="1530" y="690"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg8" value="19 · KnowledgeStore: update entities + relationships for changed file" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="715"/><mxPoint x="1530" y="715"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg9" value="20 · indexer.save(index_path)  →  atomic write:  chunks.json + vectors.index + vectors.json" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1165" y="740"/><mxPoint x="1165" y="740"/></Array></mxGeometry></mxCell>
+        <mxCell id="bg10" value="21 · ✓ re-index complete  —  next API request sees fresh data" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="985" y="765"/><mxPoint x="985" y="765"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ MANUAL RELOAD ══ -->
+        <mxCell id="ph-rel" value="MANUAL RELOAD  (POST /api/v1/reload  — full re-scan from disk)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="793" width="600" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="r1" value="22 · POST /api/v1/reload" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="90" y="815"/><mxPoint x="435" y="815"/></Array></mxGeometry></mxCell>
+        <mxCell id="r2" value="23 · service.reload()  →  _store = None  [clears in-memory cache]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="840"/><mxPoint x="435" y="840"/></Array></mxGeometry></mxCell>
+        <mxCell id="r3" value="24 · next access to service.store  →  KnowledgeStore.load(store_path)  from knowcode_knowledge.json" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="865"/><mxPoint x="1530" y="865"/></Array></mxGeometry></mxCell>
+        <mxCell id="r4" value="{status: reloaded}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="435" y="890"/><mxPoint x="90" y="890"/></Array></mxGeometry></mxCell>
+
+        <!-- CONTRAST NOTE -->
+        <mxCell id="note" value="Contrast:  FileMonitor → BackgroundIndexer  performs INCREMENTAL re-index of a single file (steps 12–21).&#xa;POST /api/v1/reload  clears the in-memory cache only; the full graph rebuild runs via GraphBuilder only if triggered by a new knowcode analyze." style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666;fontSize=10;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="928" width="1100" height="50" as="geometry" /></mxCell>
+
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/diagrams/seq_file_watch.md b/docs/diagrams/seq_file_watch.md
new file mode 100644
index 0000000..e191577
--- /dev/null
+++ b/docs/diagrams/seq_file_watch.md
@@ -0,0 +1,242 @@
+# Sequence Diagram — File Watch / Hot-Reload Workflow
+
+> Textual narration of [`seq_file_watch.drawio`](seq_file_watch.drawio).
+> Every participant, message, and note in the draw.io file is described here in full.
+
+**Triggered by:** `knowcode server --watch`
+**Effect:** Every file save triggers an incremental re-index of only that file — no full re-scan needed.
+
+---
+
+## Participants
+
+| Participant | File | Role |
+|---|---|---|
+| Developer | — | Saves source files in the project |
+| CLI | `cli/cli.py` | Parses `server --watch` flag, starts service |
+| KnowCodeService | `service.py` | Wires together indexer, monitor, and FastAPI app |
+| FileMonitor | `indexing/monitor.py` | watchdog `Observer` — watches filesystem for events |
+| IndexingHandler | `indexing/monitor.py` | watchdog event handler — filters and enqueues paths |
+| BackgroundIndexer | `indexing/background_indexer.py` | Daemon thread with `Queue` — dequeues and re-indexes |
+| Indexer | `indexing/indexer.py` | Parses, chunks, embeds a single file |
+| EmbeddingProvider | `llm/embedding.py` | VoyageAI / OpenAI embeddings API |
+| KnowledgeStore + VectorStore + ChunkRepo | `storage/` | In-memory stores updated atomically per file |
+
+---
+
+## Startup
+
+### Step 1 — Launch with `--watch`
+
+```
+Developer → CLI:  knowcode server --watch
+```
+
+### Step 2 — Initialize service
+
+```
+CLI → KnowCodeService:  KnowCodeService(store_path, strict_config=True)
+```
+
+### Step 3 — Load indexer
+
+```
+KnowCodeService:
+  service.get_indexer()
+  → Indexer(embedding_provider)
+  + load(knowcode_index/)  [if existing index found on disk]
+```
+
+### Step 4 — Start BackgroundIndexer
+
+```
+KnowCodeService → BackgroundIndexer:
+  BackgroundIndexer(indexer).start()
+  → daemon thread started
+  + Queue() initialized
+```
+
+### Step 5 — Start FileMonitor
+
+```
+KnowCodeService → FileMonitor:
+  FileMonitor(watch_root, bg_indexer).start()
+  → watchdog Observer.start()  [uses inotify / FSEvents / kqueue per OS]
+```
+
+### Step 6 — Server ready
+
+```
+KnowCodeService:  FastAPI + Uvicorn listening on :8000
+```
+
+---
+
+## File Change Event
+
+Triggered by OS filesystem notifications forwarded through watchdog.
+
+### Step 7 — Developer saves a file
+
+```
+Developer → FileMonitor:  save src/foo.py  (write to filesystem)
+```
+
+### Step 8 — Watchdog fires event
+
+```
+FileMonitor → IndexingHandler:
+  watchdog OS event  →  IndexingHandler.on_modified(FileModifiedEvent)
+```
+
+> `on_created` fires the same path: `IndexingHandler.on_created → _handle_change(path)`
+
+### Step 9 — Dispatch to handler
+
+```
+IndexingHandler:  _handle_change(event.src_path)
+```
+
+### Step 10 — Filter
+
+```
+IndexingHandler:
+  filter: file extension in SUPPORTED_EXTENSIONS  +  not gitignored
+  [path is silently dropped if filter fails]
+```
+
+### Step 11 — Enqueue
+
+```
+IndexingHandler → BackgroundIndexer:
+  bg_indexer.queue_file(file_path)  →  Queue.put(file_path)
+```
+
+---
+
+## Background Re-Indexing — `_worker` daemon thread
+
+### Step 12 — Dequeue
+
+```
+BackgroundIndexer:  Queue.get(file_path)  [blocking dequeue]
+```
+
+### Step 13 — Invoke incremental indexer
+
+```
+BackgroundIndexer → Indexer:  indexer.index_file(file_path)
+```
+
+### Step 14 — Parse file
+
+```
+Indexer:
+  parse file with appropriate language parser (Tree-sitter)
+  → ParseResult{entities[], relationships[]}
+```
+
+### Step 15 — Chunk entities
+
+```
+Indexer:
+  Chunker.process_parse_result()
+  → CodeChunks[]  {id, entity_id, content, tokens[], metadata}
+  (module header chunk + import block chunk + entity chunks with BM25 tokens)
+```
+
+### Step 16 — Embed chunks
+
+```
+Indexer → EmbeddingProvider:
+  embedding_provider.embed(chunk_texts[])
+  → VoyageAI / OpenAI Embeddings API call
+```
+
+```
+EmbeddingProvider → Indexer:
+  vectors  (list[list[float]], L2-normalized)
+```
+
+### Step 17 — Update ChunkRepository
+
+```
+Indexer → ChunkRepository:
+  remove old chunks for entity
+  add new chunks
+```
+
+### Step 18 — Update VectorStore
+
+```
+Indexer → VectorStore:
+  remove old vectors for entity
+  add new vectors
+  → rebuild FAISS IndexFlatIP
+```
+
+### Step 19 — Update KnowledgeStore
+
+```
+Indexer → KnowledgeStore:
+  update entities + relationships for the changed file
+```
+
+### Step 20 — Persist to disk
+
+```
+Indexer:
+  indexer.save(index_path)
+  atomic write:
+    → chunks.json         (all CodeChunk objects)
+    → vectors.index       (FAISS binary index)
+    → vectors.json        (metadata: schema version, dimension, model name)
+```
+
+### Step 21 — Re-index complete
+
+```
+BackgroundIndexer:  ✓ re-index complete
+                    next API request sees fresh data  (no server restart needed)
+```
+
+---
+
+## Manual Reload — `POST /api/v1/reload`
+
+This is a separate mechanism that clears the in-memory **knowledge graph** cache (not the semantic index).
+
+### Step 22 — POST reload
+
+```
+Developer → KnowCodeService:  POST /api/v1/reload
+```
+
+### Step 23 — Clear cache
+
+```
+KnowCodeService:
+  service.reload()  →  _store = None  [clears in-memory KnowledgeStore cache]
+```
+
+### Step 24 — Lazy reload on next access
+
+```
+KnowCodeService → KnowledgeStore:
+  next access to service.store
+  → KnowledgeStore.load(store_path)  reads knowcode_knowledge.json from disk
+```
+
+```
+KnowCodeService → Developer:  {status: "reloaded"}
+```
+
+---
+
+## Contrast: Incremental vs Full Reload
+
+| Mechanism | Scope | Triggered by |
+|---|---|---|
+| `FileMonitor → BackgroundIndexer` (steps 7–21) | **Incremental**: re-indexes only the single changed file; updates `ChunkRepo`, `VectorStore`, and `KnowledgeStore` in memory | File save detected by watchdog |
+| `POST /api/v1/reload` (steps 22–24) | **Cache clear only**: discards in-memory `KnowledgeStore`; reloads from `knowcode_knowledge.json` | Manual API call |
+| `knowcode analyze` (separate command) | **Full rebuild**: GraphBuilder re-scans all files, rebuilds knowledge graph, then Indexer re-scans for semantic index | CLI command |
diff --git a/docs/diagrams/seq_indexing.drawio b/docs/diagrams/seq_indexing.drawio
new file mode 100644
index 0000000..d8333d6
--- /dev/null
+++ b/docs/diagrams/seq_indexing.drawio
@@ -0,0 +1,188 @@
+<mxfile host="app.diagrams.net" modified="2026-04-19T00:00:00.000Z" agent="Claude" version="21.0.0">
+  <diagram name="Seq – Indexing Workflow" id="seq-indexing">
+    <mxGraphModel dx="1654" dy="900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1900" pageHeight="1400" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+
+        <!-- TITLE -->
+        <mxCell id="title" value="Sequence Diagram — Indexing / Analysis Workflow&#xa;Triggered by: knowcode analyze &lt;dir&gt;  (also triggers index build automatically)" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;" vertex="1" parent="1">
+          <mxGeometry x="200" y="10" width="1400" height="50" as="geometry" />
+        </mxCell>
+
+        <!-- ══ PARTICIPANT HEADERS ══ -->
+        <mxCell id="p-user"   value="&lt;b&gt;User / CI&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=11;" vertex="1" parent="1"><mxGeometry x="30"   y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-cli"    value="&lt;b&gt;CLI&lt;/b&gt;&#xa;cli.py" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1"><mxGeometry x="180"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-svc"    value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;Service&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=11;" vertex="1" parent="1"><mxGeometry x="340"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-gb"     value="&lt;b&gt;Graph&lt;/b&gt;&#xa;&lt;b&gt;Builder&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="500"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-scan"   value="&lt;b&gt;Scanner&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="660"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-parse"  value="&lt;b&gt;Parser&lt;/b&gt;&#xa;(×9 langs)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="820"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-ks"     value="&lt;b&gt;Knowledge&lt;/b&gt;&#xa;&lt;b&gt;Store&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=11;" vertex="1" parent="1"><mxGeometry x="980"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-idx"    value="&lt;b&gt;Indexer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1140" y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-chunk"  value="&lt;b&gt;Chunker&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1300" y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-emb"    value="&lt;b&gt;Embedding&lt;/b&gt;&#xa;&lt;b&gt;Provider&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1460" y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-vs"     value="&lt;b&gt;VectorStore&lt;/b&gt;&#xa;&lt;b&gt;+ ChunkRepo&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1630" y="80" width="120" height="50" as="geometry" /></mxCell>
+
+        <!-- ══ LIFELINES ══ -->
+        <mxCell id="ll-user"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="85"   y="130"/><mxPoint x="85"   y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-cli"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="235"  y="130"/><mxPoint x="235"  y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-svc"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395"  y="130"/><mxPoint x="395"  y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-gb"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555"  y="130"/><mxPoint x="555"  y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-scan"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="715"  y="130"/><mxPoint x="715"  y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-parse" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="875"  y="130"/><mxPoint x="875"  y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-ks"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1035" y="130"/><mxPoint x="1035" y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-idx"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="130"/><mxPoint x="1195" y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-chunk" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1355" y="130"/><mxPoint x="1355" y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-emb"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1520" y="130"/><mxPoint x="1520" y="1350"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-vs"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1690" y="130"/><mxPoint x="1690" y="1350"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ PHASE 1: GRAPH BUILDING ══ -->
+        <mxCell id="ph1" value="PHASE 1 — Knowledge Graph Construction" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="155" width="400" height="18" as="geometry" /></mxCell>
+
+        <!-- 1 -->
+        <mxCell id="m1" value="1 · knowcode analyze ./src [--temporal] [--coverage=report.xml]" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="85" y="180"/><mxPoint x="235" y="180"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 2 -->
+        <mxCell id="m2" value="2 · service.analyze(directory, output, ignore, temporal, coverage)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="235" y="210"/><mxPoint x="395" y="210"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 3 -->
+        <mxCell id="m3" value="3 · GraphBuilder()  →  builder.build_from_directory(root_dir, ignore, temporal, coverage)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395" y="240"/><mxPoint x="555" y="240"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 4 -->
+        <mxCell id="m4" value="4 · Scanner.scan(root_dir)  [apply gitignore / pathspec rules]" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555" y="270"/><mxPoint x="715" y="270"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 4 ret -->
+        <mxCell id="m4r" value="list[FileInfo]  {path, size, modified, language}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="715" y="295"/><mxPoint x="555" y="295"/></Array></mxGeometry>
+        </mxCell>
+
+        <!-- loop label -->
+        <mxCell id="loop1" value="[ loop ]  for each FileInfo" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="315" width="250" height="16" as="geometry" /></mxCell>
+
+        <!-- 5 -->
+        <mxCell id="m5" value="5 · _parse_file(file_info)  →  select parser by language" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555" y="335"/><mxPoint x="875" y="335"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 6 -->
+        <mxCell id="m6" value="6 · parse_file(file_path, source)  →  AST traversal" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="875" y="360"/><mxPoint x="875" y="360"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 6 ret -->
+        <mxCell id="m6r" value="ParseResult  {entities[], relationships[], errors[]}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="875" y="385"/><mxPoint x="555" y="385"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 7 -->
+        <mxCell id="m7" value="7 · _merge_result(parse_result)  →  accumulate entities + relationships" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555" y="410"/><mxPoint x="555" y="410"/></Array></mxGeometry>
+        </mxCell>
+        <!-- [ end loop ] -->
+        <mxCell id="loop1e" value="[ end loop ]" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="428" width="150" height="16" as="geometry" /></mxCell>
+
+        <!-- 8 -->
+        <mxCell id="m8" value="8 · _resolve_references()  →  cross-file CALLS / IMPORTS / INHERITS resolution" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555" y="450"/><mxPoint x="555" y="450"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 9 -->
+        <mxCell id="m9" value="9 · [opt] TemporalAnalyzer.analyze_history(limit=100)  →  COMMIT/AUTHOR entities + MODIFIED/CHANGED_BY rels" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555" y="475"/><mxPoint x="555" y="475"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 10 -->
+        <mxCell id="m10" value="10 · [opt] CoverageProcessor.process_cobertura(xml_path)  →  COVERAGE_REPORT entity + COVERS rels" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="555" y="500"/><mxPoint x="555" y="500"/></Array></mxGeometry>
+        </mxCell>
+
+        <!-- 11 -->
+        <mxCell id="m11" value="11 · KnowledgeStore.from_graph_builder(builder)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395" y="525"/><mxPoint x="1035" y="525"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 12 -->
+        <mxCell id="m12" value="12 · store.save(output_path)  →  writes knowcode_knowledge.json  (schema v2)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1035" y="550"/><mxPoint x="1035" y="550"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 12 ret -->
+        <mxCell id="m12r" value="KnowledgeStore  (stored in service._store)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1035" y="570"/><mxPoint x="395" y="570"/></Array></mxGeometry>
+        </mxCell>
+
+        <!-- ══ PHASE 2: SEMANTIC INDEX ══ -->
+        <mxCell id="ph2" value="PHASE 2 — Semantic Index Build  (called automatically by analyze(), or manually via knowcode index)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="595" width="800" height="18" as="geometry" /></mxCell>
+
+        <!-- 13 -->
+        <mxCell id="m13" value="13 · service._build_index(directory, index_path)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395" y="615"/><mxPoint x="395" y="615"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 14 -->
+        <mxCell id="m14" value="14 · create_embedding_provider(app_config)  →  VoyageAIEmbeddingProvider (voyage-code-3, dim=1024)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395" y="640"/><mxPoint x="1520" y="640"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 15 -->
+        <mxCell id="m15" value="15 · Indexer(embedding_provider)  →  indexer.index_directory(directory)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395" y="670"/><mxPoint x="1195" y="670"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 16 - Indexer does its own scan -->
+        <mxCell id="m16" value="16 · [Indexer internally] Scanner.scan(directory) + GraphBuilder parse each file" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="700"/><mxPoint x="1195" y="700"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 17 -->
+        <mxCell id="m17" value="17 · Chunker.process_parse_result(result)  →  module headers, import blocks, entity chunks + BM25 tokens" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="730"/><mxPoint x="1355" y="730"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 17 ret -->
+        <mxCell id="m17r" value="CodeChunk[]  {id, entity_id, content, tokens[], metadata}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1355" y="755"/><mxPoint x="1195" y="755"/></Array></mxGeometry>
+        </mxCell>
+
+        <!-- loop2 -->
+        <mxCell id="loop2" value="[ loop ]  embed in batches (batch_size=100)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="775" width="350" height="16" as="geometry" /></mxCell>
+
+        <!-- 18 -->
+        <mxCell id="m18" value="18 · EmbeddingProvider.embed(texts[])  →  VoyageAI/OpenAI Embeddings API" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="795"/><mxPoint x="1520" y="795"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 18 ret -->
+        <mxCell id="m18r" value="list[list[float]]  (dim=1024, L2-normalized)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1520" y="820"/><mxPoint x="1195" y="820"/></Array></mxGeometry>
+        </mxCell>
+        <!-- end loop2 -->
+        <mxCell id="loop2e" value="[ end loop ]" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="838" width="150" height="16" as="geometry" /></mxCell>
+
+        <!-- 19 -->
+        <mxCell id="m19" value="19 · ChunkRepository.add(chunks)  +  VectorStore.add(chunks, embeddings)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="858"/><mxPoint x="1690" y="858"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 20 -->
+        <mxCell id="m20" value="20 · VectorStore builds FAISS IndexFlatIP  (cosine similarity)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1690" y="883"/><mxPoint x="1690" y="883"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 21 -->
+        <mxCell id="m21" value="21 · indexer.save(index_path)  →  chunks.json + vectors.index + vectors.json" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="908"/><mxPoint x="1195" y="908"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 21 ret -->
+        <mxCell id="m21r" value="indexed_chunks count" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1195" y="933"/><mxPoint x="395" y="933"/></Array></mxGeometry>
+        </mxCell>
+
+        <!-- ══ PHASE 3: RESPONSE ══ -->
+        <!-- 22 -->
+        <mxCell id="m22" value="22 · return stats  {entities, relationships, indexed_chunks, index_path, [index_error]}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="395" y="965"/><mxPoint x="235" y="965"/></Array></mxGeometry>
+        </mxCell>
+        <!-- 23 -->
+        <mxCell id="m23" value="23 · print summary  (entity counts, relationship types, index size)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1">
+          <mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="235" y="990"/><mxPoint x="85" y="990"/></Array></mxGeometry>
+        </mxCell>
+
+        <!-- ══ OPTIONAL: FILE WATCH ══ -->
+        <mxCell id="note-watch" value="[ opt ]  --watch mode  (knowcode server --watch):&#xa;FileMonitor.start() + BackgroundIndexer.start()&#xa;On file change → index_file(path) [incremental, re-runs steps 16→21 for changed file only]&#xa;POST /api/v1/reload → service.reload() → re-reads KnowledgeStore from disk" style="text;html=1;fontSize=10;fontStyle=2;align=left;strokeColor=#d79b00;fillColor=#ffe6cc;rounded=1;" vertex="1" parent="1">
+          <mxGeometry x="30" y="1030" width="900" height="60" as="geometry" />
+        </mxCell>
+
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/diagrams/seq_indexing.md b/docs/diagrams/seq_indexing.md
new file mode 100644
index 0000000..5ff55e1
--- /dev/null
+++ b/docs/diagrams/seq_indexing.md
@@ -0,0 +1,240 @@
+# Sequence Diagram — Indexing / Analysis Workflow
+
+> Textual narration of [`seq_indexing.drawio`](seq_indexing.drawio).
+> Every participant, message, and note in the draw.io file is described here in full.
+
+**Triggered by:** `knowcode analyze <dir>`  
+**Side effect:** automatically builds the semantic index (no separate `knowcode index` call needed after analyze)
+
+---
+
+## Participants
+
+| Participant | File | Role |
+|---|---|---|
+| User / CI | — | Invokes `knowcode analyze` |
+| CLI | `cli/cli.py` | Parses arguments, calls service |
+| KnowCodeService | `service.py` | Central orchestrator |
+| GraphBuilder | `indexing/graph_builder.py` | Parses codebase into entity/relationship graph |
+| Scanner | `indexing/scanner.py` | File discovery with gitignore filtering |
+| Parser (×9 langs) | `parsers/` | Language-specific AST extraction |
+| KnowledgeStore | `storage/knowledge_store.py` | In-memory graph + JSON persistence |
+| Indexer | `indexing/indexer.py` | Full scan→chunk→embed pipeline |
+| Chunker | `indexing/chunker.py` | Splits entities into BM25-tokenized code chunks |
+| EmbeddingProvider | `llm/embedding.py` | Converts text to dense vectors |
+| VectorStore + ChunkRepo | `storage/vector_store.py`, `storage/chunk_repository.py` | Persists vectors (FAISS) and chunks (JSON) |
+
+---
+
+## Phase 1 — Knowledge Graph Construction
+
+### Step 1 — User invokes analyze
+
+```
+User → CLI:  knowcode analyze ./src [--temporal] [--coverage=report.xml]
+```
+
+Optional flags:
+- `--temporal` — enables git history analysis
+- `--coverage=<path>` — enables Cobertura XML coverage ingestion
+
+### Step 2 — CLI delegates to service
+
+```
+CLI → KnowCodeService:  service.analyze(directory, output, ignore, temporal, coverage)
+```
+
+`output` defaults to the same directory as `directory`, producing `knowcode_knowledge.json` in place.
+
+### Step 3 — GraphBuilder instantiated and scan begins
+
+```
+KnowCodeService → GraphBuilder:  GraphBuilder()
+                                 builder.build_from_directory(root_dir, additional_ignores, analyze_temporal, coverage_path)
+```
+
+`build_from_directory` is the top-level entry point for the knowledge graph pipeline.
+
+### Step 4 — Scanner discovers files
+
+```
+GraphBuilder → Scanner:  Scanner.scan(root_dir)
+Scanner returns:  list[FileInfo]  {path, size, modified, language}
+```
+
+The scanner:
+- Loads `.gitignore` rules via `pathspec`
+- Applies `_should_ignore(path)` filter (extension list + gitignore patterns)
+- Returns one `FileInfo` per qualifying file, with language auto-detected from extension
+
+### Step 5 — [Loop] Parse each file
+
+For each `FileInfo` in the discovered list:
+
+```
+GraphBuilder → Parser:  _parse_file(file_info)  →  select parser by language
+Parser:                  parse_file(file_path, source)  →  AST traversal
+Parser returns:          ParseResult  {entities[], relationships[], errors[]}
+```
+
+Language-specific parsers (Python, JavaScript, TypeScript, Java, Rust, Vue, Markdown, YAML) extend `TreeSitterParser`. Each parser:
+- Parses source with Tree-sitter
+- Extracts entities (functions, classes, methods, variables, modules)
+- Records intra-file relationships (CALLS, IMPORTS, CONTAINS, INHERITS)
+
+```
+GraphBuilder:  _merge_result(parse_result)  →  accumulate entities + relationships into internal collections
+```
+
+### Step 6 — End of file loop
+
+### Step 7 — Resolve cross-file references
+
+```
+GraphBuilder:  _resolve_references()
+```
+
+After all files are parsed, GraphBuilder resolves cross-file relationships:
+- CALLS edges: function calls resolved by qualified name across modules
+- IMPORTS edges: import statements linked to the imported module entity
+- INHERITS edges: class inheritance resolved by name lookup
+
+### Step 8 — Optional temporal analysis
+
+```
+GraphBuilder:  [if --temporal]  TemporalAnalyzer.analyze_history(limit=100)
+```
+
+- Uses GitPython to read commit log
+- Creates `COMMIT` and `AUTHOR` entities
+- Creates `AUTHOR→AUTHORED→COMMIT`, `COMMIT→MODIFIED→MODULE`, `MODULE→CHANGED_BY→COMMIT` relationships
+- Stores `insertions`, `deletions` as relationship metadata
+
+### Step 9 — Optional coverage analysis
+
+```
+GraphBuilder:  [if --coverage]  CoverageProcessor.process_cobertura(xml_path)
+```
+
+- Parses Cobertura XML report
+- Creates `COVERAGE_REPORT` entity
+- Creates `COVERAGE_REPORT→COVERS→MODULE` relationships with `line_rate` metadata
+
+### Step 10 — Build and save KnowledgeStore
+
+```
+KnowCodeService → KnowledgeStore:  KnowledgeStore.from_graph_builder(builder)
+KnowledgeStore:  store.save(output_path)  →  writes knowcode_knowledge.json  (schema v2)
+KnowledgeStore returns to KnowCodeService:  KnowledgeStore instance  (cached as service._store)
+```
+
+The JSON file structure:
+```json
+{
+  "schema_version": 2,
+  "version": "1.0",
+  "metadata": {"stats": {…}, "errors": []},
+  "entities": {"entity_id": {…Entity…}},
+  "relationships": [{…Relationship…}]
+}
+```
+
+---
+
+## Phase 2 — Semantic Index Build
+
+Called automatically by `service.analyze()` immediately after saving the knowledge store. Can also be called independently via `knowcode index`.
+
+### Step 11 — Build index invoked
+
+```
+KnowCodeService:  service._build_index(directory, index_path)
+```
+
+`index_path` defaults to `<store_root>/knowcode_index/`.
+
+### Step 12 — Create embedding provider
+
+```
+KnowCodeService:  create_embedding_provider(app_config)
+```
+
+Factory logic:
+1. Try each model in `app_config.embedding_models` in order
+2. Check API key is set in environment
+3. Return `VoyageAIEmbeddingProvider(voyage-code-3, dim=1024)` (default) or `OpenAIEmbeddingProvider`
+
+### Step 13 — Indexer runs full scan
+
+```
+KnowCodeService → Indexer:  Indexer(embedding_provider)
+                             indexer.index_directory(directory)
+```
+
+The Indexer **runs its own internal scan + parse + chunk pipeline** (independent of the GraphBuilder scan above). This means files are scanned twice during `knowcode analyze` — once for the knowledge graph and once for the semantic index.
+
+Internally, `index_directory` uses Scanner + GraphBuilder to re-parse, then hands results to Chunker.
+
+### Step 14 — Chunker produces code chunks
+
+```
+Indexer → Chunker:  Chunker.process_parse_result(result)
+Chunker returns:  CodeChunk[]  {id, entity_id, content, tokens[], metadata}
+```
+
+For each parsed entity, the Chunker produces:
+- A **module header chunk**: file path, docstring, top-level summary
+- An **import block chunk**: all import statements concatenated
+- **Entity chunks** (overlapping if the entity is large): signature + docstring + body, with configurable `max_chunk_size=1000` and `overlap=100` tokens
+
+Each chunk carries BM25-tokenized `tokens[]` for lexical search.
+
+### Step 15 — [Loop] Embed chunks in batches
+
+```
+Indexer → EmbeddingProvider:  EmbeddingProvider.embed(texts[])  →  VoyageAI / OpenAI API call
+EmbeddingProvider returns:  list[list[float]]  (dim=1024, L2-normalized)
+```
+
+Batching: `batch_size=100` chunks per API call. Embeddings are L2-normalized to enable cosine similarity via FAISS `IndexFlatIP`.
+
+### Step 16 — Store chunks and vectors
+
+```
+Indexer → ChunkRepository:  ChunkRepository.add(chunks)
+Indexer → VectorStore:      VectorStore.add(chunks, embeddings)
+VectorStore:                 builds FAISS IndexFlatIP  (inner product on normalized = cosine)
+```
+
+### Step 17 — Persist index to disk
+
+```
+Indexer:  indexer.save(index_path)
+          → chunks.json         (all CodeChunk objects)
+          → vectors.index       (FAISS binary index)
+          → vectors.json        (metadata: schema version, embedding dimension, model name)
+```
+
+### Step 18 — Return stats to CLI
+
+```
+Indexer returns to KnowCodeService:  indexed_chunks count
+KnowCodeService returns to CLI:      stats dict {entities, relationships, indexed_chunks, index_path, [index_error]}
+CLI → User:                          print summary (entity counts, relationship types, index size)
+```
+
+If `_build_index()` raises an exception (e.g., missing API key), `index_error` is included in stats but the overall `analyze` command still succeeds (knowledge graph was saved).
+
+---
+
+## Optional: File Watch Mode
+
+When `knowcode server --watch` is running:
+
+- `FileMonitor` (watchdog `Observer`) watches the project directory
+- On file save: `IndexingHandler.on_modified()` or `on_created()` → `_handle_change(path)` → extension filter → `bg_indexer.queue_file(path)`
+- `BackgroundIndexer._worker()` (daemon thread): dequeues paths, calls `indexer.index_file(path)`
+- `index_file(path)` re-runs steps 14–17 for the single changed file only (incremental, not full re-scan)
+- After re-index: the next API request automatically sees fresh data (no server restart needed)
+
+`POST /api/v1/reload` clears the in-memory `KnowledgeStore` cache; on next access it re-reads `knowcode_knowledge.json` from disk.
diff --git a/docs/diagrams/seq_mcp.drawio b/docs/diagrams/seq_mcp.drawio
new file mode 100644
index 0000000..484f042
--- /dev/null
+++ b/docs/diagrams/seq_mcp.drawio
@@ -0,0 +1,82 @@
+<mxfile host="app.diagrams.net" modified="2026-04-19T00:00:00.000Z" agent="Claude" version="21.0.0">
+  <diagram name="Seq – MCP Server" id="seq-mcp">
+    <mxGraphModel dx="1654" dy="900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1900" pageHeight="1300" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+
+        <mxCell id="title" value="Sequence Diagram — MCP Server Workflow&#xa;Triggered by: knowcode mcp-server  (STDIO / JSON-RPC 2.0, used by Claude Desktop, IDEs)" style="text;html=1;strokeColor=none;fillColor=none;align=center;fontSize=16;fontStyle=1;" vertex="1" parent="1">
+          <mxGeometry x="100" y="10" width="1600" height="50" as="geometry" />
+        </mxCell>
+
+        <!-- PARTICIPANTS -->
+        <mxCell id="p-ide"    value="&lt;b&gt;IDE / Claude&lt;/b&gt;&#xa;&lt;b&gt;Desktop&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=11;" vertex="1" parent="1"><mxGeometry x="30"   y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-mcp"   value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;MCPServer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1"><mxGeometry x="210"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-svc"   value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;Service&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=11;" vertex="1" parent="1"><mxGeometry x="390"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-ks"    value="&lt;b&gt;Knowledge&lt;/b&gt;&#xa;&lt;b&gt;Store&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=11;" vertex="1" parent="1"><mxGeometry x="570"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-ctx"   value="&lt;b&gt;Context&lt;/b&gt;&#xa;&lt;b&gt;Synthesizer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="750"  y="80" width="130" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-orch"  value="&lt;b&gt;Retrieval&lt;/b&gt;&#xa;&lt;b&gt;Orchestrator&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="930"  y="80" width="140" height="50" as="geometry" /></mxCell>
+
+        <!-- LIFELINES -->
+        <mxCell id="ll-ide"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95"   y="130"/><mxPoint x="95"   y="1270"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-mcp"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275"  y="130"/><mxPoint x="275"  y="1270"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-svc"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="455"  y="130"/><mxPoint x="455"  y="1270"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-ks"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="635"  y="130"/><mxPoint x="635"  y="1270"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-ctx"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="815"  y="130"/><mxPoint x="815"  y="1270"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-orch" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1000" y="130"/><mxPoint x="1000" y="1270"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ STARTUP ══ -->
+        <mxCell id="ph-start" value="STARTUP" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="155" width="200" height="18" as="geometry" /></mxCell>
+
+        <mxCell id="s1" value="1 · knowcode mcp-server" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95" y="178"/><mxPoint x="275" y="178"/></Array></mxGeometry></mxCell>
+        <mxCell id="s2" value="2 · run_server() → asyncio.run(run_server_async())" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="203"/><mxPoint x="275" y="203"/></Array></mxGeometry></mxCell>
+        <mxCell id="s3" value="3 · stdio_server(KnowCodeMCPServer)  →  STDIO transport  (stdin/stdout pipes)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="228"/><mxPoint x="275" y="228"/></Array></mxGeometry></mxCell>
+        <mxCell id="s4" value="4 · MCP initialize handshake  (JSON-RPC 2.0)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95" y="253"/><mxPoint x="275" y="253"/></Array></mxGeometry></mxCell>
+        <mxCell id="s5" value="5 · tools/list response  →  4 tools with full JSON schemas" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="278"/><mxPoint x="95" y="278"/></Array></mxGeometry></mxCell>
+        <mxCell id="s6" value="6 · service lazy-initialized on first tool call: KnowCodeService(store_path, strict_config=False)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="303"/><mxPoint x="455" y="303"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ TOOL 1: search_codebase ══ -->
+        <mxCell id="t1h" value="TOOL: search_codebase(query, limit=10)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="330" width="400" height="18" as="geometry" /></mxCell>
+        <mxCell id="t1-1" value="tools/call  {name: search_codebase, arguments: {query, limit}}" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95" y="352"/><mxPoint x="275" y="352"/></Array></mxGeometry></mxCell>
+        <mxCell id="t1-2" value="service.search(query)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="377"/><mxPoint x="455" y="377"/></Array></mxGeometry></mxCell>
+        <mxCell id="t1-3" value="knowledge_store.search(query)  [substring + token match on name/qualified_name]" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="455" y="402"/><mxPoint x="635" y="402"/></Array></mxGeometry></mxCell>
+        <mxCell id="t1-r" value="[{id, name, qualified_name, kind, file_path, line_start}] top limit" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="635" y="427"/><mxPoint x="95" y="427"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ TOOL 2: get_entity_context ══ -->
+        <mxCell id="t2h" value="TOOL: get_entity_context(entity_id, task_type, max_tokens)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="455" width="500" height="18" as="geometry" /></mxCell>
+        <mxCell id="t2-1" value="tools/call  {name: get_entity_context, arguments: {entity_id, task_type, max_tokens}}" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95" y="477"/><mxPoint x="275" y="477"/></Array></mxGeometry></mxCell>
+        <mxCell id="t2-2" value="service.get_context(entity_id, task_type, max_tokens)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="502"/><mxPoint x="455" y="502"/></Array></mxGeometry></mxCell>
+        <mxCell id="t2-3" value="entity = store.get_entity(entity_id)  [or fallback to store.search()]" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="455" y="527"/><mxPoint x="635" y="527"/></Array></mxGeometry></mxCell>
+        <mxCell id="t2-4" value="synthesizer.synthesize_with_task(entity_id, task_type)  →  TASK_TEMPLATES priority + boost" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="455" y="552"/><mxPoint x="815" y="552"/></Array></mxGeometry></mxCell>
+        <mxCell id="t2-5" value="fetch: parent, callers[], callees[], children[]  from KnowledgeStore" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="815" y="577"/><mxPoint x="635" y="577"/></Array></mxGeometry></mxCell>
+        <mxCell id="t2-6" value="_calculate_sufficiency(task_type, content_included, entity, text)  →  float 0.0–1.0" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="815" y="602"/><mxPoint x="815" y="602"/></Array></mxGeometry></mxCell>
+        <mxCell id="t2-r" value="{entity_id, qualified_name, context_text, total_tokens, sufficiency_score, task_type}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="815" y="627"/><mxPoint x="95" y="627"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ TOOL 3: trace_calls ══ -->
+        <mxCell id="t3h" value="TOOL: trace_calls(entity_id, direction=callers|callees, depth=1–5)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="655" width="550" height="18" as="geometry" /></mxCell>
+        <mxCell id="t3-1" value="tools/call  {name: trace_calls, arguments: {entity_id, direction, depth}}" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95" y="677"/><mxPoint x="275" y="677"/></Array></mxGeometry></mxCell>
+        <mxCell id="t3-2" value="service.store.trace_calls(entity_id, direction, depth, max_results=50)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="702"/><mxPoint x="635" y="702"/></Array></mxGeometry></mxCell>
+        <mxCell id="t3-3" value="BFS traversal on relationship graph  (CALLS / IMPORTED_BY edges, up to depth levels)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="635" y="727"/><mxPoint x="635" y="727"/></Array></mxGeometry></mxCell>
+        <mxCell id="t3-r" value="[{id, name, qualified_name, kind, file_path, line_start, call_depth}]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="635" y="752"/><mxPoint x="95" y="752"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ TOOL 4: retrieve_context_for_query ══ -->
+        <mxCell id="t4h" value="TOOL: retrieve_context_for_query(query, task_type, max_tokens, limit_entities, expand_deps, verbosity)" style="text;html=1;fontSize=11;fontStyle=1;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="780" width="800" height="18" as="geometry" /></mxCell>
+        <mxCell id="t4-1" value="tools/call  {name: retrieve_context_for_query, arguments: {query, task_type, max_tokens, limit_entities, expand_deps, verbosity}}" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="95" y="802"/><mxPoint x="275" y="802"/></Array></mxGeometry></mxCell>
+        <mxCell id="t4-2" value="service.retrieve_context_for_query(…)  →  full hybrid pipeline (classify → embed → BM25+FAISS → rerank → expand → synthesize)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="827"/><mxPoint x="1000" y="827"/></Array></mxGeometry></mxCell>
+        <mxCell id="t4-detail" value="(same pipeline as seq_query_retrieval.drawio — steps 4 through 14 apply in full)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="100" y="840" width="800" height="14" as="geometry" /></mxCell>
+        <mxCell id="t4-r" value="{context_text, sufficiency_score, total_tokens, [query, task_type, retrieval_mode, evidence[]] per verbosity}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1000" y="860"/><mxPoint x="275" y="860"/></Array></mxGeometry></mxCell>
+
+        <!-- FORMAT + RETURN -->
+        <mxCell id="t4-fmt" value="format_result(): wrap in MCP content block {type: text, text: json.dumps(result)}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="885"/><mxPoint x="275" y="885"/></Array></mxGeometry></mxCell>
+        <mxCell id="t4-fin" value="tools/call response → IDE / agent" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="275" y="910"/><mxPoint x="95" y="910"/></Array></mxGeometry></mxCell>
+
+        <!-- ERROR HANDLING NOTE -->
+        <mxCell id="note-err" value="Error handling: all tool exceptions are caught; server returns MCP error response  {isError: true, content: [{type: text, text: error_message}]}" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=10;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="945" width="900" height="30" as="geometry" /></mxCell>
+
+        <!-- TOOLS TABLE -->
+        <mxCell id="tbl" value="Tool summary:&#xa;search_codebase(query, limit=10)  →  lexical entity search on KnowledgeStore&#xa;get_entity_context(entity_id, task_type=general, max_tokens=2000)  →  task-prioritized context bundle&#xa;trace_calls(entity_id, direction=callees, depth=1)  →  BFS call graph traversal (max_results=50)&#xa;retrieve_context_for_query(query, task_type=auto, max_tokens=6000, limit_entities=3, expand_deps=true, verbosity=minimal)  →  full hybrid pipeline" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666;fontSize=10;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="990" width="1100" height="80" as="geometry" /></mxCell>
+
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/diagrams/seq_mcp.md b/docs/diagrams/seq_mcp.md
new file mode 100644
index 0000000..0859756
--- /dev/null
+++ b/docs/diagrams/seq_mcp.md
@@ -0,0 +1,257 @@
+# Sequence Diagram — MCP Server Workflow
+
+> Textual narration of [`seq_mcp.drawio`](seq_mcp.drawio).
+> Every participant, message, and note in the draw.io file is described here in full.
+
+**Triggered by:** `knowcode mcp-server`
+**Transport:** STDIO / JSON-RPC 2.0
+**Clients:** Claude Desktop, VS Code, JetBrains, any MCP-compatible IDE
+
+---
+
+## Participants
+
+| Participant | File | Role |
+|---|---|---|
+| IDE / Claude Desktop | — | MCP client — sends `tools/call` JSON-RPC requests |
+| KnowCodeMCPServer | `mcp/server.py` | MCP server — routes tool calls, formats results |
+| KnowCodeService | `service.py` | Central orchestrator — performs all actual work |
+| KnowledgeStore | `storage/knowledge_store.py` | In-memory knowledge graph (entity/relationship data) |
+| ContextSynthesizer | `analysis/context_synthesizer.py` | Builds task-prioritized context bundles |
+| RetrievalOrchestrator | `retrieval/orchestrator.py` | Full hybrid retrieval pipeline (Tool 4 only) |
+
+---
+
+## Startup
+
+### Step 1 — Launch MCP server
+
+```
+User → KnowCodeMCPServer:  knowcode mcp-server
+```
+
+### Step 2 — Start async runtime
+
+```
+KnowCodeMCPServer:  run_server()  →  asyncio.run(run_server_async())
+```
+
+### Step 3 — Open STDIO transport
+
+```
+KnowCodeMCPServer:  stdio_server(KnowCodeMCPServer)
+                    →  STDIO transport  (stdin/stdout pipes)
+```
+
+### Step 4 — MCP initialize handshake
+
+```
+IDE / Claude Desktop → KnowCodeMCPServer:
+  MCP initialize  (JSON-RPC 2.0)
+```
+
+### Step 5 — Advertise tools
+
+```
+KnowCodeMCPServer → IDE / Claude Desktop:
+  tools/list response  →  4 tools with full JSON schemas
+```
+
+### Step 6 — Lazy service initialization
+
+```
+KnowCodeMCPServer → KnowCodeService:
+  KnowCodeService(store_path, strict_config=False)
+  [initialized on the first tool call, not at startup]
+```
+
+---
+
+## Tool 1 — `search_codebase`
+
+**Signature:** `search_codebase(query: str, limit: int = 10)`
+
+### Invocation
+
+```
+IDE → KnowCodeMCPServer:
+  tools/call  {name: "search_codebase", arguments: {query, limit}}
+```
+
+### Execution
+
+```
+KnowCodeMCPServer → KnowCodeService:  service.search(query)
+KnowCodeService   → KnowledgeStore:   knowledge_store.search(query)
+```
+
+`knowledge_store.search()` uses substring and token matching on entity `name` and `qualified_name` fields.
+
+### Response
+
+```
+KnowledgeStore → IDE:
+  [{id, name, qualified_name, kind, file_path, line_start}]  top limit results
+```
+
+---
+
+## Tool 2 — `get_entity_context`
+
+**Signature:** `get_entity_context(entity_id: str, task_type: str = "general", max_tokens: int = 2000)`
+
+### Invocation
+
+```
+IDE → KnowCodeMCPServer:
+  tools/call  {name: "get_entity_context", arguments: {entity_id, task_type, max_tokens}}
+```
+
+### Execution
+
+```
+KnowCodeMCPServer → KnowCodeService:
+  service.get_context(entity_id, task_type, max_tokens)
+
+KnowCodeService → KnowledgeStore:
+  entity = store.get_entity(entity_id)  [fallback to store.search() if not found by ID]
+
+KnowCodeService → ContextSynthesizer:
+  synthesizer.synthesize_with_task(entity_id, task_type)
+  →  applies TASK_TEMPLATES priority order + per-section boost multipliers
+```
+
+ContextSynthesizer fetches related nodes from KnowledgeStore:
+- `parent` entity
+- `callers[]` (entities that call this one)
+- `callees[]` (entities this one calls)
+- `children[]` (nested entities)
+
+```
+ContextSynthesizer:
+  _calculate_sufficiency(task_type, content_included, entity, text)  →  float 0.0–1.0
+```
+
+### Response
+
+```
+ContextSynthesizer → IDE:
+  {entity_id, qualified_name, context_text, total_tokens, sufficiency_score, task_type}
+```
+
+---
+
+## Tool 3 — `trace_calls`
+
+**Signature:** `trace_calls(entity_id: str, direction: str = "callees", depth: int = 1)`
+
+Valid direction values: `callers` | `callees`. Valid depth range: 1–5.
+
+### Invocation
+
+```
+IDE → KnowCodeMCPServer:
+  tools/call  {name: "trace_calls", arguments: {entity_id, direction, depth}}
+```
+
+### Execution
+
+```
+KnowCodeMCPServer → KnowCodeService:
+  service.store.trace_calls(entity_id, direction, depth, max_results=50)
+```
+
+```
+KnowledgeStore:
+  BFS traversal on relationship graph
+  (CALLS / IMPORTED_BY edges, up to `depth` levels, max_results=50 nodes)
+```
+
+### Response
+
+```
+KnowledgeStore → IDE:
+  [{id, name, qualified_name, kind, file_path, line_start, call_depth}]
+```
+
+---
+
+## Tool 4 — `retrieve_context_for_query`
+
+**Signature:**
+```
+retrieve_context_for_query(
+  query: str,
+  task_type: str = "auto",
+  max_tokens: int = 6000,
+  limit_entities: int = 3,
+  expand_deps: bool = True,
+  verbosity: str = "minimal"
+)
+```
+
+### Invocation
+
+```
+IDE → KnowCodeMCPServer:
+  tools/call  {
+    name: "retrieve_context_for_query",
+    arguments: {query, task_type, max_tokens, limit_entities, expand_deps, verbosity}
+  }
+```
+
+### Execution
+
+```
+KnowCodeMCPServer → KnowCodeService:
+  service.retrieve_context_for_query(…)
+
+KnowCodeService → RetrievalOrchestrator:
+  full hybrid pipeline:
+  classify → embed → BM25+FAISS → rerank → expand_dependencies → synthesize
+```
+
+> This is the same pipeline described in `seq_query_retrieval.drawio` — steps 4 through 14 apply in full.
+
+```
+RetrievalOrchestrator → KnowCodeMCPServer:
+  {context_text, sufficiency_score, total_tokens,
+   [+ query, task_type, retrieval_mode, evidence[]  per verbosity level]}
+```
+
+### Result formatting
+
+```
+KnowCodeMCPServer:
+  format_result()
+  →  MCP content block  {type: "text", text: json.dumps(result)}
+
+KnowCodeMCPServer → IDE:
+  tools/call response
+```
+
+---
+
+## Error Handling
+
+All tool handler exceptions are caught at the server level. On error the server returns:
+
+```json
+{
+  "isError": true,
+  "content": [{"type": "text", "text": "<error_message>"}]
+}
+```
+
+No unhandled exception propagates through the STDIO transport.
+
+---
+
+## Tool Summary
+
+| Tool | Arguments | Internal call | Returns |
+|---|---|---|---|
+| `search_codebase` | `query`, `limit=10` | `knowledge_store.search()` — substring + token match | `[{id, name, qualified_name, kind, file_path, line_start}]` top limit |
+| `get_entity_context` | `entity_id`, `task_type=general`, `max_tokens=2000` | `synthesize_with_task()` + `_calculate_sufficiency()` | `{entity_id, qualified_name, context_text, total_tokens, sufficiency_score, task_type}` |
+| `trace_calls` | `entity_id`, `direction=callees`, `depth=1` | BFS on relationship graph (max\_results=50) | `[{id, name, qualified_name, kind, file_path, line_start, call_depth}]` |
+| `retrieve_context_for_query` | `query`, `task_type=auto`, `max_tokens=6000`, `limit_entities=3`, `expand_deps=true`, `verbosity=minimal` | Full hybrid pipeline (steps 4–14 of seq\_query\_retrieval) | `{context_text, sufficiency_score, total_tokens, …per verbosity}` |
diff --git a/docs/diagrams/seq_query_retrieval.drawio b/docs/diagrams/seq_query_retrieval.drawio
new file mode 100644
index 0000000..3a302a6
--- /dev/null
+++ b/docs/diagrams/seq_query_retrieval.drawio
@@ -0,0 +1,132 @@
+<mxfile host="app.diagrams.net" modified="2026-04-19T00:00:00.000Z" agent="Claude" version="21.0.0">
+  <diagram name="Seq – Query Retrieval" id="seq-query">
+    <mxGraphModel dx="1654" dy="900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1900" pageHeight="1500" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+
+        <mxCell id="title" value="Sequence Diagram — Query / Retrieval Workflow&#xa;Triggered by: CLI context · CLI ask · REST POST /api/v1/context/query · MCP retrieve_context_for_query" style="text;html=1;strokeColor=none;fillColor=none;align=center;fontSize=16;fontStyle=1;" vertex="1" parent="1">
+          <mxGeometry x="200" y="10" width="1500" height="50" as="geometry" />
+        </mxCell>
+
+        <!-- PARTICIPANTS -->
+        <mxCell id="p-user"  value="&lt;b&gt;User / Agent&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=11;" vertex="1" parent="1"><mxGeometry x="20"   y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-entry" value="&lt;b&gt;CLI / REST&lt;/b&gt;&#xa;&lt;b&gt;/ MCP&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1"><mxGeometry x="160"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-svc"  value="&lt;b&gt;KnowCode&lt;/b&gt;&#xa;&lt;b&gt;Service&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=11;" vertex="1" parent="1"><mxGeometry x="310"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-orch" value="&lt;b&gt;Retrieval&lt;/b&gt;&#xa;&lt;b&gt;Orchestrator&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="460"  y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-cls"  value="&lt;b&gt;Query&lt;/b&gt;&#xa;&lt;b&gt;Classifier&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="620"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-se"   value="&lt;b&gt;Search&lt;/b&gt;&#xa;&lt;b&gt;Engine&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="770"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-hidx" value="&lt;b&gt;Hybrid&lt;/b&gt;&#xa;&lt;b&gt;Index&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="920"  y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-rr"   value="&lt;b&gt;Reranker&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1070" y="80" width="110" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-compl" value="&lt;b&gt;expand_&lt;/b&gt;&#xa;&lt;b&gt;dependencies&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1220" y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-ctx"  value="&lt;b&gt;Context&lt;/b&gt;&#xa;&lt;b&gt;Synthesizer&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1390" y="80" width="120" height="50" as="geometry" /></mxCell>
+        <mxCell id="p-llm"  value="&lt;b&gt;Agent / LLM&lt;/b&gt;&#xa;&lt;b&gt;(ask cmd)&lt;/b&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;fontSize=11;" vertex="1" parent="1"><mxGeometry x="1560" y="80" width="120" height="50" as="geometry" /></mxCell>
+
+        <!-- LIFELINES -->
+        <mxCell id="ll-user"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="75"   y="130"/><mxPoint x="75"   y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-entry" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="215"  y="130"/><mxPoint x="215"  y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-svc"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="365"  y="130"/><mxPoint x="365"  y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-orch"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520"  y="130"/><mxPoint x="520"  y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-cls"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="675"  y="130"/><mxPoint x="675"  y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-se"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="825"  y="130"/><mxPoint x="825"  y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-hidx"  value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="975"  y="130"/><mxPoint x="975"  y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-rr"    value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1125" y="130"/><mxPoint x="1125" y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-compl" value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1280" y="130"/><mxPoint x="1280" y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-ctx"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1450" y="130"/><mxPoint x="1450" y="1450"/></Array></mxGeometry></mxCell>
+        <mxCell id="ll-llm"   value="" style="dashed=1;endArrow=none;html=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="130"/><mxPoint x="1620" y="1450"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ ENTRY ══ -->
+        <mxCell id="m1" value="1 · query / question / entity_id" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="75" y="175"/><mxPoint x="215" y="175"/></Array></mxGeometry></mxCell>
+        <mxCell id="m2" value="2 · service.retrieve_context_for_query(query, max_tokens=6000, task_type, limit_entities=3, expand_deps, verbosity)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="215" y="205"/><mxPoint x="365" y="205"/></Array></mxGeometry></mxCell>
+        <mxCell id="m3" value="3 · orchestrator.retrieve_context_for_query(…)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="365" y="235"/><mxPoint x="520" y="235"/></Array></mxGeometry></mxCell>
+
+        <!-- VALIDATE -->
+        <mxCell id="m4" value="4 · _assert_store_exists()  +  _assert_index_exists()  [raise 412 on missing]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="260"/><mxPoint x="520" y="260"/></Array></mxGeometry></mxCell>
+
+        <!-- CLASSIFY -->
+        <mxCell id="m5" value="5 · classify_query(query)  [regex pattern matching, 5 task types]" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="285"/><mxPoint x="675" y="285"/></Array></mxGeometry></mxCell>
+        <mxCell id="m5r" value="(TaskType, confidence)  →  resolved_task_type = override OR detected" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="675" y="310"/><mxPoint x="520" y="310"/></Array></mxGeometry></mxCell>
+
+        <!-- GET SEARCH ENGINE -->
+        <mxCell id="m6" value="6 · service.get_search_engine()  [lazy-init: HybridIndex(chunk_repo, vector_store)]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="335"/><mxPoint x="520" y="335"/></Array></mxGeometry></mxCell>
+
+        <!-- VALIDATE INDEX -->
+        <mxCell id="m7" value="7 · _validate_index_compatibility(index_path)  [dimension + model mismatch check]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="360"/><mxPoint x="520" y="360"/></Array></mxGeometry></mxCell>
+
+        <!-- SEARCH_SCORED -->
+        <mxCell id="m8" value="8 · engine.search_scored(query, limit=max(10, limit_entities×5), expand_deps)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="385"/><mxPoint x="825" y="385"/></Array></mxGeometry></mxCell>
+
+        <!-- EMBED -->
+        <mxCell id="m9" value="9 · embedding_provider.embed_single(query)  →  query_vector (dim=1024)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="825" y="410"/><mxPoint x="825" y="410"/></Array></mxGeometry></mxCell>
+
+        <!-- HYBRID SEARCH -->
+        <mxCell id="m10" value="10 · hybrid_index.search(query, query_vec, limit=limit×2)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="825" y="435"/><mxPoint x="975" y="435"/></Array></mxGeometry></mxCell>
+        <mxCell id="m10a" value="10a · BM25 search on ChunkRepository tokens" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="975" y="460"/><mxPoint x="975" y="460"/></Array></mxGeometry></mxCell>
+        <mxCell id="m10b" value="10b · FAISS similarity search on VectorStore  (IndexFlatIP, cosine)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="975" y="485"/><mxPoint x="975" y="485"/></Array></mxGeometry></mxCell>
+        <mxCell id="m10c" value="10c · merge + normalize scores  →  list[(CodeChunk, score)]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="975" y="510"/><mxPoint x="975" y="510"/></Array></mxGeometry></mxCell>
+        <mxCell id="m10r" value="list[(CodeChunk, score)]  top limit×2 candidates" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="975" y="535"/><mxPoint x="825" y="535"/></Array></mxGeometry></mxCell>
+
+        <!-- RERANK -->
+        <mxCell id="m11" value="11 · reranker.rerank(query, results, top_k=limit)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="825" y="560"/><mxPoint x="1125" y="560"/></Array></mxGeometry></mxCell>
+        <mxCell id="m11a" value="Primary: VoyageAI rerank-2.5 cross-encoder" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1125" y="585"/><mxPoint x="1125" y="585"/></Array></mxGeometry></mxCell>
+        <mxCell id="m11b" value="Fallback (if VoyageAI unavailable): signal-based  {boost_documented×1.2, boost_recent×1.1, query-in-content×1.5, exact kind×2.0}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1125" y="610"/><mxPoint x="1125" y="610"/></Array></mxGeometry></mxCell>
+        <mxCell id="m11r" value="list[(CodeChunk, score)]  top_k reranked" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1125" y="635"/><mxPoint x="825" y="635"/></Array></mxGeometry></mxCell>
+
+        <!-- EXPAND DEPS -->
+        <mxCell id="m12" value="12 · [if expand_deps] expand_dependencies(chunk, chunk_repo, store, max_depth=1)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="825" y="660"/><mxPoint x="1280" y="660"/></Array></mxGeometry></mxCell>
+        <mxCell id="m12a" value="chunk_repo.get_by_entity(entity_id)  +  store.get_callees(entity_id)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1280" y="685"/><mxPoint x="1280" y="685"/></Array></mxGeometry></mxCell>
+        <mxCell id="m12r" value="list[ScoredChunk]  {chunk, score, source=retrieved|dependency}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1280" y="710"/><mxPoint x="825" y="710"/></Array></mxGeometry></mxCell>
+        <mxCell id="m12rr" value="List[ScoredChunk] returned to orchestrator" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="825" y="735"/><mxPoint x="520" y="735"/></Array></mxGeometry></mxCell>
+
+        <!-- NOTE: semantic fallback -->
+        <mxCell id="note-fallback" value="[ on exception ]  semantic retrieval failed  →  fall back to lexical: store.search(query) + keyword expansion" style="text;html=1;fontSize=10;fontStyle=2;align=left;fillColor=#fff2cc;strokeColor=#d6b656;rounded=1;" vertex="1" parent="1"><mxGeometry x="30" y="755" width="700" height="24" as="geometry" /></mxCell>
+
+        <!-- LOOP ENTITIES -->
+        <mxCell id="loop-e" value="[ loop ]  for each selected entity_id  (top limit_entities unique entities from evidence)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="790" width="600" height="16" as="geometry" /></mxCell>
+
+        <!-- 13 -->
+        <mxCell id="m13" value="13 · service.get_context(entity_id, task_type, per_entity_max_tokens, summarize=(verbosity=='minimal'))" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="810"/><mxPoint x="1450" y="810"/></Array></mxGeometry></mxCell>
+        <mxCell id="m13a" value="synthesize_with_task(entity_id, task_type)  →  apply TASK_TEMPLATES priority + boost" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1450" y="835"/><mxPoint x="1450" y="835"/></Array></mxGeometry></mxCell>
+        <mxCell id="m13b" value="_calculate_sufficiency(task_type, content_included, entity, text)  →  score 0.0–1.0" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1450" y="860"/><mxPoint x="1450" y="860"/></Array></mxGeometry></mxCell>
+        <mxCell id="m13r" value="{context_text, total_tokens, truncated, included_entities, task_type, sufficiency_score}" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1450" y="885"/><mxPoint x="520" y="885"/></Array></mxGeometry></mxCell>
+
+        <!-- end loop entities -->
+        <mxCell id="loop-ee" value="[ end loop ]" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="903" width="150" height="16" as="geometry" /></mxCell>
+
+        <!-- 14 - assemble response -->
+        <mxCell id="m14" value="14 · Assemble: context_text = join(context_parts, '---')  |  sufficiency = avg(scores)  |  filter by verbosity" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="520" y="925"/><mxPoint x="520" y="925"/></Array></mxGeometry></mxCell>
+
+        <!-- VERBOSITY BOX -->
+        <mxCell id="vbox" value="Verbosity filter:&#xa;minimal    →  {context_text, sufficiency_score, total_tokens, reduction_summary}&#xa;standard   →  + query, task_type, task_confidence, retrieval_mode, max_tokens, truncated&#xa;verbose    →  + evidence[]  (rank, chunk_id, entity_id, score, source)&#xa;diagnostic →  full dict (all fields + errors[])" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666;fontSize=10;align=left;" vertex="1" parent="1"><mxGeometry x="310" y="945" width="660" height="75" as="geometry" /></mxCell>
+
+        <!-- ══ ALT A: context / REST ══ -->
+        <mxCell id="altA" value="[ alt A ]  CLI context  |  REST /context/query  |  MCP retrieve_context_for_query  →  return dict to caller" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="1035" width="800" height="16" as="geometry" /></mxCell>
+        <mxCell id="mA1" value="15a · QueryResponse / ContextResponse → User" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="365" y="1055"/><mxPoint x="215" y="1055"/></Array></mxGeometry></mxCell>
+        <mxCell id="mA2" value="" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="215" y="1080"/><mxPoint x="75" y="1080"/></Array></mxGeometry></mxCell>
+
+        <!-- ══ ALT B: ask → Agent ══ -->
+        <mxCell id="altB" value="[ alt B ]  CLI ask  →  pass context to Agent (LLM call)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="1105" width="500" height="16" as="geometry" /></mxCell>
+        <mxCell id="mB1" value="15b · agent.answer(query)  or  smart_answer(query, force_llm)" style="html=1;endArrow=block;endFill=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="215" y="1125"/><mxPoint x="1620" y="1125"/></Array></mxGeometry></mxCell>
+
+        <!-- SMART ANSWER CHECK -->
+        <mxCell id="mB-check" value="smart_answer: check sufficiency_score ≥ threshold (default 0.8)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1150"/><mxPoint x="1620" y="1150"/></Array></mxGeometry></mxCell>
+        <mxCell id="local-note" value="[ if sufficient ]  _format_local_answer()  →  return context-only answer  (no LLM tokens used)" style="text;html=1;fontSize=10;fontStyle=2;align=left;fillColor=#d5e8d4;strokeColor=#82b366;rounded=1;" vertex="1" parent="1"><mxGeometry x="30" y="1165" width="680" height="20" as="geometry" /></mxCell>
+        <mxCell id="llm-note" value="[ if insufficient or force_llm ]  call LLM with failover" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="1192" width="450" height="16" as="geometry" /></mxCell>
+
+        <mxCell id="mB2" value="16 · build prompt: get_prompt_template(task_type) + context_text + question" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1210"/><mxPoint x="1620" y="1210"/></Array></mxGeometry></mxCell>
+
+        <!-- LLM FAILOVER LOOP -->
+        <mxCell id="loop-llm" value="[ loop ]  for each model in config.models order  (RPM+RPD rate-limit check per model)" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="1230" width="700" height="16" as="geometry" /></mxCell>
+        <mxCell id="mB3a" value="17a · Google: client.models.generate_content(model, prompt)" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1250"/><mxPoint x="1620" y="1250"/></Array></mxGeometry></mxCell>
+        <mxCell id="mB3b" value="17b · OpenAI-compat: client.chat.completions.create(model, messages)  [OpenRouter / Mistral]" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1275"/><mxPoint x="1620" y="1275"/></Array></mxGeometry></mxCell>
+        <mxCell id="mB3c" value="rate_limiter.record_usage(model.name)  →  ~/.knowcode/usage_stats.json" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1300"/><mxPoint x="1620" y="1300"/></Array></mxGeometry></mxCell>
+        <mxCell id="mB3d" value="on ResourceExhausted / error  →  try next model" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1325"/><mxPoint x="1620" y="1325"/></Array></mxGeometry></mxCell>
+        <mxCell id="loop-llme" value="[ end loop ]" style="text;html=1;fontSize=10;fontStyle=2;align=left;" vertex="1" parent="1"><mxGeometry x="30" y="1343" width="150" height="16" as="geometry" /></mxCell>
+
+        <mxCell id="mBr" value="18 · answer text" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="1620" y="1365"/><mxPoint x="215" y="1365"/></Array></mxGeometry></mxCell>
+        <mxCell id="mBfin" value="19 · {answer, source=llm|local, task_type, sufficiency_score} → User" style="html=1;endArrow=open;endFill=0;dashed=1;" edge="1" parent="1"><mxGeometry relative="1" as="geometry"><Array as="points"><mxPoint x="215" y="1395"/><mxPoint x="75" y="1395"/></Array></mxGeometry></mxCell>
+
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/diagrams/seq_query_retrieval.md b/docs/diagrams/seq_query_retrieval.md
new file mode 100644
index 0000000..90b6914
--- /dev/null
+++ b/docs/diagrams/seq_query_retrieval.md
@@ -0,0 +1,273 @@
+# Sequence Diagram — Query / Retrieval Workflow
+
+> Textual narration of [`seq_query_retrieval.drawio`](seq_query_retrieval.drawio).
+> Every participant, message, and note in the draw.io file is described here in full.
+
+**Triggered by:** `knowcode context` · `knowcode ask` · REST `POST /api/v1/context/query` · MCP `retrieve_context_for_query`
+
+---
+
+## Participants
+
+| Participant | File | Role |
+|---|---|---|
+| User / Agent | — | Issues query or question |
+| CLI / REST / MCP | `cli/cli.py`, `api/api.py`, `mcp/server.py` | Entry point — routes to KnowCodeService |
+| KnowCodeService | `service.py` | Central orchestrator |
+| RetrievalOrchestrator | `retrieval/orchestrator.py` | Validates, classifies, retrieves, synthesizes |
+| QueryClassifier | `llm/query_classifier.py` | Detects task type via regex pattern matching |
+| SearchEngine | `retrieval/search_engine.py` | Embeds query, calls HybridIndex, reranks |
+| HybridIndex | `retrieval/hybrid_index.py` | Merges BM25 (lexical) + FAISS (dense) results |
+| Reranker | `retrieval/reranker.py` | Cross-encoder reranking (VoyageAI primary, signal fallback) |
+| expand\_dependencies | `retrieval/completeness.py` | Expands callee context for top-ranked chunks |
+| ContextSynthesizer | `analysis/context_synthesizer.py` | Builds ContextBundle; computes sufficiency score |
+| Agent / LLM (ask cmd) | `llm/agent.py` | Generates natural language answer (Alt B only) |
+
+---
+
+## Step 1 — User invokes query entry point
+
+```
+User → CLI/REST/MCP:  query / question / entity_id
+```
+
+The caller uses one of four entry points:
+- `knowcode context <query>` — CLI, returns structured context
+- `knowcode ask <question>` — CLI, returns LLM-generated answer
+- `POST /api/v1/context/query` — REST API (`QueryRequest`)
+- `retrieve_context_for_query` — MCP tool call
+
+## Step 2 — Entry point calls service
+
+```
+CLI/REST/MCP → KnowCodeService:
+  service.retrieve_context_for_query(
+    query, max_tokens=6000, task_type,
+    limit_entities=3, expand_deps, verbosity
+  )
+```
+
+## Step 3 — Service delegates to orchestrator
+
+```
+KnowCodeService → RetrievalOrchestrator:
+  orchestrator.retrieve_context_for_query(…)
+```
+
+## Step 4 — Validate preconditions
+
+```
+RetrievalOrchestrator:  _assert_store_exists()  +  _assert_index_exists()
+```
+
+Raises HTTP 412 if the knowledge store or semantic index has not been built yet.
+
+## Step 5 — Classify query
+
+```
+RetrievalOrchestrator → QueryClassifier:  classify_query(query)
+```
+
+The classifier uses five sets of weighted regex patterns (one per `TaskType`):
+- `IMPLEMENTATION`, `DEBUGGING`, `ARCHITECTURE`, `TESTING`, `GENERAL`
+
+Returns: `(TaskType, confidence)`.
+
+`resolved_task_type = task_type override (if caller supplied) OR detected task_type`
+
+## Step 6 — Lazy-init search engine
+
+```
+RetrievalOrchestrator:
+  service.get_search_engine()
+  → HybridIndex(chunk_repository, vector_store)  [created once, cached]
+```
+
+## Step 7 — Validate index compatibility
+
+```
+RetrievalOrchestrator:
+  _validate_index_compatibility(index_path)
+  → checks embedding dimension + model name match
+  → raises on mismatch
+```
+
+## Step 8 — Search: retrieve scored chunks
+
+```
+RetrievalOrchestrator → SearchEngine:
+  engine.search_scored(query, limit=max(10, limit_entities×5), expand_deps)
+```
+
+### Step 9 — Embed query
+
+```
+SearchEngine:
+  embedding_provider.embed_single(query)  →  query_vector  (dim=1024)
+```
+
+### Step 10 — Hybrid search
+
+```
+SearchEngine → HybridIndex:  hybrid_index.search(query, query_vec, limit=limit×2)
+```
+
+Internally HybridIndex executes three sub-steps:
+
+- **10a** — BM25 search on `ChunkRepository` token lists (lexical)
+- **10b** — FAISS similarity search on `VectorStore` (`IndexFlatIP`, cosine similarity via L2-normalized inner product)
+- **10c** — Merge + normalize scores → `list[(CodeChunk, score)]`
+
+Returns: top `limit×2` candidates back to SearchEngine.
+
+### Step 11 — Rerank
+
+```
+SearchEngine → Reranker:  reranker.rerank(query, results, top_k=limit)
+```
+
+- **Primary**: VoyageAI `rerank-2.5` cross-encoder
+- **Fallback** (if VoyageAI unavailable): signal-based scoring:
+  - `boost_documented × 1.2`
+  - `boost_recent × 1.1`
+  - query text found in content: `× 1.5`
+  - exact entity kind match: `× 2.0`
+
+Returns: `list[(CodeChunk, score)]` top\_k reranked.
+
+### Step 12 — Expand dependencies
+
+```
+SearchEngine → expand_dependencies(chunk, chunk_repo, store, max_depth=1)
+```
+
+For each top-ranked chunk (when `expand_deps=True`):
+- `chunk_repo.get_by_entity(entity_id)` — fetch all chunks for the entity
+- `store.get_callees(entity_id)` — walk CALLS relationships one level deep
+
+Returns: `list[ScoredChunk]` with `source` field: `retrieved` (original result) or `dependency` (callee).
+
+SearchEngine returns `List[ScoredChunk]` to RetrievalOrchestrator.
+
+---
+
+> **Note — Semantic fallback**: If semantic retrieval raises an exception,
+> RetrievalOrchestrator falls back to lexical search:
+> `store.search(query)` + keyword expansion.
+
+---
+
+## Step 13 — [Loop] Synthesize context per entity
+
+For each selected `entity_id` (top `limit_entities` unique entities from the evidence list):
+
+```
+RetrievalOrchestrator → ContextSynthesizer:
+  service.get_context(
+    entity_id, task_type,
+    per_entity_max_tokens,
+    summarize=(verbosity == 'minimal')
+  )
+```
+
+Internally:
+
+- **13a** — `synthesize_with_task(entity_id, task_type)` — applies `TASK_TEMPLATES` priority order and per-section boost multipliers for the resolved task type
+- **13b** — `_calculate_sufficiency(task_type, content_included, entity, text)` → float `0.0–1.0`
+
+Returns:
+```
+{
+  context_text,
+  total_tokens,
+  truncated,
+  included_entities,
+  task_type,
+  sufficiency_score
+}
+```
+
+## Step 14 — Assemble final response
+
+```
+RetrievalOrchestrator:
+  context_text = '\n---\n'.join(context_parts)
+  sufficiency  = avg(sufficiency_scores)
+  apply verbosity filter
+```
+
+### Verbosity filter
+
+| Level | Fields returned |
+|---|---|
+| `minimal` | `context_text`, `sufficiency_score`, `total_tokens`, `reduction_summary` |
+| `standard` | + `query`, `task_type`, `task_confidence`, `retrieval_mode`, `max_tokens`, `truncated` |
+| `verbose` | + `evidence[]` (`rank`, `chunk_id`, `entity_id`, `score`, `source`) |
+| `diagnostic` | full dict — all fields + `errors[]` |
+
+---
+
+## Alt A — Return context to caller
+
+**Applies to:** `CLI context` · `REST /api/v1/context/query` · `MCP retrieve_context_for_query`
+
+```
+Step 15a:
+  KnowCodeService → CLI/REST/MCP:  QueryResponse / ContextResponse
+  CLI/REST/MCP → User:             structured context dict
+```
+
+---
+
+## Alt B — Ask command: pass to Agent / LLM
+
+**Applies to:** `CLI ask`
+
+### Step 15b — Invoke Agent
+
+```
+CLI → Agent:  agent.answer(query)  OR  agent.smart_answer(query, force_llm)
+```
+
+### smart\_answer sufficiency check
+
+```
+Agent:  check sufficiency_score ≥ threshold  (default 0.8, from AppConfig)
+```
+
+- **If sufficient**: `_format_local_answer()` — returns context-only answer; no LLM tokens consumed.
+- **If insufficient or `force_llm=True`**: proceed to LLM call below.
+
+### Step 16 — Build prompt
+
+```
+Agent:
+  get_prompt_template(task_type)  +  context_text  +  question
+```
+
+### Step 17 — LLM failover loop
+
+```
+[ loop ]  for each model in config.models order (RPM + RPD rate-limit check per model)
+```
+
+- **17a** — Google Gemini: `client.models.generate_content(model, prompt)`
+- **17b** — OpenAI-compatible (OpenRouter / Mistral): `client.chat.completions.create(model, messages)`
+- **17c** — `rate_limiter.record_usage(model.name)` → `~/.knowcode/usage_stats.json`
+- **17d** — On `ResourceExhausted` or other error → try next model in list
+
+```
+[ end loop ]
+```
+
+### Step 18 — Return answer
+
+```
+Agent → CLI:  answer text
+```
+
+### Step 19 — CLI returns to User
+
+```
+CLI → User:  {answer, source=llm|local, task_type, sufficiency_score}
+```