"""Test nocmem with real conversation data from NOC's SQLite database. Extracts conversation turns, ingests them, then tests recall with realistic queries that a user would actually ask. """ import sys import time import sqlite3 import requests BASE = "http://127.0.0.1:9820" DB_PATH = "/data/src/noc/noc.db" PASS = 0 FAIL = 0 def test(name, fn): global PASS, FAIL try: fn() print(f" ✓ {name}") PASS += 1 except AssertionError as e: print(f" ✗ {name}: {e}") FAIL += 1 except Exception as e: print(f" ✗ {name}: EXCEPTION {e}") FAIL += 1 # ── step 1: extract conversation turns from SQLite ────────────────── def extract_turns(): """Extract (user_msg, assistant_msg) pairs from the database.""" conn = sqlite3.connect(DB_PATH) rows = conn.execute( "SELECT role, content FROM messages ORDER BY id" ).fetchall() conn.close() turns = [] i = 0 while i < len(rows) - 1: role, content = rows[i] # skip non-user messages, agent outputs, very short messages if role != "user" or len(content) < 5 or content.startswith("[Agent ") or content.startswith("[用户上传") or content.startswith("[语音消息]"): i += 1 continue # find the next assistant reply j = i + 1 while j < len(rows) and rows[j][0] != "assistant": j += 1 if j < len(rows): assistant_content = rows[j][1] if len(assistant_content) > 10 and "" not in assistant_content: turns.append((content, assistant_content)) i = j + 1 return turns # ── step 2: ingest all turns ─────────────────────────────────────── def ingest_turns(turns): """Ingest conversation turns via /ingest endpoint.""" total_stored = 0 for user_msg, assistant_msg in turns: r = requests.post(f"{BASE}/ingest", json={ "user_msg": user_msg, "assistant_msg": assistant_msg, }) if r.status_code == 200: total_stored += r.json().get("stored", 0) return total_stored # ── step 3: also store some key facts directly ───────────────────── def store_key_facts(): """Store critical facts that heuristic extraction might miss.""" facts = [ {"cue": "bot的名字叫什么", "target": "bot的名字叫小乖，是Fam给取的", "importance": 0.9}, {"cue": "有哪些工具可以用", "target": "工具有: fam_todo(飞书待办), send_file(发文件), spawn_agent/agent_status/kill_agent(子代理管理), run_shell, run_python, update_memory, update_inner_state, gen_voice", "importance": 0.8}, {"cue": "vLLM在5090上的性能", "target": "RTX 5090上vLLM跑gemma模型只有4.8 tok/s，需要切换到awq_marlin量化来提升速度", "importance": 0.8}, {"cue": "repo-vis项目是什么", "target": "repo-vis是一个用Rust后端+Three.js前端的3D代码库可视化工具，目标支持Linux内核级别的大型仓库和Pico VR", "importance": 0.8}, {"cue": "repo-vis的性能瓶颈", "target": "Linux内核79K文件量级下，SQLite 1GB上限和O(n)全量反序列化是瓶颈，需要n-ary tree按需合并优化", "importance": 0.9}, {"cue": "明天的待办事项", "target": "最紧迫的是emblem scanner的AI Chat和KB部分（最高优先级），然后是曲面二维码识读优化信息收集", "importance": 0.7}, {"cue": "后端切换到了什么", "target": "NOC后端从原来的方案切换到了vLLM，速度变快了", "importance": 0.7}, {"cue": "home目录下有多少log文件", "target": "home目录及子目录下共有960个.log文件", "importance": 0.5}, ] stored = 0 for f in facts: r = requests.post(f"{BASE}/store", json=f) if r.status_code == 200: stored += 1 return stored # ── step 4: recall tests with realistic queries ──────────────────── def test_recall_bot_name(): r = requests.post(f"{BASE}/recall", json={"text": "你叫什么名字"}) data = r.json() assert data["count"] > 0, "should recall something" assert "小乖" in data["memories"], f"should mention 小乖, got: {data['memories'][:200]}" def test_recall_tools(): r = requests.post(f"{BASE}/recall", json={"text": "有什么工具可以用"}) data = r.json() assert data["count"] > 0 m = data["memories"].lower() assert "tool" in m or "工具" in m or "spawn" in m or "fam_todo" in m, f"should mention tools, got: {data['memories'][:200]}" def test_recall_vllm(): r = requests.post(f"{BASE}/recall", json={"text": "vllm性能怎么样"}) data = r.json() assert data["count"] > 0 assert "4.8" in data["memories"] or "5090" in data["memories"] or "tok" in data["memories"], \ f"should mention vLLM stats, got: {data['memories'][:200]}" def test_recall_repovis(): r = requests.post(f"{BASE}/recall", json={"text": "repo-vis项目"}) data = r.json() assert data["count"] > 0 m = data["memories"] assert "Rust" in m or "Three" in m or "3D" in m or "可视化" in m, \ f"should mention repo-vis tech, got: {m[:200]}" def test_recall_performance_bottleneck(): r = requests.post(f"{BASE}/recall", json={"text": "Linux内核代码仓库跑不动"}) data = r.json() assert data["count"] > 0 m = data["memories"] assert "SQLite" in m or "79K" in m or "瓶颈" in m or "n-ary" in m or "内核" in m, \ f"should mention bottleneck, got: {m[:200]}" def test_recall_todo(): r = requests.post(f"{BASE}/recall", json={"text": "待办事项有哪些"}) data = r.json() assert data["count"] > 0 m = data["memories"] assert "emblem" in m.lower() or "todo" in m.lower() or "待办" in m or "scanner" in m.lower(), \ f"should mention todos, got: {m[:200]}" def test_recall_vr(): r = requests.post(f"{BASE}/recall", json={"text": "VR支持"}) data = r.json() assert data["count"] > 0 m = data["memories"] assert "Pico" in m or "VR" in m or "repo-vis" in m.lower(), \ f"should mention VR, got: {m[:200]}" def test_recall_chinese_natural(): """Test with natural Chinese conversational query.""" r = requests.post(f"{BASE}/recall", json={"text": "之前聊过什么技术话题"}) data = r.json() assert data["count"] > 0, "should recall some technical topics" def test_recall_cross_topic(): """Query that spans multiple memories — should return diverse results.""" r = requests.post(f"{BASE}/recall", json={ "text": "项目进度和优化", "top_k": 5, }) data = r.json() assert data["count"] >= 2, f"should recall multiple memories, got {data['count']}" def test_recall_log_files(): r = requests.post(f"{BASE}/recall", json={"text": "日志文件有多少"}) data = r.json() assert data["count"] > 0 assert "960" in data["memories"] or "log" in data["memories"].lower(), \ f"should mention log files, got: {data['memories'][:200]}" # ── step 5: multi-hop chain test ────────────────────────────────── def test_multihop_chain(): """Test if Hebbian chaining connects related memories. repo-vis → performance bottleneck → n-ary tree optimization """ r = requests.post(f"{BASE}/recall", json={ "text": "repo-vis", "top_k": 3, "hops": 3, }) data = r.json() assert data["count"] > 0 # print chain for inspection print(f" chain: {data['memories'][:300]}") # ── step 6: latency with real data ───────────────────────────────── def test_latency_with_data(): """Recall latency after loading real data.""" times = [] for q in ["工具", "vllm", "项目", "待办", "性能"]: r = requests.post(f"{BASE}/recall", json={"text": q}) times.append(r.json()["latency_ms"]) avg = sum(times) / len(times) print(f" avg latency: {avg:.1f}ms (max: {max(times):.1f}ms)") assert avg < 50, f"average latency {avg:.1f}ms too high" # ── main ──────────────────────────────────────────────────────────── def main(): global PASS, FAIL print("nocmem real-data test") print(f"server: {BASE}") print(f"database: {DB_PATH}\n") # check server try: requests.get(f"{BASE}/stats", timeout=3).raise_for_status() except Exception: print("ERROR: server not reachable") sys.exit(1) # extract print("── extract ──") turns = extract_turns() print(f" extracted {len(turns)} conversation turns") # ingest print("\n── ingest (heuristic, no LLM) ──") t0 = time.monotonic() ingested = ingest_turns(turns) elapsed = time.monotonic() - t0 print(f" ingested {ingested} memories from {len(turns)} turns ({elapsed:.1f}s)") # store key facts print("\n── store key facts ──") stored = store_key_facts() print(f" stored {stored} key facts") # stats r = requests.get(f"{BASE}/stats") stats = r.json() print(f"\n── memory stats ──") print(f" memories: {stats['num_memories']}") print(f" cue entries: {stats['num_cue_entries']} (aug ratio: {stats['augmentation_ratio']:.1f}x)") print(f" W norm: {stats['w_norm']:.1f}") # recall tests print(f"\n── recall accuracy (natural language queries) ──") test("bot的名字", test_recall_bot_name) test("可用工具", test_recall_tools) test("vLLM性能", test_recall_vllm) test("repo-vis项目", test_recall_repovis) test("性能瓶颈", test_recall_performance_bottleneck) test("待办事项", test_recall_todo) test("VR支持", test_recall_vr) test("log文件数量", test_recall_log_files) test("自然中文查询", test_recall_chinese_natural) test("跨主题召回", test_recall_cross_topic) print(f"\n── multi-hop chain ──") test("repo-vis联想链", test_multihop_chain) print(f"\n── latency ──") test("平均延迟 < 50ms", test_latency_with_data) print(f"\n{'='*50}") total = PASS + FAIL print(f"PASS: {PASS}/{total} FAIL: {FAIL}/{total}") if FAIL: sys.exit(1) else: print("All tests passed!") if __name__ == "__main__": main()