nocmem: qa-style extraction prompt, multi-cue variants, claude history importer
- Switch extraction prompt to qa-style (80% recall vs 60% baseline) - Semicolon-separated cues in extraction become paraphrase variants - Add import_claude.py to bulk-import Claude Code conversation history - Fix LLM model name in systemd service, add logging basicConfig
This commit is contained in:
217
mem/benchmarks/prompt_experiment.py
Normal file
217
mem/benchmarks/prompt_experiment.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""Test different extraction prompts for ingest quality.
|
||||
|
||||
Takes a few real conversation turns, tries different prompts,
|
||||
compares what gets extracted, then tests recall against target queries.
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
import requests
|
||||
import torch
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from nuonuo.hippocampus import HippocampalMemory
|
||||
from openai import OpenAI
|
||||
|
||||
DEVICE = "cuda"
|
||||
LLM = OpenAI(base_url="http://100.84.7.49:8000/v1", api_key="unused")
|
||||
MODEL = "gemma4-31b"
|
||||
encoder = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
|
||||
|
||||
# real conversation turns from Claude Code history
|
||||
TURNS = [
|
||||
("装好了tailscale。你帮忙生成吧,用新的bot名叫yoyo",
|
||||
"好,Tailscale IP `100.116.213.25`,yoyo 用户创建成功。继续搞 token 和部署。"),
|
||||
("能来一个org共享的gitea ci cd runner 不,就跑在这台机器上就行。",
|
||||
"搞起来!装 act_runner,注册到 euphon org,以后每个仓库直接加 `.gitea/workflows/` 就能跑 CI 了。"),
|
||||
("能不能给heradev一个bridge的nic,secondary的,这样就有192.168.1.0/24的地址可以用了",
|
||||
"饭团,你是要给 heradev 这个 KVM 虚拟机加一个桥接网卡对吧?这样它就能直接拿到物理网段 192.168.1.0/24 的 IP。"),
|
||||
("i7 bot也不好,直接叫@ms吧",
|
||||
"好,@ms 改好了。现在来研究全局 webhook 的问题——之前系统级 webhook 没触发,让我深入看看。"),
|
||||
("AI/ML 工具克隆 全部删掉吧,emblemcv,visint也删掉",
|
||||
"好嘞,这一波删掉能省 ~62G。动手了。"),
|
||||
]
|
||||
|
||||
# queries we expect to recall
|
||||
TARGET_QUERIES = [
|
||||
("yoyo是什么", ["yoyo"]),
|
||||
("act runner怎么装的", ["act_runner", "runner"]),
|
||||
("heradev网络怎么配的", ["heradev", "桥接", "bridge", "192.168"]),
|
||||
("gitea bot叫什么", ["ms", "@ms", "麻薯"]),
|
||||
("清理了哪些项目", ["emblemcv", "visint", "62G", "删"]),
|
||||
]
|
||||
|
||||
# different extraction prompts to test
|
||||
PROMPTS = {
|
||||
"baseline": """From this conversation turn, extract key facts worth remembering for future conversations.
|
||||
For each fact, provide a "cue" (what would trigger recalling this) and a "target" (the fact itself).
|
||||
Rate importance 0-1 (1 = critical fact, 0 = trivial).
|
||||
|
||||
User: {user}
|
||||
Assistant: {assistant}
|
||||
|
||||
Output format (one per line):
|
||||
CUE: <trigger phrase> | TARGET: <fact> | IMPORTANCE: <0-1>
|
||||
|
||||
Only extract genuinely useful facts. If nothing worth remembering, output NONE.""",
|
||||
|
||||
"entity_focused": """从这段对话中提取值得记住的事实。重点关注:
|
||||
- 名称、代号、别名(谁叫什么)
|
||||
- 配置、参数、端口、地址
|
||||
- 做了什么操作、改了什么
|
||||
- 决策和原因
|
||||
|
||||
每条事实用以下格式输出(每行一条):
|
||||
CUE: <用什么问题能想起这件事> | TARGET: <事实本身,要具体> | IMPORTANCE: <0-1>
|
||||
|
||||
User: {user}
|
||||
Assistant: {assistant}
|
||||
|
||||
如果没有值得记住的,输出 NONE。""",
|
||||
|
||||
"multi_cue": """从这段对话中提取值得长期记住的事实。
|
||||
|
||||
要求:
|
||||
1. 每条事实提供 2-3 个不同的触发短语(cue),用分号分隔
|
||||
2. target 要具体、独立可理解(不依赖上下文)
|
||||
3. 包含所有出现的名称、代号、配置值
|
||||
|
||||
格式(每行一条):
|
||||
CUE: <触发短语1>; <触发短语2>; <触发短语3> | TARGET: <具体事实> | IMPORTANCE: <0-1>
|
||||
|
||||
User: {user}
|
||||
Assistant: {assistant}
|
||||
|
||||
没有值得记住的则输出 NONE。""",
|
||||
|
||||
"qa_style": """你是一个记忆提取器。把这段对话变成若干个"问答对"——未来有人问这个问题时,能直接给出答案。
|
||||
|
||||
要求:
|
||||
- 问题要自然,像人真的会这么问
|
||||
- 答案要具体完整,包含关键细节(名称、数字、地址等)
|
||||
- 同一个事实可以从不同角度提问
|
||||
|
||||
格式(每行一条):
|
||||
CUE: <自然的提问方式> | TARGET: <完整的回答> | IMPORTANCE: <0-1>
|
||||
|
||||
User: {user}
|
||||
Assistant: {assistant}
|
||||
|
||||
没有值得记住的则输出 NONE。""",
|
||||
}
|
||||
|
||||
import re
|
||||
|
||||
def extract_with_prompt(prompt_template, user_msg, asst_msg):
|
||||
prompt = prompt_template.format(user=user_msg, assistant=asst_msg)
|
||||
try:
|
||||
resp = LLM.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.3, max_tokens=512,
|
||||
)
|
||||
result = resp.choices[0].message.content
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
memories = []
|
||||
for line in result.strip().split("\n"):
|
||||
if line.strip() == "NONE":
|
||||
break
|
||||
m = re.match(r"CUE:\s*(.+?)\s*\|\s*TARGET:\s*(.+?)\s*\|\s*IMPORTANCE:\s*([\d.]+)", line)
|
||||
if m:
|
||||
memories.append({
|
||||
"cue": m.group(1).strip(),
|
||||
"target": m.group(2).strip(),
|
||||
"importance": float(m.group(3)),
|
||||
})
|
||||
return memories
|
||||
|
||||
|
||||
def emb(text):
|
||||
return encoder.encode([text], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0]
|
||||
|
||||
|
||||
def test_recall(memories_list, queries):
|
||||
"""Build a memory from extracted memories and test recall."""
|
||||
hip = HippocampalMemory(embed_dim=384, beta=32.0, hopfield_top_k=10, device=DEVICE)
|
||||
|
||||
for mem in memories_list:
|
||||
cue_text = mem["cue"]
|
||||
target_text = mem["target"]
|
||||
cue_emb = emb(cue_text)
|
||||
target_emb = emb(target_text)
|
||||
|
||||
# handle multi-cue (semicolon separated)
|
||||
variants = []
|
||||
if ";" in cue_text:
|
||||
parts = [p.strip() for p in cue_text.split(";") if p.strip()]
|
||||
if len(parts) > 1:
|
||||
cue_emb = emb(parts[0])
|
||||
variants = [emb(p) for p in parts[1:]]
|
||||
|
||||
hip.store(cue_emb, target_emb, cue_variants=variants if variants else None,
|
||||
metadata={"cue": cue_text, "target": target_text})
|
||||
|
||||
hits = 0
|
||||
for query, keywords in queries:
|
||||
qe = emb(query)
|
||||
results = hip.recall(qe, top_k=3)
|
||||
recalled_text = " ".join(r.metadata["target"] for r in results)
|
||||
hit = any(kw.lower() in recalled_text.lower() for kw in keywords)
|
||||
if hit:
|
||||
hits += 1
|
||||
|
||||
return hits, len(queries)
|
||||
|
||||
|
||||
def main():
|
||||
print("extraction prompt experiment\n")
|
||||
print(f"turns: {len(TURNS)}, queries: {len(TARGET_QUERIES)}\n")
|
||||
|
||||
for name, template in PROMPTS.items():
|
||||
print(f"{'='*60}")
|
||||
print(f" prompt: {name}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
all_memories = []
|
||||
for user_msg, asst_msg in TURNS:
|
||||
mems = extract_with_prompt(template, user_msg, asst_msg)
|
||||
all_memories.extend(mems)
|
||||
for m in mems:
|
||||
print(f" [{m['importance']:.1f}] CUE: {m['cue'][:50]}")
|
||||
print(f" TGT: {m['target'][:60]}")
|
||||
|
||||
print(f"\n extracted: {len(all_memories)} memories")
|
||||
|
||||
hits, total = test_recall(all_memories, TARGET_QUERIES)
|
||||
print(f" recall: {hits}/{total} ({hits/total*100:.0f}%)")
|
||||
|
||||
# show per-query results
|
||||
hip = HippocampalMemory(embed_dim=384, beta=32.0, hopfield_top_k=10, device=DEVICE)
|
||||
for mem in all_memories:
|
||||
cue_text = mem["cue"]
|
||||
cue_emb = emb(cue_text.split(";")[0].strip() if ";" in cue_text else cue_text)
|
||||
target_emb = emb(mem["target"])
|
||||
variants = []
|
||||
if ";" in cue_text:
|
||||
parts = [p.strip() for p in cue_text.split(";") if p.strip()]
|
||||
variants = [emb(p) for p in parts[1:]] if len(parts) > 1 else []
|
||||
hip.store(cue_emb, target_emb, cue_variants=variants or None,
|
||||
metadata={"cue": cue_text, "target": mem["target"]})
|
||||
|
||||
for query, keywords in TARGET_QUERIES:
|
||||
qe = emb(query)
|
||||
results = hip.recall(qe, top_k=1)
|
||||
if results:
|
||||
target = results[0].metadata["target"][:60]
|
||||
hit = any(kw.lower() in results[0].metadata["target"].lower() for kw in keywords)
|
||||
mark = "✓" if hit else "✗"
|
||||
print(f" {mark} {query:20s} → {target}")
|
||||
else:
|
||||
print(f" ✗ {query:20s} → (empty)")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
178
mem/import_claude.py
Normal file
178
mem/import_claude.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""Import Claude Code conversation history into nocmem.
|
||||
|
||||
Scans ~/.claude/projects/ for JSONL conversation files,
|
||||
extracts user-assistant turn pairs, and ingests them via /ingest API.
|
||||
|
||||
Usage:
|
||||
uv run python import_claude.py [--dry-run] [--limit N]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
BASE = os.environ.get("NOCMEM_ENDPOINT", "http://127.0.0.1:9820")
|
||||
CLAUDE_DIR = Path.home() / ".claude" / "projects"
|
||||
|
||||
|
||||
def extract_turns(jsonl_path: Path) -> list[tuple[str, str]]:
|
||||
"""Extract (user_msg, assistant_msg) pairs from a JSONL conversation."""
|
||||
messages = [] # (role, text)
|
||||
|
||||
with open(jsonl_path) as f:
|
||||
for line in f:
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
msg_type = obj.get("type")
|
||||
if msg_type not in ("user", "assistant"):
|
||||
continue
|
||||
|
||||
msg = obj.get("message", {})
|
||||
content = msg.get("content", "")
|
||||
|
||||
# extract text from content
|
||||
if isinstance(content, str):
|
||||
text = content.strip()
|
||||
elif isinstance(content, list):
|
||||
parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") == "text":
|
||||
parts.append(part["text"])
|
||||
text = "\n".join(parts).strip()
|
||||
else:
|
||||
continue
|
||||
|
||||
if not text or len(text) < 10:
|
||||
continue
|
||||
|
||||
# skip tool-heavy assistant responses (mostly noise)
|
||||
if msg_type == "assistant" and text.count("```") > 10:
|
||||
continue
|
||||
|
||||
role = "user" if msg_type == "user" else "assistant"
|
||||
messages.append((role, text))
|
||||
|
||||
# pair up user-assistant turns
|
||||
turns = []
|
||||
i = 0
|
||||
while i < len(messages) - 1:
|
||||
if messages[i][0] == "user":
|
||||
# find next assistant
|
||||
j = i + 1
|
||||
while j < len(messages) and messages[j][0] != "assistant":
|
||||
j += 1
|
||||
if j < len(messages):
|
||||
user_text = messages[i][1][:500] # truncate long messages
|
||||
asst_text = messages[j][1][:500]
|
||||
turns.append((user_text, asst_text))
|
||||
i = j + 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return turns
|
||||
|
||||
|
||||
def ingest_turn(user_msg: str, assistant_msg: str) -> int:
|
||||
"""Send a turn to nocmem /ingest, return number of memories stored."""
|
||||
try:
|
||||
r = requests.post(
|
||||
f"{BASE}/ingest",
|
||||
json={"user_msg": user_msg, "assistant_msg": assistant_msg},
|
||||
timeout=120,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
return r.json().get("stored", 0)
|
||||
except Exception as e:
|
||||
print(f" error: {e}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Import Claude Code history into nocmem")
|
||||
parser.add_argument("--dry-run", action="store_true", help="just show what would be imported")
|
||||
parser.add_argument("--limit", type=int, default=0, help="max turns to ingest (0=all)")
|
||||
parser.add_argument("--project", type=str, default="", help="filter by project dir name substring")
|
||||
args = parser.parse_args()
|
||||
|
||||
# find all conversation files
|
||||
conversations = []
|
||||
for project_dir in sorted(CLAUDE_DIR.iterdir()):
|
||||
if not project_dir.is_dir():
|
||||
continue
|
||||
if args.project and args.project not in project_dir.name:
|
||||
continue
|
||||
for jsonl in sorted(project_dir.glob("*.jsonl")):
|
||||
if "subagents" in str(jsonl):
|
||||
continue
|
||||
conversations.append((project_dir.name, jsonl))
|
||||
|
||||
print(f"found {len(conversations)} conversations in {CLAUDE_DIR}")
|
||||
if args.project:
|
||||
print(f" filtered by: {args.project}")
|
||||
|
||||
# extract all turns
|
||||
all_turns = []
|
||||
for project_name, jsonl_path in conversations:
|
||||
turns = extract_turns(jsonl_path)
|
||||
if turns:
|
||||
all_turns.extend([(project_name, u, a) for u, a in turns])
|
||||
|
||||
print(f"extracted {len(all_turns)} turns total\n")
|
||||
|
||||
if args.limit:
|
||||
all_turns = all_turns[:args.limit]
|
||||
|
||||
if args.dry_run:
|
||||
for project, user_msg, asst_msg in all_turns[:20]:
|
||||
print(f" [{project[:30]}]")
|
||||
print(f" U: {user_msg[:80]}")
|
||||
print(f" A: {asst_msg[:80]}")
|
||||
print()
|
||||
if len(all_turns) > 20:
|
||||
print(f" ... and {len(all_turns) - 20} more")
|
||||
return
|
||||
|
||||
# check server
|
||||
try:
|
||||
r = requests.get(f"{BASE}/stats", timeout=3)
|
||||
r.raise_for_status()
|
||||
before = r.json()["num_memories"]
|
||||
print(f"nocmem: {before} memories before import\n")
|
||||
except Exception:
|
||||
print(f"ERROR: nocmem not reachable at {BASE}")
|
||||
sys.exit(1)
|
||||
|
||||
# ingest
|
||||
total_stored = 0
|
||||
t0 = time.monotonic()
|
||||
for i, (project, user_msg, asst_msg) in enumerate(all_turns):
|
||||
stored = ingest_turn(user_msg, asst_msg)
|
||||
total_stored += stored
|
||||
if (i + 1) % 10 == 0:
|
||||
elapsed = time.monotonic() - t0
|
||||
rate = (i + 1) / elapsed
|
||||
eta = (len(all_turns) - i - 1) / rate if rate > 0 else 0
|
||||
print(f" [{i+1}/{len(all_turns)}] stored={total_stored} ({rate:.1f} turns/s, ETA {eta:.0f}s)")
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
|
||||
# final stats
|
||||
r = requests.get(f"{BASE}/stats")
|
||||
after = r.json()["num_memories"]
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"imported {total_stored} memories from {len(all_turns)} turns")
|
||||
print(f"nocmem: {before} → {after} memories")
|
||||
print(f"time: {elapsed:.1f}s")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -5,12 +5,12 @@ After=network.target
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/data/src/noc/mem
|
||||
ExecStart=/home/fam/.local/bin/uv run uvicorn server:app --host 0.0.0.0 --port 9820
|
||||
ExecStart=/home/fam/.local/bin/uv run uvicorn server:app --host 0.0.0.0 --port 9820 --log-level info
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
Environment=NOCMEM_LLM_ENDPOINT=http://100.84.7.49:8000/v1
|
||||
Environment=NOCMEM_LLM_MODEL=QuantTrio/gemma-4-31B-it-AWQ
|
||||
Environment=NOCMEM_LLM_MODEL=gemma4-31b
|
||||
Environment=NOCMEM_LLM_API_KEY=unused
|
||||
Environment=NOCMEM_DATA_DIR=/data/src/noc/mem/data
|
||||
Environment=NOCMEM_DEVICE=cuda
|
||||
|
||||
@@ -21,6 +21,7 @@ from openai import OpenAI
|
||||
|
||||
from nuonuo.hippocampus import HippocampalMemory
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("nocmem")
|
||||
|
||||
# ── config ──────────────────────────────────────────────────────────
|
||||
@@ -212,12 +213,16 @@ class ExtractedMemory:
|
||||
|
||||
def _extract_memories_llm(user_msg: str, assistant_msg: str) -> list[ExtractedMemory]:
|
||||
prompt = (
|
||||
"From this conversation turn, extract key facts worth remembering for future conversations.\n"
|
||||
"For each fact, provide a \"cue\" (what would trigger recalling this) and a \"target\" (the fact itself).\n"
|
||||
"Rate importance 0-1 (1 = critical fact, 0 = trivial).\n\n"
|
||||
'你是一个记忆提取器。把这段对话变成若干个"问答对"——未来有人问这个问题时,能直接给出答案。\n\n'
|
||||
"要求:\n"
|
||||
"- 问题要自然,像人真的会这么问\n"
|
||||
"- 答案要具体完整,包含关键细节(名称、数字、地址等)\n"
|
||||
"- 同一个事实可以从不同角度提问\n"
|
||||
"- 每条 CUE 提供 2-3 个不同的触发短语,用分号分隔\n\n"
|
||||
"格式(每行一条):\n"
|
||||
"CUE: <提问方式1>; <提问方式2>; <提问方式3> | TARGET: <完整的回答> | IMPORTANCE: <0-1>\n\n"
|
||||
f"User: {user_msg}\nAssistant: {assistant_msg}\n\n"
|
||||
"Output format (one per line):\nCUE: <trigger phrase> | TARGET: <fact> | IMPORTANCE: <0-1>\n\n"
|
||||
"Only extract genuinely useful facts. If nothing worth remembering, output NONE."
|
||||
"没有值得记住的则输出 NONE。"
|
||||
)
|
||||
try:
|
||||
resp = llm_client.chat.completions.create(
|
||||
@@ -330,19 +335,28 @@ def _extract_and_store(user_msg: str, assistant_msg: str) -> int:
|
||||
if mem.importance < 0.3:
|
||||
continue
|
||||
|
||||
cue_emb = embed(mem.cue)
|
||||
# split semicolon-separated cues into primary + variants
|
||||
cue_parts = [p.strip() for p in mem.cue.split(";") if p.strip()]
|
||||
primary_cue = cue_parts[0] if cue_parts else mem.cue
|
||||
inline_variants = cue_parts[1:] if len(cue_parts) > 1 else []
|
||||
|
||||
cue_emb = embed(primary_cue)
|
||||
target_emb = embed(mem.target)
|
||||
|
||||
if llm_client:
|
||||
paraphrases = _generate_paraphrases_llm(mem.cue, n=3)
|
||||
else:
|
||||
paraphrases = _generate_paraphrases_heuristic(mem.cue, n=3)
|
||||
# inline variants from semicolon cues (already in the extraction)
|
||||
variant_embs = embed_batch(inline_variants) if inline_variants else []
|
||||
|
||||
variant_embs = embed_batch(paraphrases) if paraphrases else []
|
||||
# additionally generate paraphrases if no inline variants
|
||||
if not inline_variants:
|
||||
if llm_client:
|
||||
paraphrases = _generate_paraphrases_llm(primary_cue, n=3)
|
||||
else:
|
||||
paraphrases = _generate_paraphrases_heuristic(primary_cue, n=3)
|
||||
variant_embs = embed_batch(paraphrases) if paraphrases else []
|
||||
|
||||
hippocampus.store(
|
||||
cue_emb, target_emb,
|
||||
cue_variants=variant_embs,
|
||||
cue_variants=variant_embs if variant_embs else None,
|
||||
metadata={"cue": mem.cue, "target": mem.target, "importance": mem.importance},
|
||||
timestamp=time.time(),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user