nocmem: qa-style extraction prompt, multi-cue variants, claude history importer

- Switch extraction prompt to qa-style (80% recall vs 60% baseline) - Semicolon-separated cues in extraction become paraphrase variants - Add import_claude.py to bulk-import Claude Code conversation history - Fix LLM model name in systemd service, add logging basicConfig
2026-04-11 22:57:17 +01:00
parent daed6c9d37
commit 35cafbd4ca
4 changed files with 423 additions and 14 deletions
--- a/mem/server.py
+++ b/mem/server.py
@@ -21,6 +21,7 @@ from openai import OpenAI

 from nuonuo.hippocampus import HippocampalMemory

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("nocmem")

 # ── config ──────────────────────────────────────────────────────────
@@ -212,12 +213,16 @@ class ExtractedMemory:

 def _extract_memories_llm(user_msg: str, assistant_msg: str) -> list[ExtractedMemory]:
    prompt = (
-        "From this conversation turn, extract key facts worth remembering for future conversations.\n"
-        "For each fact, provide a \"cue\" (what would trigger recalling this) and a \"target\" (the fact itself).\n"
-        "Rate importance 0-1 (1 = critical fact, 0 = trivial).\n\n"
+        '你是一个记忆提取器。把这段对话变成若干个"问答对"——未来有人问这个问题时，能直接给出答案。\n\n'
+        "要求：\n"
+        "- 问题要自然，像人真的会这么问\n"
+        "- 答案要具体完整，包含关键细节（名称、数字、地址等）\n"
+        "- 同一个事实可以从不同角度提问\n"
+        "- 每条 CUE 提供 2-3 个不同的触发短语，用分号分隔\n\n"
+        "格式（每行一条）：\n"
+        "CUE: <提问方式1>; <提问方式2>; <提问方式3> | TARGET: <完整的回答> | IMPORTANCE: <0-1>\n\n"
        f"User: {user_msg}\nAssistant: {assistant_msg}\n\n"
-        "Output format (one per line):\nCUE: <trigger phrase> | TARGET: <fact> | IMPORTANCE: <0-1>\n\n"
-        "Only extract genuinely useful facts. If nothing worth remembering, output NONE."
+        "没有值得记住的则输出 NONE。"
    )
    try:
        resp = llm_client.chat.completions.create(
@@ -330,19 +335,28 @@ def _extract_and_store(user_msg: str, assistant_msg: str) -> int:
        if mem.importance < 0.3:
            continue

-        cue_emb = embed(mem.cue)
+        # split semicolon-separated cues into primary + variants
+        cue_parts = [p.strip() for p in mem.cue.split(";") if p.strip()]
+        primary_cue = cue_parts[0] if cue_parts else mem.cue
+        inline_variants = cue_parts[1:] if len(cue_parts) > 1 else []
+
+        cue_emb = embed(primary_cue)
        target_emb = embed(mem.target)

-        if llm_client:
-            paraphrases = _generate_paraphrases_llm(mem.cue, n=3)
-        else:
-            paraphrases = _generate_paraphrases_heuristic(mem.cue, n=3)
+        # inline variants from semicolon cues (already in the extraction)
+        variant_embs = embed_batch(inline_variants) if inline_variants else []

-        variant_embs = embed_batch(paraphrases) if paraphrases else []
+        # additionally generate paraphrases if no inline variants
+        if not inline_variants:
+            if llm_client:
+                paraphrases = _generate_paraphrases_llm(primary_cue, n=3)
+            else:
+                paraphrases = _generate_paraphrases_heuristic(primary_cue, n=3)
+            variant_embs = embed_batch(paraphrases) if paraphrases else []

        hippocampus.store(
            cue_emb, target_emb,
-            cue_variants=variant_embs,
+            cue_variants=variant_embs if variant_embs else None,
            metadata={"cue": mem.cue, "target": mem.target, "importance": mem.importance},
            timestamp=time.time(),
        )