From 4eeebdd9351dcfdfd6d51fb8f9ad5c434188d633 Mon Sep 17 00:00:00 2001 From: bielie Date: Sun, 1 Feb 2026 15:07:06 +0800 Subject: [PATCH] Improve story summary logging --- modules/story-summary/generate/prompt.js | 118 +- modules/story-summary/story-summary.css | 27 + modules/story-summary/story-summary.js | 27 + modules/story-summary/vector/recall.js | 1127 +++++++++--------- modules/story-summary/vector/state-recall.js | 14 +- 5 files changed, 650 insertions(+), 663 deletions(-) diff --git a/modules/story-summary/generate/prompt.js b/modules/story-summary/generate/prompt.js index 2c86e6d..d539aab 100644 --- a/modules/story-summary/generate/prompt.js +++ b/modules/story-summary/generate/prompt.js @@ -36,7 +36,7 @@ const RECENT_ORPHAN_MAX = 5000; // [待整理] 独立预算 const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示) const L3_MAX = 2000; const ARCS_MAX = 1500; -const TOP_N_STAR = 5; // 相似度前N条加⭐ +const TOP_N_STAR = 5; // 匹配度前N条加⭐ // ───────────────────────────────────────────────────────────────────────────── // 工具函数 @@ -179,84 +179,64 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) { const pct = (n, d) => (d > 0 ? Math.round((n / d) * 100) : 0); const lines = [ - "", - "╔══════════════════════════════════════════════════════════════╗", - "║ Prompt 装配报告 ║", - "╠══════════════════════════════════════════════════════════════╣", - `║ 总预算: ${stats.budget.max} tokens`, - `║ 已使用: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`, - `║ 剩余: ${stats.budget.max - stats.budget.used} tokens`, - "╚══════════════════════════════════════════════════════════════╝", - "", + '', + '\u250c' + '\u2500'.repeat(61) + '\u2510', + '\u2502 \u3010\u88c5\u914d\u7edf\u8ba1\u3011 \u2502', + '\u2514' + '\u2500'.repeat(61) + '\u2518', + ` \u603b\u9884\u7b97: ${stats.budget.max} tokens | \u5df2\u4f7f\u7528: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`, + '', ]; - // 世界状态 - lines.push("┌─────────────────────────────────────────────────────────────┐"); - lines.push("│ [1] 世界约束 (上限 2000) │"); - lines.push("└─────────────────────────────────────────────────────────────┘"); - lines.push(` 注入: ${stats.world.count} 条 | ${stats.world.tokens} tokens`); - lines.push(""); + // [1] World constraints + lines.push(' [1] \u4e16\u754c\u7ea6\u675f (\u4e0a\u9650 2000)'); + lines.push(` \u9009\u5165: ${stats.world.count} \u6761 | \u6d88\u8017: ${stats.world.tokens} tokens`); + lines.push(''); - // 核心经历 + 过往背景 - lines.push("┌─────────────────────────────────────────────────────────────┐"); - lines.push("│ [2] 核心经历 + 过往背景(含证据) │"); - lines.push("└─────────────────────────────────────────────────────────────┘"); - lines.push(` 选入: ${stats.events.selected} 条 | 事件本体: ${stats.events.tokens} tokens`); - lines.push(` 挂载证据: ${stats.evidence.attached} 条 | 证据: ${stats.evidence.tokens} tokens`); - lines.push(` 核心: ${details.directCount || 0} | 过往: ${details.similarCount || 0}`); - if (details.eventList?.length) { - lines.push(" ────────────────────────────────────────"); - details.eventList.slice(0, 20).forEach((ev, i) => { - const type = ev.isDirect ? "核心" : "过往"; - const hasE = ev.hasEvidence ? " +E" : ""; - const title = (ev.title || "(无标题)").slice(0, 32); - lines.push(` ${String(i + 1).padStart(2)}. [${type}${hasE}] ${title} (${ev.tokens}tok)`); - }); - if (details.eventList.length > 20) lines.push(` ... 还有 ${details.eventList.length - 20} 条`); - } - lines.push(""); + // [2] Core + background events + lines.push(' [2] \u6838\u5fc3\u7ecf\u5386 + \u8fc7\u5f80\u80cc\u666f'); + lines.push(` \u4e8b\u4ef6: ${stats.events.selected} \u6761 | \u6d88\u8017: ${stats.events.tokens} tokens`); - // 远期片段 - lines.push("┌─────────────────────────────────────────────────────────────┐"); - lines.push("│ [3] 远期片段(已总结范围) │"); - lines.push("└─────────────────────────────────────────────────────────────┘"); - lines.push(` 注入: ${stats.orphans.injected} 条 | ${stats.orphans.tokens} tokens`); - lines.push(""); + const l0EvidenceCount = details.eventList?.filter(e => e.hasL0Evidence)?.length || 0; + const l1EvidenceCount = (stats.evidence.attached || 0) - l0EvidenceCount; + lines.push(` \u8bc1\u636e: ${stats.evidence.attached} \u6761 (L0: ${l0EvidenceCount}, L1: ${l1EvidenceCount}) | \u6d88\u8017: ${stats.evidence.tokens} tokens`); + lines.push(` \u6838\u5fc3: ${details.directCount || 0} \u6761 | \u8fc7\u5f80: ${details.similarCount || 0} \u6761`); + lines.push(''); - // 待整理 - lines.push("┌─────────────────────────────────────────────────────────────┐"); - lines.push("│ [4] 待整理(未总结范围,独立预算 5000) │"); - lines.push("└─────────────────────────────────────────────────────────────┘"); - lines.push(` 注入: ${recentOrphanStats?.injected || 0} 条 | ${recentOrphanStats?.tokens || 0} tokens`); - lines.push(` 楼层范围: ${recentOrphanStats?.floorRange || "N/A"}`); - lines.push(""); + // [3] Long-term chunks + const l0OrphanCount = stats.orphans.l0Count || 0; + const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount; + lines.push(' [3] \u8fdc\u671f\u7247\u6bb5 (\u5df2\u603b\u7ed3\u8303\u56f4)'); + lines.push(` \u9009\u5165: ${stats.orphans.injected} \u6761 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | \u6d88\u8017: ${stats.orphans.tokens} tokens`); + lines.push(''); - lines.push("┌─────────────────────────────────────────────────────────────┐"); - lines.push("│ [5] 人物弧光(上限 1500) │"); - lines.push("└─────────────────────────────────────────────────────────────┘"); - lines.push(` 注入: ${stats.arcs.count} 条 | ${stats.arcs.tokens} tokens`); - lines.push(""); + // [4] Recent orphans + lines.push(' [4] \u5f85\u6574\u7406 (\u72ec\u7acb\u9884\u7b97 5000)'); + lines.push(` \u9009\u5165: ${recentOrphanStats?.injected || 0} \u6761 | \u6d88\u8017: ${recentOrphanStats?.tokens || 0} tokens`); + lines.push(` \u697c\u5c42: ${recentOrphanStats?.floorRange || 'N/A'}`); + lines.push(''); - // 预算条形图 - lines.push("┌─────────────────────────────────────────────────────────────┐"); - lines.push("│ 【预算分布】 │"); - lines.push("└─────────────────────────────────────────────────────────────┘"); + // [5] Arcs + lines.push(' [5] \u4eba\u7269\u5f27\u5149 (\u4e0a\u9650 1500)'); + lines.push(` \u9009\u5165: ${stats.arcs.count} \u6761 | \u6d88\u8017: ${stats.arcs.tokens} tokens`); + lines.push(''); + + // Budget bar + lines.push(' \u3010\u9884\u7b97\u5206\u5e03\u3011'); const total = stats.budget.max; const bar = (tokens, label) => { - const width = Math.round((tokens / total) * 40); - const pctStr = pct(tokens, total) + "%"; - return ` ${label.padEnd(6)} ${"█".repeat(width).padEnd(40)} ${String(tokens).padStart(5)} (${pctStr})`; + const width = Math.round((tokens / total) * 30); + const pctStr = pct(tokens, total) + '%'; + return ` ${label.padEnd(6)} ${'\u2588'.repeat(width).padEnd(30)} ${String(tokens).padStart(5)} (${pctStr})`; }; - lines.push(bar(stats.world.tokens, "约束")); - lines.push(bar(stats.events.tokens, "经历")); - lines.push(bar(stats.evidence.tokens, "证据")); - lines.push(bar(stats.orphans.tokens, "远期")); - lines.push(bar(recentOrphanStats?.tokens || 0, "待整理")); - lines.push(bar(stats.arcs.tokens, "弧光")); - lines.push(bar(stats.budget.max - stats.budget.used, "剩余")); - lines.push(""); + lines.push(bar(stats.world.tokens, '\u7ea6\u675f')); + lines.push(bar(stats.events.tokens + stats.evidence.tokens, '\u7ecf\u5386')); + lines.push(bar(stats.orphans.tokens, '\u8fdc\u671f')); + lines.push(bar(recentOrphanStats?.tokens || 0, '\u5f85\u6574\u7406')); + lines.push(bar(stats.arcs.tokens, '\u5f27\u5149')); + lines.push(bar(stats.budget.max - stats.budget.used, '\u5269\u4f59')); + lines.push(''); - return lines.join("\n"); + return lines.join('\n'); } // 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换 function renumberEventText(text, newIndex) { @@ -468,7 +448,7 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities return lines.join("\n"); } - // 候选按相似度从高到低(保证高分优先拥有证据) + // 候选按匹配度从高到低(保证高分优先拥有证据) const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence } diff --git a/modules/story-summary/story-summary.css b/modules/story-summary/story-summary.css index 375c39c..7946367 100644 --- a/modules/story-summary/story-summary.css +++ b/modules/story-summary/story-summary.css @@ -2677,3 +2677,30 @@ h1 span { font-size: .8125rem; line-height: 1.8; } + +/* 调试日志区域手机适配 */ +@media (max-width: 768px) { + #recall-log-content { + font-size: 10px; + padding: 8px; + overflow-x: hidden; /* 禁止横向滚动 */ + word-break: break-all; /* 强制换行 */ + white-space: pre-wrap; /* 保留换行但允许自动换行 */ + } + + .debug-log-viewer { + font-size: 10px; + overflow-x: hidden; + word-break: break-all; + white-space: pre-wrap; + } +} + +@media (max-width: 480px) { + #recall-log-content, + .debug-log-viewer { + font-size: 9px; + line-height: 1.4; + padding: 6px; + } +} diff --git a/modules/story-summary/story-summary.js b/modules/story-summary/story-summary.js index c9a343a..09120e2 100644 --- a/modules/story-summary/story-summary.js +++ b/modules/story-summary/story-summary.js @@ -109,6 +109,26 @@ let vectorAbortController = null; let lastSentUserMessage = null; let lastSentTimestamp = 0; +function captureUserInput() { + const text = $("#send_textarea").val(); + if (text?.trim()) { + lastSentUserMessage = text.trim(); + lastSentTimestamp = Date.now(); + } +} + +function onSendPointerdown(e) { + if (e.target?.closest?.("#send_but")) { + captureUserInput(); + } +} + +function onSendKeydown(e) { + if (e.key === "Enter" && !e.shiftKey && e.target?.closest?.("#send_textarea")) { + captureUserInput(); + } +} + let hideApplyTimer = null; const HIDE_APPLY_DEBOUNCE_MS = 250; @@ -1483,6 +1503,10 @@ function registerEvents() { eventSource.on(event_types.USER_MESSAGE_RENDERED, (data) => setTimeout(() => handleMessageRendered(data), 50)); eventSource.on(event_types.CHARACTER_MESSAGE_RENDERED, (data) => setTimeout(() => handleMessageRendered(data), 50)); + // 用户输入捕获(原生捕获阶段) + document.addEventListener("pointerdown", onSendPointerdown, true); + document.addEventListener("keydown", onSendKeydown, true); + // 注入链路 eventSource.on(event_types.GENERATION_STARTED, handleGenerationStarted); eventSource.on(event_types.GENERATION_STOPPED, clearExtensionPrompt); @@ -1497,6 +1521,9 @@ function unregisterEvents() { hideOverlay(); clearExtensionPrompt(); + + document.removeEventListener("pointerdown", onSendPointerdown, true); + document.removeEventListener("keydown", onSendKeydown, true); } // ═══════════════════════════════════════════════════════════════════════════ diff --git a/modules/story-summary/vector/recall.js b/modules/story-summary/vector/recall.js index b1a2aab..234dd9f 100644 --- a/modules/story-summary/vector/recall.js +++ b/modules/story-summary/vector/recall.js @@ -1,12 +1,12 @@ -// Story Summary - Recall Engine -// L1 chunk + L2 event 召回 -// - 全量向量打分 -// - 指数衰减加权 Query Embedding -// - 实体/参与者加分 -// - MMR 去重 -// - floor 稀疏去重 - -import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js'; +// Story Summary - Recall Engine +// L1 chunk + L2 event 召回 +// - 全量向量打分 +// - 指数衰减加权 Query Embedding +// - 实体/参与者加分 +// - MMR 去重 +// - floor 稀疏去重 + +import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js'; import { embed, getEngineFingerprint } from './embedder.js'; import { xbLog } from '../../../core/debug-core.js'; import { getContext } from '../../../../../../extensions.js'; @@ -18,29 +18,29 @@ import { stateToVirtualChunks, mergeAndSparsify, } from './state-recall.js'; - -const MODULE_ID = 'recall'; - -const CONFIG = { - QUERY_MSG_COUNT: 5, - QUERY_DECAY_BETA: 0.7, - QUERY_MAX_CHARS: 600, - QUERY_CONTEXT_CHARS: 240, - - // 因果链 - CAUSAL_CHAIN_MAX_DEPTH: 10, // 放宽跳数,让图自然终止 - CAUSAL_INJECT_MAX: 30, // 放宽上限,由 prompt token 预算最终控制 - - CANDIDATE_CHUNKS: 200, - CANDIDATE_EVENTS: 150, - - MAX_CHUNKS: 40, + +const MODULE_ID = 'recall'; + +const CONFIG = { + QUERY_MSG_COUNT: 5, + QUERY_DECAY_BETA: 0.7, + QUERY_MAX_CHARS: 600, + QUERY_CONTEXT_CHARS: 240, + + // 因果链 + CAUSAL_CHAIN_MAX_DEPTH: 10, // 放宽跳数,让图自然终止 + CAUSAL_INJECT_MAX: 30, // 放宽上限,由 prompt token 预算最终控制 + + CANDIDATE_CHUNKS: 200, + CANDIDATE_EVENTS: 150, + + MAX_CHUNKS: 40, MAX_EVENTS: 120, - - MIN_SIMILARITY_CHUNK: 0.6, - MIN_SIMILARITY_EVENT: 0.65, - MMR_LAMBDA: 0.72, - + + MIN_SIMILARITY_CHUNK: 0.6, + MIN_SIMILARITY_EVENT: 0.65, + MMR_LAMBDA: 0.72, + BONUS_PARTICIPANT_HIT: 0.08, BONUS_TEXT_HIT: 0.05, BONUS_WORLD_TOPIC_HIT: 0.06, @@ -51,101 +51,101 @@ const CONFIG = { FLOOR_LIMIT: 1, }; - -// ═══════════════════════════════════════════════════════════════════════════ -// 工具函数 -// ═══════════════════════════════════════════════════════════════════════════ - -function cosineSimilarity(a, b) { - if (!a?.length || !b?.length || a.length !== b.length) return 0; - let dot = 0, nA = 0, nB = 0; - for (let i = 0; i < a.length; i++) { - dot += a[i] * b[i]; - nA += a[i] * a[i]; - nB += b[i] * b[i]; - } - return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0; -} - -function normalizeVec(v) { - let s = 0; - for (let i = 0; i < v.length; i++) s += v[i] * v[i]; - s = Math.sqrt(s) || 1; - return v.map(x => x / s); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 因果链追溯(Graph-augmented retrieval) -// - 从已召回事件出发,沿 causedBy 向上追溯祖先事件 -// - 记录边:chainFrom = 哪个召回事件需要它 -// - 不在这里决定“是否额外注入”,只负责遍历与结构化结果 -// ═══════════════════════════════════════════════════════════════════════════ - -function buildEventIndex(allEvents) { - const map = new Map(); - for (const e of allEvents || []) { - if (e?.id) map.set(e.id, e); - } - return map; -} - -/** - * @returns {Map} - */ -function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { - const out = new Map(); - const idRe = /^evt-\d+$/; - - function visit(parentId, depth, chainFrom) { - if (depth > maxDepth) return; - if (!idRe.test(parentId)) return; - - const ev = eventIndex.get(parentId); - if (!ev) return; - - // 如果同一个祖先被多个召回事件引用:保留更“近”的深度或追加来源 - const existed = out.get(parentId); - if (!existed) { - out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] }); - } else { - if (depth < existed.depth) existed.depth = depth; - if (!existed.chainFrom.includes(chainFrom)) existed.chainFrom.push(chainFrom); - } - - for (const next of (ev.causedBy || [])) { - visit(String(next || '').trim(), depth + 1, chainFrom); - } - } - - for (const r of recalledEvents || []) { - const rid = r?.event?.id; - if (!rid) continue; - for (const cid of (r.event?.causedBy || [])) { - visit(String(cid || '').trim(), 1, rid); - } - } - - return out; -} - -/** - * 因果事件排序:引用数 > 深度 > 编号 - */ -function sortCausalEvents(causalArray) { - return causalArray.sort((a, b) => { - // 1. 被多条召回链引用的优先 - const refDiff = b.chainFrom.length - a.chainFrom.length; - if (refDiff !== 0) return refDiff; - - // 2. 深度浅的优先 - const depthDiff = a.depth - b.depth; - if (depthDiff !== 0) return depthDiff; - - // 3. 事件编号排序 - return String(a.event.id).localeCompare(String(b.event.id)); - }); -} - + +// ═══════════════════════════════════════════════════════════════════════════ +// 工具函数 +// ═══════════════════════════════════════════════════════════════════════════ + +function cosineSimilarity(a, b) { + if (!a?.length || !b?.length || a.length !== b.length) return 0; + let dot = 0, nA = 0, nB = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + nA += a[i] * a[i]; + nB += b[i] * b[i]; + } + return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0; +} + +function normalizeVec(v) { + let s = 0; + for (let i = 0; i < v.length; i++) s += v[i] * v[i]; + s = Math.sqrt(s) || 1; + return v.map(x => x / s); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// 因果链追溯(Graph-augmented retrieval) +// - 从已召回事件出发,沿 causedBy 向上追溯祖先事件 +// - 记录边:chainFrom = 哪个召回事件需要它 +// - 不在这里决定“是否额外注入”,只负责遍历与结构化结果 +// ═══════════════════════════════════════════════════════════════════════════ + +function buildEventIndex(allEvents) { + const map = new Map(); + for (const e of allEvents || []) { + if (e?.id) map.set(e.id, e); + } + return map; +} + +/** + * @returns {Map} + */ +function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { + const out = new Map(); + const idRe = /^evt-\d+$/; + + function visit(parentId, depth, chainFrom) { + if (depth > maxDepth) return; + if (!idRe.test(parentId)) return; + + const ev = eventIndex.get(parentId); + if (!ev) return; + + // 如果同一个祖先被多个召回事件引用:保留更“近”的深度或追加来源 + const existed = out.get(parentId); + if (!existed) { + out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] }); + } else { + if (depth < existed.depth) existed.depth = depth; + if (!existed.chainFrom.includes(chainFrom)) existed.chainFrom.push(chainFrom); + } + + for (const next of (ev.causedBy || [])) { + visit(String(next || '').trim(), depth + 1, chainFrom); + } + } + + for (const r of recalledEvents || []) { + const rid = r?.event?.id; + if (!rid) continue; + for (const cid of (r.event?.causedBy || [])) { + visit(String(cid || '').trim(), 1, rid); + } + } + + return out; +} + +/** + * 因果事件排序:引用数 > 深度 > 编号 + */ +function sortCausalEvents(causalArray) { + return causalArray.sort((a, b) => { + // 1. 被多条召回链引用的优先 + const refDiff = b.chainFrom.length - a.chainFrom.length; + if (refDiff !== 0) return refDiff; + + // 2. 深度浅的优先 + const depthDiff = a.depth - b.depth; + if (depthDiff !== 0) return depthDiff; + + // 3. 事件编号排序 + return String(a.event.id).localeCompare(String(b.event.id)); + }); +} + function normalize(s) { return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim(); } @@ -165,21 +165,23 @@ function cleanForRecall(text) { // 2. 移除 TTS 标记(硬编码) return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim(); } - -function buildExpDecayWeights(n, beta) { - const last = n - 1; - const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last))); - const sum = w.reduce((a, b) => a + b, 0) || 1; - return w.map(x => x / sum); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// Query 构建 -// ═══════════════════════════════════════════════════════════════════════════ - + +function buildExpDecayWeights(n, beta) { + const last = n - 1; + const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last))); + const sum = w.reduce((a, b) => a + b, 0) || 1; + return w.map(x => x / sum); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Query 构建 +// ═══════════════════════════════════════════════════════════════════════════ + function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = null) { if (!chat?.length) return []; + const { name1 } = getContext(); + let messages = chat; if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { messages = messages.slice(0, -1); @@ -193,152 +195,152 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul // 避免重复(如果 chat 已包含该消息则不追加) if (lastMsgText !== pendingText) { - messages = [...messages, { is_user: true, name: "用户", mes: pendingUserMessage }]; + messages = [...messages, { is_user: true, name: name1 || "用户", mes: pendingUserMessage }]; } } return messages.slice(-count).map((m, idx, arr) => { - const speaker = m.name || (m.is_user ? '用户' : '角色'); + const speaker = m.name || (m.is_user ? (name1 || "用户") : "角色"); const clean = cleanForRecall(m.mes); if (!clean) return ''; - const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS; - return `${speaker}: ${clean.slice(0, limit)}`; - }).filter(Boolean); -} - -async function embedWeightedQuery(segments, vectorConfig) { - if (!segments?.length) return null; - - const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA); - const vecs = await embed(segments, vectorConfig); - const dims = vecs?.[0]?.length || 0; - if (!dims) return null; - - const out = new Array(dims).fill(0); - for (let i = 0; i < vecs.length; i++) { - for (let j = 0; j < dims; j++) out[j] += (vecs[i][j] || 0) * weights[i]; - } - - return { vector: normalizeVec(out), weights }; -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 实体抽取 -// ═══════════════════════════════════════════════════════════════════════════ - -function buildEntityLexicon(store, allEvents) { - const { name1 } = getContext(); - const userName = normalize(name1); - const set = new Set(); - - for (const e of allEvents || []) { - for (const p of e.participants || []) { - const s = normalize(p); - if (s) set.add(s); - } - } - - const json = store?.json || {}; - - for (const m of json.characters?.main || []) { - const s = normalize(typeof m === 'string' ? m : m?.name); - if (s) set.add(s); - } - - for (const a of json.arcs || []) { - const s = normalize(a?.name); - if (s) set.add(s); - } - - for (const w of json.world || []) { - const t = normalize(w?.topic); - if (t && !t.includes('::')) set.add(t); - } - - for (const r of json.characters?.relationships || []) { - const from = normalize(r?.from); - const to = normalize(r?.to); - if (from) set.add(from); - if (to) set.add(to); - } - - const stop = new Set([userName, '我', '你', '他', '她', '它', '用户', '角色', 'assistant'].map(normalize).filter(Boolean)); - - return Array.from(set) - .filter(s => s.length >= 2 && !stop.has(s) && !/^[\s\p{P}\p{S}]+$/u.test(s) && !/<[^>]+>/.test(s)) - .slice(0, 5000); -} - -function extractEntities(text, lexicon) { - const t = normalize(text); - if (!t || !lexicon?.length) return []; - - const sorted = [...lexicon].sort((a, b) => b.length - a.length); - const hits = []; - for (const e of sorted) { - if (t.includes(e)) hits.push(e); - if (hits.length >= 20) break; - } - return hits; -} - -// ═══════════════════════════════════════════════════════════════════════════ -// MMR -// ═══════════════════════════════════════════════════════════════════════════ - -function mmrSelect(candidates, k, lambda, getVector, getScore) { - const selected = []; - const ids = new Set(); - - while (selected.length < k && candidates.length) { - let best = null, bestScore = -Infinity; - - for (const c of candidates) { - if (ids.has(c._id)) continue; - - const rel = getScore(c); - let div = 0; - - if (selected.length) { - const vC = getVector(c); - if (vC?.length) { - for (const s of selected) { - const sim = cosineSimilarity(vC, getVector(s)); - if (sim > div) div = sim; - } - } - } - - const score = lambda * rel - (1 - lambda) * div; - if (score > bestScore) { - bestScore = score; - best = c; - } - } - - if (!best) break; - selected.push(best); - ids.add(best._id); - } - - return selected; -} - -// ═══════════════════════════════════════════════════════════════════════════ -// L1 Chunks 检索 -// ═══════════════════════════════════════════════════════════════════════════ - + const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS; + return `${speaker}: ${clean.slice(0, limit)}`; + }).filter(Boolean); +} + +async function embedWeightedQuery(segments, vectorConfig) { + if (!segments?.length) return null; + + const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA); + const vecs = await embed(segments, vectorConfig); + const dims = vecs?.[0]?.length || 0; + if (!dims) return null; + + const out = new Array(dims).fill(0); + for (let i = 0; i < vecs.length; i++) { + for (let j = 0; j < dims; j++) out[j] += (vecs[i][j] || 0) * weights[i]; + } + + return { vector: normalizeVec(out), weights }; +} + +// ═══════════════════════════════════════════════════════════════════════════ +// 实体抽取 +// ═══════════════════════════════════════════════════════════════════════════ + +function buildEntityLexicon(store, allEvents) { + const { name1 } = getContext(); + const userName = normalize(name1); + const set = new Set(); + + for (const e of allEvents || []) { + for (const p of e.participants || []) { + const s = normalize(p); + if (s) set.add(s); + } + } + + const json = store?.json || {}; + + for (const m of json.characters?.main || []) { + const s = normalize(typeof m === 'string' ? m : m?.name); + if (s) set.add(s); + } + + for (const a of json.arcs || []) { + const s = normalize(a?.name); + if (s) set.add(s); + } + + for (const w of json.world || []) { + const t = normalize(w?.topic); + if (t && !t.includes('::')) set.add(t); + } + + for (const r of json.characters?.relationships || []) { + const from = normalize(r?.from); + const to = normalize(r?.to); + if (from) set.add(from); + if (to) set.add(to); + } + + const stop = new Set([userName, '我', '你', '他', '她', '它', '用户', '角色', 'assistant'].map(normalize).filter(Boolean)); + + return Array.from(set) + .filter(s => s.length >= 2 && !stop.has(s) && !/^[\s\p{P}\p{S}]+$/u.test(s) && !/<[^>]+>/.test(s)) + .slice(0, 5000); +} + +function extractEntities(text, lexicon) { + const t = normalize(text); + if (!t || !lexicon?.length) return []; + + const sorted = [...lexicon].sort((a, b) => b.length - a.length); + const hits = []; + for (const e of sorted) { + if (t.includes(e)) hits.push(e); + if (hits.length >= 20) break; + } + return hits; +} + +// ═══════════════════════════════════════════════════════════════════════════ +// MMR +// ═══════════════════════════════════════════════════════════════════════════ + +function mmrSelect(candidates, k, lambda, getVector, getScore) { + const selected = []; + const ids = new Set(); + + while (selected.length < k && candidates.length) { + let best = null, bestScore = -Infinity; + + for (const c of candidates) { + if (ids.has(c._id)) continue; + + const rel = getScore(c); + let div = 0; + + if (selected.length) { + const vC = getVector(c); + if (vC?.length) { + for (const s of selected) { + const sim = cosineSimilarity(vC, getVector(s)); + if (sim > div) div = sim; + } + } + } + + const score = lambda * rel - (1 - lambda) * div; + if (score > bestScore) { + bestScore = score; + best = c; + } + } + + if (!best) break; + selected.push(best); + ids.add(best._id); + } + + return selected; +} + +// ═══════════════════════════════════════════════════════════════════════════ +// L1 Chunks 检索 +// ═══════════════════════════════════════════════════════════════════════════ + async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) { - const { chatId } = getContext(); - if (!chatId || !queryVector?.length) return []; - - const meta = await getMeta(chatId); - const fp = getEngineFingerprint(vectorConfig); - if (meta.fingerprint && meta.fingerprint !== fp) return []; - - const chunkVectors = await getAllChunkVectors(chatId); - if (!chunkVectors.length) return []; - + const { chatId } = getContext(); + if (!chatId || !queryVector?.length) return []; + + const meta = await getMeta(chatId); + const fp = getEngineFingerprint(vectorConfig); + if (meta.fingerprint && meta.fingerprint !== fp) return []; + + const chunkVectors = await getAllChunkVectors(chatId); + if (!chunkVectors.length) return []; + const scored = chunkVectors.map(cv => { const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/); const floor = match ? parseInt(match[1], 10) : 0; @@ -375,19 +377,20 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) .filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK) .sort((a, b) => b.similarity - a.similarity) .slice(0, CONFIG.CANDIDATE_CHUNKS); - - // 动态 K:质量不够就少拿 - const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length); - - const selected = mmrSelect( - candidates, - dynamicK, - CONFIG.MMR_LAMBDA, - c => c.vector, - c => c.similarity - ); - - // floor 稀疏去重:每个楼层只保留该楼层相似度最高的那条 + + // 动态 K:质量不够就少拿 + const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length); + + const selected = mmrSelect( + candidates, + dynamicK, + CONFIG.MMR_LAMBDA, + c => c.vector, + c => c.similarity + ); + + +// floor 稀疏去重:每个楼层只保留该楼层匹配度最高的那条 const bestByFloor = new Map(); for (const s of selected) { const prev = bestByFloor.get(s.floor); @@ -396,21 +399,21 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) } } - // 最终结果按相似度降序 + // 最终结果按匹配度降序 const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity); - const floors = [...new Set(sparse.map(c => c.floor))]; - const chunks = await getChunksByFloors(chatId, floors); - const chunkMap = new Map(chunks.map(c => [c.chunkId, c])); - + const floors = [...new Set(sparse.map(c => c.floor))]; + const chunks = await getChunksByFloors(chatId, floors); + const chunkMap = new Map(chunks.map(c => [c.chunkId, c])); + const results = sparse.map(item => { const chunk = chunkMap.get(item.chunkId); if (!chunk) return null; return { chunkId: item.chunkId, floor: item.floor, - chunkIdx: item.chunkIdx, - speaker: chunk.speaker, + chunkIdx: item.chunkIdx, + speaker: chunk.speaker, isUser: chunk.isUser, text: chunk.text, similarity: item.similarity, @@ -424,11 +427,11 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) return results; } - -// ═══════════════════════════════════════════════════════════════════════════ -// L2 Events 检索 -// ═══════════════════════════════════════════════════════════════════════════ - + +// ═══════════════════════════════════════════════════════════════════════════ +// L2 Events 检索 +// ═══════════════════════════════════════════════════════════════════════════ + async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) { const { chatId, name1 } = getContext(); if (!chatId || !queryVector?.length) { @@ -442,37 +445,41 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn const eventVectors = await getAllEventVectors(chatId); const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector])); if (!vectorMap.size) return []; - - const userName = normalize(name1); - const querySet = new Set((queryEntities || []).map(normalize)); - - // 只取硬约束类的 world topic - const worldTopics = (store?.json?.world || []) - .filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase())) - .map(w => normalize(w.topic)) - .filter(Boolean); - - const scored = (allEvents || []).map((event, idx) => { - const v = vectorMap.get(event.id); - const sim = v ? cosineSimilarity(queryVector, v) : 0; - - let bonus = 0; - const reasons = []; - - // participants 命中 - const participants = (event.participants || []).map(normalize).filter(Boolean); - if (participants.some(p => p !== userName && querySet.has(p))) { - bonus += CONFIG.BONUS_PARTICIPANT_HIT; - reasons.push('participant'); - } - - // text 命中 - const text = normalize(`${event.title || ''} ${event.summary || ''}`); - if ((queryEntities || []).some(e => text.includes(normalize(e)))) { - bonus += CONFIG.BONUS_TEXT_HIT; - reasons.push('text'); - } - + + const userName = normalize(name1); + const queryNormList = (queryEntities || []).map(normalize).filter(Boolean); + const querySet = new Set(queryNormList); + + // 只取硬约束类的 world topic + const worldTopics = (store?.json?.world || []) + .filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase())) + .map(w => normalize(w.topic)) + .filter(Boolean); + + const scored = (allEvents || []).map((event, idx) => { + const v = vectorMap.get(event.id); + const sim = v ? cosineSimilarity(queryVector, v) : 0; + + let bonus = 0; + const reasons = []; + + // participants 命中 + const participants = (event.participants || []).map(normalize).filter(Boolean); + const hitCount = participants.filter(p => p !== userName && querySet.has(p)).length; + const hasParticipantHit = hitCount > 0; + if (hasParticipantHit) { + bonus += CONFIG.BONUS_PARTICIPANT_HIT * Math.log2(hitCount + 1); + reasons.push(hitCount > 1 ? `participant×${hitCount}` : 'participant'); + } + + // text 命中 + const text = normalize(`${event.title || ''} ${event.summary || ''}`); + const textHitCount = queryNormList.filter(e => text.includes(e)).length; + if (textHitCount > 0) { + bonus += CONFIG.BONUS_TEXT_HIT * Math.log2(textHitCount + 1); + reasons.push(textHitCount > 1 ? `text×${textHitCount}` : 'text'); + } + // world topic 命中 if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) { bonus += CONFIG.BONUS_WORLD_TOPIC_HIT; @@ -490,20 +497,20 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn } } } - - return { - _id: event.id, - _idx: idx, - event, - similarity: sim, - bonus, - finalScore: sim + bonus, - reasons, - isDirect: reasons.includes('participant'), - vector: v, - }; - }); - + + return { + _id: event.id, + _idx: idx, + event, + similarity: sim, + bonus, + finalScore: sim + bonus, + reasons, + isDirect: hasParticipantHit, + vector: v, + }; + }); + // ★ 记录过滤前的分布(用 finalScore,与显示一致) const preFilterDistribution = { total: scored.length, @@ -521,205 +528,151 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn .filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT) .sort((a, b) => b.finalScore - a.finalScore) .slice(0, CONFIG.CANDIDATE_EVENTS); - - // 动态 K:质量不够就少拿 - const dynamicK = Math.min(CONFIG.MAX_EVENTS, candidates.length); - - const selected = mmrSelect( - candidates, - dynamicK, - CONFIG.MMR_LAMBDA, - c => c.vector, - c => c.finalScore - ); - - return selected - .sort((a, b) => b.finalScore - a.finalScore) - .map(s => ({ - event: s.event, - similarity: s.finalScore, - _recallType: s.isDirect ? 'DIRECT' : 'SIMILAR', - _recallReason: s.reasons.length ? s.reasons.join('+') : '相似', - _preFilterDistribution: preFilterDistribution, - })); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 日志:因果树格式化 -// ═══════════════════════════════════════════════════════════════════════════ - -function formatCausalTree(causalEvents, recalledEvents) { - if (!causalEvents?.length) return ''; - - const lines = [ - '', - '┌─────────────────────────────────────────────────────────────┐', - '│ 【因果链追溯】 │', - '└─────────────────────────────────────────────────────────────┘', - ]; - - // 按 chainFrom 分组展示 - const bySource = new Map(); - for (const c of causalEvents) { - for (const src of c.chainFrom || []) { - if (!bySource.has(src)) bySource.set(src, []); - bySource.get(src).push(c); - } - } - - for (const [sourceId, ancestors] of bySource) { - const sourceEvent = recalledEvents.find(e => e.event?.id === sourceId); - const sourceTitle = sourceEvent?.event?.title || sourceId; - lines.push(` ${sourceId} "${sourceTitle}" 的前因链:`); - - // 按深度排序 - ancestors.sort((a, b) => a.depth - b.depth); - - for (const c of ancestors) { - const indent = ' ' + ' '.repeat(c.depth - 1); - const ev = c.event; - const title = ev.title || '(无标题)'; - const refs = c.chainFrom.length > 1 ? ` [被${c.chainFrom.length}条链引用]` : ''; - lines.push(`${indent}└─ [depth=${c.depth}] ${ev.id} "${title}"${refs}`); - } - } - - lines.push(''); - return lines.join('\n'); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 日志:主报告 -// ═══════════════════════════════════════════════════════════════════════════ - -function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities, causalEvents = [], chunkPreFilterStats = null, l0Results = [] }) { - const lines = [ - '╔══════════════════════════════════════════════════════════════╗', - '║ 记忆召回报告 ║', - '╠══════════════════════════════════════════════════════════════╣', - `║ 耗时: ${elapsed}ms`, - '╚══════════════════════════════════════════════════════════════╝', - '', - '┌─────────────────────────────────────────────────────────────┐', - '│ 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) │', - '│ 权重越高 = 对召回方向影响越大 │', - '└─────────────────────────────────────────────────────────────┘', - ]; - - // 按权重从高到低排序显示 - const segmentsSorted = segments.map((s, i) => ({ - idx: i + 1, - weight: weights?.[i] ?? 0, - text: s, - })).sort((a, b) => b.weight - a.weight); - - segmentsSorted.forEach((s, rank) => { - const bar = '█'.repeat(Math.round(s.weight * 20)); - const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text; - const marker = rank === 0 ? ' ◀ 主导' : ''; - lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`); - }); - - lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); - lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`); + + // 动态 K:质量不够就少拿 + const dynamicK = Math.min(CONFIG.MAX_EVENTS, candidates.length); + + const selected = mmrSelect( + candidates, + dynamicK, + CONFIG.MMR_LAMBDA, + c => c.vector, + c => c.finalScore + ); + + return selected + .sort((a, b) => b.finalScore - a.finalScore) + .map(s => ({ + event: s.event, + similarity: s.finalScore, + _recallType: s.isDirect ? 'DIRECT' : 'SIMILAR', + _recallReason: s.reasons.length ? s.reasons.join('+') : '相似', + _preFilterDistribution: preFilterDistribution, + })); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// 日志:因果树格式化 +// ═══════════════════════════════════════════════════════════════════════════ + + +// ═══════════════════════════════════════════════════════════════════════════ +// 日志:主报告 +// ═══════════════════════════════════════════════════════════════════════════ + +function formatRecallLog({ + elapsed, + segments, + weights, + chunkResults, + eventResults, + allEvents, + queryEntities, + causalEvents = [], + chunkPreFilterStats = null, + l0Results = [], + l0PreFilterStats = null, +}) { + const lines = [ + '\u2554' + '\u2550'.repeat(62) + '\u2557', + '\u2551 \u8bb0\u5fc6\u53ec\u56de\u62a5\u544a \u2551', + '\u2560' + '\u2550'.repeat(62) + '\u2563', + `\u2551 \u8017\u65f6: ${elapsed}ms`, + '\u255a' + '\u2550'.repeat(62) + '\u255d', + '', + '\u250c' + '\u2500'.repeat(61) + '\u2510', + '\u2502 \u3010\u67e5\u8be2\u6784\u5efa\u3011\u6700\u8fd1 5 \u6761\u6d88\u606f\uff0c\u6307\u6570\u8870\u51cf\u52a0\u6743 (\u03b2=0.7) \u2502', + '\u2514' + '\u2500'.repeat(61) + '\u2518', + ]; + + // Keep query previews only (the only place to keep raw text) + const segmentsSorted = segments.map((s, i) => ({ + idx: i + 1, + weight: weights?.[i] ?? 0, + text: s, + })).sort((a, b) => b.weight - a.weight); + + segmentsSorted.forEach((s, rank) => { + const bar = '\u2588'.repeat(Math.round(s.weight * 20)); + const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text; + const marker = rank === 0 ? ' \u25c0 \u4e3b\u5bfc' : ''; + lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`); + }); lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【L0 语义锚点】状态变更加权信号 │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); + lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510'); + lines.push('\u2502 \u3010\u63d0\u53d6\u5b9e\u4f53\u3011 \u2502'); + lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518'); + lines.push(` ${queryEntities?.length ? queryEntities.join('\u3001') : '(\u65e0)'}`); + // Recall stats (numbers only) + lines.push(''); + lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510'); + lines.push('\u2502 \u3010\u53ec\u56de\u7edf\u8ba1\u3011 \u2502'); + lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518'); + + // L0 + const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b); + lines.push(' L0 \u8bed\u4e49\u951a\u70b9:'); if (l0Results.length) { - const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b); - lines.push(` 召回: ${l0Results.length} 条`); - lines.push(` 影响楼层: ${l0Floors.join(', ')}(L1/L2 候选在这些楼层获得 +${CONFIG.L0_FLOOR_BONUS_FACTOR} 加分)`); - lines.push(''); - - l0Results.slice(0, 10).forEach((r, i) => { - lines.push(` ${String(i + 1).padStart(2)}. #${r.floor} ${r.atom.semantic.slice(0, 50)}${r.atom.semantic.length > 50 ? '...' : ''}`); - lines.push(` 相似度: ${r.similarity.toFixed(3)}`); - }); - - if (l0Results.length > 10) { - lines.push(` ... 还有 ${l0Results.length - 10} 条`); - } + const l0Dist = { + '0.8+': l0Results.filter(r => r.similarity >= 0.8).length, + '0.7-0.8': l0Results.filter(r => r.similarity >= 0.7 && r.similarity < 0.8).length, + '0.6-0.7': l0Results.filter(r => r.similarity >= 0.6 && r.similarity < 0.7).length, + '0.55-0.6': l0Results.filter(r => r.similarity >= 0.55 && r.similarity < 0.6).length, + }; + lines.push(` \u9009\u5165: ${l0Results.length} \u6761 | \u5f71\u54cd\u697c\u5c42: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} \u52a0\u6743)`); + lines.push(` \u5339\u914d\u5ea6: 0.8+: ${l0Dist['0.8+']} | 0.7-0.8: ${l0Dist['0.7-0.8']} | 0.6-0.7: ${l0Dist['0.6-0.7']} | 0.55-0.6: ${l0Dist['0.55-0.6']}`); } else { - lines.push(' 召回: 0 条(无 L0 数据或未启用)'); + lines.push(' (\u65e0\u6570\u636e)'); } + // L1 lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【L1 原文片段】 │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); - + lines.push(' L1 \u539f\u6587\u7247\u6bb5:'); if (chunkPreFilterStats) { const dist = chunkPreFilterStats.distribution || {}; - lines.push(` 过滤前: ${chunkPreFilterStats.total} 条`); - lines.push(' 相似度分布:'); - lines.push(` 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`); - lines.push(` 0.55-0.6: ${dist['0.55-0.6'] || 0} | <0.55: ${dist['<0.55'] || 0}`); - lines.push(` 通过阈值(>=${chunkPreFilterStats.threshold}): ${chunkPreFilterStats.passThreshold} 条`); - lines.push(` MMR+Floor去重后: ${chunkResults.length} 条`); + lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${chunkPreFilterStats.threshold}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`); + lines.push(` \u5339\u914d\u5ea6: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0} | <0.6: ${(dist['0.55-0.6'] || 0) + (dist['<0.55'] || 0)}`); + + const floorCounts = new Map(); + chunkResults.forEach(c => floorCounts.set(c.floor, (floorCounts.get(c.floor) || 0) + 1)); + const floorStats = `\u8986\u76d6 ${floorCounts.size} \u4e2a\u697c\u5c42`; + lines.push(` ${floorStats}`); } else { - lines.push(` 召回: ${chunkResults.length} 条`); + lines.push(` \u9009\u5165: ${chunkResults.length} \u6761`); } - chunkResults.slice(0, 15).forEach((c, i) => { - const preview = c.text.length > 50 ? c.text.slice(0, 50) + '...' : c.text; - lines.push(` ${String(i + 1).padStart(2)}. #${String(c.floor).padStart(3)} [${c.speaker}] ${preview}`); - lines.push(` 相似度: ${c.similarity.toFixed(3)}`); - }); - - if (chunkResults.length > 15) { - lines.push(` ... 还有 ${chunkResults.length - 15} 条`); - } - - lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【L2 事件记忆】 │'); - lines.push('│ DIRECT=亲身经历 SIMILAR=相关背景 │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); - - eventResults.forEach((e, i) => { - const type = e._recallType === 'DIRECT' ? '★ DIRECT ' : ' SIMILAR'; - const title = e.event.title || '(无标题)'; - lines.push(` ${String(i + 1).padStart(2)}. ${type} ${title}`); - lines.push(` 相似度: ${e.similarity.toFixed(3)} | 原因: ${e._recallReason}`); - }); - - // 统计 - const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length; - const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length; - const preFilterDist = eventResults[0]?._preFilterDistribution || {}; - - lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【统计】 │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); - lines.push(` L1 片段: ${chunkResults.length} 条`); - lines.push(` L2 事件: ${eventResults.length} / ${allEvents.length} 条 (DIRECT: ${directCount}, SIMILAR: ${similarCount})`); + // L2 + const preFilterDist = eventResults[0]?._preFilterDistribution || {}; + const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length; + const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length; + + lines.push(''); + lines.push(' L2 \u4e8b\u4ef6\u8bb0\u5fc6:'); + lines.push(` \u603b\u4e8b\u4ef6: ${allEvents.length} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${preFilterDist.threshold || 0.65}): ${preFilterDist.passThreshold || 0} \u6761 | \u6700\u7ec8: ${eventResults.length} \u6761`); if (preFilterDist.total) { - lines.push(` L2 过滤前分布(${preFilterDist.total} 条,含bonus):`); - lines.push(` 0.85+: ${preFilterDist['0.85+'] || 0} | 0.7-0.85: ${preFilterDist['0.7-0.85'] || 0} | 0.6-0.7: ${preFilterDist['0.6-0.7'] || 0}`); - lines.push(` 0.5-0.6: ${preFilterDist['0.5-0.6'] || 0} | <0.5: ${preFilterDist['<0.5'] || 0}`); - lines.push(` 通过阈值(>=${preFilterDist.threshold || 0.6}): ${preFilterDist.passThreshold || 0} 条`); + lines.push(` \u5339\u914d\u5ea6: 0.85+: ${preFilterDist['0.85+'] || 0} | 0.7-0.85: ${preFilterDist['0.7-0.85'] || 0} | 0.6-0.7: ${preFilterDist['0.6-0.7'] || 0} | <0.6: ${(preFilterDist['0.5-0.6'] || 0) + (preFilterDist['<0.5'] || 0)}`); } - lines.push(` 实体命中: ${queryEntities?.length || 0} 个`); - if (causalEvents.length) lines.push(` 因果链追溯: ${causalEvents.length} 条`); - lines.push(''); - - // 追加因果树详情 - lines.push(formatCausalTree(causalEvents, eventResults)); - - return lines.join('\n'); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 主入口 -// ═══════════════════════════════════════════════════════════════════════════ - + lines.push(` \u7c7b\u578b: DIRECT ${directCount} \u6761 | SIMILAR ${similarCount} \u6761`); + + // Causal chains + if (causalEvents.length) { + const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0)); + const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0)); + lines.push(''); + lines.push(' \u56e0\u679c\u94fe\u8ffd\u6eaf:'); + lines.push(` \u8ffd\u6eaf: ${causalEvents.length} \u6761 | \u6700\u5927\u88ab\u5f15: ${maxRefs} \u6b21 | \u6700\u5927\u6df1\u5ea6: ${maxDepth}`); + } + + lines.push(''); + return lines.join('\n'); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// 主入口 +// ═══════════════════════════════════════════════════════════════════════════ + export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) { const T0 = performance.now(); const { chat } = getContext(); @@ -731,23 +684,23 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options = } const segments = buildQuerySegments(chat, CONFIG.QUERY_MSG_COUNT, !!options.excludeLastAi, pendingUserMessage); - - let queryVector, weights; - try { - const result = await embedWeightedQuery(segments, vectorConfig); - queryVector = result?.vector; - weights = result?.weights; - } catch (e) { - xbLog.error(MODULE_ID, '查询向量生成失败', e); - return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Query embedding failed.' }; - } - - if (!queryVector?.length) { - return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' }; - } - + + let queryVector, weights; + try { + const result = await embedWeightedQuery(segments, vectorConfig); + queryVector = result?.vector; + weights = result?.weights; + } catch (e) { + xbLog.error(MODULE_ID, '查询向量生成失败', e); + return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Query embedding failed.' }; + } + + if (!queryVector?.length) { + return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' }; + } + const lexicon = buildEntityLexicon(store, allEvents); - const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon); + const queryEntities = extractEntities(segments.join('\n'), lexicon); // ════════════════════════════════════════════════════════════════════════ // L0 召回 @@ -776,32 +729,32 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options = // ════════════════════════════════════════════════════════════════════════ const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS); - // ───────────────────────────────────────────────────────────────────── - // 因果链追溯:从 eventResults 出发找祖先事件 - // 注意:是否“额外注入”要去重(如果祖先事件本来已召回,就不额外注入) - // ───────────────────────────────────────────────────────────────────── - const eventIndex = buildEventIndex(allEvents); - const causalMap = traceCausalAncestors(eventResults, eventIndex); - - const recalledIdSet = new Set(eventResults.map(x => x?.event?.id).filter(Boolean)); - const causalEvents = Array.from(causalMap.values()) - .filter(x => x?.event?.id && !recalledIdSet.has(x.event.id)) - .map(x => ({ - event: x.event, - similarity: 0, - _recallType: 'CAUSAL', - _recallReason: `因果链(${x.chainFrom.join(',')})`, - _causalDepth: x.depth, - _chainFrom: x.chainFrom, - chainFrom: x.chainFrom, - depth: x.depth, - })); - - // 排序:引用数 > 深度 > 编号,然后截断 - sortCausalEvents(causalEvents); - const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX); - - const elapsed = Math.round(performance.now() - T0); + // ───────────────────────────────────────────────────────────────────── + // 因果链追溯:从 eventResults 出发找祖先事件 + // 注意:是否“额外注入”要去重(如果祖先事件本来已召回,就不额外注入) + // ───────────────────────────────────────────────────────────────────── + const eventIndex = buildEventIndex(allEvents); + const causalMap = traceCausalAncestors(eventResults, eventIndex); + + const recalledIdSet = new Set(eventResults.map(x => x?.event?.id).filter(Boolean)); + const causalEvents = Array.from(causalMap.values()) + .filter(x => x?.event?.id && !recalledIdSet.has(x.event.id)) + .map(x => ({ + event: x.event, + similarity: 0, + _recallType: 'CAUSAL', + _recallReason: `因果链(${x.chainFrom.join(',')})`, + _causalDepth: x.depth, + _chainFrom: x.chainFrom, + chainFrom: x.chainFrom, + depth: x.depth, + })); + + // 排序:引用数 > 深度 > 编号,然后截断 + sortCausalEvents(causalEvents); + const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX); + + const elapsed = Math.round(performance.now() - T0); const logText = formatRecallLog({ elapsed, queryText, @@ -823,18 +776,18 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options = return { events: eventResults, causalEvents: causalEventsTruncated, chunks: mergedChunks, elapsed, logText, queryEntities, l0Results }; } - -export function buildQueryText(chat, count = 2, excludeLastAi = false) { - if (!chat?.length) return ''; - - let messages = chat; - if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { - messages = messages.slice(0, -1); - } - - return messages.slice(-count).map(m => { + +export function buildQueryText(chat, count = 2, excludeLastAi = false) { + if (!chat?.length) return ''; + + let messages = chat; + if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { + messages = messages.slice(0, -1); + } + + return messages.slice(-count).map(m => { const text = cleanForRecall(m.mes); - const speaker = m.name || (m.is_user ? '用户' : '角色'); - return `${speaker}: ${text.slice(0, 500)}`; - }).filter(Boolean).join('\n'); -} + const speaker = m.name || (m.is_user ? '用户' : '角色'); + return `${speaker}: ${text.slice(0, 500)}`; + }).filter(Boolean).join('\n'); +} diff --git a/modules/story-summary/vector/state-recall.js b/modules/story-summary/vector/state-recall.js index 28caa95..cef2143 100644 --- a/modules/story-summary/vector/state-recall.js +++ b/modules/story-summary/vector/state-recall.js @@ -59,7 +59,7 @@ export async function searchStateAtoms(queryVector, vectorConfig) { const atoms = getStateAtoms(); const atomMap = new Map(atoms.map(a => [a.atomId, a])); - // 计算相似度 + // 计算匹配度 const scored = stateVectors .map(sv => { const atom = atomMap.get(sv.atomId); @@ -92,8 +92,8 @@ export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) { const floorBonus = new Map(); for (const r of l0Results || []) { - // 每个楼层只加一次,取最高相似度对应的 bonus - // 简化处理:统一加 bonusFactor,不区分相似度高低 + // 每个楼层只加一次,取最高匹配度对应的 bonus + // 简化处理:统一加 bonusFactor,不区分匹配度高低 if (!floorBonus.has(r.floor)) { floorBonus.set(r.floor, bonusFactor); } @@ -132,13 +132,13 @@ export function stateToVirtualChunks(l0Results) { /** * 合并 L0 和 L1 chunks,每楼层最多保留 limit 条 - * @param {Array} l0Chunks - 虚拟 chunks(已按相似度排序) - * @param {Array} l1Chunks - 真实 chunks(已按相似度排序) + * @param {Array} l0Chunks - 虚拟 chunks(已按匹配度排序) + * @param {Array} l1Chunks - 真实 chunks(已按匹配度排序) * @param {number} limit - 每楼层上限 * @returns {Array} 合并后的 chunks */ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) { - // 合并并按相似度排序 + // 合并并按匹配度排序 const all = [...(l0Chunks || []), ...(l1Chunks || [])] .sort((a, b) => b.similarity - a.similarity); @@ -153,7 +153,7 @@ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) { } } - // 扁平化并保持相似度排序 + // 扁平化并保持匹配度排序 return Array.from(byFloor.values()) .flat() .sort((a, b) => b.similarity - a.similarity);