Adjust story summary recall thresholds

This commit is contained in:
2026-02-01 16:26:29 +08:00
parent 0ac347968e
commit cf0fc88a24
6 changed files with 298 additions and 234 deletions

View File

@@ -6,6 +6,7 @@ import { chat_metadata } from "../../../../../../../script.js";
import { EXT_ID } from "../../../core/constants.js"; import { EXT_ID } from "../../../core/constants.js";
import { xbLog } from "../../../core/debug-core.js"; import { xbLog } from "../../../core/debug-core.js";
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js"; import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js";
import { clearEventTextIndex } from '../vector/text-search.js';
const MODULE_ID = 'summaryStore'; const MODULE_ID = 'summaryStore';
@@ -278,6 +279,8 @@ export async function clearSummaryData(chatId) {
await clearEventVectors(chatId); await clearEventVectors(chatId);
} }
clearEventTextIndex();
xbLog.info(MODULE_ID, '总结数据已清空'); xbLog.info(MODULE_ID, '总结数据已清空');
} }

View File

@@ -36,7 +36,7 @@ const RECENT_ORPHAN_MAX = 5000; // [待整理] 独立预算
const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示) const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示)
const L3_MAX = 2000; const L3_MAX = 2000;
const ARCS_MAX = 1500; const ARCS_MAX = 1500;
const TOP_N_STAR = 5; // 匹配度前N条加⭐ const TOP_N_STAR = 5; // 相似度前N条加⭐
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// 工具函数 // 工具函数
@@ -180,64 +180,66 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
const lines = [ const lines = [
'', '',
'\u250c' + '\u2500'.repeat(61) + '\u2510', '┌─────────────────────────────────────────────────────────────┐',
'\u2502 \u3010\u88c5\u914d\u7edf\u8ba1\u3011 \u2502', '│ 【装配统计】 ',
'\u2514' + '\u2500'.repeat(61) + '\u2518', '└─────────────────────────────────────────────────────────────┘',
` \u603b\u9884\u7b97: ${stats.budget.max} tokens | \u5df2\u4f7f\u7528: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`, ` 总预算: ${stats.budget.max} tokens | 已使用: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`,
'', '',
]; ];
// [1] World constraints // [1] 世界约束
lines.push(' [1] \u4e16\u754c\u7ea6\u675f (\u4e0a\u9650 2000)'); lines.push(` [1] 世界约束 (上限 2000)`);
lines.push(` \u9009\u5165: ${stats.world.count} \u6761 | \u6d88\u8017: ${stats.world.tokens} tokens`); lines.push(` 选入: ${stats.world.count} 条 | 消耗: ${stats.world.tokens} tokens`);
lines.push(''); lines.push('');
// [2] Core + background events // [2] 核心经历 + 过往背景
lines.push(' [2] \u6838\u5fc3\u7ecf\u5386 + \u8fc7\u5f80\u80cc\u666f'); lines.push(` [2] 核心经历 + 过往背景`);
lines.push(` \u4e8b\u4ef6: ${stats.events.selected} \u6761 | \u6d88\u8017: ${stats.events.tokens} tokens`); lines.push(` 事件: ${stats.events.selected} 条 | 消耗: ${stats.events.tokens} tokens`);
// 证据统计(区分 L0 和 L1
const l0EvidenceCount = details.eventList?.filter(e => e.hasL0Evidence)?.length || 0; const l0EvidenceCount = details.eventList?.filter(e => e.hasL0Evidence)?.length || 0;
const l1EvidenceCount = (stats.evidence.attached || 0) - l0EvidenceCount; const l1EvidenceCount = (stats.evidence.attached || 0) - l0EvidenceCount;
lines.push(` \u8bc1\u636e: ${stats.evidence.attached} \u6761 (L0: ${l0EvidenceCount}, L1: ${l1EvidenceCount}) | \u6d88\u8017: ${stats.evidence.tokens} tokens`); lines.push(` 证据: ${stats.evidence.attached} (L0: ${l0EvidenceCount}, L1: ${l1EvidenceCount}) | 消耗: ${stats.evidence.tokens} tokens`);
lines.push(` \u6838\u5fc3: ${details.directCount || 0} \u6761 | \u8fc7\u5f80: ${details.similarCount || 0} \u6761`); lines.push(` 核心: ${details.directCount || 0} 条 | 过往: ${details.similarCount || 0} `);
lines.push(''); lines.push('');
// [3] Long-term chunks // [3] 远期片段
const l0OrphanCount = stats.orphans.l0Count || 0; const l0OrphanCount = stats.orphans.l0Count || 0;
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount; const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
lines.push(' [3] \u8fdc\u671f\u7247\u6bb5 (\u5df2\u603b\u7ed3\u8303\u56f4)'); lines.push(` [3] 远期片段 (已总结范围)`);
lines.push(` \u9009\u5165: ${stats.orphans.injected} \u6761 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | \u6d88\u8017: ${stats.orphans.tokens} tokens`); lines.push(` 选入: ${stats.orphans.injected} (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`);
lines.push(''); lines.push('');
// [4] Recent orphans // [4] 待整理
lines.push(' [4] \u5f85\u6574\u7406 (\u72ec\u7acb\u9884\u7b97 5000)'); lines.push(` [4] 待整理 (独立预算 5000)`);
lines.push(` \u9009\u5165: ${recentOrphanStats?.injected || 0} \u6761 | \u6d88\u8017: ${recentOrphanStats?.tokens || 0} tokens`); lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`);
lines.push(` \u697c\u5c42: ${recentOrphanStats?.floorRange || 'N/A'}`); lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`);
lines.push(''); lines.push('');
// [5] Arcs // [5] 人物弧光
lines.push(' [5] \u4eba\u7269\u5f27\u5149 (\u4e0a\u9650 1500)'); lines.push(` [5] 人物弧光 (上限 1500)`);
lines.push(` \u9009\u5165: ${stats.arcs.count} \u6761 | \u6d88\u8017: ${stats.arcs.tokens} tokens`); lines.push(` 选入: ${stats.arcs.count} 条 | 消耗: ${stats.arcs.tokens} tokens`);
lines.push(''); lines.push('');
// Budget bar // 预算条形图
lines.push(' \u3010\u9884\u7b97\u5206\u5e03\u3011'); lines.push(' 【预算分布】');
const total = stats.budget.max; const total = stats.budget.max;
const bar = (tokens, label) => { const bar = (tokens, label) => {
const width = Math.round((tokens / total) * 30); const width = Math.round((tokens / total) * 30);
const pctStr = pct(tokens, total) + '%'; const pctStr = pct(tokens, total) + '%';
return ` ${label.padEnd(6)} ${'\u2588'.repeat(width).padEnd(30)} ${String(tokens).padStart(5)} (${pctStr})`; return ` ${label.padEnd(6)} ${''.repeat(width).padEnd(30)} ${String(tokens).padStart(5)} (${pctStr})`;
}; };
lines.push(bar(stats.world.tokens, '\u7ea6\u675f')); lines.push(bar(stats.world.tokens, '约束'));
lines.push(bar(stats.events.tokens + stats.evidence.tokens, '\u7ecf\u5386')); lines.push(bar(stats.events.tokens + stats.evidence.tokens, '经历'));
lines.push(bar(stats.orphans.tokens, '\u8fdc\u671f')); lines.push(bar(stats.orphans.tokens, '远期'));
lines.push(bar(recentOrphanStats?.tokens || 0, '\u5f85\u6574\u7406')); lines.push(bar(recentOrphanStats?.tokens || 0, '待整理'));
lines.push(bar(stats.arcs.tokens, '\u5f27\u5149')); lines.push(bar(stats.arcs.tokens, '弧光'));
lines.push(bar(stats.budget.max - stats.budget.used, '\u5269\u4f59')); lines.push(bar(stats.budget.max - stats.budget.used, '剩余'));
lines.push(''); lines.push('');
return lines.join('\n'); return lines.join('\n');
} }
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换 // 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
function renumberEventText(text, newIndex) { function renumberEventText(text, newIndex) {
const s = String(text || ""); const s = String(text || "");
@@ -448,7 +450,7 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
return lines.join("\n"); return lines.join("\n");
} }
// 候选按匹配度从高到低(保证高分优先拥有证据) // 候选按相似度从高到低(保证高分优先拥有证据)
const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence } const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence }

View File

@@ -2677,30 +2677,3 @@ h1 span {
font-size: .8125rem; font-size: .8125rem;
line-height: 1.8; line-height: 1.8;
} }
/* 调试日志区域手机适配 */
@media (max-width: 768px) {
#recall-log-content {
font-size: 10px;
padding: 8px;
overflow-x: hidden; /* 禁止横向滚动 */
word-break: break-all; /* 强制换行 */
white-space: pre-wrap; /* 保留换行但允许自动换行 */
}
.debug-log-viewer {
font-size: 10px;
overflow-x: hidden;
word-break: break-all;
white-space: pre-wrap;
}
}
@media (max-width: 480px) {
#recall-log-content,
.debug-log-viewer {
font-size: 9px;
line-height: 1.4;
padding: 6px;
}
}

View File

@@ -2,8 +2,9 @@
// L1 chunk + L2 event 召回 // L1 chunk + L2 event 召回
// - 全量向量打分 // - 全量向量打分
// - 指数衰减加权 Query Embedding // - 指数衰减加权 Query Embedding
// - 实体/参与者加分 // - L0 floor 加权
// - MMR 去重 // - RRF 混合检索(向量 + 文本)
// - MMR 去重(融合后执行)
// - floor 稀疏去重 // - floor 稀疏去重
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js'; import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
@@ -18,6 +19,7 @@ import {
stateToVirtualChunks, stateToVirtualChunks,
mergeAndSparsify, mergeAndSparsify,
} from './state-recall.js'; } from './state-recall.js';
import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
const MODULE_ID = 'recall'; const MODULE_ID = 'recall';
@@ -27,9 +29,8 @@ const CONFIG = {
QUERY_MAX_CHARS: 600, QUERY_MAX_CHARS: 600,
QUERY_CONTEXT_CHARS: 240, QUERY_CONTEXT_CHARS: 240,
// 因果链 CAUSAL_CHAIN_MAX_DEPTH: 10,
CAUSAL_CHAIN_MAX_DEPTH: 10, // 放宽跳数,让图自然终止 CAUSAL_INJECT_MAX: 30,
CAUSAL_INJECT_MAX: 30, // 放宽上限,由 prompt token 预算最终控制
CANDIDATE_CHUNKS: 200, CANDIDATE_CHUNKS: 200,
CANDIDATE_EVENTS: 150, CANDIDATE_EVENTS: 150,
@@ -38,18 +39,16 @@ const CONFIG = {
MAX_EVENTS: 120, MAX_EVENTS: 120,
MIN_SIMILARITY_CHUNK: 0.6, MIN_SIMILARITY_CHUNK: 0.6,
MIN_SIMILARITY_CHUNK_RECENT: 0.5,
MIN_SIMILARITY_EVENT: 0.65, MIN_SIMILARITY_EVENT: 0.65,
MMR_LAMBDA: 0.72, MMR_LAMBDA: 0.72,
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
// L0 配置
L0_FLOOR_BONUS_FACTOR: 0.10, L0_FLOOR_BONUS_FACTOR: 0.10,
FLOOR_MAX_CHUNKS: 2, FLOOR_MAX_CHUNKS: 2,
FLOOR_LIMIT: 1, FLOOR_LIMIT: 1,
RRF_K: 60,
TEXT_SEARCH_LIMIT: 80,
}; };
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -75,10 +74,53 @@ function normalizeVec(v) {
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 因果链追溯Graph-augmented retrieval // RRF 融合
// - 从已召回事件出发,沿 causedBy 向上追溯祖先事件 // ═══════════════════════════════════════════════════════════════════════════
// - 记录边chainFrom = 哪个召回事件需要它
// - 不在这里决定“是否额外注入”,只负责遍历与结构化结果 function fuseEventsByRRF(vectorRanked, textRanked, eventById, k = CONFIG.RRF_K) {
const map = new Map();
const upsert = (id) => {
if (!map.has(id)) {
map.set(id, { id, rrf: 0, vRank: Infinity, tRank: Infinity, type: 'TEXT' });
}
return map.get(id);
};
vectorRanked.forEach((r, i) => {
const id = r.event?.id;
if (!id) return;
const o = upsert(id);
o.vRank = i + 1;
o.rrf += 1 / (k + i + 1);
o.type = o.tRank !== Infinity ? 'HYBRID' : 'VECTOR';
o.vector = r.vector;
});
textRanked.forEach((r) => {
const o = upsert(r.id);
o.tRank = r.textRank;
o.rrf += 1 / (k + r.textRank);
o.type = o.vRank !== Infinity ? 'HYBRID' : 'TEXT';
});
const typePriority = { HYBRID: 0, VECTOR: 1, TEXT: 2 };
return Array.from(map.values())
.map(o => ({ ...o, event: eventById.get(o.id) }))
.filter(x => x.event)
.sort((a, b) => {
if (b.rrf !== a.rrf) return b.rrf - a.rrf;
if (typePriority[a.type] !== typePriority[b.type]) {
return typePriority[a.type] - typePriority[b.type];
}
if (a.vRank !== b.vRank) return a.vRank - b.vRank;
return a.tRank - b.tRank;
});
}
// ═══════════════════════════════════════════════════════════════════════════
// 因果链追溯
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
function buildEventIndex(allEvents) { function buildEventIndex(allEvents) {
@@ -89,9 +131,6 @@ function buildEventIndex(allEvents) {
return map; return map;
} }
/**
* @returns {Map<string, {event, depth, chainFrom}>}
*/
function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
const out = new Map(); const out = new Map();
const idRe = /^evt-\d+$/; const idRe = /^evt-\d+$/;
@@ -103,7 +142,6 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
const ev = eventIndex.get(parentId); const ev = eventIndex.get(parentId);
if (!ev) return; if (!ev) return;
// 如果同一个祖先被多个召回事件引用:保留更“近”的深度或追加来源
const existed = out.get(parentId); const existed = out.get(parentId);
if (!existed) { if (!existed) {
out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] }); out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] });
@@ -128,20 +166,12 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
return out; return out;
} }
/**
* 因果事件排序:引用数 > 深度 > 编号
*/
function sortCausalEvents(causalArray) { function sortCausalEvents(causalArray) {
return causalArray.sort((a, b) => { return causalArray.sort((a, b) => {
// 1. 被多条召回链引用的优先
const refDiff = b.chainFrom.length - a.chainFrom.length; const refDiff = b.chainFrom.length - a.chainFrom.length;
if (refDiff !== 0) return refDiff; if (refDiff !== 0) return refDiff;
// 2. 深度浅的优先
const depthDiff = a.depth - b.depth; const depthDiff = a.depth - b.depth;
if (depthDiff !== 0) return depthDiff; if (depthDiff !== 0) return depthDiff;
// 3. 事件编号排序
return String(a.event.id).localeCompare(String(b.event.id)); return String(a.event.id).localeCompare(String(b.event.id));
}); });
} }
@@ -150,7 +180,6 @@ function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim(); return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
} }
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
function parseFloorRange(summary) { function parseFloorRange(summary) {
if (!summary) return null; if (!summary) return null;
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/); const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
@@ -161,8 +190,6 @@ function parseFloorRange(summary) {
} }
function cleanForRecall(text) { function cleanForRecall(text) {
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim(); return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
} }
@@ -187,13 +214,11 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
messages = messages.slice(0, -1); messages = messages.slice(0, -1);
} }
// ★ 如果有待处理的用户消息且 chat 中最后一条不是它,追加虚拟消息
if (pendingUserMessage) { if (pendingUserMessage) {
const lastMsg = messages[messages.length - 1]; const lastMsg = messages[messages.length - 1];
const lastMsgText = lastMsg?.mes?.trim() || ""; const lastMsgText = lastMsg?.mes?.trim() || "";
const pendingText = pendingUserMessage.trim(); const pendingText = pendingUserMessage.trim();
// 避免重复(如果 chat 已包含该消息则不追加)
if (lastMsgText !== pendingText) { if (lastMsgText !== pendingText) {
messages = [...messages, { is_user: true, name: name1 || "用户", mes: pendingUserMessage }]; messages = [...messages, { is_user: true, name: name1 || "用户", mes: pendingUserMessage }];
} }
@@ -330,7 +355,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// L1 Chunks 检索 // L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) { async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
const { chatId } = getContext(); const { chatId } = getContext();
if (!chatId || !queryVector?.length) return []; if (!chatId || !queryVector?.length) return [];
@@ -359,11 +384,21 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
}; };
}); });
// Pre-filter stats for logging const candidates = scored
.filter(s => {
const threshold = s.floor > lastSummarizedFloor
? CONFIG.MIN_SIMILARITY_CHUNK_RECENT
: CONFIG.MIN_SIMILARITY_CHUNK;
return s.similarity >= threshold;
})
.sort((a, b) => b.similarity - a.similarity)
.slice(0, CONFIG.CANDIDATE_CHUNKS);
const preFilterStats = { const preFilterStats = {
total: scored.length, total: scored.length,
passThreshold: scored.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK).length, passThreshold: candidates.length,
threshold: CONFIG.MIN_SIMILARITY_CHUNK, thresholdRemote: CONFIG.MIN_SIMILARITY_CHUNK,
thresholdRecent: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
distribution: { distribution: {
'0.8+': scored.filter(s => s.similarity >= 0.8).length, '0.8+': scored.filter(s => s.similarity >= 0.8).length,
'0.7-0.8': scored.filter(s => s.similarity >= 0.7 && s.similarity < 0.8).length, '0.7-0.8': scored.filter(s => s.similarity >= 0.7 && s.similarity < 0.8).length,
@@ -373,12 +408,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
}, },
}; };
const candidates = scored
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, CONFIG.CANDIDATE_CHUNKS);
// 动态 K质量不够就少拿
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length); const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
const selected = mmrSelect( const selected = mmrSelect(
@@ -389,8 +418,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
c => c.similarity c => c.similarity
); );
// floor 稀疏去重:每个楼层只保留该楼层匹配度最高的那条
const bestByFloor = new Map(); const bestByFloor = new Map();
for (const s of selected) { for (const s of selected) {
const prev = bestByFloor.get(s.floor); const prev = bestByFloor.get(s.floor);
@@ -399,7 +426,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
} }
} }
// 最终结果按匹配度降序
const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity); const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity);
const floors = [...new Set(sparse.map(c => c.floor))]; const floors = [...new Set(sparse.map(c => c.floor))];
@@ -420,7 +446,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
}; };
}).filter(Boolean); }).filter(Boolean);
// Attach stats for logging
if (results.length > 0) { if (results.length > 0) {
results._preFilterStats = preFilterStats; results._preFilterStats = preFilterStats;
} }
@@ -429,14 +454,12 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索 // L2 Events 检索RRF 混合 + MMR 后置)
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) { async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
const { chatId, name1 } = getContext(); const { chatId } = getContext();
if (!chatId || !queryVector?.length) { if (!chatId || !queryVector?.length) return [];
return [];
}
const meta = await getMeta(chatId); const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig); const fp = getEngineFingerprint(vectorConfig);
@@ -446,53 +469,29 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector])); const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return []; if (!vectorMap.size) return [];
const userName = normalize(name1); // 构建/更新文本索引
const queryNormList = (queryEntities || []).map(normalize).filter(Boolean); const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
const querySet = new Set(queryNormList); ensureEventTextIndex(allEvents, revision);
// 只取硬约束类的 world topic // 文本路检索
const worldTopics = (store?.json?.world || []) const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
.filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase()))
.map(w => normalize(w.topic)) // ═══════════════════════════════════════════════════════════════════════
.filter(Boolean); // 向量路检索(只保留 L0 加权)
// ═══════════════════════════════════════════════════════════════════════
const scored = (allEvents || []).map((event, idx) => { const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id); const v = vectorMap.get(event.id);
const sim = v ? cosineSimilarity(queryVector, v) : 0; const sim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0; let bonus = 0;
const reasons = [];
// participants 命中 // L0 加权
const participants = (event.participants || []).map(normalize).filter(Boolean);
const hitCount = participants.filter(p => p !== userName && querySet.has(p)).length;
const hasParticipantHit = hitCount > 0;
if (hasParticipantHit) {
bonus += CONFIG.BONUS_PARTICIPANT_HIT * Math.log2(hitCount + 1);
reasons.push(hitCount > 1 ? `participant×${hitCount}` : 'participant');
}
// text 命中
const text = normalize(`${event.title || ''} ${event.summary || ''}`);
const textHitCount = queryNormList.filter(e => text.includes(e)).length;
if (textHitCount > 0) {
bonus += CONFIG.BONUS_TEXT_HIT * Math.log2(textHitCount + 1);
reasons.push(textHitCount > 1 ? `text×${textHitCount}` : 'text');
}
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
}
// L0 加权:事件覆盖楼层范围命中
const range = parseFloorRange(event.summary); const range = parseFloorRange(event.summary);
if (range) { if (range) {
for (let f = range.start; f <= range.end; f++) { for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) { if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f); bonus += l0FloorBonus.get(f);
reasons.push('L0');
break; break;
} }
} }
@@ -503,15 +502,11 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
_idx: idx, _idx: idx,
event, event,
similarity: sim, similarity: sim,
bonus,
finalScore: sim + bonus, finalScore: sim + bonus,
reasons,
isDirect: hasParticipantHit,
vector: v, vector: v,
}; };
}); });
// ★ 记录过滤前的分布(用 finalScore与显示一致
const preFilterDistribution = { const preFilterDistribution = {
total: scored.length, total: scored.length,
'0.85+': scored.filter(s => s.finalScore >= 0.85).length, '0.85+': scored.filter(s => s.finalScore >= 0.85).length,
@@ -523,41 +518,66 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
threshold: CONFIG.MIN_SIMILARITY_EVENT, threshold: CONFIG.MIN_SIMILARITY_EVENT,
}; };
// ★ 过滤改成用 finalScore包含 bonus // 向量路:纯相似度排序(不在这里做 MMR
const candidates = scored const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT) .filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore) .sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS); .slice(0, CONFIG.CANDIDATE_EVENTS);
// 动态 K质量不够就少拿 const vectorRanked = candidates.map(s => ({
const dynamicK = Math.min(CONFIG.MAX_EVENTS, candidates.length); event: s.event,
similarity: s.finalScore,
vector: s.vector,
}));
const selected = mmrSelect( // RRF 融合
candidates, const eventById = new Map(allEvents.map(e => [e.id, e]));
dynamicK, const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
// 向量非空时过滤纯 TEXT
const hasVector = vectorRanked.length > 0;
const filtered = hasVector ? fused.filter(x => x.type !== 'TEXT') : fused;
// MMR 放在融合后:对最终候选集去重
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
...x,
_id: x.id,
}));
const mmrOutput = mmrSelect(
mmrInput,
CONFIG.MAX_EVENTS,
CONFIG.MMR_LAMBDA, CONFIG.MMR_LAMBDA,
c => c.vector, c => c.vector || null,
c => c.finalScore c => c.rrf
); );
return selected // 构造结果
.sort((a, b) => b.finalScore - a.finalScore) const results = mmrOutput.map(x => ({
.map(s => ({ event: x.event,
event: s.event, similarity: x.rrf,
similarity: s.finalScore, _recallType: x.type === 'HYBRID' ? 'DIRECT' : 'SIMILAR',
_recallType: s.isDirect ? 'DIRECT' : 'SIMILAR', _recallReason: x.type,
_recallReason: s.reasons.length ? s.reasons.join('+') : '相似', _rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
_preFilterDistribution: preFilterDistribution, }));
}));
// 统计信息附加到第一条结果
if (results.length > 0) {
results[0]._preFilterDistribution = preFilterDistribution;
results[0]._rrfStats = {
vectorCount: vectorRanked.length,
textCount: textRanked.length,
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
textOnlyFiltered: fused.filter(x => x.type === 'TEXT').length,
};
}
return results;
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 日志:因果树格式化 // 日志
// ═══════════════════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════════════════
// 日志:主报告
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({ function formatRecallLog({
@@ -571,21 +591,19 @@ function formatRecallLog({
causalEvents = [], causalEvents = [],
chunkPreFilterStats = null, chunkPreFilterStats = null,
l0Results = [], l0Results = [],
l0PreFilterStats = null,
}) { }) {
const lines = [ const lines = [
'\u2554' + '\u2550'.repeat(62) + '\u2557', '\u2554' + '\u2550'.repeat(62) + '\u2557',
'\u2551 \u8bb0\u5fc6\u53ec\u56de\u62a5\u544a \u2551', '\u2551 记忆召回报告 \u2551',
'\u2560' + '\u2550'.repeat(62) + '\u2563', '\u2560' + '\u2550'.repeat(62) + '\u2563',
`\u2551 \u8017\u65f6: ${elapsed}ms`, `\u2551 耗时: ${elapsed}ms`,
'\u255a' + '\u2550'.repeat(62) + '\u255d', '\u255a' + '\u2550'.repeat(62) + '\u255d',
'', '',
'\u250c' + '\u2500'.repeat(61) + '\u2510', '\u250c' + '\u2500'.repeat(61) + '\u2510',
'\u2502 \u3010\u67e5\u8be2\u6784\u5efa\u3011\u6700\u8fd1 5 \u6761\u6d88\u606f\uff0c\u6307\u6570\u8870\u51cf\u52a0\u6743 (\u03b2=0.7) \u2502', '\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502',
'\u2514' + '\u2500'.repeat(61) + '\u2518', '\u2514' + '\u2500'.repeat(61) + '\u2518',
]; ];
// Keep query previews only (the only place to keep raw text)
const segmentsSorted = segments.map((s, i) => ({ const segmentsSorted = segments.map((s, i) => ({
idx: i + 1, idx: i + 1,
weight: weights?.[i] ?? 0, weight: weights?.[i] ?? 0,
@@ -595,74 +613,57 @@ function formatRecallLog({
segmentsSorted.forEach((s, rank) => { segmentsSorted.forEach((s, rank) => {
const bar = '\u2588'.repeat(Math.round(s.weight * 20)); const bar = '\u2588'.repeat(Math.round(s.weight * 20));
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text; const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
const marker = rank === 0 ? ' \u25c0 \u4e3b\u5bfc' : ''; const marker = rank === 0 ? ' ◀ 主导' : '';
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`); lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
}); });
lines.push(''); lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510'); lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 \u3010\u63d0\u53d6\u5b9e\u4f53\u3011 \u2502'); lines.push('\u2502 【提取实体】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518'); lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
lines.push(` ${queryEntities?.length ? queryEntities.join('\u3001') : '(\u65e0)'}`); lines.push(` ${queryEntities?.length ? queryEntities.join('') : '()'}`);
// Recall stats (numbers only)
lines.push(''); lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510'); lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 \u3010\u53ec\u56de\u7edf\u8ba1\u3011 \u2502'); lines.push('\u2502 【召回统计】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518'); lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
// L0 // L0
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b); const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
lines.push(' L0 \u8bed\u4e49\u951a\u70b9:'); lines.push(' L0 语义锚点:');
if (l0Results.length) { if (l0Results.length) {
const l0Dist = { lines.push(` 选入: ${l0Results.length} 条 | 影响楼层: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} 加权)`);
'0.8+': l0Results.filter(r => r.similarity >= 0.8).length,
'0.7-0.8': l0Results.filter(r => r.similarity >= 0.7 && r.similarity < 0.8).length,
'0.6-0.7': l0Results.filter(r => r.similarity >= 0.6 && r.similarity < 0.7).length,
'0.55-0.6': l0Results.filter(r => r.similarity >= 0.55 && r.similarity < 0.6).length,
};
lines.push(` \u9009\u5165: ${l0Results.length} \u6761 | \u5f71\u54cd\u697c\u5c42: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} \u52a0\u6743)`);
lines.push(` \u5339\u914d\u5ea6: 0.8+: ${l0Dist['0.8+']} | 0.7-0.8: ${l0Dist['0.7-0.8']} | 0.6-0.7: ${l0Dist['0.6-0.7']} | 0.55-0.6: ${l0Dist['0.55-0.6']}`);
} else { } else {
lines.push(' (\u65e0\u6570\u636e)'); lines.push(' (无数据)');
} }
// L1 // L1
lines.push(''); lines.push('');
lines.push(' L1 \u539f\u6587\u7247\u6bb5:'); lines.push(' L1 原文片段:');
if (chunkPreFilterStats) { if (chunkPreFilterStats) {
const dist = chunkPreFilterStats.distribution || {}; const dist = chunkPreFilterStats.distribution || {};
lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${chunkPreFilterStats.threshold}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`); lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u8fdc\u671f\u2265${chunkPreFilterStats.thresholdRemote}, \u5f85\u6574\u7406\u2265${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`);
lines.push(` \u5339\u914d\u5ea6: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0} | <0.6: ${(dist['0.55-0.6'] || 0) + (dist['<0.55'] || 0)}`); lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
const floorCounts = new Map();
chunkResults.forEach(c => floorCounts.set(c.floor, (floorCounts.get(c.floor) || 0) + 1));
const floorStats = `\u8986\u76d6 ${floorCounts.size} \u4e2a\u697c\u5c42`;
lines.push(` ${floorStats}`);
} else { } else {
lines.push(` \u9009\u5165: ${chunkResults.length} \u6761`); lines.push(` 选入: ${chunkResults.length} `);
} }
// L2 // L2
const preFilterDist = eventResults[0]?._preFilterDistribution || {}; const rrfStats = eventResults[0]?._rrfStats || {};
const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length;
const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length;
lines.push(''); lines.push('');
lines.push(' L2 \u4e8b\u4ef6\u8bb0\u5fc6:'); lines.push(' L2 事件记忆 (RRF 混合检索):');
lines.push(` \u603b\u4e8b\u4ef6: ${allEvents.length} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${preFilterDist.threshold || 0.65}): ${preFilterDist.passThreshold || 0} \u6761 | \u6700\u7ec8: ${eventResults.length} \u6761`); lines.push(` 总事件: ${allEvents.length} 条 | 最终: ${eventResults.length} `);
if (preFilterDist.total) { lines.push(` 向量路: ${rrfStats.vectorCount || 0} 条 | 文本路: ${rrfStats.textCount || 0}`);
lines.push(` \u5339\u914d\u5ea6: 0.85+: ${preFilterDist['0.85+'] || 0} | 0.7-0.85: ${preFilterDist['0.7-0.85'] || 0} | 0.6-0.7: ${preFilterDist['0.6-0.7'] || 0} | <0.6: ${(preFilterDist['0.5-0.6'] || 0) + (preFilterDist['<0.5'] || 0)}`); lines.push(` HYBRID: ${rrfStats.hybridCount || 0} 条 | 纯 VECTOR: ${rrfStats.vectorOnlyCount || 0} 条 | 纯 TEXT (已过滤): ${rrfStats.textOnlyFiltered || 0}`);
}
lines.push(` \u7c7b\u578b: DIRECT ${directCount} \u6761 | SIMILAR ${similarCount} \u6761`);
// Causal chains // Causal
if (causalEvents.length) { if (causalEvents.length) {
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0)); const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0)); const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
lines.push(''); lines.push('');
lines.push(' \u56e0\u679c\u94fe\u8ffd\u6eaf:'); lines.push(' 因果链追溯:');
lines.push(` \u8ffd\u6eaf: ${causalEvents.length} \u6761 | \u6700\u5927\u88ab\u5f15: ${maxRefs} \u6b21 | \u6700\u5927\u6df1\u5ea6: ${maxDepth}`); lines.push(` 追溯: ${causalEvents.length} 条 | 最大被引: ${maxRefs} 次 | 最大深度: ${maxDepth}`);
} }
lines.push(''); lines.push('');
@@ -677,6 +678,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
const T0 = performance.now(); const T0 = performance.now();
const { chat } = getContext(); const { chat } = getContext();
const store = getSummaryStore(); const store = getSummaryStore();
const lastSummarizedFloor = store?.lastSummarizedMesId ?? -1;
const { pendingUserMessage = null } = options; const { pendingUserMessage = null } = options;
if (!allEvents?.length) { if (!allEvents?.length) {
@@ -702,9 +704,15 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
const lexicon = buildEntityLexicon(store, allEvents); const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities(segments.join('\n'), lexicon); const queryEntities = extractEntities(segments.join('\n'), lexicon);
// ════════════════════════════════════════════════════════════════════════ // 构建文本查询串:最后一条消息 + 实体 + 关键词
const lastSeg = segments[segments.length - 1] || '';
const queryTextForSearch = [
lastSeg,
...queryEntities,
...(store?.json?.keywords || []).slice(0, 5).map(k => k.text),
].join(' ');
// L0 召回 // L0 召回
// ════════════════════════════════════════════════════════════════════════
let l0Results = []; let l0Results = [];
let l0FloorBonus = new Map(); let l0FloorBonus = new Map();
let l0VirtualChunks = []; let l0VirtualChunks = [];
@@ -718,21 +726,15 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
} }
const [chunkResults, eventResults] = await Promise.all([ const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig, l0FloorBonus), searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus), searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
]); ]);
const chunkPreFilterStats = chunkResults._preFilterStats || null; const chunkPreFilterStats = chunkResults._preFilterStats || null;
// ════════════════════════════════════════════════════════════════════════
// 合并 L0 虚拟 chunks 到 L1
// ════════════════════════════════════════════════════════════════════════
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS); const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
// ───────────────────────────────────────────────────────────────────── // 因果链追溯
// 因果链追溯:从 eventResults 出发找祖先事件
// 注意:是否“额外注入”要去重(如果祖先事件本来已召回,就不额外注入)
// ─────────────────────────────────────────────────────────────────────
const eventIndex = buildEventIndex(allEvents); const eventIndex = buildEventIndex(allEvents);
const causalMap = traceCausalAncestors(eventResults, eventIndex); const causalMap = traceCausalAncestors(eventResults, eventIndex);
@@ -750,7 +752,6 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
depth: x.depth, depth: x.depth,
})); }));
// 排序:引用数 > 深度 > 编号,然后截断
sortCausalEvents(causalEvents); sortCausalEvents(causalEvents);
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX); const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);

View File

@@ -59,7 +59,7 @@ export async function searchStateAtoms(queryVector, vectorConfig) {
const atoms = getStateAtoms(); const atoms = getStateAtoms();
const atomMap = new Map(atoms.map(a => [a.atomId, a])); const atomMap = new Map(atoms.map(a => [a.atomId, a]));
// 计算匹配 // 计算相似
const scored = stateVectors const scored = stateVectors
.map(sv => { .map(sv => {
const atom = atomMap.get(sv.atomId); const atom = atomMap.get(sv.atomId);
@@ -92,8 +92,8 @@ export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
const floorBonus = new Map(); const floorBonus = new Map();
for (const r of l0Results || []) { for (const r of l0Results || []) {
// 每个楼层只加一次,取最高匹配度对应的 bonus // 每个楼层只加一次,取最高相似度对应的 bonus
// 简化处理:统一加 bonusFactor不区分匹配度高低 // 简化处理:统一加 bonusFactor不区分相似度高低
if (!floorBonus.has(r.floor)) { if (!floorBonus.has(r.floor)) {
floorBonus.set(r.floor, bonusFactor); floorBonus.set(r.floor, bonusFactor);
} }
@@ -132,13 +132,13 @@ export function stateToVirtualChunks(l0Results) {
/** /**
* 合并 L0 和 L1 chunks每楼层最多保留 limit 条 * 合并 L0 和 L1 chunks每楼层最多保留 limit 条
* @param {Array} l0Chunks - 虚拟 chunks已按匹配度排序) * @param {Array} l0Chunks - 虚拟 chunks已按相似度排序)
* @param {Array} l1Chunks - 真实 chunks已按匹配度排序) * @param {Array} l1Chunks - 真实 chunks已按相似度排序)
* @param {number} limit - 每楼层上限 * @param {number} limit - 每楼层上限
* @returns {Array} 合并后的 chunks * @returns {Array} 合并后的 chunks
*/ */
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) { export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
// 合并并按匹配度排序 // 合并并按相似度排序
const all = [...(l0Chunks || []), ...(l1Chunks || [])] const all = [...(l0Chunks || []), ...(l1Chunks || [])]
.sort((a, b) => b.similarity - a.similarity); .sort((a, b) => b.similarity - a.similarity);
@@ -153,7 +153,7 @@ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
} }
} }
// 扁平化并保持匹配度排序 // 扁平化并保持相似度排序
return Array.from(byFloor.values()) return Array.from(byFloor.values())
.flat() .flat()
.sort((a, b) => b.similarity - a.similarity); .sort((a, b) => b.similarity - a.similarity);

View File

@@ -0,0 +1,85 @@
// ═══════════════════════════════════════════════════════════════════════════
// Text Search - L2 事件文本检索MiniSearch
// 与向量检索互补,通过 RRF 融合
// ═══════════════════════════════════════════════════════════════════════════
import MiniSearch from '../../../libs/minisearch.mjs';
let idx = null;
let lastRevision = null;
/**
* 中文逐字 + 英数字串分词
*/
function tokenize(text) {
return String(text || '').match(/[\u4e00-\u9fff]|[a-zA-Z0-9]+/g) || [];
}
/**
* 去掉 summary 末尾的楼层标记
*/
function stripFloorTag(s) {
return String(s || '').replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '').trim();
}
/**
* 构建/更新事件文本索引
*/
export function ensureEventTextIndex(events, revision) {
if (!events?.length) {
idx = null;
lastRevision = null;
return;
}
if (idx && revision === lastRevision) return;
try {
idx = new MiniSearch({
fields: ['title', 'summary', 'participants'],
storeFields: ['id'],
tokenize,
});
idx.addAll(events.map(e => ({
id: e.id,
title: e.title || '',
summary: stripFloorTag(e.summary),
participants: (e.participants || []).join(' '),
})));
lastRevision = revision;
} catch (e) {
console.error('[text-search] Index build failed:', e);
idx = null;
lastRevision = null;
}
}
/**
* 文本检索事件
*/
export function searchEventsByText(queryText, limit = 80) {
if (!idx || !queryText?.trim()) return [];
try {
const res = idx.search(queryText, {
limit,
boost: { title: 2, participants: 1.5, summary: 1 },
fuzzy: 0.2,
prefix: true,
});
return res.map((r, i) => ({ id: r.id, textRank: i + 1 }));
} catch (e) {
console.error('[text-search] Search failed:', e);
return [];
}
}
/**
* 清理索引
*/
export function clearEventTextIndex() {
idx = null;
lastRevision = null;
}