Adjust story summary recall thresholds

This commit is contained in:
2026-02-01 16:26:29 +08:00
parent 0ac347968e
commit cf0fc88a24
6 changed files with 298 additions and 234 deletions

View File

@@ -2,8 +2,9 @@
// L1 chunk + L2 event 召回
// - 全量向量打分
// - 指数衰减加权 Query Embedding
// - 实体/参与者加分
// - MMR 去重
// - L0 floor 加权
// - RRF 混合检索(向量 + 文本)
// - MMR 去重(融合后执行)
// - floor 稀疏去重
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
@@ -18,6 +19,7 @@ import {
stateToVirtualChunks,
mergeAndSparsify,
} from './state-recall.js';
import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
const MODULE_ID = 'recall';
@@ -27,9 +29,8 @@ const CONFIG = {
QUERY_MAX_CHARS: 600,
QUERY_CONTEXT_CHARS: 240,
// 因果链
CAUSAL_CHAIN_MAX_DEPTH: 10, // 放宽跳数,让图自然终止
CAUSAL_INJECT_MAX: 30, // 放宽上限,由 prompt token 预算最终控制
CAUSAL_CHAIN_MAX_DEPTH: 10,
CAUSAL_INJECT_MAX: 30,
CANDIDATE_CHUNKS: 200,
CANDIDATE_EVENTS: 150,
@@ -38,18 +39,16 @@ const CONFIG = {
MAX_EVENTS: 120,
MIN_SIMILARITY_CHUNK: 0.6,
MIN_SIMILARITY_CHUNK_RECENT: 0.5,
MIN_SIMILARITY_EVENT: 0.65,
MMR_LAMBDA: 0.72,
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
// L0 配置
L0_FLOOR_BONUS_FACTOR: 0.10,
FLOOR_MAX_CHUNKS: 2,
FLOOR_LIMIT: 1,
RRF_K: 60,
TEXT_SEARCH_LIMIT: 80,
};
// ═══════════════════════════════════════════════════════════════════════════
@@ -75,10 +74,53 @@ function normalizeVec(v) {
}
// ═══════════════════════════════════════════════════════════════════════════
// 因果链追溯Graph-augmented retrieval
// - 从已召回事件出发,沿 causedBy 向上追溯祖先事件
// - 记录边chainFrom = 哪个召回事件需要它
// - 不在这里决定“是否额外注入”,只负责遍历与结构化结果
// RRF 融合
// ═══════════════════════════════════════════════════════════════════════════
function fuseEventsByRRF(vectorRanked, textRanked, eventById, k = CONFIG.RRF_K) {
const map = new Map();
const upsert = (id) => {
if (!map.has(id)) {
map.set(id, { id, rrf: 0, vRank: Infinity, tRank: Infinity, type: 'TEXT' });
}
return map.get(id);
};
vectorRanked.forEach((r, i) => {
const id = r.event?.id;
if (!id) return;
const o = upsert(id);
o.vRank = i + 1;
o.rrf += 1 / (k + i + 1);
o.type = o.tRank !== Infinity ? 'HYBRID' : 'VECTOR';
o.vector = r.vector;
});
textRanked.forEach((r) => {
const o = upsert(r.id);
o.tRank = r.textRank;
o.rrf += 1 / (k + r.textRank);
o.type = o.vRank !== Infinity ? 'HYBRID' : 'TEXT';
});
const typePriority = { HYBRID: 0, VECTOR: 1, TEXT: 2 };
return Array.from(map.values())
.map(o => ({ ...o, event: eventById.get(o.id) }))
.filter(x => x.event)
.sort((a, b) => {
if (b.rrf !== a.rrf) return b.rrf - a.rrf;
if (typePriority[a.type] !== typePriority[b.type]) {
return typePriority[a.type] - typePriority[b.type];
}
if (a.vRank !== b.vRank) return a.vRank - b.vRank;
return a.tRank - b.tRank;
});
}
// ═══════════════════════════════════════════════════════════════════════════
// 因果链追溯
// ═══════════════════════════════════════════════════════════════════════════
function buildEventIndex(allEvents) {
@@ -89,9 +131,6 @@ function buildEventIndex(allEvents) {
return map;
}
/**
* @returns {Map<string, {event, depth, chainFrom}>}
*/
function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
const out = new Map();
const idRe = /^evt-\d+$/;
@@ -103,7 +142,6 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
const ev = eventIndex.get(parentId);
if (!ev) return;
// 如果同一个祖先被多个召回事件引用:保留更“近”的深度或追加来源
const existed = out.get(parentId);
if (!existed) {
out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] });
@@ -128,20 +166,12 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
return out;
}
/**
* 因果事件排序:引用数 > 深度 > 编号
*/
function sortCausalEvents(causalArray) {
return causalArray.sort((a, b) => {
// 1. 被多条召回链引用的优先
const refDiff = b.chainFrom.length - a.chainFrom.length;
if (refDiff !== 0) return refDiff;
// 2. 深度浅的优先
const depthDiff = a.depth - b.depth;
if (depthDiff !== 0) return depthDiff;
// 3. 事件编号排序
return String(a.event.id).localeCompare(String(b.event.id));
});
}
@@ -150,7 +180,6 @@ function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
function parseFloorRange(summary) {
if (!summary) return null;
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
@@ -161,8 +190,6 @@ function parseFloorRange(summary) {
}
function cleanForRecall(text) {
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
}
@@ -187,13 +214,11 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
messages = messages.slice(0, -1);
}
// ★ 如果有待处理的用户消息且 chat 中最后一条不是它,追加虚拟消息
if (pendingUserMessage) {
const lastMsg = messages[messages.length - 1];
const lastMsgText = lastMsg?.mes?.trim() || "";
const pendingText = pendingUserMessage.trim();
// 避免重复(如果 chat 已包含该消息则不追加)
if (lastMsgText !== pendingText) {
messages = [...messages, { is_user: true, name: name1 || "用户", mes: pendingUserMessage }];
}
@@ -330,7 +355,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) {
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
@@ -359,11 +384,21 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
};
});
// Pre-filter stats for logging
const candidates = scored
.filter(s => {
const threshold = s.floor > lastSummarizedFloor
? CONFIG.MIN_SIMILARITY_CHUNK_RECENT
: CONFIG.MIN_SIMILARITY_CHUNK;
return s.similarity >= threshold;
})
.sort((a, b) => b.similarity - a.similarity)
.slice(0, CONFIG.CANDIDATE_CHUNKS);
const preFilterStats = {
total: scored.length,
passThreshold: scored.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK).length,
threshold: CONFIG.MIN_SIMILARITY_CHUNK,
passThreshold: candidates.length,
thresholdRemote: CONFIG.MIN_SIMILARITY_CHUNK,
thresholdRecent: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
distribution: {
'0.8+': scored.filter(s => s.similarity >= 0.8).length,
'0.7-0.8': scored.filter(s => s.similarity >= 0.7 && s.similarity < 0.8).length,
@@ -373,12 +408,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
},
};
const candidates = scored
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, CONFIG.CANDIDATE_CHUNKS);
// 动态 K质量不够就少拿
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
const selected = mmrSelect(
@@ -389,8 +418,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
c => c.similarity
);
// floor 稀疏去重:每个楼层只保留该楼层匹配度最高的那条
const bestByFloor = new Map();
for (const s of selected) {
const prev = bestByFloor.get(s.floor);
@@ -399,7 +426,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
}
}
// 最终结果按匹配度降序
const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity);
const floors = [...new Set(sparse.map(c => c.floor))];
@@ -420,7 +446,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
};
}).filter(Boolean);
// Attach stats for logging
if (results.length > 0) {
results._preFilterStats = preFilterStats;
}
@@ -429,14 +454,12 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
}
// ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索
// L2 Events 检索RRF 混合 + MMR 后置)
// ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
const { chatId, name1 } = getContext();
if (!chatId || !queryVector?.length) {
return [];
}
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
@@ -446,53 +469,29 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
const userName = normalize(name1);
const queryNormList = (queryEntities || []).map(normalize).filter(Boolean);
const querySet = new Set(queryNormList);
// 构建/更新文本索引
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
ensureEventTextIndex(allEvents, revision);
// 只取硬约束类的 world topic
const worldTopics = (store?.json?.world || [])
.filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase()))
.map(w => normalize(w.topic))
.filter(Boolean);
// 文本路检索
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
// ═══════════════════════════════════════════════════════════════════════
// 向量路检索(只保留 L0 加权)
// ═══════════════════════════════════════════════════════════════════════
const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id);
const sim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0;
const reasons = [];
// participants 命中
const participants = (event.participants || []).map(normalize).filter(Boolean);
const hitCount = participants.filter(p => p !== userName && querySet.has(p)).length;
const hasParticipantHit = hitCount > 0;
if (hasParticipantHit) {
bonus += CONFIG.BONUS_PARTICIPANT_HIT * Math.log2(hitCount + 1);
reasons.push(hitCount > 1 ? `participant×${hitCount}` : 'participant');
}
// text 命中
const text = normalize(`${event.title || ''} ${event.summary || ''}`);
const textHitCount = queryNormList.filter(e => text.includes(e)).length;
if (textHitCount > 0) {
bonus += CONFIG.BONUS_TEXT_HIT * Math.log2(textHitCount + 1);
reasons.push(textHitCount > 1 ? `text×${textHitCount}` : 'text');
}
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
}
// L0 加权:事件覆盖楼层范围命中
// L0 加权
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
reasons.push('L0');
break;
}
}
@@ -503,15 +502,11 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
_idx: idx,
event,
similarity: sim,
bonus,
finalScore: sim + bonus,
reasons,
isDirect: hasParticipantHit,
vector: v,
};
});
// ★ 记录过滤前的分布(用 finalScore与显示一致
const preFilterDistribution = {
total: scored.length,
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
@@ -523,41 +518,66 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
threshold: CONFIG.MIN_SIMILARITY_EVENT,
};
// ★ 过滤改成用 finalScore包含 bonus
// 向量路:纯相似度排序(不在这里做 MMR
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
// 动态 K质量不够就少拿
const dynamicK = Math.min(CONFIG.MAX_EVENTS, candidates.length);
const vectorRanked = candidates.map(s => ({
event: s.event,
similarity: s.finalScore,
vector: s.vector,
}));
const selected = mmrSelect(
candidates,
dynamicK,
// RRF 融合
const eventById = new Map(allEvents.map(e => [e.id, e]));
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
// 向量非空时过滤纯 TEXT
const hasVector = vectorRanked.length > 0;
const filtered = hasVector ? fused.filter(x => x.type !== 'TEXT') : fused;
// MMR 放在融合后:对最终候选集去重
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
...x,
_id: x.id,
}));
const mmrOutput = mmrSelect(
mmrInput,
CONFIG.MAX_EVENTS,
CONFIG.MMR_LAMBDA,
c => c.vector,
c => c.finalScore
c => c.vector || null,
c => c.rrf
);
return selected
.sort((a, b) => b.finalScore - a.finalScore)
.map(s => ({
event: s.event,
similarity: s.finalScore,
_recallType: s.isDirect ? 'DIRECT' : 'SIMILAR',
_recallReason: s.reasons.length ? s.reasons.join('+') : '相似',
_preFilterDistribution: preFilterDistribution,
}));
// 构造结果
const results = mmrOutput.map(x => ({
event: x.event,
similarity: x.rrf,
_recallType: x.type === 'HYBRID' ? 'DIRECT' : 'SIMILAR',
_recallReason: x.type,
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
}));
// 统计信息附加到第一条结果
if (results.length > 0) {
results[0]._preFilterDistribution = preFilterDistribution;
results[0]._rrfStats = {
vectorCount: vectorRanked.length,
textCount: textRanked.length,
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
textOnlyFiltered: fused.filter(x => x.type === 'TEXT').length,
};
}
return results;
}
// ═══════════════════════════════════════════════════════════════════════════
// 日志:因果树格式化
// ═══════════════════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════════════════
// 日志:主报告
// 日志
// ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({
@@ -571,21 +591,19 @@ function formatRecallLog({
causalEvents = [],
chunkPreFilterStats = null,
l0Results = [],
l0PreFilterStats = null,
}) {
const lines = [
'\u2554' + '\u2550'.repeat(62) + '\u2557',
'\u2551 \u8bb0\u5fc6\u53ec\u56de\u62a5\u544a \u2551',
'\u2551 记忆召回报告 \u2551',
'\u2560' + '\u2550'.repeat(62) + '\u2563',
`\u2551 \u8017\u65f6: ${elapsed}ms`,
`\u2551 耗时: ${elapsed}ms`,
'\u255a' + '\u2550'.repeat(62) + '\u255d',
'',
'\u250c' + '\u2500'.repeat(61) + '\u2510',
'\u2502 \u3010\u67e5\u8be2\u6784\u5efa\u3011\u6700\u8fd1 5 \u6761\u6d88\u606f\uff0c\u6307\u6570\u8870\u51cf\u52a0\u6743 (\u03b2=0.7) \u2502',
'\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502',
'\u2514' + '\u2500'.repeat(61) + '\u2518',
];
// Keep query previews only (the only place to keep raw text)
const segmentsSorted = segments.map((s, i) => ({
idx: i + 1,
weight: weights?.[i] ?? 0,
@@ -595,74 +613,57 @@ function formatRecallLog({
segmentsSorted.forEach((s, rank) => {
const bar = '\u2588'.repeat(Math.round(s.weight * 20));
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
const marker = rank === 0 ? ' \u25c0 \u4e3b\u5bfc' : '';
const marker = rank === 0 ? ' ◀ 主导' : '';
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
});
lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 \u3010\u63d0\u53d6\u5b9e\u4f53\u3011 \u2502');
lines.push('\u2502 【提取实体】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
lines.push(` ${queryEntities?.length ? queryEntities.join('\u3001') : '(\u65e0)'}`);
lines.push(` ${queryEntities?.length ? queryEntities.join('') : '()'}`);
// Recall stats (numbers only)
lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 \u3010\u53ec\u56de\u7edf\u8ba1\u3011 \u2502');
lines.push('\u2502 【召回统计】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
// L0
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
lines.push(' L0 \u8bed\u4e49\u951a\u70b9:');
lines.push(' L0 语义锚点:');
if (l0Results.length) {
const l0Dist = {
'0.8+': l0Results.filter(r => r.similarity >= 0.8).length,
'0.7-0.8': l0Results.filter(r => r.similarity >= 0.7 && r.similarity < 0.8).length,
'0.6-0.7': l0Results.filter(r => r.similarity >= 0.6 && r.similarity < 0.7).length,
'0.55-0.6': l0Results.filter(r => r.similarity >= 0.55 && r.similarity < 0.6).length,
};
lines.push(` \u9009\u5165: ${l0Results.length} \u6761 | \u5f71\u54cd\u697c\u5c42: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} \u52a0\u6743)`);
lines.push(` \u5339\u914d\u5ea6: 0.8+: ${l0Dist['0.8+']} | 0.7-0.8: ${l0Dist['0.7-0.8']} | 0.6-0.7: ${l0Dist['0.6-0.7']} | 0.55-0.6: ${l0Dist['0.55-0.6']}`);
lines.push(` 选入: ${l0Results.length} 条 | 影响楼层: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} 加权)`);
} else {
lines.push(' (\u65e0\u6570\u636e)');
lines.push(' (无数据)');
}
// L1
lines.push('');
lines.push(' L1 \u539f\u6587\u7247\u6bb5:');
lines.push(' L1 原文片段:');
if (chunkPreFilterStats) {
const dist = chunkPreFilterStats.distribution || {};
lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${chunkPreFilterStats.threshold}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`);
lines.push(` \u5339\u914d\u5ea6: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0} | <0.6: ${(dist['0.55-0.6'] || 0) + (dist['<0.55'] || 0)}`);
const floorCounts = new Map();
chunkResults.forEach(c => floorCounts.set(c.floor, (floorCounts.get(c.floor) || 0) + 1));
const floorStats = `\u8986\u76d6 ${floorCounts.size} \u4e2a\u697c\u5c42`;
lines.push(` ${floorStats}`);
lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u8fdc\u671f\u2265${chunkPreFilterStats.thresholdRemote}, \u5f85\u6574\u7406\u2265${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`);
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
} else {
lines.push(` \u9009\u5165: ${chunkResults.length} \u6761`);
lines.push(` 选入: ${chunkResults.length} `);
}
// L2
const preFilterDist = eventResults[0]?._preFilterDistribution || {};
const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length;
const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length;
const rrfStats = eventResults[0]?._rrfStats || {};
lines.push('');
lines.push(' L2 \u4e8b\u4ef6\u8bb0\u5fc6:');
lines.push(` \u603b\u4e8b\u4ef6: ${allEvents.length} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${preFilterDist.threshold || 0.65}): ${preFilterDist.passThreshold || 0} \u6761 | \u6700\u7ec8: ${eventResults.length} \u6761`);
if (preFilterDist.total) {
lines.push(` \u5339\u914d\u5ea6: 0.85+: ${preFilterDist['0.85+'] || 0} | 0.7-0.85: ${preFilterDist['0.7-0.85'] || 0} | 0.6-0.7: ${preFilterDist['0.6-0.7'] || 0} | <0.6: ${(preFilterDist['0.5-0.6'] || 0) + (preFilterDist['<0.5'] || 0)}`);
}
lines.push(` \u7c7b\u578b: DIRECT ${directCount} \u6761 | SIMILAR ${similarCount} \u6761`);
lines.push(' L2 事件记忆 (RRF 混合检索):');
lines.push(` 总事件: ${allEvents.length} 条 | 最终: ${eventResults.length} `);
lines.push(` 向量路: ${rrfStats.vectorCount || 0} 条 | 文本路: ${rrfStats.textCount || 0}`);
lines.push(` HYBRID: ${rrfStats.hybridCount || 0} 条 | 纯 VECTOR: ${rrfStats.vectorOnlyCount || 0} 条 | 纯 TEXT (已过滤): ${rrfStats.textOnlyFiltered || 0}`);
// Causal chains
// Causal
if (causalEvents.length) {
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
lines.push('');
lines.push(' \u56e0\u679c\u94fe\u8ffd\u6eaf:');
lines.push(` \u8ffd\u6eaf: ${causalEvents.length} \u6761 | \u6700\u5927\u88ab\u5f15: ${maxRefs} \u6b21 | \u6700\u5927\u6df1\u5ea6: ${maxDepth}`);
lines.push(' 因果链追溯:');
lines.push(` 追溯: ${causalEvents.length} 条 | 最大被引: ${maxRefs} 次 | 最大深度: ${maxDepth}`);
}
lines.push('');
@@ -677,6 +678,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
const T0 = performance.now();
const { chat } = getContext();
const store = getSummaryStore();
const lastSummarizedFloor = store?.lastSummarizedMesId ?? -1;
const { pendingUserMessage = null } = options;
if (!allEvents?.length) {
@@ -702,9 +704,15 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities(segments.join('\n'), lexicon);
// ════════════════════════════════════════════════════════════════════════
// 构建文本查询串:最后一条消息 + 实体 + 关键词
const lastSeg = segments[segments.length - 1] || '';
const queryTextForSearch = [
lastSeg,
...queryEntities,
...(store?.json?.keywords || []).slice(0, 5).map(k => k.text),
].join(' ');
// L0 召回
// ════════════════════════════════════════════════════════════════════════
let l0Results = [];
let l0FloorBonus = new Map();
let l0VirtualChunks = [];
@@ -718,21 +726,15 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
}
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig, l0FloorBonus),
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
]);
const chunkPreFilterStats = chunkResults._preFilterStats || null;
// ════════════════════════════════════════════════════════════════════════
// 合并 L0 虚拟 chunks 到 L1
// ════════════════════════════════════════════════════════════════════════
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
// ─────────────────────────────────────────────────────────────────────
// 因果链追溯:从 eventResults 出发找祖先事件
// 注意:是否“额外注入”要去重(如果祖先事件本来已召回,就不额外注入)
// ─────────────────────────────────────────────────────────────────────
// 因果链追溯
const eventIndex = buildEventIndex(allEvents);
const causalMap = traceCausalAncestors(eventResults, eventIndex);
@@ -750,7 +752,6 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
depth: x.depth,
}));
// 排序:引用数 > 深度 > 编号,然后截断
sortCausalEvents(causalEvents);
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);

View File

@@ -59,7 +59,7 @@ export async function searchStateAtoms(queryVector, vectorConfig) {
const atoms = getStateAtoms();
const atomMap = new Map(atoms.map(a => [a.atomId, a]));
// 计算匹配
// 计算相似
const scored = stateVectors
.map(sv => {
const atom = atomMap.get(sv.atomId);
@@ -92,8 +92,8 @@ export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
const floorBonus = new Map();
for (const r of l0Results || []) {
// 每个楼层只加一次,取最高匹配度对应的 bonus
// 简化处理:统一加 bonusFactor不区分匹配度高低
// 每个楼层只加一次,取最高相似度对应的 bonus
// 简化处理:统一加 bonusFactor不区分相似度高低
if (!floorBonus.has(r.floor)) {
floorBonus.set(r.floor, bonusFactor);
}
@@ -132,13 +132,13 @@ export function stateToVirtualChunks(l0Results) {
/**
* 合并 L0 和 L1 chunks每楼层最多保留 limit 条
* @param {Array} l0Chunks - 虚拟 chunks已按匹配度排序)
* @param {Array} l1Chunks - 真实 chunks已按匹配度排序)
* @param {Array} l0Chunks - 虚拟 chunks已按相似度排序)
* @param {Array} l1Chunks - 真实 chunks已按相似度排序)
* @param {number} limit - 每楼层上限
* @returns {Array} 合并后的 chunks
*/
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
// 合并并按匹配度排序
// 合并并按相似度排序
const all = [...(l0Chunks || []), ...(l1Chunks || [])]
.sort((a, b) => b.similarity - a.similarity);
@@ -153,7 +153,7 @@ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
}
}
// 扁平化并保持匹配度排序
// 扁平化并保持相似度排序
return Array.from(byFloor.values())
.flat()
.sort((a, b) => b.similarity - a.similarity);

View File

@@ -0,0 +1,85 @@
// ═══════════════════════════════════════════════════════════════════════════
// Text Search - L2 事件文本检索MiniSearch
// 与向量检索互补,通过 RRF 融合
// ═══════════════════════════════════════════════════════════════════════════
import MiniSearch from '../../../libs/minisearch.mjs';
let idx = null;
let lastRevision = null;
/**
* 中文逐字 + 英数字串分词
*/
function tokenize(text) {
return String(text || '').match(/[\u4e00-\u9fff]|[a-zA-Z0-9]+/g) || [];
}
/**
* 去掉 summary 末尾的楼层标记
*/
function stripFloorTag(s) {
return String(s || '').replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '').trim();
}
/**
* 构建/更新事件文本索引
*/
export function ensureEventTextIndex(events, revision) {
if (!events?.length) {
idx = null;
lastRevision = null;
return;
}
if (idx && revision === lastRevision) return;
try {
idx = new MiniSearch({
fields: ['title', 'summary', 'participants'],
storeFields: ['id'],
tokenize,
});
idx.addAll(events.map(e => ({
id: e.id,
title: e.title || '',
summary: stripFloorTag(e.summary),
participants: (e.participants || []).join(' '),
})));
lastRevision = revision;
} catch (e) {
console.error('[text-search] Index build failed:', e);
idx = null;
lastRevision = null;
}
}
/**
* 文本检索事件
*/
export function searchEventsByText(queryText, limit = 80) {
if (!idx || !queryText?.trim()) return [];
try {
const res = idx.search(queryText, {
limit,
boost: { title: 2, participants: 1.5, summary: 1 },
fuzzy: 0.2,
prefix: true,
});
return res.map((r, i) => ({ id: r.id, textRank: i + 1 }));
} catch (e) {
console.error('[text-search] Search failed:', e);
return [];
}
}
/**
* 清理索引
*/
export function clearEventTextIndex() {
idx = null;
lastRevision = null;
}