Adjust story summary recall thresholds
This commit is contained in:
@@ -6,6 +6,7 @@ import { chat_metadata } from "../../../../../../../script.js";
|
|||||||
import { EXT_ID } from "../../../core/constants.js";
|
import { EXT_ID } from "../../../core/constants.js";
|
||||||
import { xbLog } from "../../../core/debug-core.js";
|
import { xbLog } from "../../../core/debug-core.js";
|
||||||
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js";
|
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js";
|
||||||
|
import { clearEventTextIndex } from '../vector/text-search.js';
|
||||||
|
|
||||||
const MODULE_ID = 'summaryStore';
|
const MODULE_ID = 'summaryStore';
|
||||||
|
|
||||||
@@ -278,6 +279,8 @@ export async function clearSummaryData(chatId) {
|
|||||||
await clearEventVectors(chatId);
|
await clearEventVectors(chatId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearEventTextIndex();
|
||||||
|
|
||||||
xbLog.info(MODULE_ID, '总结数据已清空');
|
xbLog.info(MODULE_ID, '总结数据已清空');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ const RECENT_ORPHAN_MAX = 5000; // [待整理] 独立预算
|
|||||||
const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示)
|
const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示)
|
||||||
const L3_MAX = 2000;
|
const L3_MAX = 2000;
|
||||||
const ARCS_MAX = 1500;
|
const ARCS_MAX = 1500;
|
||||||
const TOP_N_STAR = 5; // 匹配度前N条加⭐
|
const TOP_N_STAR = 5; // 相似度前N条加⭐
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
// 工具函数
|
// 工具函数
|
||||||
@@ -180,64 +180,66 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
|
|||||||
|
|
||||||
const lines = [
|
const lines = [
|
||||||
'',
|
'',
|
||||||
'\u250c' + '\u2500'.repeat(61) + '\u2510',
|
'┌─────────────────────────────────────────────────────────────┐',
|
||||||
'\u2502 \u3010\u88c5\u914d\u7edf\u8ba1\u3011 \u2502',
|
'│ 【装配统计】 │',
|
||||||
'\u2514' + '\u2500'.repeat(61) + '\u2518',
|
'└─────────────────────────────────────────────────────────────┘',
|
||||||
` \u603b\u9884\u7b97: ${stats.budget.max} tokens | \u5df2\u4f7f\u7528: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`,
|
` 总预算: ${stats.budget.max} tokens | 已使用: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`,
|
||||||
'',
|
'',
|
||||||
];
|
];
|
||||||
|
|
||||||
// [1] World constraints
|
// [1] 世界约束
|
||||||
lines.push(' [1] \u4e16\u754c\u7ea6\u675f (\u4e0a\u9650 2000)');
|
lines.push(` [1] 世界约束 (上限 2000)`);
|
||||||
lines.push(` \u9009\u5165: ${stats.world.count} \u6761 | \u6d88\u8017: ${stats.world.tokens} tokens`);
|
lines.push(` 选入: ${stats.world.count} 条 | 消耗: ${stats.world.tokens} tokens`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
// [2] Core + background events
|
// [2] 核心经历 + 过往背景
|
||||||
lines.push(' [2] \u6838\u5fc3\u7ecf\u5386 + \u8fc7\u5f80\u80cc\u666f');
|
lines.push(` [2] 核心经历 + 过往背景`);
|
||||||
lines.push(` \u4e8b\u4ef6: ${stats.events.selected} \u6761 | \u6d88\u8017: ${stats.events.tokens} tokens`);
|
lines.push(` 事件: ${stats.events.selected} 条 | 消耗: ${stats.events.tokens} tokens`);
|
||||||
|
|
||||||
|
// 证据统计(区分 L0 和 L1)
|
||||||
const l0EvidenceCount = details.eventList?.filter(e => e.hasL0Evidence)?.length || 0;
|
const l0EvidenceCount = details.eventList?.filter(e => e.hasL0Evidence)?.length || 0;
|
||||||
const l1EvidenceCount = (stats.evidence.attached || 0) - l0EvidenceCount;
|
const l1EvidenceCount = (stats.evidence.attached || 0) - l0EvidenceCount;
|
||||||
lines.push(` \u8bc1\u636e: ${stats.evidence.attached} \u6761 (L0: ${l0EvidenceCount}, L1: ${l1EvidenceCount}) | \u6d88\u8017: ${stats.evidence.tokens} tokens`);
|
lines.push(` 证据: ${stats.evidence.attached} 条 (L0: ${l0EvidenceCount}, L1: ${l1EvidenceCount}) | 消耗: ${stats.evidence.tokens} tokens`);
|
||||||
lines.push(` \u6838\u5fc3: ${details.directCount || 0} \u6761 | \u8fc7\u5f80: ${details.similarCount || 0} \u6761`);
|
lines.push(` 核心: ${details.directCount || 0} 条 | 过往: ${details.similarCount || 0} 条`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
// [3] Long-term chunks
|
// [3] 远期片段
|
||||||
const l0OrphanCount = stats.orphans.l0Count || 0;
|
const l0OrphanCount = stats.orphans.l0Count || 0;
|
||||||
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
|
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
|
||||||
lines.push(' [3] \u8fdc\u671f\u7247\u6bb5 (\u5df2\u603b\u7ed3\u8303\u56f4)');
|
lines.push(` [3] 远期片段 (已总结范围)`);
|
||||||
lines.push(` \u9009\u5165: ${stats.orphans.injected} \u6761 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | \u6d88\u8017: ${stats.orphans.tokens} tokens`);
|
lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
// [4] Recent orphans
|
// [4] 待整理
|
||||||
lines.push(' [4] \u5f85\u6574\u7406 (\u72ec\u7acb\u9884\u7b97 5000)');
|
lines.push(` [4] 待整理 (独立预算 5000)`);
|
||||||
lines.push(` \u9009\u5165: ${recentOrphanStats?.injected || 0} \u6761 | \u6d88\u8017: ${recentOrphanStats?.tokens || 0} tokens`);
|
lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`);
|
||||||
lines.push(` \u697c\u5c42: ${recentOrphanStats?.floorRange || 'N/A'}`);
|
lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
// [5] Arcs
|
// [5] 人物弧光
|
||||||
lines.push(' [5] \u4eba\u7269\u5f27\u5149 (\u4e0a\u9650 1500)');
|
lines.push(` [5] 人物弧光 (上限 1500)`);
|
||||||
lines.push(` \u9009\u5165: ${stats.arcs.count} \u6761 | \u6d88\u8017: ${stats.arcs.tokens} tokens`);
|
lines.push(` 选入: ${stats.arcs.count} 条 | 消耗: ${stats.arcs.tokens} tokens`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
// Budget bar
|
// 预算条形图
|
||||||
lines.push(' \u3010\u9884\u7b97\u5206\u5e03\u3011');
|
lines.push(' 【预算分布】');
|
||||||
const total = stats.budget.max;
|
const total = stats.budget.max;
|
||||||
const bar = (tokens, label) => {
|
const bar = (tokens, label) => {
|
||||||
const width = Math.round((tokens / total) * 30);
|
const width = Math.round((tokens / total) * 30);
|
||||||
const pctStr = pct(tokens, total) + '%';
|
const pctStr = pct(tokens, total) + '%';
|
||||||
return ` ${label.padEnd(6)} ${'\u2588'.repeat(width).padEnd(30)} ${String(tokens).padStart(5)} (${pctStr})`;
|
return ` ${label.padEnd(6)} ${'█'.repeat(width).padEnd(30)} ${String(tokens).padStart(5)} (${pctStr})`;
|
||||||
};
|
};
|
||||||
lines.push(bar(stats.world.tokens, '\u7ea6\u675f'));
|
lines.push(bar(stats.world.tokens, '约束'));
|
||||||
lines.push(bar(stats.events.tokens + stats.evidence.tokens, '\u7ecf\u5386'));
|
lines.push(bar(stats.events.tokens + stats.evidence.tokens, '经历'));
|
||||||
lines.push(bar(stats.orphans.tokens, '\u8fdc\u671f'));
|
lines.push(bar(stats.orphans.tokens, '远期'));
|
||||||
lines.push(bar(recentOrphanStats?.tokens || 0, '\u5f85\u6574\u7406'));
|
lines.push(bar(recentOrphanStats?.tokens || 0, '待整理'));
|
||||||
lines.push(bar(stats.arcs.tokens, '\u5f27\u5149'));
|
lines.push(bar(stats.arcs.tokens, '弧光'));
|
||||||
lines.push(bar(stats.budget.max - stats.budget.used, '\u5269\u4f59'));
|
lines.push(bar(stats.budget.max - stats.budget.used, '剩余'));
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
return lines.join('\n');
|
return lines.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
|
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
|
||||||
function renumberEventText(text, newIndex) {
|
function renumberEventText(text, newIndex) {
|
||||||
const s = String(text || "");
|
const s = String(text || "");
|
||||||
@@ -448,7 +450,7 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
|||||||
return lines.join("\n");
|
return lines.join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 候选按匹配度从高到低(保证高分优先拥有证据)
|
// 候选按相似度从高到低(保证高分优先拥有证据)
|
||||||
const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
|
const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
|
||||||
|
|
||||||
const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence }
|
const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence }
|
||||||
|
|||||||
@@ -2677,30 +2677,3 @@ h1 span {
|
|||||||
font-size: .8125rem;
|
font-size: .8125rem;
|
||||||
line-height: 1.8;
|
line-height: 1.8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 调试日志区域手机适配 */
|
|
||||||
@media (max-width: 768px) {
|
|
||||||
#recall-log-content {
|
|
||||||
font-size: 10px;
|
|
||||||
padding: 8px;
|
|
||||||
overflow-x: hidden; /* 禁止横向滚动 */
|
|
||||||
word-break: break-all; /* 强制换行 */
|
|
||||||
white-space: pre-wrap; /* 保留换行但允许自动换行 */
|
|
||||||
}
|
|
||||||
|
|
||||||
.debug-log-viewer {
|
|
||||||
font-size: 10px;
|
|
||||||
overflow-x: hidden;
|
|
||||||
word-break: break-all;
|
|
||||||
white-space: pre-wrap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
|
||||||
#recall-log-content,
|
|
||||||
.debug-log-viewer {
|
|
||||||
font-size: 9px;
|
|
||||||
line-height: 1.4;
|
|
||||||
padding: 6px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,8 +2,9 @@
|
|||||||
// L1 chunk + L2 event 召回
|
// L1 chunk + L2 event 召回
|
||||||
// - 全量向量打分
|
// - 全量向量打分
|
||||||
// - 指数衰减加权 Query Embedding
|
// - 指数衰减加权 Query Embedding
|
||||||
// - 实体/参与者加分
|
// - L0 floor 加权
|
||||||
// - MMR 去重
|
// - RRF 混合检索(向量 + 文本)
|
||||||
|
// - MMR 去重(融合后执行)
|
||||||
// - floor 稀疏去重
|
// - floor 稀疏去重
|
||||||
|
|
||||||
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
|
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
|
||||||
@@ -18,6 +19,7 @@ import {
|
|||||||
stateToVirtualChunks,
|
stateToVirtualChunks,
|
||||||
mergeAndSparsify,
|
mergeAndSparsify,
|
||||||
} from './state-recall.js';
|
} from './state-recall.js';
|
||||||
|
import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
|
||||||
|
|
||||||
const MODULE_ID = 'recall';
|
const MODULE_ID = 'recall';
|
||||||
|
|
||||||
@@ -27,9 +29,8 @@ const CONFIG = {
|
|||||||
QUERY_MAX_CHARS: 600,
|
QUERY_MAX_CHARS: 600,
|
||||||
QUERY_CONTEXT_CHARS: 240,
|
QUERY_CONTEXT_CHARS: 240,
|
||||||
|
|
||||||
// 因果链
|
CAUSAL_CHAIN_MAX_DEPTH: 10,
|
||||||
CAUSAL_CHAIN_MAX_DEPTH: 10, // 放宽跳数,让图自然终止
|
CAUSAL_INJECT_MAX: 30,
|
||||||
CAUSAL_INJECT_MAX: 30, // 放宽上限,由 prompt token 预算最终控制
|
|
||||||
|
|
||||||
CANDIDATE_CHUNKS: 200,
|
CANDIDATE_CHUNKS: 200,
|
||||||
CANDIDATE_EVENTS: 150,
|
CANDIDATE_EVENTS: 150,
|
||||||
@@ -38,18 +39,16 @@ const CONFIG = {
|
|||||||
MAX_EVENTS: 120,
|
MAX_EVENTS: 120,
|
||||||
|
|
||||||
MIN_SIMILARITY_CHUNK: 0.6,
|
MIN_SIMILARITY_CHUNK: 0.6,
|
||||||
|
MIN_SIMILARITY_CHUNK_RECENT: 0.5,
|
||||||
MIN_SIMILARITY_EVENT: 0.65,
|
MIN_SIMILARITY_EVENT: 0.65,
|
||||||
MMR_LAMBDA: 0.72,
|
MMR_LAMBDA: 0.72,
|
||||||
|
|
||||||
BONUS_PARTICIPANT_HIT: 0.08,
|
|
||||||
BONUS_TEXT_HIT: 0.05,
|
|
||||||
BONUS_WORLD_TOPIC_HIT: 0.06,
|
|
||||||
|
|
||||||
// L0 配置
|
|
||||||
L0_FLOOR_BONUS_FACTOR: 0.10,
|
L0_FLOOR_BONUS_FACTOR: 0.10,
|
||||||
FLOOR_MAX_CHUNKS: 2,
|
FLOOR_MAX_CHUNKS: 2,
|
||||||
|
|
||||||
FLOOR_LIMIT: 1,
|
FLOOR_LIMIT: 1,
|
||||||
|
|
||||||
|
RRF_K: 60,
|
||||||
|
TEXT_SEARCH_LIMIT: 80,
|
||||||
};
|
};
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
@@ -75,10 +74,53 @@ function normalizeVec(v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 因果链追溯(Graph-augmented retrieval)
|
// RRF 融合
|
||||||
// - 从已召回事件出发,沿 causedBy 向上追溯祖先事件
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// - 记录边:chainFrom = 哪个召回事件需要它
|
|
||||||
// - 不在这里决定“是否额外注入”,只负责遍历与结构化结果
|
function fuseEventsByRRF(vectorRanked, textRanked, eventById, k = CONFIG.RRF_K) {
|
||||||
|
const map = new Map();
|
||||||
|
|
||||||
|
const upsert = (id) => {
|
||||||
|
if (!map.has(id)) {
|
||||||
|
map.set(id, { id, rrf: 0, vRank: Infinity, tRank: Infinity, type: 'TEXT' });
|
||||||
|
}
|
||||||
|
return map.get(id);
|
||||||
|
};
|
||||||
|
|
||||||
|
vectorRanked.forEach((r, i) => {
|
||||||
|
const id = r.event?.id;
|
||||||
|
if (!id) return;
|
||||||
|
const o = upsert(id);
|
||||||
|
o.vRank = i + 1;
|
||||||
|
o.rrf += 1 / (k + i + 1);
|
||||||
|
o.type = o.tRank !== Infinity ? 'HYBRID' : 'VECTOR';
|
||||||
|
o.vector = r.vector;
|
||||||
|
});
|
||||||
|
|
||||||
|
textRanked.forEach((r) => {
|
||||||
|
const o = upsert(r.id);
|
||||||
|
o.tRank = r.textRank;
|
||||||
|
o.rrf += 1 / (k + r.textRank);
|
||||||
|
o.type = o.vRank !== Infinity ? 'HYBRID' : 'TEXT';
|
||||||
|
});
|
||||||
|
|
||||||
|
const typePriority = { HYBRID: 0, VECTOR: 1, TEXT: 2 };
|
||||||
|
|
||||||
|
return Array.from(map.values())
|
||||||
|
.map(o => ({ ...o, event: eventById.get(o.id) }))
|
||||||
|
.filter(x => x.event)
|
||||||
|
.sort((a, b) => {
|
||||||
|
if (b.rrf !== a.rrf) return b.rrf - a.rrf;
|
||||||
|
if (typePriority[a.type] !== typePriority[b.type]) {
|
||||||
|
return typePriority[a.type] - typePriority[b.type];
|
||||||
|
}
|
||||||
|
if (a.vRank !== b.vRank) return a.vRank - b.vRank;
|
||||||
|
return a.tRank - b.tRank;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
// 因果链追溯
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
function buildEventIndex(allEvents) {
|
function buildEventIndex(allEvents) {
|
||||||
@@ -89,9 +131,6 @@ function buildEventIndex(allEvents) {
|
|||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @returns {Map<string, {event, depth, chainFrom}>}
|
|
||||||
*/
|
|
||||||
function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
|
function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
|
||||||
const out = new Map();
|
const out = new Map();
|
||||||
const idRe = /^evt-\d+$/;
|
const idRe = /^evt-\d+$/;
|
||||||
@@ -103,7 +142,6 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
|
|||||||
const ev = eventIndex.get(parentId);
|
const ev = eventIndex.get(parentId);
|
||||||
if (!ev) return;
|
if (!ev) return;
|
||||||
|
|
||||||
// 如果同一个祖先被多个召回事件引用:保留更“近”的深度或追加来源
|
|
||||||
const existed = out.get(parentId);
|
const existed = out.get(parentId);
|
||||||
if (!existed) {
|
if (!existed) {
|
||||||
out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] });
|
out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] });
|
||||||
@@ -128,20 +166,12 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 因果事件排序:引用数 > 深度 > 编号
|
|
||||||
*/
|
|
||||||
function sortCausalEvents(causalArray) {
|
function sortCausalEvents(causalArray) {
|
||||||
return causalArray.sort((a, b) => {
|
return causalArray.sort((a, b) => {
|
||||||
// 1. 被多条召回链引用的优先
|
|
||||||
const refDiff = b.chainFrom.length - a.chainFrom.length;
|
const refDiff = b.chainFrom.length - a.chainFrom.length;
|
||||||
if (refDiff !== 0) return refDiff;
|
if (refDiff !== 0) return refDiff;
|
||||||
|
|
||||||
// 2. 深度浅的优先
|
|
||||||
const depthDiff = a.depth - b.depth;
|
const depthDiff = a.depth - b.depth;
|
||||||
if (depthDiff !== 0) return depthDiff;
|
if (depthDiff !== 0) return depthDiff;
|
||||||
|
|
||||||
// 3. 事件编号排序
|
|
||||||
return String(a.event.id).localeCompare(String(b.event.id));
|
return String(a.event.id).localeCompare(String(b.event.id));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -150,7 +180,6 @@ function normalize(s) {
|
|||||||
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
|
|
||||||
function parseFloorRange(summary) {
|
function parseFloorRange(summary) {
|
||||||
if (!summary) return null;
|
if (!summary) return null;
|
||||||
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
|
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
|
||||||
@@ -161,8 +190,6 @@ function parseFloorRange(summary) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function cleanForRecall(text) {
|
function cleanForRecall(text) {
|
||||||
// 1. 应用用户自定义过滤规则
|
|
||||||
// 2. 移除 TTS 标记(硬编码)
|
|
||||||
return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
|
return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -187,13 +214,11 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
|
|||||||
messages = messages.slice(0, -1);
|
messages = messages.slice(0, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ★ 如果有待处理的用户消息且 chat 中最后一条不是它,追加虚拟消息
|
|
||||||
if (pendingUserMessage) {
|
if (pendingUserMessage) {
|
||||||
const lastMsg = messages[messages.length - 1];
|
const lastMsg = messages[messages.length - 1];
|
||||||
const lastMsgText = lastMsg?.mes?.trim() || "";
|
const lastMsgText = lastMsg?.mes?.trim() || "";
|
||||||
const pendingText = pendingUserMessage.trim();
|
const pendingText = pendingUserMessage.trim();
|
||||||
|
|
||||||
// 避免重复(如果 chat 已包含该消息则不追加)
|
|
||||||
if (lastMsgText !== pendingText) {
|
if (lastMsgText !== pendingText) {
|
||||||
messages = [...messages, { is_user: true, name: name1 || "用户", mes: pendingUserMessage }];
|
messages = [...messages, { is_user: true, name: name1 || "用户", mes: pendingUserMessage }];
|
||||||
}
|
}
|
||||||
@@ -330,7 +355,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
|
|||||||
// L1 Chunks 检索
|
// L1 Chunks 检索
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) {
|
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
|
||||||
const { chatId } = getContext();
|
const { chatId } = getContext();
|
||||||
if (!chatId || !queryVector?.length) return [];
|
if (!chatId || !queryVector?.length) return [];
|
||||||
|
|
||||||
@@ -359,11 +384,21 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
// Pre-filter stats for logging
|
const candidates = scored
|
||||||
|
.filter(s => {
|
||||||
|
const threshold = s.floor > lastSummarizedFloor
|
||||||
|
? CONFIG.MIN_SIMILARITY_CHUNK_RECENT
|
||||||
|
: CONFIG.MIN_SIMILARITY_CHUNK;
|
||||||
|
return s.similarity >= threshold;
|
||||||
|
})
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, CONFIG.CANDIDATE_CHUNKS);
|
||||||
|
|
||||||
const preFilterStats = {
|
const preFilterStats = {
|
||||||
total: scored.length,
|
total: scored.length,
|
||||||
passThreshold: scored.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK).length,
|
passThreshold: candidates.length,
|
||||||
threshold: CONFIG.MIN_SIMILARITY_CHUNK,
|
thresholdRemote: CONFIG.MIN_SIMILARITY_CHUNK,
|
||||||
|
thresholdRecent: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
|
||||||
distribution: {
|
distribution: {
|
||||||
'0.8+': scored.filter(s => s.similarity >= 0.8).length,
|
'0.8+': scored.filter(s => s.similarity >= 0.8).length,
|
||||||
'0.7-0.8': scored.filter(s => s.similarity >= 0.7 && s.similarity < 0.8).length,
|
'0.7-0.8': scored.filter(s => s.similarity >= 0.7 && s.similarity < 0.8).length,
|
||||||
@@ -373,12 +408,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const candidates = scored
|
|
||||||
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK)
|
|
||||||
.sort((a, b) => b.similarity - a.similarity)
|
|
||||||
.slice(0, CONFIG.CANDIDATE_CHUNKS);
|
|
||||||
|
|
||||||
// 动态 K:质量不够就少拿
|
|
||||||
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
|
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
|
||||||
|
|
||||||
const selected = mmrSelect(
|
const selected = mmrSelect(
|
||||||
@@ -389,8 +418,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
|
|||||||
c => c.similarity
|
c => c.similarity
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
// floor 稀疏去重:每个楼层只保留该楼层匹配度最高的那条
|
|
||||||
const bestByFloor = new Map();
|
const bestByFloor = new Map();
|
||||||
for (const s of selected) {
|
for (const s of selected) {
|
||||||
const prev = bestByFloor.get(s.floor);
|
const prev = bestByFloor.get(s.floor);
|
||||||
@@ -399,7 +426,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 最终结果按匹配度降序
|
|
||||||
const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity);
|
const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity);
|
||||||
|
|
||||||
const floors = [...new Set(sparse.map(c => c.floor))];
|
const floors = [...new Set(sparse.map(c => c.floor))];
|
||||||
@@ -420,7 +446,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
|
|||||||
};
|
};
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
// Attach stats for logging
|
|
||||||
if (results.length > 0) {
|
if (results.length > 0) {
|
||||||
results._preFilterStats = preFilterStats;
|
results._preFilterStats = preFilterStats;
|
||||||
}
|
}
|
||||||
@@ -429,14 +454,12 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map())
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// L2 Events 检索
|
// L2 Events 检索(RRF 混合 + MMR 后置)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
|
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
|
||||||
const { chatId, name1 } = getContext();
|
const { chatId } = getContext();
|
||||||
if (!chatId || !queryVector?.length) {
|
if (!chatId || !queryVector?.length) return [];
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const meta = await getMeta(chatId);
|
const meta = await getMeta(chatId);
|
||||||
const fp = getEngineFingerprint(vectorConfig);
|
const fp = getEngineFingerprint(vectorConfig);
|
||||||
@@ -446,53 +469,29 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
|||||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||||
if (!vectorMap.size) return [];
|
if (!vectorMap.size) return [];
|
||||||
|
|
||||||
const userName = normalize(name1);
|
// 构建/更新文本索引
|
||||||
const queryNormList = (queryEntities || []).map(normalize).filter(Boolean);
|
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
|
||||||
const querySet = new Set(queryNormList);
|
ensureEventTextIndex(allEvents, revision);
|
||||||
|
|
||||||
// 只取硬约束类的 world topic
|
// 文本路检索
|
||||||
const worldTopics = (store?.json?.world || [])
|
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
|
||||||
.filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase()))
|
|
||||||
.map(w => normalize(w.topic))
|
// ═══════════════════════════════════════════════════════════════════════
|
||||||
.filter(Boolean);
|
// 向量路检索(只保留 L0 加权)
|
||||||
|
// ═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
const scored = (allEvents || []).map((event, idx) => {
|
const scored = (allEvents || []).map((event, idx) => {
|
||||||
const v = vectorMap.get(event.id);
|
const v = vectorMap.get(event.id);
|
||||||
const sim = v ? cosineSimilarity(queryVector, v) : 0;
|
const sim = v ? cosineSimilarity(queryVector, v) : 0;
|
||||||
|
|
||||||
let bonus = 0;
|
let bonus = 0;
|
||||||
const reasons = [];
|
|
||||||
|
|
||||||
// participants 命中
|
// L0 加权
|
||||||
const participants = (event.participants || []).map(normalize).filter(Boolean);
|
|
||||||
const hitCount = participants.filter(p => p !== userName && querySet.has(p)).length;
|
|
||||||
const hasParticipantHit = hitCount > 0;
|
|
||||||
if (hasParticipantHit) {
|
|
||||||
bonus += CONFIG.BONUS_PARTICIPANT_HIT * Math.log2(hitCount + 1);
|
|
||||||
reasons.push(hitCount > 1 ? `participant×${hitCount}` : 'participant');
|
|
||||||
}
|
|
||||||
|
|
||||||
// text 命中
|
|
||||||
const text = normalize(`${event.title || ''} ${event.summary || ''}`);
|
|
||||||
const textHitCount = queryNormList.filter(e => text.includes(e)).length;
|
|
||||||
if (textHitCount > 0) {
|
|
||||||
bonus += CONFIG.BONUS_TEXT_HIT * Math.log2(textHitCount + 1);
|
|
||||||
reasons.push(textHitCount > 1 ? `text×${textHitCount}` : 'text');
|
|
||||||
}
|
|
||||||
|
|
||||||
// world topic 命中
|
|
||||||
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
|
|
||||||
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
|
|
||||||
reasons.push('world');
|
|
||||||
}
|
|
||||||
|
|
||||||
// L0 加权:事件覆盖楼层范围命中
|
|
||||||
const range = parseFloorRange(event.summary);
|
const range = parseFloorRange(event.summary);
|
||||||
if (range) {
|
if (range) {
|
||||||
for (let f = range.start; f <= range.end; f++) {
|
for (let f = range.start; f <= range.end; f++) {
|
||||||
if (l0FloorBonus.has(f)) {
|
if (l0FloorBonus.has(f)) {
|
||||||
bonus += l0FloorBonus.get(f);
|
bonus += l0FloorBonus.get(f);
|
||||||
reasons.push('L0');
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -503,15 +502,11 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
|||||||
_idx: idx,
|
_idx: idx,
|
||||||
event,
|
event,
|
||||||
similarity: sim,
|
similarity: sim,
|
||||||
bonus,
|
|
||||||
finalScore: sim + bonus,
|
finalScore: sim + bonus,
|
||||||
reasons,
|
|
||||||
isDirect: hasParticipantHit,
|
|
||||||
vector: v,
|
vector: v,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
// ★ 记录过滤前的分布(用 finalScore,与显示一致)
|
|
||||||
const preFilterDistribution = {
|
const preFilterDistribution = {
|
||||||
total: scored.length,
|
total: scored.length,
|
||||||
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
||||||
@@ -523,41 +518,66 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
|||||||
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
||||||
};
|
};
|
||||||
|
|
||||||
// ★ 过滤改成用 finalScore(包含 bonus)
|
// 向量路:纯相似度排序(不在这里做 MMR)
|
||||||
const candidates = scored
|
const candidates = scored
|
||||||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||||||
.sort((a, b) => b.finalScore - a.finalScore)
|
.sort((a, b) => b.finalScore - a.finalScore)
|
||||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||||
|
|
||||||
// 动态 K:质量不够就少拿
|
const vectorRanked = candidates.map(s => ({
|
||||||
const dynamicK = Math.min(CONFIG.MAX_EVENTS, candidates.length);
|
event: s.event,
|
||||||
|
similarity: s.finalScore,
|
||||||
|
vector: s.vector,
|
||||||
|
}));
|
||||||
|
|
||||||
const selected = mmrSelect(
|
// RRF 融合
|
||||||
candidates,
|
const eventById = new Map(allEvents.map(e => [e.id, e]));
|
||||||
dynamicK,
|
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
|
||||||
|
|
||||||
|
// 向量非空时过滤纯 TEXT
|
||||||
|
const hasVector = vectorRanked.length > 0;
|
||||||
|
const filtered = hasVector ? fused.filter(x => x.type !== 'TEXT') : fused;
|
||||||
|
|
||||||
|
// MMR 放在融合后:对最终候选集去重
|
||||||
|
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
|
||||||
|
...x,
|
||||||
|
_id: x.id,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const mmrOutput = mmrSelect(
|
||||||
|
mmrInput,
|
||||||
|
CONFIG.MAX_EVENTS,
|
||||||
CONFIG.MMR_LAMBDA,
|
CONFIG.MMR_LAMBDA,
|
||||||
c => c.vector,
|
c => c.vector || null,
|
||||||
c => c.finalScore
|
c => c.rrf
|
||||||
);
|
);
|
||||||
|
|
||||||
return selected
|
// 构造结果
|
||||||
.sort((a, b) => b.finalScore - a.finalScore)
|
const results = mmrOutput.map(x => ({
|
||||||
.map(s => ({
|
event: x.event,
|
||||||
event: s.event,
|
similarity: x.rrf,
|
||||||
similarity: s.finalScore,
|
_recallType: x.type === 'HYBRID' ? 'DIRECT' : 'SIMILAR',
|
||||||
_recallType: s.isDirect ? 'DIRECT' : 'SIMILAR',
|
_recallReason: x.type,
|
||||||
_recallReason: s.reasons.length ? s.reasons.join('+') : '相似',
|
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
|
||||||
_preFilterDistribution: preFilterDistribution,
|
}));
|
||||||
}));
|
|
||||||
|
// 统计信息附加到第一条结果
|
||||||
|
if (results.length > 0) {
|
||||||
|
results[0]._preFilterDistribution = preFilterDistribution;
|
||||||
|
results[0]._rrfStats = {
|
||||||
|
vectorCount: vectorRanked.length,
|
||||||
|
textCount: textRanked.length,
|
||||||
|
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
|
||||||
|
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
|
||||||
|
textOnlyFiltered: fused.filter(x => x.type === 'TEXT').length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 日志:因果树格式化
|
// 日志
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
// 日志:主报告
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
function formatRecallLog({
|
function formatRecallLog({
|
||||||
@@ -571,21 +591,19 @@ function formatRecallLog({
|
|||||||
causalEvents = [],
|
causalEvents = [],
|
||||||
chunkPreFilterStats = null,
|
chunkPreFilterStats = null,
|
||||||
l0Results = [],
|
l0Results = [],
|
||||||
l0PreFilterStats = null,
|
|
||||||
}) {
|
}) {
|
||||||
const lines = [
|
const lines = [
|
||||||
'\u2554' + '\u2550'.repeat(62) + '\u2557',
|
'\u2554' + '\u2550'.repeat(62) + '\u2557',
|
||||||
'\u2551 \u8bb0\u5fc6\u53ec\u56de\u62a5\u544a \u2551',
|
'\u2551 记忆召回报告 \u2551',
|
||||||
'\u2560' + '\u2550'.repeat(62) + '\u2563',
|
'\u2560' + '\u2550'.repeat(62) + '\u2563',
|
||||||
`\u2551 \u8017\u65f6: ${elapsed}ms`,
|
`\u2551 耗时: ${elapsed}ms`,
|
||||||
'\u255a' + '\u2550'.repeat(62) + '\u255d',
|
'\u255a' + '\u2550'.repeat(62) + '\u255d',
|
||||||
'',
|
'',
|
||||||
'\u250c' + '\u2500'.repeat(61) + '\u2510',
|
'\u250c' + '\u2500'.repeat(61) + '\u2510',
|
||||||
'\u2502 \u3010\u67e5\u8be2\u6784\u5efa\u3011\u6700\u8fd1 5 \u6761\u6d88\u606f\uff0c\u6307\u6570\u8870\u51cf\u52a0\u6743 (\u03b2=0.7) \u2502',
|
'\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502',
|
||||||
'\u2514' + '\u2500'.repeat(61) + '\u2518',
|
'\u2514' + '\u2500'.repeat(61) + '\u2518',
|
||||||
];
|
];
|
||||||
|
|
||||||
// Keep query previews only (the only place to keep raw text)
|
|
||||||
const segmentsSorted = segments.map((s, i) => ({
|
const segmentsSorted = segments.map((s, i) => ({
|
||||||
idx: i + 1,
|
idx: i + 1,
|
||||||
weight: weights?.[i] ?? 0,
|
weight: weights?.[i] ?? 0,
|
||||||
@@ -595,74 +613,57 @@ function formatRecallLog({
|
|||||||
segmentsSorted.forEach((s, rank) => {
|
segmentsSorted.forEach((s, rank) => {
|
||||||
const bar = '\u2588'.repeat(Math.round(s.weight * 20));
|
const bar = '\u2588'.repeat(Math.round(s.weight * 20));
|
||||||
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
|
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
|
||||||
const marker = rank === 0 ? ' \u25c0 \u4e3b\u5bfc' : '';
|
const marker = rank === 0 ? ' ◀ 主导' : '';
|
||||||
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
||||||
lines.push('\u2502 \u3010\u63d0\u53d6\u5b9e\u4f53\u3011 \u2502');
|
lines.push('\u2502 【提取实体】 \u2502');
|
||||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
||||||
lines.push(` ${queryEntities?.length ? queryEntities.join('\u3001') : '(\u65e0)'}`);
|
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
|
||||||
|
|
||||||
// Recall stats (numbers only)
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
||||||
lines.push('\u2502 \u3010\u53ec\u56de\u7edf\u8ba1\u3011 \u2502');
|
lines.push('\u2502 【召回统计】 \u2502');
|
||||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
||||||
|
|
||||||
// L0
|
// L0
|
||||||
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
||||||
lines.push(' L0 \u8bed\u4e49\u951a\u70b9:');
|
lines.push(' L0 语义锚点:');
|
||||||
if (l0Results.length) {
|
if (l0Results.length) {
|
||||||
const l0Dist = {
|
lines.push(` 选入: ${l0Results.length} 条 | 影响楼层: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} 加权)`);
|
||||||
'0.8+': l0Results.filter(r => r.similarity >= 0.8).length,
|
|
||||||
'0.7-0.8': l0Results.filter(r => r.similarity >= 0.7 && r.similarity < 0.8).length,
|
|
||||||
'0.6-0.7': l0Results.filter(r => r.similarity >= 0.6 && r.similarity < 0.7).length,
|
|
||||||
'0.55-0.6': l0Results.filter(r => r.similarity >= 0.55 && r.similarity < 0.6).length,
|
|
||||||
};
|
|
||||||
lines.push(` \u9009\u5165: ${l0Results.length} \u6761 | \u5f71\u54cd\u697c\u5c42: ${l0Floors.join(', ')} (+${CONFIG.L0_FLOOR_BONUS_FACTOR} \u52a0\u6743)`);
|
|
||||||
lines.push(` \u5339\u914d\u5ea6: 0.8+: ${l0Dist['0.8+']} | 0.7-0.8: ${l0Dist['0.7-0.8']} | 0.6-0.7: ${l0Dist['0.6-0.7']} | 0.55-0.6: ${l0Dist['0.55-0.6']}`);
|
|
||||||
} else {
|
} else {
|
||||||
lines.push(' (\u65e0\u6570\u636e)');
|
lines.push(' (无数据)');
|
||||||
}
|
}
|
||||||
|
|
||||||
// L1
|
// L1
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push(' L1 \u539f\u6587\u7247\u6bb5:');
|
lines.push(' L1 原文片段:');
|
||||||
if (chunkPreFilterStats) {
|
if (chunkPreFilterStats) {
|
||||||
const dist = chunkPreFilterStats.distribution || {};
|
const dist = chunkPreFilterStats.distribution || {};
|
||||||
lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${chunkPreFilterStats.threshold}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`);
|
lines.push(` \u5168\u91cf: ${chunkPreFilterStats.total} \u6761 | \u901a\u8fc7\u9608\u503c(\u8fdc\u671f\u2265${chunkPreFilterStats.thresholdRemote}, \u5f85\u6574\u7406\u2265${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} \u6761 | \u6700\u7ec8: ${chunkResults.length} \u6761`);
|
||||||
lines.push(` \u5339\u914d\u5ea6: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0} | <0.6: ${(dist['0.55-0.6'] || 0) + (dist['<0.55'] || 0)}`);
|
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
|
||||||
|
|
||||||
const floorCounts = new Map();
|
|
||||||
chunkResults.forEach(c => floorCounts.set(c.floor, (floorCounts.get(c.floor) || 0) + 1));
|
|
||||||
const floorStats = `\u8986\u76d6 ${floorCounts.size} \u4e2a\u697c\u5c42`;
|
|
||||||
lines.push(` ${floorStats}`);
|
|
||||||
} else {
|
} else {
|
||||||
lines.push(` \u9009\u5165: ${chunkResults.length} \u6761`);
|
lines.push(` 选入: ${chunkResults.length} 条`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// L2
|
// L2
|
||||||
const preFilterDist = eventResults[0]?._preFilterDistribution || {};
|
const rrfStats = eventResults[0]?._rrfStats || {};
|
||||||
const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length;
|
|
||||||
const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length;
|
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push(' L2 \u4e8b\u4ef6\u8bb0\u5fc6:');
|
lines.push(' L2 事件记忆 (RRF 混合检索):');
|
||||||
lines.push(` \u603b\u4e8b\u4ef6: ${allEvents.length} \u6761 | \u901a\u8fc7\u9608\u503c(\u2265${preFilterDist.threshold || 0.65}): ${preFilterDist.passThreshold || 0} \u6761 | \u6700\u7ec8: ${eventResults.length} \u6761`);
|
lines.push(` 总事件: ${allEvents.length} 条 | 最终: ${eventResults.length} 条`);
|
||||||
if (preFilterDist.total) {
|
lines.push(` 向量路: ${rrfStats.vectorCount || 0} 条 | 文本路: ${rrfStats.textCount || 0} 条`);
|
||||||
lines.push(` \u5339\u914d\u5ea6: 0.85+: ${preFilterDist['0.85+'] || 0} | 0.7-0.85: ${preFilterDist['0.7-0.85'] || 0} | 0.6-0.7: ${preFilterDist['0.6-0.7'] || 0} | <0.6: ${(preFilterDist['0.5-0.6'] || 0) + (preFilterDist['<0.5'] || 0)}`);
|
lines.push(` HYBRID: ${rrfStats.hybridCount || 0} 条 | 纯 VECTOR: ${rrfStats.vectorOnlyCount || 0} 条 | 纯 TEXT (已过滤): ${rrfStats.textOnlyFiltered || 0} 条`);
|
||||||
}
|
|
||||||
lines.push(` \u7c7b\u578b: DIRECT ${directCount} \u6761 | SIMILAR ${similarCount} \u6761`);
|
|
||||||
|
|
||||||
// Causal chains
|
// Causal
|
||||||
if (causalEvents.length) {
|
if (causalEvents.length) {
|
||||||
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
|
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
|
||||||
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
|
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push(' \u56e0\u679c\u94fe\u8ffd\u6eaf:');
|
lines.push(' 因果链追溯:');
|
||||||
lines.push(` \u8ffd\u6eaf: ${causalEvents.length} \u6761 | \u6700\u5927\u88ab\u5f15: ${maxRefs} \u6b21 | \u6700\u5927\u6df1\u5ea6: ${maxDepth}`);
|
lines.push(` 追溯: ${causalEvents.length} 条 | 最大被引: ${maxRefs} 次 | 最大深度: ${maxDepth}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
@@ -677,6 +678,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
const T0 = performance.now();
|
const T0 = performance.now();
|
||||||
const { chat } = getContext();
|
const { chat } = getContext();
|
||||||
const store = getSummaryStore();
|
const store = getSummaryStore();
|
||||||
|
const lastSummarizedFloor = store?.lastSummarizedMesId ?? -1;
|
||||||
const { pendingUserMessage = null } = options;
|
const { pendingUserMessage = null } = options;
|
||||||
|
|
||||||
if (!allEvents?.length) {
|
if (!allEvents?.length) {
|
||||||
@@ -702,9 +704,15 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
const lexicon = buildEntityLexicon(store, allEvents);
|
const lexicon = buildEntityLexicon(store, allEvents);
|
||||||
const queryEntities = extractEntities(segments.join('\n'), lexicon);
|
const queryEntities = extractEntities(segments.join('\n'), lexicon);
|
||||||
|
|
||||||
// ════════════════════════════════════════════════════════════════════════
|
// 构建文本查询串:最后一条消息 + 实体 + 关键词
|
||||||
|
const lastSeg = segments[segments.length - 1] || '';
|
||||||
|
const queryTextForSearch = [
|
||||||
|
lastSeg,
|
||||||
|
...queryEntities,
|
||||||
|
...(store?.json?.keywords || []).slice(0, 5).map(k => k.text),
|
||||||
|
].join(' ');
|
||||||
|
|
||||||
// L0 召回
|
// L0 召回
|
||||||
// ════════════════════════════════════════════════════════════════════════
|
|
||||||
let l0Results = [];
|
let l0Results = [];
|
||||||
let l0FloorBonus = new Map();
|
let l0FloorBonus = new Map();
|
||||||
let l0VirtualChunks = [];
|
let l0VirtualChunks = [];
|
||||||
@@ -718,21 +726,15 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
}
|
}
|
||||||
|
|
||||||
const [chunkResults, eventResults] = await Promise.all([
|
const [chunkResults, eventResults] = await Promise.all([
|
||||||
searchChunks(queryVector, vectorConfig, l0FloorBonus),
|
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
|
||||||
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
|
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
||||||
|
|
||||||
// ════════════════════════════════════════════════════════════════════════
|
|
||||||
// 合并 L0 虚拟 chunks 到 L1
|
|
||||||
// ════════════════════════════════════════════════════════════════════════
|
|
||||||
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
|
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────
|
// 因果链追溯
|
||||||
// 因果链追溯:从 eventResults 出发找祖先事件
|
|
||||||
// 注意:是否“额外注入”要去重(如果祖先事件本来已召回,就不额外注入)
|
|
||||||
// ─────────────────────────────────────────────────────────────────────
|
|
||||||
const eventIndex = buildEventIndex(allEvents);
|
const eventIndex = buildEventIndex(allEvents);
|
||||||
const causalMap = traceCausalAncestors(eventResults, eventIndex);
|
const causalMap = traceCausalAncestors(eventResults, eventIndex);
|
||||||
|
|
||||||
@@ -750,7 +752,6 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
depth: x.depth,
|
depth: x.depth,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// 排序:引用数 > 深度 > 编号,然后截断
|
|
||||||
sortCausalEvents(causalEvents);
|
sortCausalEvents(causalEvents);
|
||||||
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
|
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
|
||||||
|
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ export async function searchStateAtoms(queryVector, vectorConfig) {
|
|||||||
const atoms = getStateAtoms();
|
const atoms = getStateAtoms();
|
||||||
const atomMap = new Map(atoms.map(a => [a.atomId, a]));
|
const atomMap = new Map(atoms.map(a => [a.atomId, a]));
|
||||||
|
|
||||||
// 计算匹配度
|
// 计算相似度
|
||||||
const scored = stateVectors
|
const scored = stateVectors
|
||||||
.map(sv => {
|
.map(sv => {
|
||||||
const atom = atomMap.get(sv.atomId);
|
const atom = atomMap.get(sv.atomId);
|
||||||
@@ -92,8 +92,8 @@ export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
|
|||||||
const floorBonus = new Map();
|
const floorBonus = new Map();
|
||||||
|
|
||||||
for (const r of l0Results || []) {
|
for (const r of l0Results || []) {
|
||||||
// 每个楼层只加一次,取最高匹配度对应的 bonus
|
// 每个楼层只加一次,取最高相似度对应的 bonus
|
||||||
// 简化处理:统一加 bonusFactor,不区分匹配度高低
|
// 简化处理:统一加 bonusFactor,不区分相似度高低
|
||||||
if (!floorBonus.has(r.floor)) {
|
if (!floorBonus.has(r.floor)) {
|
||||||
floorBonus.set(r.floor, bonusFactor);
|
floorBonus.set(r.floor, bonusFactor);
|
||||||
}
|
}
|
||||||
@@ -132,13 +132,13 @@ export function stateToVirtualChunks(l0Results) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 合并 L0 和 L1 chunks,每楼层最多保留 limit 条
|
* 合并 L0 和 L1 chunks,每楼层最多保留 limit 条
|
||||||
* @param {Array} l0Chunks - 虚拟 chunks(已按匹配度排序)
|
* @param {Array} l0Chunks - 虚拟 chunks(已按相似度排序)
|
||||||
* @param {Array} l1Chunks - 真实 chunks(已按匹配度排序)
|
* @param {Array} l1Chunks - 真实 chunks(已按相似度排序)
|
||||||
* @param {number} limit - 每楼层上限
|
* @param {number} limit - 每楼层上限
|
||||||
* @returns {Array} 合并后的 chunks
|
* @returns {Array} 合并后的 chunks
|
||||||
*/
|
*/
|
||||||
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
|
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
|
||||||
// 合并并按匹配度排序
|
// 合并并按相似度排序
|
||||||
const all = [...(l0Chunks || []), ...(l1Chunks || [])]
|
const all = [...(l0Chunks || []), ...(l1Chunks || [])]
|
||||||
.sort((a, b) => b.similarity - a.similarity);
|
.sort((a, b) => b.similarity - a.similarity);
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 扁平化并保持匹配度排序
|
// 扁平化并保持相似度排序
|
||||||
return Array.from(byFloor.values())
|
return Array.from(byFloor.values())
|
||||||
.flat()
|
.flat()
|
||||||
.sort((a, b) => b.similarity - a.similarity);
|
.sort((a, b) => b.similarity - a.similarity);
|
||||||
|
|||||||
85
modules/story-summary/vector/text-search.js
Normal file
85
modules/story-summary/vector/text-search.js
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
// Text Search - L2 事件文本检索(MiniSearch)
|
||||||
|
// 与向量检索互补,通过 RRF 融合
|
||||||
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
import MiniSearch from '../../../libs/minisearch.mjs';
|
||||||
|
|
||||||
|
let idx = null;
|
||||||
|
let lastRevision = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 中文逐字 + 英数字串分词
|
||||||
|
*/
|
||||||
|
function tokenize(text) {
|
||||||
|
return String(text || '').match(/[\u4e00-\u9fff]|[a-zA-Z0-9]+/g) || [];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 去掉 summary 末尾的楼层标记
|
||||||
|
*/
|
||||||
|
function stripFloorTag(s) {
|
||||||
|
return String(s || '').replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '').trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 构建/更新事件文本索引
|
||||||
|
*/
|
||||||
|
export function ensureEventTextIndex(events, revision) {
|
||||||
|
if (!events?.length) {
|
||||||
|
idx = null;
|
||||||
|
lastRevision = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx && revision === lastRevision) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
idx = new MiniSearch({
|
||||||
|
fields: ['title', 'summary', 'participants'],
|
||||||
|
storeFields: ['id'],
|
||||||
|
tokenize,
|
||||||
|
});
|
||||||
|
|
||||||
|
idx.addAll(events.map(e => ({
|
||||||
|
id: e.id,
|
||||||
|
title: e.title || '',
|
||||||
|
summary: stripFloorTag(e.summary),
|
||||||
|
participants: (e.participants || []).join(' '),
|
||||||
|
})));
|
||||||
|
|
||||||
|
lastRevision = revision;
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[text-search] Index build failed:', e);
|
||||||
|
idx = null;
|
||||||
|
lastRevision = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文本检索事件
|
||||||
|
*/
|
||||||
|
export function searchEventsByText(queryText, limit = 80) {
|
||||||
|
if (!idx || !queryText?.trim()) return [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = idx.search(queryText, {
|
||||||
|
limit,
|
||||||
|
boost: { title: 2, participants: 1.5, summary: 1 },
|
||||||
|
fuzzy: 0.2,
|
||||||
|
prefix: true,
|
||||||
|
});
|
||||||
|
return res.map((r, i) => ({ id: r.id, textRank: i + 1 }));
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[text-search] Search failed:', e);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 清理索引
|
||||||
|
*/
|
||||||
|
export function clearEventTextIndex() {
|
||||||
|
idx = null;
|
||||||
|
lastRevision = null;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user