Add vector IO and text filtering

This commit is contained in:
2026-01-29 17:02:51 +08:00
parent fc23781e17
commit ee5f02fff9
10 changed files with 3368 additions and 42 deletions

View File

@@ -7,10 +7,11 @@
// - floor 稀疏去重
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore } from '../data/store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore } from '../data/store.js';
import { filterText } from './text-filter.js';
const MODULE_ID = 'recall';
@@ -139,13 +140,11 @@ function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
function stripNoise(text) {
return String(text || '')
.replace(/<think>[\s\S]*?<\/think>/gi, '')
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
.replace(/\[tts:[^\]]*\]/gi, '')
.trim();
}
function cleanForRecall(text) {
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
}
function buildExpDecayWeights(n, beta) {
const last = n - 1;
@@ -180,8 +179,8 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
return messages.slice(-count).map((m, idx, arr) => {
const speaker = m.name || (m.is_user ? '用户' : '角色');
const clean = stripNoise(m.mes);
if (!clean) return '';
const clean = cleanForRecall(m.mes);
if (!clean) return '';
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
return `${speaker}: ${clean.slice(0, limit)}`;
}).filter(Boolean);
@@ -773,7 +772,7 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
messages = messages.slice(0, -1);
}
return messages.slice(-count).map(m => {
return messages.slice(-count).map(m => {
const text = cleanForRecall(m.mes);
const speaker = m.name || (m.is_user ? '用户' : '角色');
return `${speaker}: ${text.slice(0, 500)}`;