Improve state v2 parsing and errors

This commit is contained in:
2026-02-01 21:55:47 +08:00
parent cf0fc88a24
commit c3cc86160b
3 changed files with 383 additions and 117 deletions

View File

@@ -1,15 +1,100 @@
import jsyaml from '../../../libs/js-yaml.mjs';
const STATE_TAG_RE = /<\s*state\b[^>]*>([\s\S]*?)<\s*\/\s*state\s*>/gi;
/**
* Robust <state> block matcher (no regex)
* - Pairs each </state> with the nearest preceding <state ...>
* - Ignores unclosed <state>
*/
function isValidOpenTagAt(s, i) {
if (s[i] !== '<') return false;
const head = s.slice(i, i + 6).toLowerCase();
if (head !== '<state') return false;
const next = s[i + 6] ?? '';
if (next && !(next === '>' || next === '/' || /\s/.test(next))) return false;
return true;
}
function isValidCloseTagAt(s, i) {
if (s[i] !== '<') return false;
if (s[i + 1] !== '/') return false;
const head = s.slice(i, i + 7).toLowerCase();
if (head !== '</state') return false;
let j = i + 7;
while (j < s.length && /\s/.test(s[j])) j++;
return s[j] === '>';
}
function findTagEnd(s, openIndex) {
const end = s.indexOf('>', openIndex);
return end === -1 ? -1 : end;
}
function findStateBlockSpans(text) {
const s = String(text ?? '');
const closes = [];
for (let i = 0; i < s.length; i++) {
if (s[i] !== '<') continue;
if (isValidCloseTagAt(s, i)) closes.push(i);
}
if (!closes.length) return [];
const spans = [];
let searchEnd = s.length;
for (let cIdx = closes.length - 1; cIdx >= 0; cIdx--) {
const closeStart = closes[cIdx];
if (closeStart >= searchEnd) continue;
let closeEnd = closeStart + 7;
while (closeEnd < s.length && s[closeEnd] !== '>') closeEnd++;
if (s[closeEnd] !== '>') continue;
closeEnd += 1;
let openStart = -1;
for (let i = closeStart - 1; i >= 0; i--) {
if (s[i] !== '<') continue;
if (!isValidOpenTagAt(s, i)) continue;
const tagEnd = findTagEnd(s, i);
if (tagEnd === -1) continue;
if (tagEnd >= closeStart) continue;
openStart = i;
break;
}
if (openStart === -1) continue;
const openTagEnd = findTagEnd(s, openStart);
if (openTagEnd === -1) continue;
spans.push({
openStart,
openTagEnd: openTagEnd + 1,
closeStart,
closeEnd,
});
searchEnd = openStart;
}
spans.reverse();
return spans;
}
export function extractStateBlocks(text) {
const s = String(text ?? '');
if (!s || s.toLowerCase().indexOf('<state') === -1) return [];
const spans = findStateBlockSpans(s);
const out = [];
STATE_TAG_RE.lastIndex = 0;
let m;
while ((m = STATE_TAG_RE.exec(s)) !== null) {
const inner = String(m[1] ?? '');
for (const sp of spans) {
const inner = s.slice(sp.openTagEnd, sp.closeStart);
if (inner.trim()) out.push(inner);
}
return out;
@@ -17,85 +102,120 @@ export function extractStateBlocks(text) {
export function computeStateSignature(text) {
const s = String(text ?? '');
if (!s || s.toLowerCase().indexOf('<state') === -1) return '';
const chunks = [];
STATE_TAG_RE.lastIndex = 0;
let m;
while ((m = STATE_TAG_RE.exec(s)) !== null) chunks.push(String(m[0] ?? '').trim());
return chunks.length ? chunks.join('\n---\n') : '';
const spans = findStateBlockSpans(s);
if (!spans.length) return '';
const chunks = spans.map(sp => s.slice(sp.openStart, sp.closeEnd).trim());
return chunks.join('\n---\n');
}
/**
* 解析 <state> 块
* 返回: { rules: [{path, rule}], ops: [{path, op, value, ...}] }
*/
export function parseStateBlock(content) {
const lines = String(content ?? '').split(/\r?\n/);
const rules = [];
const dataLines = [];
// 第一遍:分离规则行和数据行
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
let inSchema = false;
let schemaPath = '';
let schemaLines = [];
let schemaBaseIndent = -1;
// 规则行:以 $ 开头
if (trimmed.startsWith('$')) {
const parsed = parseRuleLineInternal(trimmed);
if (parsed) rules.push(parsed);
} else {
dataLines.push(line);
const flushSchema = () => {
if (schemaLines.length) {
const parsed = parseSchemaBlock(schemaPath, schemaLines);
rules.push(...parsed);
}
inSchema = false;
schemaPath = '';
schemaLines = [];
schemaBaseIndent = -1;
};
for (let i = 0; i < lines.length; i++) {
const raw = lines[i];
const trimmed = raw.trim();
const indent = raw.search(/\S/);
if (!trimmed || trimmed.startsWith('#')) {
if (inSchema && schemaBaseIndent >= 0) schemaLines.push(raw);
continue;
}
// $schema 开始
if (trimmed.startsWith('$schema')) {
flushSchema();
const rest = trimmed.slice(7).trim();
schemaPath = rest || '';
inSchema = true;
schemaBaseIndent = -1;
continue;
}
if (inSchema) {
if (schemaBaseIndent < 0) {
schemaBaseIndent = indent;
}
// 缩进回退 => schema 结束
if (indent < schemaBaseIndent && indent >= 0 && trimmed) {
flushSchema();
i--;
continue;
}
schemaLines.push(raw);
continue;
}
// 普通 $rule$ro, $range, $step, $enum
if (trimmed.startsWith('$')) {
const parsed = parseRuleLine(trimmed);
if (parsed) rules.push(parsed);
continue;
}
dataLines.push(raw);
}
// 第二遍:解析数据
const ops = parseDataLines(dataLines);
flushSchema();
const ops = parseDataLines(dataLines);
return { rules, ops };
}
function parseRuleLineInternal(line) {
const tokens = line.trim().split(/\s+/);
const directives = [];
let pathStart = 0;
for (let i = 0; i < tokens.length; i++) {
if (tokens[i].startsWith('$')) {
directives.push(tokens[i]);
pathStart = i + 1;
} else {
break;
}
}
const path = tokens.slice(pathStart).join(' ').trim();
if (!path || !directives.length) return null;
const rule = {};
for (const tok of directives) {
if (tok === '$ro') { rule.ro = true; continue; }
if (tok === '$lock') { rule.lock = true; continue; }
const rangeMatch = tok.match(/^\$range=\[\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\]$/);
if (rangeMatch) {
rule.min = Math.min(Number(rangeMatch[1]), Number(rangeMatch[2]));
rule.max = Math.max(Number(rangeMatch[1]), Number(rangeMatch[2]));
/**
* 解析数据行
*/
function stripYamlInlineComment(s) {
const text = String(s ?? '');
if (!text) return '';
let inSingle = false;
let inDouble = false;
let escaped = false;
for (let i = 0; i < text.length; i++) {
const ch = text[i];
if (inSingle) {
if (ch === "'") {
if (text[i + 1] === "'") { i++; continue; }
inSingle = false;
}
continue;
}
const stepMatch = tok.match(/^\$step=(\d+(?:\.\d+)?)$/);
if (stepMatch) { rule.step = Math.abs(Number(stepMatch[1])); continue; }
const enumMatch = tok.match(/^\$enum=\{([^}]+)\}$/);
if (enumMatch) {
rule.enum = enumMatch[1].split(/[;]/).map(s => s.trim()).filter(Boolean);
if (inDouble) {
if (escaped) { escaped = false; continue; }
if (ch === '\\') { escaped = true; continue; }
if (ch === '"') inDouble = false;
continue;
}
if (ch === "'") { inSingle = true; continue; }
if (ch === '"') { inDouble = true; continue; }
if (ch === '#') {
const prev = i > 0 ? text[i - 1] : '';
if (i === 0 || /\s/.test(prev)) {
return text.slice(0, i).trimEnd();
}
}
}
return { path, rule };
return text.trimEnd();
}
function parseDataLines(lines) {
@@ -146,7 +266,8 @@ function parseDataLines(lines) {
if (colonIdx === -1) continue;
const path = trimmed.slice(0, colonIdx).trim();
const rhs = trimmed.slice(colonIdx + 1).trim();
let rhs = trimmed.slice(colonIdx + 1).trim();
rhs = stripYamlInlineComment(rhs);
if (!path) continue;
if (!rhs) {
@@ -194,23 +315,19 @@ export function parseInlineValue(raw) {
if (t === 'null') return { op: 'del' };
// (负数) 强制 set
const parenNum = t.match(/^\((-?\d+(?:\.\d+)?)\)$/);
if (parenNum) return { op: 'set', value: Number(parenNum[1]) };
// +N / -N
if (/^\+\d/.test(t) || /^-\d/.test(t)) {
const n = Number(t);
if (Number.isFinite(n)) return { op: 'inc', delta: n };
}
// +"str"
const pushD = t.match(/^\+"((?:[^"\\]|\\.)*)"\s*$/);
if (pushD) return { op: 'push', value: unescapeString(pushD[1]) };
const pushS = t.match(/^\+'((?:[^'\\]|\\.)*)'\s*$/);
if (pushS) return { op: 'push', value: unescapeString(pushS[1]) };
// +[...]
if (t.startsWith('+[')) {
try {
const arr = JSON.parse(t.slice(1));
@@ -219,13 +336,11 @@ export function parseInlineValue(raw) {
return { op: 'set', value: t, warning: '+[] 解析失败' };
}
// -"str"
const popD = t.match(/^-"((?:[^"\\]|\\.)*)"\s*$/);
if (popD) return { op: 'pop', value: unescapeString(popD[1]) };
const popS = t.match(/^-'((?:[^'\\]|\\.)*)'\s*$/);
if (popS) return { op: 'pop', value: unescapeString(popS[1]) };
// -[...]
if (t.startsWith('-[')) {
try {
const arr = JSON.parse(t.slice(1));
@@ -234,10 +349,8 @@ export function parseInlineValue(raw) {
return { op: 'set', value: t, warning: '-[] 解析失败' };
}
// 裸数字
if (/^-?\d+(?:\.\d+)?$/.test(t)) return { op: 'set', value: Number(t) };
// "str" / 'str'
const strD = t.match(/^"((?:[^"\\]|\\.)*)"\s*$/);
if (strD) return { op: 'set', value: unescapeString(strD[1]) };
const strS = t.match(/^'((?:[^'\\]|\\.)*)'\s*$/);
@@ -246,12 +359,10 @@ export function parseInlineValue(raw) {
if (t === 'true') return { op: 'set', value: true };
if (t === 'false') return { op: 'set', value: false };
// JSON array/object
if (t.startsWith('{') || t.startsWith('[')) {
try { return { op: 'set', value: JSON.parse(t) }; }
catch { return { op: 'set', value: t, warning: 'JSON 解析失败' }; }
}
// 兜底
return { op: 'set', value: t };
}