Commit b6147297 by luoqi

feat(plan): tooth-overlap union-find merge + EMR fallback for visit facts

scenarios/treatment-initiation-recall:
  按 tooth set 重叠做 union-find 合并同 patient 同 sub_scenario 的多 sig,
  sub_key 改 '<sub>@<tooth|whole>' 粒度,允许同 patient 不同牙位各 1 reason 行,
  cluster_fact_ids / triggers 注入 evidence + signals,源标签 (诊断+医生建议)
  在 cluster 含两种 sig 时合并显示。跟 chain-composer bucket 口径对齐,reason
  与 chain 1:1。

plan-aggregate.serializeProfile:
  visitFacts 优先 encounter_record,缺失时回退 emr_record。
  场景:DW 部分 host 的 appointment.in_time 字段空 → encounter 全空,但 EMR
  完整(医生写病历必到诊),不该让 lastVisit/daysSinceLastVisit 为 null。
parent bfd5fd14
...@@ -217,8 +217,16 @@ function serializeProfile( ...@@ -217,8 +217,16 @@ function serializeProfile(
const refunds = allFacts.filter((f) => f.type === 'refund_record'); const refunds = allFacts.filter((f) => f.type === 'refund_record');
const ltvCents = sumAmount(payments) + sumAmount(recharges) - sumAmount(refunds); const ltvCents = sumAmount(payments) + sumAmount(recharges) - sumAmount(refunds);
const latestEncounter = encounters[0]; // 就诊时间口径:
const firstEncounter = encounters.at(-1); // 优先用 encounter_record(appointment 显式标"已到诊"的)
// 兜底 emr_record(医生写病历必然患者到场,assembler 没归 encounter 时不丢)
// 场景:DW 部分 host 的 appointment.in_time 字段缺失,但 EMR 完整
// 例 张吴双 — 0 encounter / 39 emr,encounter 兜底空,就诊时间应来自 EMR
const allVisitFacts = [...encounters, ...allFacts.filter((f) => f.type === 'emr_record')]
.filter((f) => f.occurredAt)
.sort((a, b) => (b.occurredAt!.getTime() - a.occurredAt!.getTime()));
const latestEncounter = allVisitFacts[0];
const firstEncounter = allVisitFacts.at(-1);
const daysSinceLatestVisit = latestEncounter?.occurredAt const daysSinceLatestVisit = latestEncounter?.occurredAt
? Math.floor((Date.now() - latestEncounter.occurredAt.getTime()) / 86400_000) ? Math.floor((Date.now() - latestEncounter.occurredAt.getTime()) / 86400_000)
: null; : null;
......
...@@ -387,22 +387,21 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin { ...@@ -387,22 +387,21 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin {
) )
`; `;
// 同 patient 多个命中信号 → 取最早(daysSince 最大)作为主 hit // ⭐ 同 patient 同 sub_scenario 的多 sig 按 tooth-overlap 合并(union-find)
const byPatient = new Map<string, HitRow>(); // 跟 chain-composer 的 bucket 合并口径一致 → reason 与 chain 1:1 对齐
for (const r of rows) { // 场景:王燕桦 K01 3 个 sig(38 / 48;38 / 18;28;38)牙位重叠 → 1 个临床 episode
const existing = byPatient.get(r.patient_id); // 合并后 sub_key = sub_scenario@<union(tooth)>;daysSince 取 cluster 内最大(最早诊断)
if (!existing || r.days_since > existing.days_since) { // 全口诊断(K05 等空牙位)→ 全部归入 'whole' cluster(1 个 / patient,跟 chain.tooth='*whole' 一致)
byPatient.set(r.patient_id, r); const mergedHits = mergeRowsByToothOverlap(rows);
}
}
// 为算 6 因子,一次性查所有命中 patient 的 persona + recent execution // 为算 6 因子,一次性查所有命中 patient 的 persona + recent execution
const patientIds = [...byPatient.keys()]; const patientIds = [...new Set(mergedHits.map((r) => r.patient_id))];
const personaCtx = await this.fetchPersonaContext(patientIds); const personaCtx = await this.fetchPersonaContext(patientIds);
const execCtx = await this.fetchRecentExecutions(patientIds, scope.now); const execCtx = await this.fetchRecentExecutions(patientIds, scope.now);
const hits: ScenarioHit[] = []; const hits: ScenarioHit[] = [];
for (const [patientId, r] of byPatient.entries()) { for (const r of mergedHits) {
const patientId = r.patient_id;
const persona = personaCtx.get(patientId); const persona = personaCtx.get(patientId);
const confidence = r.confidence const confidence = r.confidence
? Number(r.confidence) ? Number(r.confidence)
...@@ -422,11 +421,12 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin { ...@@ -422,11 +421,12 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin {
}); });
const toothStr = r.tooth ? ` · 牙位 ${r.tooth}` : ''; const toothStr = r.tooth ? ` · 牙位 ${r.tooth}` : '';
const sourceStr = // cluster 内可能含两种 sig(诊断 + 医生建议)→ 文案合并显示;只 1 种则保留单一来源
r.signal_type === 'recommendation_record' const hasDx = r.cluster_has_diagnosis ?? (r.signal_type === 'diagnosis_record');
? '(医生建议)' const hasRec = r.cluster_has_recommendation ?? (r.signal_type === 'recommendation_record');
: '(诊断)'; const sourceStr = hasDx && hasRec ? '(诊断+医生建议)' : hasRec ? '(医生建议)' : '(诊断)';
// 触发信号类型(原 enum,不语义化;前端用 triggerTypeLabelZh 翻译) // 触发信号类型(原 enum,不语义化;前端用 triggerTypeLabelZh 翻译)
// lead 单一类型 — 想精准列出所有 sig 的话需要把 triggers[] 改成 cluster 全量,目前 lead 代表
const triggerType = const triggerType =
r.signal_type === 'recommendation_record' ? 'recommendation' : 'diagnosis'; r.signal_type === 'recommendation_record' ? 'recommendation' : 'diagnosis';
hits.push({ hits.push({
...@@ -443,14 +443,18 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin { ...@@ -443,14 +443,18 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin {
// 目标诊所 = 诊断出该未治疗需求的诊所(患者最可能回访的地方) // 目标诊所 = 诊断出该未治疗需求的诊所(患者最可能回访的地方)
targetClinicId: r.clinic_id ?? null, targetClinicId: r.clinic_id ?? null,
evidence: { evidence: {
// [0] = 触发 fact(对应 signals.triggers[0]);其余是相关参考 fact // cluster 内全部 sig 的 fact_id(lead 在 [0])— 让审计/追溯能看到所有触发依据
factIds: [r.signal_fact_id], factIds: r.cluster_fact_ids ?? [r.signal_fact_id],
}, },
subKey, // sub_key 牙位级粒度:plan_reasons UNIQUE(plan, scenario, sub_key) 允许同 patient
// 同 sub_scenario 多牙位各 1 行(36/46 都需充填都进库)。
// 全口诊断(K05 等无牙位)→ '@whole';前端 / plan-aggregate 用 signals.toothPosition 区分语义。
subKey: `${subKey}@${(r.tooth ?? '').trim() || 'whole'}`,
// 结构化召回信号(DB 存 raw enum / canonical code,前端字典翻译富文本) // 结构化召回信号(DB 存 raw enum / canonical code,前端字典翻译富文本)
// triggers 是 cluster 内全量(去重),前端按 type set 大小渲染 (诊断) / (医生建议) / (诊断+医生建议)
signals: { signals: {
subKey, subKey,
triggers: [{ type: triggerType, code: r.signal_code }], triggers: r.cluster_triggers ?? [{ type: triggerType, code: r.signal_code }],
toothPosition: r.tooth ?? null, toothPosition: r.tooth ?? null,
daysSince: r.days_since, daysSince: r.days_since,
expectedCategories: [...excludeCats], expectedCategories: [...excludeCats],
...@@ -532,4 +536,115 @@ interface HitRow { ...@@ -532,4 +536,115 @@ interface HitRow {
clinic_id: string | null; // 触发诊断 fact 的诊所 → plan.targetClinicId clinic_id: string | null; // 触发诊断 fact 的诊所 → plan.targetClinicId
signal_occurred_at: Date; signal_occurred_at: Date;
days_since: number; days_since: number;
// ↓ mergeRowsByToothOverlap 合并后注入(单 sig 时跟 signal_fact_id / signal_type 同步)
cluster_fact_ids?: string[]; // cluster 内所有 sig 的 fact_id(给 evidence.factIds)
cluster_has_diagnosis?: boolean; // cluster 含至少 1 个 diagnosis_record
cluster_has_recommendation?: boolean; // cluster 含至少 1 个 recommendation_record
cluster_triggers?: Array<{ type: string; code: string }>; // cluster 内 unique (type, code),给 signals.triggers
}
/// 牙位字符串 → set of base tooth("15;24 B;24" → {"15","24"})
/// 跟 plan-aggregate.service.ts 的 toothSet 同口径(剥牙面 B/L/M/O/D 后缀 + 去重)
function parseToothSet(s: string | null | undefined): Set<string> {
if (!s) return new Set();
const out = new Set<string>();
for (const raw of s.split(';')) {
// base = 数字开头连续数字部分;"24 B" → "24", "1D" → "1"(palmer 乳牙也走 base 数字)
const m = raw.trim().match(/^\d+/);
if (m) out.add(m[0]);
}
return out;
}
/// union-find 把同 patient 多 sig 按 tooth-overlap 合并成 cluster
/// 全口诊断(tooth 空)→ 每 patient 1 个 'whole' cluster(全口治疗按牙位无意义,合并到一起)
/// 返回:每个 cluster 1 个 mergedHit,tooth = union of all teeth,daysSince = max(最早诊断)
function mergeRowsByToothOverlap(rows: HitRow[]): HitRow[] {
// 按 patient_id 分组(scenario 已 per sub_scenario,这里只需 patient 级合并)
const byPatient = new Map<string, HitRow[]>();
for (const r of rows) {
const arr = byPatient.get(r.patient_id) ?? [];
arr.push(r);
byPatient.set(r.patient_id, arr);
}
const merged: HitRow[] = [];
for (const [, patientRows] of byPatient.entries()) {
// 分两组:有牙位 vs 全口(空牙位)
const wholeMouth: HitRow[] = [];
const withTooth: { row: HitRow; teeth: Set<string> }[] = [];
for (const r of patientRows) {
const teeth = parseToothSet(r.tooth);
if (teeth.size === 0) wholeMouth.push(r);
else withTooth.push({ row: r, teeth });
}
// 全口 sig:全 patient 1 个 cluster(K05 等全口诊断不按牙位拆)
if (wholeMouth.length > 0) {
wholeMouth.sort((a, b) => b.days_since - a.days_since);
const lead = { ...wholeMouth[0]! }; // 最早诊断 = 主 hit,tooth 保持 null
lead.cluster_fact_ids = wholeMouth.map((x) => x.signal_fact_id);
lead.cluster_has_diagnosis = wholeMouth.some((x) => x.signal_type === 'diagnosis_record');
lead.cluster_has_recommendation = wholeMouth.some((x) => x.signal_type === 'recommendation_record');
lead.cluster_triggers = uniqueTriggers(wholeMouth);
merged.push(lead);
}
// 有牙位 sig:union-find 按 tooth overlap 合并
// 简化版:O(n²) 邻接合并(n 通常 < 10,够用)
const parent: number[] = withTooth.map((_, i) => i);
const find = (x: number): number => (parent[x] === x ? x : (parent[x] = find(parent[x]!)));
const union = (a: number, b: number) => {
const ra = find(a), rb = find(b);
if (ra !== rb) parent[ra] = rb;
};
for (let i = 0; i < withTooth.length; i++) {
for (let j = i + 1; j < withTooth.length; j++) {
const A = withTooth[i]!.teeth;
const B = withTooth[j]!.teeth;
for (const t of A) {
if (B.has(t)) { union(i, j); break; }
}
}
}
// 收集 cluster
const clusters = new Map<number, { teeth: Set<string>; rows: HitRow[] }>();
for (let i = 0; i < withTooth.length; i++) {
const root = find(i);
const c = clusters.get(root) ?? { teeth: new Set<string>(), rows: [] };
for (const t of withTooth[i]!.teeth) c.teeth.add(t);
c.rows.push(withTooth[i]!.row);
clusters.set(root, c);
}
for (const c of clusters.values()) {
// 代表 hit = cluster 内 days_since 最大那条;tooth 改为 union 后字符串(字典序)
c.rows.sort((a, b) => b.days_since - a.days_since);
const lead = { ...c.rows[0]! };
lead.tooth = Array.from(c.teeth).sort().join(';');
lead.cluster_fact_ids = c.rows.map((x) => x.signal_fact_id);
lead.cluster_has_diagnosis = c.rows.some((x) => x.signal_type === 'diagnosis_record');
lead.cluster_has_recommendation = c.rows.some((x) => x.signal_type === 'recommendation_record');
lead.cluster_triggers = uniqueTriggers(c.rows);
merged.push(lead);
}
}
return merged;
}
/// 提 cluster 内 unique (type, code) 触发,给 signals.triggers
/// type='diagnosis' 排在前(给前端首选展示),其次 'recommendation'
function uniqueTriggers(rows: HitRow[]): Array<{ type: string; code: string }> {
const seen = new Set<string>();
const out: Array<{ type: string; code: string }> = [];
for (const r of rows) {
const type = r.signal_type === 'recommendation_record' ? 'recommendation' : 'diagnosis';
const key = `${type}|${r.signal_code}`;
if (!seen.has(key)) {
seen.add(key);
out.push({ type, code: r.signal_code });
}
}
// diagnosis 排前 → 前端 triggers[0] 兜底也是诊断
out.sort((a, b) => (a.type === 'diagnosis' ? -1 : 1) - (b.type === 'diagnosis' ? -1 : 1));
return out;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment