feat(plan): tooth-overlap union-find merge + EMR fallback for visit facts

scenarios/treatment-initiation-recall: 按 tooth set 重叠做 union-find 合并同 patient 同 sub_scenario 的多 sig, sub_key 改 '<sub>@<tooth|whole>' 粒度,允许同 patient 不同牙位各 1 reason 行, cluster_fact_ids / triggers 注入 evidence + signals,源标签 (诊断+医生建议) 在 cluster 含两种 sig 时合并显示。跟 chain-composer bucket 口径对齐,reason 与 chain 1:1。 plan-aggregate.serializeProfile: visitFacts 优先 encounter_record,缺失时回退 emr_record。场景:DW 部分 host 的 appointment.in_time 字段空 → encounter 全空,但 EMR 完整(医生写病历必到诊),不该让 lastVisit/daysSinceLastVisit 为 null。

feat(plan): tooth-overlap union-find merge + EMR fallback for visit facts
scenarios/treatment-initiation-recall: 按 tooth set 重叠做 union-find 合并同 patient 同 sub_scenario 的多 sig, sub_key 改 '<sub>@<tooth|whole>' 粒度,允许同 patient 不同牙位各 1 reason 行, cluster_fact_ids / triggers 注入 evidence + signals,源标签 (诊断+医生建议) 在 cluster 含两种 sig 时合并显示。跟 chain-composer bucket 口径对齐,reason 与 chain 1:1。 plan-aggregate.serializeProfile: visitFacts 优先 encounter_record,缺失时回退 emr_record。场景:DW 部分 host 的 appointment.in_time 字段空 → encounter 全空,但 EMR 完整(医生写病历必到诊),不该让 lastVisit/daysSinceLastVisit 为 null。
b6147297 · luoqi · bfd5fd14 · b6147297 · b6147297
Commit b6147297 authored May 27, 2026 by luoqi
Hide whitespace changes
Inline Side-by-side

Showing with 143 additions and 20 deletions

apps/pac-service/src/modules/plan-aggregate/plan-aggregate.service.ts
+10 -2

apps/pac-service/src/modules/plan/engine/scenarios/treatment-initiation-recall.scenario.ts
+133 -18

No files found.
--- a/apps/pac-service/src/modules/plan-aggregate/plan-aggregate.service.ts
+++ b/apps/pac-service/src/modules/plan-aggregate/plan-aggregate.service.ts
@@ -217,8 +217,16 @@ function serializeProfile(
  const refunds = allFacts.filter((f) => f.type === 'refund_record');
  const ltvCents = sumAmount(payments) + sumAmount(recharges) - sumAmount(refunds);
-  const latestEncounter = encounters[0];
+  // 就诊时间口径:
-  const firstEncounter = encounters.at(-1);
+  //   优先用 encounter_record(appointment 显式标"已到诊"的)
+  //   兜底 emr_record(医生写病历必然患者到场,assembler 没归 encounter 时不丢)
+  //   场景:DW 部分 host 的 appointment.in_time 字段缺失,但 EMR 完整
+  //        例 张吴双 — 0 encounter / 39 emr,encounter 兜底空,就诊时间应来自 EMR
+  const allVisitFacts = [...encounters, ...allFacts.filter((f) => f.type === 'emr_record')]
+    .filter((f) => f.occurredAt)
+    .sort((a, b) => (b.occurredAt!.getTime() - a.occurredAt!.getTime()));
+  const latestEncounter = allVisitFacts[0];
+  const firstEncounter = allVisitFacts.at(-1);
  const daysSinceLatestVisit = latestEncounter?.occurredAt
    ? Math.floor((Date.now() - latestEncounter.occurredAt.getTime()) / 86400_000)
    : null;

--- a/apps/pac-service/src/modules/plan/engine/scenarios/treatment-initiation-recall.scenario.ts
+++ b/apps/pac-service/src/modules/plan/engine/scenarios/treatment-initiation-recall.scenario.ts
@@ -387,22 +387,21 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin {
        )
    `;
-    // 同 patient 多个命中信号 → 取最早(daysSince 最大)作为主 hit
+    // ⭐ 同 patient 同 sub_scenario 的多 sig 按 tooth-overlap 合并(union-find)
-    const byPatient = new Map<string, HitRow>();
+    //   跟 chain-composer 的 bucket 合并口径一致 → reason 与 chain 1:1 对齐
-    for (const r of rows) {
+    //   场景:王燕桦 K01 3 个 sig(38 / 48;38 / 18;28;38)牙位重叠 → 1 个临床 episode
-      const existing = byPatient.get(r.patient_id);
+    //   合并后 sub_key = sub_scenario@<union(tooth)>;daysSince 取 cluster 内最大(最早诊断)
-      if (!existing || r.days_since > existing.days_since) {
+    //   全口诊断(K05 等空牙位)→ 全部归入 'whole' cluster(1 个 / patient,跟 chain.tooth='*whole' 一致)
-        byPatient.set(r.patient_id, r);
+    const mergedHits = mergeRowsByToothOverlap(rows);
-      }
-    }
    // 为算 6 因子,一次性查所有命中 patient 的 persona + recent execution
-    const patientIds = [...byPatient.keys()];
+    const patientIds = [...new Set(mergedHits.map((r) => r.patient_id))];
    const personaCtx = await this.fetchPersonaContext(patientIds);
    const execCtx = await this.fetchRecentExecutions(patientIds, scope.now);
    const hits: ScenarioHit[] = [];
-    for (const [patientId, r] of byPatient.entries()) {
+    for (const r of mergedHits) {
+      const patientId = r.patient_id;
      const persona = personaCtx.get(patientId);
      const confidence = r.confidence
        ? Number(r.confidence)
@@ -422,11 +421,12 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin {
      });
      const toothStr = r.tooth ? ` · 牙位 ${r.tooth}` : '';
-      const sourceStr =
+      // cluster 内可能含两种 sig(诊断 + 医生建议)→ 文案合并显示;只 1 种则保留单一来源
-        r.signal_type === 'recommendation_record'
+      const hasDx = r.cluster_has_diagnosis ?? (r.signal_type === 'diagnosis_record');
-          ? '(医生建议)'
+      const hasRec = r.cluster_has_recommendation ?? (r.signal_type === 'recommendation_record');
-          : '(诊断)';
+      const sourceStr = hasDx && hasRec ? '(诊断+医生建议)' : hasRec ? '(医生建议)' : '(诊断)';
      // 触发信号类型(原 enum,不语义化;前端用 triggerTypeLabelZh 翻译)
+      // lead 单一类型 — 想精准列出所有 sig 的话需要把 triggers[] 改成 cluster 全量,目前 lead 代表
      const triggerType =
        r.signal_type === 'recommendation_record' ? 'recommendation' : 'diagnosis';
      hits.push({
@@ -443,14 +443,18 @@ export class TreatmentInitiationRecallScenario implements PlanScenarioPlugin {
        // 目标诊所 = 诊断出该未治疗需求的诊所(患者最可能回访的地方)
        targetClinicId: r.clinic_id ?? null,
        evidence: {
-          // [0] = 触发 fact(对应 signals.triggers[0]);其余是相关参考 fact
+          // cluster 内全部 sig 的 fact_id(lead 在 [0])— 让审计/追溯能看到所有触发依据
-          factIds: [r.signal_fact_id],
+          factIds: r.cluster_fact_ids ?? [r.signal_fact_id],
        },
-        subKey,
+        // sub_key 牙位级粒度:plan_reasons UNIQUE(plan, scenario, sub_key) 允许同 patient
+        //   同 sub_scenario 多牙位各 1 行(36/46 都需充填都进库)。
+        //   全口诊断(K05 等无牙位)→ '@whole';前端 / plan-aggregate 用 signals.toothPosition 区分语义。
+        subKey: `${subKey}@${(r.tooth ?? '').trim() || 'whole'}`,
        // 结构化召回信号(DB 存 raw enum / canonical code,前端字典翻译富文本)
+        // triggers 是 cluster 内全量(去重),前端按 type set 大小渲染 (诊断) / (医生建议) / (诊断+医生建议)
        signals: {
          subKey,
-          triggers: [{ type: triggerType, code: r.signal_code }],
+          triggers: r.cluster_triggers ?? [{ type: triggerType, code: r.signal_code }],
          toothPosition: r.tooth ?? null,
          daysSince: r.days_since,
          expectedCategories: [...excludeCats],
@@ -532,4 +536,115 @@ interface HitRow {
  clinic_id: string | null; // 触发诊断 fact 的诊所 → plan.targetClinicId
  signal_occurred_at: Date;
  days_since: number;
+  // ↓ mergeRowsByToothOverlap 合并后注入(单 sig 时跟 signal_fact_id / signal_type 同步)
+  cluster_fact_ids?: string[];     // cluster 内所有 sig 的 fact_id(给 evidence.factIds)
+  cluster_has_diagnosis?: boolean; // cluster 含至少 1 个 diagnosis_record
+  cluster_has_recommendation?: boolean; // cluster 含至少 1 个 recommendation_record
+  cluster_triggers?: Array<{ type: string; code: string }>; // cluster 内 unique (type, code),给 signals.triggers
+}
+/// 牙位字符串 → set of base tooth("15;24 B;24" → {"15","24"})
+/// 跟 plan-aggregate.service.ts 的 toothSet 同口径(剥牙面 B/L/M/O/D 后缀 + 去重)
+function parseToothSet(s: string | null | undefined): Set<string> {
+  if (!s) return new Set();
+  const out = new Set<string>();
+  for (const raw of s.split(';')) {
+    // base = 数字开头连续数字部分;"24 B" → "24", "1D" → "1"(palmer 乳牙也走 base 数字)
+    const m = raw.trim().match(/^\d+/);
+    if (m) out.add(m[0]);
+  }
+  return out;
+}
+/// union-find 把同 patient 多 sig 按 tooth-overlap 合并成 cluster
+/// 全口诊断(tooth 空)→ 每 patient 1 个 'whole' cluster(全口治疗按牙位无意义,合并到一起)
+/// 返回:每个 cluster 1 个 mergedHit,tooth = union of all teeth,daysSince = max(最早诊断)
+function mergeRowsByToothOverlap(rows: HitRow[]): HitRow[] {
+  // 按 patient_id 分组(scenario 已 per sub_scenario,这里只需 patient 级合并)
+  const byPatient = new Map<string, HitRow[]>();
+  for (const r of rows) {
+    const arr = byPatient.get(r.patient_id) ?? [];
+    arr.push(r);
+    byPatient.set(r.patient_id, arr);
+  }
+  const merged: HitRow[] = [];
+  for (const [, patientRows] of byPatient.entries()) {
+    // 分两组:有牙位 vs 全口(空牙位)
+    const wholeMouth: HitRow[] = [];
+    const withTooth: { row: HitRow; teeth: Set<string> }[] = [];
+    for (const r of patientRows) {
+      const teeth = parseToothSet(r.tooth);
+      if (teeth.size === 0) wholeMouth.push(r);
+      else withTooth.push({ row: r, teeth });
+    }
+    // 全口 sig:全 patient 1 个 cluster(K05 等全口诊断不按牙位拆)
+    if (wholeMouth.length > 0) {
+      wholeMouth.sort((a, b) => b.days_since - a.days_since);
+      const lead = { ...wholeMouth[0]! }; // 最早诊断 = 主 hit,tooth 保持 null
+      lead.cluster_fact_ids = wholeMouth.map((x) => x.signal_fact_id);
+      lead.cluster_has_diagnosis = wholeMouth.some((x) => x.signal_type === 'diagnosis_record');
+      lead.cluster_has_recommendation = wholeMouth.some((x) => x.signal_type === 'recommendation_record');
+      lead.cluster_triggers = uniqueTriggers(wholeMouth);
+      merged.push(lead);
+    }
+    // 有牙位 sig:union-find 按 tooth overlap 合并
+    // 简化版:O(n²) 邻接合并(n 通常 < 10,够用)
+    const parent: number[] = withTooth.map((_, i) => i);
+    const find = (x: number): number => (parent[x] === x ? x : (parent[x] = find(parent[x]!)));
+    const union = (a: number, b: number) => {
+      const ra = find(a), rb = find(b);
+      if (ra !== rb) parent[ra] = rb;
+    };
+    for (let i = 0; i < withTooth.length; i++) {
+      for (let j = i + 1; j < withTooth.length; j++) {
+        const A = withTooth[i]!.teeth;
+        const B = withTooth[j]!.teeth;
+        for (const t of A) {
+          if (B.has(t)) { union(i, j); break; }
+        }
+      }
+    }
+    // 收集 cluster
+    const clusters = new Map<number, { teeth: Set<string>; rows: HitRow[] }>();
+    for (let i = 0; i < withTooth.length; i++) {
+      const root = find(i);
+      const c = clusters.get(root) ?? { teeth: new Set<string>(), rows: [] };
+      for (const t of withTooth[i]!.teeth) c.teeth.add(t);
+      c.rows.push(withTooth[i]!.row);
+      clusters.set(root, c);
+    }
+    for (const c of clusters.values()) {
+      // 代表 hit = cluster 内 days_since 最大那条;tooth 改为 union 后字符串(字典序)
+      c.rows.sort((a, b) => b.days_since - a.days_since);
+      const lead = { ...c.rows[0]! };
+      lead.tooth = Array.from(c.teeth).sort().join(';');
+      lead.cluster_fact_ids = c.rows.map((x) => x.signal_fact_id);
+      lead.cluster_has_diagnosis = c.rows.some((x) => x.signal_type === 'diagnosis_record');
+      lead.cluster_has_recommendation = c.rows.some((x) => x.signal_type === 'recommendation_record');
+      lead.cluster_triggers = uniqueTriggers(c.rows);
+      merged.push(lead);
+    }
+  }
+  return merged;
+}
+/// 提 cluster 内 unique (type, code) 触发,给 signals.triggers
+/// type='diagnosis' 排在前(给前端首选展示),其次 'recommendation'
+function uniqueTriggers(rows: HitRow[]): Array<{ type: string; code: string }> {
+  const seen = new Set<string>();
+  const out: Array<{ type: string; code: string }> = [];
+  for (const r of rows) {
+    const type = r.signal_type === 'recommendation_record' ? 'recommendation' : 'diagnosis';
+    const key = `${type}|${r.signal_code}`;
+    if (!seen.has(key)) {
+      seen.add(key);
+      out.push({ type, code: r.signal_code });
+    }
+  }
+  // diagnosis 排前 → 前端 triggers[0] 兜底也是诊断
+  out.sort((a, b) => (a.type === 'diagnosis' ? -1 : 1) - (b.type === 'diagnosis' ? -1 : 1));
+  return out;
 }