Commit 4eee5874 by luoqi

fix(ingest): image_finding SQL 适配 prod CH 23.8(分层嵌套避免 多JOIN+ARRAYJOIN)

prod CH 23.8 拒绝「Multiple JOIN + ARRAY JOIN 同层」且不支持相关子查询。改成分层嵌套:
s1(image⋈client⋈org,无 array join)→ s2(array join 读 s1)→ 外层。
cohort 注入(裸 IN)落到 po 子查询 GROUP BY 前 → po 按 batch scope + 外层 notEmpty(po.org)
过滤到 batch 患者(prod CH 实测等价正确)。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 4400e2a7
...@@ -176,11 +176,44 @@ sql_source: ...@@ -176,11 +176,44 @@ sql_source:
WHERE last_visit_time IS NOT NULL WHERE last_visit_time IS NOT NULL
) )
# ── 影像 AI 分析 → 诊断信号源(image_finding)【本次禁用,follow-up】 ── # ── 影像 AI 分析(fact_emr_image_analysis_out)→ 诊断信号源(image_finding)──
# ⚠️ prod CH 不允许「多 JOIN + ARRAY JOIN 混用」(Multiple JOIN does not support mix with ARRAY JOINs), # 结构化 AI 源,源 SQL 一次 pivot+炸牙位(下游零 transform):
# 且 org 子查询的 GROUP BY 会带偏 cohort 注入。需重做(pivot/炸牙位挪 transform 层 或 换 clinic 取法)。 # join file_num→client 取 patient_id+brand;org 用患者 EMR organization_id 作诊所(image 无诊所);
# 本地 CH 24.3 容忍该写法故未暴露;prod 严格。暂禁,assembler 注册也一并注掉。 # ARRAY JOIN 把 10 病种列 pivot 成 (code,牙位数组),splitByChar+arrayJoin 炸成每牙一行。
# (原 SQL 见 git 历史 commit 4a7750d) # ⚠️ prod CH 23.8 不允许「多 JOIN + ARRAY JOIN 同层」→ 必须分层嵌套:
# s1(joins,无 array join)→ s2(array join,读 s1 子查询)→ 外层(无 join)。
# ⚠️ cohort 注入(裸 patient_id,brand IN)会插到首个 GROUP BY 前 = po 子查询 → po 按 batch scope,
# 外层 notEmpty(po.org) 恰好把输出过滤到 batch 患者(已在 prod CH 验证等价正确)。
# 病种→K 码映射留 manifest(host 形态);去重靠召回 (subKey,tooth) 聚类;code_source=image_ai 独立 subject。
image_finding_rows: |
SELECT patient_id, brand, organization_id, emr_id, rq, code, code_source, tooth,
concat(emr_id, '|imgai|', code, '|', tooth) AS diag_external_id
FROM (
SELECT patient_id, brand, organization_id, emr_id, rq, cm.1 AS code, 'image_ai' AS code_source,
arrayJoin(splitByChar(',', replaceRegexpAll(cm.2, '[\[\] '']', ''))) AS tooth
FROM (
SELECT c.patient_id AS patient_id, c.brand AS brand, po.org AS organization_id,
ia.emr_id AS emr_id, ia.rq AS rq,
ia.cavity AS cavity, ia.impacted_tooth AS impacted_tooth, ia.embedded_tooth AS embedded_tooth,
ia.root_periodontitis AS root_periodontitis, ia.root_remnant AS root_remnant,
ia.crown_remnant AS crown_remnant, ia.wedge_shaped_defect AS wedge_shaped_defect,
ia.cyst AS cyst, ia.tooth_loss AS tooth_loss, ia.retained_primary_tooth AS retained_primary_tooth
FROM dw_group.fact_emr_image_analysis_out ia
INNER JOIN dw_group.fact_client_out c ON c.file_num = ia.file_num AND c.brand = ia.brand
LEFT JOIN (
SELECT patient_id, brand, any(organization_id) AS org
FROM dw_group.fact_emr_treatment_out WHERE notEmpty(organization_id)
GROUP BY patient_id, brand
) po ON po.patient_id = c.patient_id AND po.brand = c.brand
WHERE c.last_visit_time IS NOT NULL AND notEmpty(po.org)
) s1
ARRAY JOIN [('K02', cavity), ('K01', impacted_tooth), ('K01', embedded_tooth),
('K04', root_periodontitis), ('K03', root_remnant), ('K03', crown_remnant),
('K03', wedge_shaped_defect), ('K09', cyst), ('K08', tooth_loss),
('K00', retained_primary_tooth)] AS cm
WHERE cm.2 != '[]' AND cm.2 != ''
) s2
WHERE tooth != ''
# ── 诊所回访任务(fact_returnvisit_out)→ patient_return_visit upsert(展示用,5 试点)── # ── 诊所回访任务(fact_returnvisit_out)→ patient_return_visit upsert(展示用,5 试点)──
# customer_id AS patient_id(让 cohort 过滤生效);WHERE org ∈ 5 试点(= EMR 表的 org)→ 只摄 5 家。 # customer_id AS patient_id(让 cohort 过滤生效);WHERE org ∈ 5 试点(= EMR 表的 org)→ 只摄 5 家。
...@@ -815,7 +848,7 @@ assemblers: ...@@ -815,7 +848,7 @@ assemblers:
- { file: assemblers/encounter.yaml } - { file: assemblers/encounter.yaml }
- { file: assemblers/appointment.yaml } - { file: assemblers/appointment.yaml }
- { file: assemblers/diagnosis.yaml } - { file: assemblers/diagnosis.yaml }
# - { file: assemblers/image_finding.yaml } # 【本次禁用,prod CH join+arrayjoin 不兼容,follow-up 重做】 - { file: assemblers/image_finding.yaml } # 影像 AI 分析 → diagnosis_record(code_source=image_ai)
- { file: assemblers/treatment_planned.yaml } - { file: assemblers/treatment_planned.yaml }
- { file: assemblers/treatment_review.yaml } - { file: assemblers/treatment_review.yaml }
- { file: assemblers/treatment_actual.yaml } - { file: assemblers/treatment_actual.yaml }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment