Commit 7a91d425 by luoqi

fix(sync): 单患者刷新兼容嵌套 manifest SQL + 加 dev 抽样开关

- injectPatientFilter 原用 regex 解析 SELECT...FROM table,遇嵌套子查询(image_finding s1/s2 + array join)
  会拖坏括号 → CH 语法错 → 单刷 500。改为包裹原查询为子查询 + 外层 patient_id/brand 过滤(兼容任意复杂 SQL)。
- 加 PAC_COHORT_LIMIT / PAC_COHORT_SAMPLE(recent/oldest/random)dev 抽样开关,默认全量、行为不变。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 55f8de0c
......@@ -332,10 +332,29 @@ export class ClickHouseSourceService {
whereParts.push(ids.length === 1 ? `${patient_key_column} = ${quoted}` : `${patient_key_column} IN (${quoted})`);
}
const whereSql = whereParts.length > 0 ? ` WHERE ${whereParts.join(' AND ')}` : '';
const selectCols = tenant_key_column
? `${patient_key_column}, ${tenant_key_column}`
: patient_key_column;
const sql = `SELECT DISTINCT ${selectCols} FROM ${patient_list_from}${whereSql} ORDER BY ${patient_key_column}`;
// dev/ops:PAC_COHORT_LIMIT=N → 只取 N 个患者(本地抽样重摄用)。不设 = 全量,默认行为不变。
// PAC_COHORT_SAMPLE = recent(默认,最近就诊优先)| oldest(最久未来,lapsed,召回候选多)
// | random(随机,production 代表性,自然带出 ~召回率比例)
const cohortLimitRaw = parseInt(process.env.PAC_COHORT_LIMIT ?? '', 10);
const cohortLimit = Number.isFinite(cohortLimitRaw) && cohortLimitRaw > 0 ? cohortLimitRaw : 0;
const sampleMode = (process.env.PAC_COHORT_SAMPLE ?? 'recent').toLowerCase();
let extraSelect = '';
let orderTail = ` ORDER BY ${patient_key_column}`; // 全量:按 key 稳定排序
if (cohortLimit > 0) {
if (sampleMode === 'random') {
extraSelect = ', rand() AS _samp'; // 放进 SELECT 规避 DISTINCT + ORDER BY 非选列限制
orderTail = ` ORDER BY _samp LIMIT ${cohortLimit}`;
} else if (list_cursor_column) {
const dir = sampleMode === 'oldest' ? 'ASC' : 'DESC';
extraSelect = `, ${list_cursor_column}`;
orderTail = ` ORDER BY ${list_cursor_column} ${dir} LIMIT ${cohortLimit}`;
} else {
orderTail = ` ORDER BY ${patient_key_column} LIMIT ${cohortLimit}`;
}
}
const selectCols =
(tenant_key_column ? `${patient_key_column}, ${tenant_key_column}` : patient_key_column) + extraSelect;
const sql = `SELECT DISTINCT ${selectCols} FROM ${patient_list_from}${whereSql}${orderTail}`;
this.logger.log(`[clickhouse·cohort] list patient keys — ${sql.slice(0, 200)}`);
const started = Date.now();
const rows = (await this.queryJsonWithRetry(client, sql, 'list-patient-keys')) as Array<Record<string, unknown>>;
......@@ -473,21 +492,13 @@ export class ClickHouseSourceService {
patientIdEsc: string,
brandEsc: string,
): string {
const m = originalSql.match(/^\s*SELECT\s+([\s\S]+?)\s+FROM\s+([\w.]+)/i);
if (!m) {
throw new Error(
`[patient-refresh] cannot parse SQL: ${originalSql.slice(0, 80)}...`,
);
}
const selectCols = m[1]!.trim();
const fromTable = m[2]!;
const clauses: string[] = [
`patient_id = '${patientIdEsc}'`,
`brand = '${brandEsc}'`,
];
// 业务过滤(如 settlement_status=1 / is_refund=0 / appo_status IN ... )保留
clauses.push(...this.extractBusinessFilters(originalSql));
return `SELECT ${selectCols} FROM ${fromTable} WHERE ${clauses.join(' AND ')}`;
// 包裹原查询为子查询,外层按 patient_id+brand 过滤 —— 兼容任意复杂 SQL。
// ⚠️ 不再用 regex 解析 `SELECT cols FROM table`:遇到嵌套子查询(如 image_finding 的
// `SELECT … FROM (…s1… ARRAY JOIN …) s2 WHERE tooth != ''`)会把半截子查询拖进列表 →
// 括号不匹配 → CH 语法错误(单患者刷新 500)。包裹法对简单/嵌套查询都成立:
// 所有 cohort 资源查询的最外层都暴露 patient_id + brand;原 cohort IN 子查询与业务过滤一并保留。
const inner = originalSql.trim().replace(/;\s*$/, '');
return `SELECT * FROM (\n${inner}\n) AS _pt WHERE patient_id = '${patientIdEsc}' AND brand = '${brandEsc}'`;
}
/// W4 末:重写 SQL,把 cursor 条件注入到原 SQL 的 cohort 子查询和外层 WHERE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment