Commit 6689b612 by luoqi

feat(persona): 权益身份升级为 B.1.3 五类多标签(卡券关键词)

- 摄入 fact_settlement_mode_out.card_type_name/card_name → payment_record.content
  (payment.parser + schema + payment.yaml;String() 强转防数字型卡券值炸 .trim)。
- entitlement_status 重写:遍历结算卡券/保司/channel 关键词多标签(允许并列):
  高端保险直付/银行私行权益/储值会员/儿牙会员/医保客户;未命中→不打标签。
- 注册表 spec(v2)。本地 928:医保225/银行私行175/儿牙35/储值27/高端保险17(274 患者有权益)。
- 无 schema migration(卡券名落 content JSONB);需重摄 payment facts。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 37527216
......@@ -30,6 +30,9 @@ field_mapping:
# - 跟 design.md 患者价值定义对齐:LTV = 患者带来的业务量,不是现金流
amount: receivable_this # FieldMapper.normalize 按 amount_unit=yuan 转 cents
method: payment_channel # transforms.pick_first_nonzero 推断的主导支付通道
# 卡券名称(B.1.3 权益身份关键词匹配源:储值/儿牙/银行私行/商保/医保)
cardTypeName: card_type_name
cardName: card_name
# 商业保险公司名(settlement_mode_out.insurance_name)— 喂 persona「权益身份」特征。
# 保司名脏(57 个别名),归一在 entitlement-status.feature 的 canonicalInsurer 里做;
# 此处原样带入 fact.content.insurance_name(单一收口),非空即"商保结算"强信号。
......
......@@ -8,128 +8,81 @@ import type {
} from './feature.interface';
/**
* entitlement_status 权益身份(商保 / 医保 …)
* entitlement_status 权益身份(B.1.3)— 规则层,snapshot · 多标签(允许并列)
*
* ⭐ 本特征是「事实投影型」,跟 value / recall_risk(计算分档型)性质不同:
* - 不引入业务阈值判断,只把"用过哪类保险结算"的事实 rollup 出来
* - 商业保险**强时效**(雇主团险换工作即失效;实测 61% 患者最近一次商保 >2 年前)
* 且 DW 无保单有效期字段 → **不断言"当前在保"**,只产「史 + 最近日期」,
* 时效判断留给读取方(UI 按 lastInsuranceAt 变措辞,scorer 按 monthsSinceLast 套窗口)
*
* 数据来源(payment_record fact,单一收口 fact.content):
* - 商保:channel='insurance' 或 content.insurance_name 非空(阶段2 re-ingest 后才有保司名)
* - 医保:channel='medical_insurance'(社保普惠,**不当 VIP**,仅记录)
*
* 落库(零迁移):description 人读串 / score=monthsSinceLast 给排序 /
* evidence.data 放结构化明细(insurers / lastInsuranceAt …)给 scorer 与 UI 结构化消费。
*
* 未命中(既无商保也无医保结算)→ 返回 null,不打标签。
* 口径(图):遍历历史结算记录,命中关键词即打标(可多类并列);未命中→不打标签。
* 数据源:payment_record.content 的 card_type_name / card_name / insurance_name / channel
* (摄入自 fact_settlement_mode_out 卡券字段)+ recharge_record(储值)。
* 5 类:
* 高端保险直付:Bupa / 中间带 / 保险直付 / 商保(或 channel=insurance / 有保司名)
* 银行私行权益:招行私行 / 私人银行 / 白金卡 / 贵宾卡 / 私行
* 储值会员: 储值 / 预存(或有 recharge_record 充值)
* 儿牙会员: 儿牙会员 / 乐牙卡 / 涂氟年卡 / 儿牙 / 乐牙
* 医保客户: 医保(或 channel=medical_insurance)
*/
@Injectable()
export class EntitlementStatusFeatureExtractor implements FeatureExtractor {
readonly key = PersonaFeatureKey.ENTITLEMENT_STATUS;
/** 保司名归一(57 个脏名 → canonical;别名折叠 + 排除测试数据)。
* 阶段1(未 re-ingest)content.insurance_name 为空 → 列表自然为空,不影响"是否商保"判定。 */
private static canonicalInsurer(raw: unknown): string | null {
const s = String(raw ?? '').trim();
if (!s) return null;
if (/测试|test|xxx|宣南书馆|乐雅健康科技|乐牙/i.test(s)) return null; // 测试/非保险脏数据
const ALIAS: [RegExp, string][] = [
[/万欣和|MSH/i, '万欣和'],
[/招商信诺|信诺|CIGNA/i, '招商信诺'],
[/平安/i, '平安健康险'],
[/中意/i, '中意人寿'],
[/太保安联|太平洋|CPIC/i, '太保安联'],
[/安态|AETNA/i, '安态'],
[/柏盛/i, '柏盛健康'],
[/保柏|BUPA/i, '保柏'],
[/友邦|AIA/i, '友邦'],
[/中间带|Medilink/i, '中间带'],
[/安联|ALLIANZ/i, '安联'],
[/吉倍吉|GBG/i, '吉倍吉'],
[/工银安盛/i, '工银安盛'],
[/复星|FOSUN/i, '复星联合'],
[/方胜|FESCO/i, '方胜'],
[/江泰/i, '江泰救援'],
[/休荪|HSC/i, '休荪'],
[/安顾|ERV/i, '安顾援助'],
[/泰康/i, '泰康养老'],
];
for (const [re, name] of ALIAS) if (re.test(s)) return name;
const zh = s.split('/')[0]!.trim();
return zh || null;
}
private monthsSince(from: Date, now: Date): number {
return Math.max(0, Math.floor((now.getTime() - from.getTime()) / (1000 * 60 * 60 * 24 * 30.4375)));
}
private isCommercial(c: Record<string, unknown>): boolean {
return c.channel === 'insurance' || !!String(c.insurance_name ?? '').trim();
}
// 关键词集(命中卡券/保司文本即打标)
private static readonly RULES: Array<{ code: string; zh: string; kw: string[] }> = [
{ code: 'high_insurance', zh: '高端保险直付', kw: ['bupa', '中间带', '保险直付', '商保', '万欣和', '招商信诺', 'msh'] },
{ code: 'bank_vip', zh: '银行私行权益', kw: ['招行私行', '私人银行', '白金卡', '贵宾卡', '私行'] },
{ code: 'stored_value', zh: '储值会员', kw: ['储值', '预存'] },
{ code: 'pedo_member', zh: '儿牙会员', kw: ['儿牙会员', '乐牙卡', '涂氟年卡', '儿牙', '乐牙'] },
{ code: 'medical', zh: '医保客户', kw: ['医保'] },
];
extract(ctx: FeatureExtractorContext): PersonaFeatureDraft | null {
const payments = ctx.factsByType.get('payment_record') ?? [];
const payments = (ctx.factsByType.get('payment_record') ?? []) as ActiveFact[];
const hasRecharge = (ctx.factsByType.get('recharge_record') ?? []).length > 0;
const commercialFactIds: string[] = [];
const insurerSet = new Set<string>();
let lastCommercialAt: Date | null = null;
let lastMedicalAt: Date | null = null;
const medicalFactIds: string[] = [];
const matched = new Set<string>();
const evidenceIds = new Set<string>();
for (const f of payments as ActiveFact[]) {
for (const f of payments) {
const c = (f.content ?? {}) as Record<string, unknown>;
const at = f.occurredAt ?? null;
if (this.isCommercial(c)) {
commercialFactIds.push(f.id);
const ins = EntitlementStatusFeatureExtractor.canonicalInsurer(c.insurance_name);
if (ins) insurerSet.add(ins);
if (at && (!lastCommercialAt || at > lastCommercialAt)) lastCommercialAt = at;
} else if (c.channel === 'medical_insurance') {
medicalFactIds.push(f.id);
if (at && (!lastMedicalAt || at > lastMedicalAt)) lastMedicalAt = at;
const channel = String(c.channel ?? '');
const text = [c.card_type_name, c.card_name, c.insurance_name]
.map((x) => String(x ?? ''))
.join(' ')
.toLowerCase();
let hit = false;
// channel 强信号
if (channel === 'insurance' || String(c.insurance_name ?? '').trim()) {
matched.add('high_insurance');
hit = true;
}
if (channel === 'medical_insurance') {
matched.add('medical');
hit = true;
}
// 卡券/保司关键词
for (const r of EntitlementStatusFeatureExtractor.RULES) {
if (r.kw.some((k) => text.includes(k.toLowerCase()))) {
matched.add(r.code);
hit = true;
}
}
if (hit) evidenceIds.add(f.id);
}
const hasCommercial = commercialFactIds.length > 0;
const hasMedical = medicalFactIds.length > 0;
if (!hasCommercial && !hasMedical) return null; // 未命中,不打标签
// 储值:有充值记录也算
if (hasRecharge) matched.add('stored_value');
const insurers = [...insurerSet].sort();
const fmtYm = (d: Date | null) => (d ? `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}` : null);
const monthsCommercial = lastCommercialAt ? this.monthsSince(lastCommercialAt, ctx.now) : null;
const monthsMedical = lastMedicalAt ? this.monthsSince(lastMedicalAt, ctx.now) : null;
if (matched.size === 0) return null; // 未命中 → 不打标签
// description:人读自包含;商保优先,把"最近日期"显式写出来(时效判断留给人/scorer)
const parts: string[] = [];
if (hasCommercial) {
const who = insurers.length ? ` · ${insurers.join('、')}` : '';
const when = fmtYm(lastCommercialAt);
parts.push(`商保客户${who}${when ? ` · 最近 ${when}${monthsCommercial}个月前)` : ''}`);
}
if (hasMedical) {
const when = fmtYm(lastMedicalAt);
parts.push(`医保结算${when ? ` · 最近 ${when}` : ''}`);
}
// 按 RULES 顺序输出(稳定)
const ordered = EntitlementStatusFeatureExtractor.RULES.filter((r) => matched.has(r.code));
const codes = ordered.map((r) => r.code);
const labels = ordered.map((r) => r.zh);
return {
key: this.key,
description: parts.join(';'),
// score = 商保最近月数(越小越新);仅医保时用医保月数兜底。旗标型,非梯度,scorer 读 data 更精确。
score: monthsCommercial ?? monthsMedical ?? null,
// 结构化特征值 → persona_features.data 列(给 scorer/UI 结构化消费)
data: {
commercialInsured: hasCommercial,
commercialInsurers: insurers, // 阶段2 re-ingest 后才有保司名
lastCommercialInsuranceAt: lastCommercialAt ? lastCommercialAt.toISOString() : null,
monthsSinceLastCommercial: monthsCommercial,
medicalInsured: hasMedical,
lastMedicalInsuranceAt: lastMedicalAt ? lastMedicalAt.toISOString() : null,
},
evidence: {
factIds: [...commercialFactIds, ...medicalFactIds],
},
description: labels.join(' / '),
score: null,
data: { types: codes, labels },
evidence: { factIds: [...evidenceIds] },
};
}
}
......@@ -339,6 +339,9 @@ const PaymentRecordContent = z
channel: nullableString(),
/// 商业保险公司名(可空)— 非空 = 商保结算;喂 persona「权益身份」。保司名脏,归一在 feature 层做。
insurance_name: nullableString(),
/// 卡券类型名 / 卡券名(可空)— B.1.3 权益身份关键词匹配源(储值/儿牙/银行私行等)
card_type_name: nullableString(),
card_name: nullableString(),
/// 收费医生 id(host 侧)— 医患关系信号(患者主要花钱给哪个医生)
doctor_id: nullableString(),
/// 关联接诊 id(反查"这次收款关联哪次接诊")
......
......@@ -47,6 +47,10 @@ export class PaymentParser implements Parser {
// 商保公司名(可空):非空 = 商保结算;喂 persona「权益身份」。归一在 feature 层做。
const insRaw = (c.insuranceName as string | undefined) ?? null;
const insuranceName = insRaw && insRaw.trim() ? insRaw.trim() : null;
// 卡券名称(B.1.3 权益身份):card_type_name / card_name;关键词匹配在 feature 层做。
// String() 强转 — host 偶有数字型卡券值(.trim 直接调会炸)。
const cardTypeName = String(c.cardTypeName ?? '').trim() || null;
const cardName = String(c.cardName ?? '').trim() || null;
return [
{
......@@ -63,6 +67,8 @@ export class PaymentParser implements Parser {
amount_cents: amount,
channel: method,
insurance_name: insuranceName,
card_type_name: cardTypeName,
card_name: cardName,
doctor_id: doctorId,
encounter_external_id: encounterExternalId,
related_order_external_id: orderExternalId,
......
......@@ -156,4 +156,23 @@ export const PERSONA_FEATURE_SPECS: Record<string, PersonaFeatureSpec> = {
owner: 'pac-algo',
version: 1,
},
// ── B.1.3 权益身份(业务 CDP 口径;多标签)──
entitlement_status: {
key: 'entitlement_status',
nameZh: '权益身份',
tier: 'rule',
timeSemantics: 'lifetime', // 遍历历史全部结算记录
labelValues: ['高端保险直付', '银行私行权益', '储值会员', '儿牙会员', '医保客户'],
dataSource: 'DW fact_settlement_mode_out 卡券名(card_type_name/card_name)+ insurance_name/channel → payment_record.content;recharge_record(储值)',
dataFields: ['settlement_records(卡券名称、权益类型)'],
meaning: '通过历史结算卡券反推权益身份,识别高净值客户、预付费会员及医保覆盖客户',
algorithm: [
'遍历历史结算,命中关键词即打标(允许并列):',
'高端保险直付 Bupa/中间带/保险直付/商保;银行私行 招行私行/私人银行/白金卡/贵宾卡;',
'储值会员 储值/预存(或有充值);儿牙会员 儿牙会员/乐牙卡/涂氟年卡;医保 医保。未命中→不打标签。',
].join('\n'),
owner: 'pac-algo',
version: 2,
},
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment