Commit 3a3abed7 by luoqi

feat(persona): RFM 采用业务 CDP 口径(图 B.1.1)+ M 分位 + 注册表 spec

- rfm.feature 改为业务整理好的 RFM 定义:R/F 分段照图、M 按租户分位(p20/40/60/80)、
  8 段决策树(重要价值..低活跃)。R/F/M = last_visit_time/visit_times/net_receipts_total(lifetime)。
- M 分位需群体计算:PersonaService 算+缓存租户分位阈值(30min TTL),注入 ctx.populationStats;
  缺失降级绝对¥档。valueTier(绝对¥)/riskScore 保留 → 仍 100% 兼容旧 value/recall_risk。
- 新增 persona-feature-specs.ts:标签注册表(标签值/数据来源/数据字段/释义/算法/时间语义),
  代码存、来源可切(现 PAC 自算,宿主 CDP 报表给出后切宿主值)。score 列弃用语义。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 6fafb50e
......@@ -34,6 +34,12 @@ export interface FeatureExtractorContext {
factsByType: Map<string, ActiveFact[]>;
/// today 锚点(测试时可注入固定时间)
now: Date;
/// 群体统计(租户级,统计层特征用 — 如 RFM 的 M 分位阈值 [p20,p40,p60,p80])。
/// 单一真理源由 PersonaService 计算 + 缓存后注入;缺失时统计层特征降级到绝对阈值。
populationStats?: {
/// 累计净消费(cents)分位阈值 [p20,p40,p60,p80](租户内,M 打分用)
monetaryQuantiles: number[];
} | null;
}
export interface ActiveFact {
......
......@@ -13,46 +13,81 @@ import type {
} from './feature.interface';
/**
* rfm 价值分群(RFM 八象限 + 生命周期)— 统计层第一个特征
* rfm 价值分群(RFM 八象限 + 生命周期)— 统计层
*
* ⭐ 设计依据 docs/algorithm/persona-design-v2.md:
* - RFM 天然融合三种时间语义:R 最近(snapshot)/ F 频次(window-24m)/ M 金额(lifetime)。
* - 统一旧 value(=M)+ recall_risk(=R+gap):一个特征产出 valueTier + riskScore 给打分,
* 再叠加 segment(八象限)+ lifecycle(生命周期)供圈人群/话术。data 带 segment → 天然 campaign-ready。
* - 特征只产「事实」(data),分值映射是各消费场景的事(scorer 读 data 自己算 bonus)。
* ⭐ 口径直接采用业务整理好的 CDP RFM 标签定义(见 persona-design-v2.md §四 / 注册表 spec):
* 数据字段:R=last_visit_time · F=visit_times · M=net_receipts_total(均 lifetime)
* 数据来源:现从 PAC 事实层自算(宿主 CDP 报表给出后切宿主值,口径不变)
*
* 口径(默认值,上线后用扫描器看真实分布校准,集中在此):
* R 最近 = 距最后一次临床/到诊事件天数(encounter/treatment(actual)/diagnosis/recommendation/visit_registration)
* F 频次 = 近 24 月去重到诊「天数」(encounter/treatment(actual)/visit_registration)
* M 金额 = 累计净消费 = Σpayment + Σrecharge − Σrefund(cents,全历史)
* 高/低阈 = R 高 ≤270d(还在召回节律内)· F 高 ≥4 次/2年 · M 高 ≥¥3,000(银卡+)
* Step1 R/F/M 各 1-5 分
* R 距上次就诊:≤540=5 / 541-730=4 / 731-1095=3 / 1096-1460=2 / >1460=1
* F 累计就诊次:≥5=5 / 3-4=4 / 2=2 / 1=1(0 视为 1)
* M 累计净消费分位:TOP20%=5 / 20-40%=4 / 40-60%=3 / 60-80%=2 / BOTTOM20%=1
* (分位阈值由 PersonaService 群体计算注入 ctx.populationStats;缺失→绝对¥档兜底)
* Step2 八象限(决策树):
* R≥4 F≥3 M≥4 重要价值 / R=3 F≥3 M≥4 重要保持 / R≥4 F=2 M≥4 重要发展 / R≤2 F≥3 M≥4 重要挽留
* R≥4 F≥3 M<4 一般价值 / R=3 F≥3 M<4 一般保持 / R≥4 F=2 M<4 一般发展 / 其余(含 R≤2 任意)低活跃
*
* 另产(非图,PAC 消费用):valueTier 0-4(绝对¥档,喂 valueBonus + VIP 展示)、
* riskScore 0-3(=旧 recall_risk,喂 likelihoodBonus)、lifecycle 生命周期。
* 特征只产事实(data),分值映射由各消费场景自管(score 列已弃用语义)。
*/
@Injectable()
export class RfmFeatureExtractor implements FeatureExtractor {
readonly key = PersonaFeatureKey.RFM;
// M 金额分档(与旧 value 一致,保证 valueBonus 行为不变)
private static readonly M_TIERS = [
{ cents: 3000_000, tier: 4, mScore: 5 }, // ¥30k 钻
{ cents: 1000_000, tier: 3, mScore: 5 }, // ¥10k 金
{ cents: 300_000, tier: 2, mScore: 4 }, // ¥3k 银(M 高分界)
{ cents: 50_000, tier: 1, mScore: 2 }, // ¥500 普通
// valueTier 绝对¥档(与旧 value 一致,保 valueBonus 行为不变)
private static readonly VALUE_TIERS = [
{ cents: 3000_000, tier: 4 },
{ cents: 1000_000, tier: 3 },
{ cents: 300_000, tier: 2 },
{ cents: 50_000, tier: 1 },
];
// M 无群体分位时的绝对¥兜底档(→1-5)
private static readonly M_ABS = [3000_000, 1000_000, 300_000, 50_000];
// 八象限:(R 高, F 高, M 高) → segment key + 中文
private static segmentOf(rHi: boolean, fHi: boolean, mHi: boolean) {
const m = mHi ? 'important' : 'general';
const v = m === 'important' ? '重要' : '一般';
if (rHi && fHi) return { key: `${m}_value`, zh: `${v}价值` };
if (!rHi && fHi) return { key: `${m}_retain`, zh: `${v}保持` };
if (rHi && !fHi) return { key: `${m}_develop`, zh: `${v}发展` };
return { key: `${m}_winback`, zh: `${v}挽留` };
private static rScore(d: number): number {
if (d <= 540) return 5;
if (d <= 730) return 4;
if (d <= 1095) return 3;
if (d <= 1460) return 2;
return 1;
}
private static fScore(n: number): number {
if (n >= 5) return 5;
if (n >= 3) return 4;
if (n === 2) return 2;
return 1; // 1 次或 0 次
}
private static mScore(cents: number, quantiles: number[]): number {
if (quantiles.length === 4) {
const [p20, p40, p60, p80] = quantiles;
if (cents >= p80!) return 5;
if (cents >= p60!) return 4;
if (cents >= p40!) return 3;
if (cents >= p20!) return 2;
return 1;
}
// 兜底:绝对¥档
const [a, b, c, d] = RfmFeatureExtractor.M_ABS;
if (cents >= a!) return 5;
if (cents >= b!) return 4;
if (cents >= c!) return 3;
if (cents >= d!) return 2;
return 1;
}
private static dayBucket(days: number, edges: number[]): number {
// edges 升序;返回 5..1(越靠前越"好"/越近)
for (let i = 0; i < edges.length; i++) if (days <= edges[i]!) return 5 - i;
return 5 - edges.length;
/** 八象限决策树(R/F/M 为 1-5 分),图 B.1.1 Step2 */
private static segmentOf(r: number, f: number, m: number): { key: string; zh: string } {
const mHi = m >= 4;
if (r >= 4 && f >= 3 && mHi) return { key: 'important_value', zh: '重要价值' };
if (r === 3 && f >= 3 && mHi) return { key: 'important_retain', zh: '重要保持' };
if (r >= 4 && f === 2 && mHi) return { key: 'important_develop', zh: '重要发展' };
if (r <= 2 && f >= 3 && mHi) return { key: 'important_winback', zh: '重要挽留' };
if (r >= 4 && f >= 3 && !mHi) return { key: 'general_value', zh: '一般价值' };
if (r === 3 && f >= 3 && !mHi) return { key: 'general_retain', zh: '一般保持' };
if (r >= 4 && f === 2 && !mHi) return { key: 'general_develop', zh: '一般发展' };
return { key: 'low_active', zh: '低活跃' }; // 含图 rule8(R≤2 任意)+ 未覆盖的稀疏组合
}
extract(ctx: FeatureExtractorContext): PersonaFeatureDraft {
......@@ -60,7 +95,7 @@ export class RfmFeatureExtractor implements FeatureExtractor {
const now = ctx.now;
const DAY = 86400_000;
// ── M:累计净消费 ──
// ── M:累计净消费(lifetime)──
const moneyFactIds: string[] = [];
let monetaryCents = 0;
for (const f of get('payment_record')) {
......@@ -75,60 +110,53 @@ export class RfmFeatureExtractor implements FeatureExtractor {
monetaryCents -= Number((f.content as Record<string, unknown>).amount_cents ?? 0);
moneyFactIds.push(f.id);
}
const mTier = RfmFeatureExtractor.M_TIERS.find((t) => monetaryCents >= t.cents);
const valueTier = mTier?.tier ?? 0; // 0-4,喂 valueBonus
const mScore = mTier?.mScore ?? 1; // 1-5
const mHigh = monetaryCents >= 300_000; // ¥3k 银卡+
const valueTier = RfmFeatureExtractor.VALUE_TIERS.find((t) => monetaryCents >= t.cents)?.tier ?? 0;
const mScore = RfmFeatureExtractor.mScore(monetaryCents, ctx.populationStats?.monetaryQuantiles ?? []);
// ── R / F:到诊事件 ──
// ── R / F:就诊(encounter / actual treatment / 挂号)──
const visitFacts: ActiveFact[] = [
...get(FactType.ENCOUNTER_RECORD),
...get(FactType.TREATMENT_RECORD).filter((f) => f.kind === FactKind.ACTUAL),
...get(FactType.VISIT_REGISTRATION_RECORD),
];
// R 最近还纳入诊断/建议(任意临床触点),F 只算物理到诊
const recencyFacts: ActiveFact[] = [
...visitFacts,
...get(FactType.DIAGNOSIS_RECORD),
...get(FactType.RECOMMENDATION_RECORD),
];
const clinicalFactIds = recencyFacts.map((f) => f.id);
let latest: Date | null = null;
let earliest: Date | null = null;
for (const f of recencyFacts) {
if (!f.occurredAt) continue;
if (!latest || f.occurredAt > latest) latest = f.occurredAt;
if (!earliest || f.occurredAt < earliest) earliest = f.occurredAt;
}
const recencyDays = latest ? Math.floor((now.getTime() - latest.getTime()) / DAY) : 9999;
const firstDays = earliest ? Math.floor((now.getTime() - earliest.getTime()) / DAY) : 9999;
// F:近 24 月去重到诊天数
const win = now.getTime() - 730 * DAY;
const visitDays = new Set<string>();
let lastVisit: Date | null = null;
let firstVisit: Date | null = null;
for (const f of visitFacts) {
if (!f.occurredAt || f.occurredAt.getTime() < win) continue;
if (!f.occurredAt) continue;
visitDays.add(f.occurredAt.toISOString().slice(0, 10));
if (!lastVisit || f.occurredAt > lastVisit) lastVisit = f.occurredAt;
if (!firstVisit || f.occurredAt < firstVisit) firstVisit = f.occurredAt;
}
const freqCount24m = visitDays.size;
const rHigh = recencyDays <= 270;
const fHigh = freqCount24m >= 4;
const rScore = RfmFeatureExtractor.dayBucket(recencyDays, [90, 270, 540, 900]); // 5..1
const fScore = freqCount24m >= 8 ? 5 : freqCount24m >= 4 ? 4 : freqCount24m >= 2 ? 3 : freqCount24m >= 1 ? 2 : 1;
const freqCount = visitDays.size; // lifetime 去重就诊天数 = visit_times
const recencyDays = lastVisit ? Math.floor((now.getTime() - lastVisit.getTime()) / DAY) : 9999;
const firstDays = firstVisit ? Math.floor((now.getTime() - firstVisit.getTime()) / DAY) : 9999;
const seg = RfmFeatureExtractor.segmentOf(rHigh, fHigh, mHigh);
const rScore = RfmFeatureExtractor.rScore(recencyDays);
const fScore = RfmFeatureExtractor.fScore(freqCount);
const seg = RfmFeatureExtractor.segmentOf(rScore, fScore, mScore);
// ── lifecycle 生命周期(R 派生;reactivated 回流暂缓,后续加)──
// ── lifecycle 生命周期(临床节律,R 派生;reactivated 回流后续加)──
let lifecycle: string;
if (recencyDays > 540) lifecycle = 'churned';
else if (recencyDays > 270) lifecycle = 'silent';
if (recencyDays > 730) lifecycle = 'churned';
else if (recencyDays > 365) lifecycle = 'silent';
else if (firstDays <= 180) lifecycle = 'new';
else lifecycle = 'active';
const lifecycleZh = { churned: '流失', silent: '沉默', new: '新客', active: '活跃' }[lifecycle]!;
// ── riskScore 0-3(=旧 recall_risk:R + 治疗缺口),喂 likelihoodBonus ──
// ── riskScore 0-3(=旧 recall_risk:临床触点 recency + 治疗缺口),喂 likelihoodBonus ──
const clinicalFactIds: string[] = [];
let lastClinical: Date | null = null;
for (const f of [
...get(FactType.DIAGNOSIS_RECORD),
...get(FactType.TREATMENT_RECORD),
...get(FactType.RECOMMENDATION_RECORD),
...get(FactType.ENCOUNTER_RECORD),
]) {
clinicalFactIds.push(f.id);
if (f.occurredAt && (!lastClinical || f.occurredAt > lastClinical)) lastClinical = f.occurredAt;
}
const clinicalDays = lastClinical ? Math.floor((now.getTime() - lastClinical.getTime()) / DAY) : 9999;
const actualCats = new Set<string>();
for (const tx of get(FactType.TREATMENT_RECORD)) {
if (tx.kind !== FactKind.ACTUAL) continue;
......@@ -146,38 +174,34 @@ export class RfmFeatureExtractor implements FeatureExtractor {
}
}
let riskScore: number;
if (recencyDays >= 540 && hasGap) riskScore = 3;
else if (recencyDays >= 360 || hasGap) riskScore = 2;
else if (recencyDays >= 180) riskScore = 1;
if (clinicalDays >= 540 && hasGap) riskScore = 3;
else if (clinicalDays >= 360 || hasGap) riskScore = 2;
else if (clinicalDays >= 180) riskScore = 1;
else riskScore = 0;
const yuan = (monetaryCents / 100).toLocaleString('zh-CN', { maximumFractionDigits: 0 });
const recencyStr = latest ? `距上次 ${recencyDays} 天` : '无到诊记录';
const recencyStr = lastVisit ? `距上次 ${recencyDays} 天` : '无到诊记录';
return {
key: this.key,
// 人读自包含:分群 · 生命周期 · R/F/M 概要
description: `${seg.zh} · ${lifecycleZh} · ${recencyStr} · 近2年${freqCount24m}次 · 累计¥${yuan}`,
// score = 粗排提示(R+F+M 之和,1-5 各维),非语义载体;场景从 data 自算分
score: rScore + fScore + mScore,
description: `${seg.zh} · ${lifecycleZh} · ${recencyStr} · 就诊${freqCount}次 · 累计¥${yuan} · R${rScore}F${fScore}M${mScore}`,
// score 列已弃用语义(场景从 data 自算分);此处留空
score: null,
data: {
segment: seg.key, // 八象限 key → 圈人群
lifecycle, // 生命周期阶段
segment: seg.key, // 八象限 → 圈人群
lifecycle,
rScore,
fScore,
mScore,
rHigh,
fHigh,
mHigh,
recencyDays,
firstSeenDays: firstDays,
freqCount24m,
monetaryCents,
valueTier, // 0-4 → 兼容 valueBonus
riskScore, // 0-3 → 兼容 likelihoodBonus
freqCount, // = visit_times(lifetime 去重就诊天数)
monetaryCents, // = net_receipts_total
valueTier, // 0-4 绝对¥档 → valueBonus / VIP 展示
riskScore, // 0-3 → likelihoodBonus
hasTreatmentGap: hasGap,
},
evidence: { factIds: [...new Set([...moneyFactIds, ...clinicalFactIds])] },
evidence: { factIds: [...new Set([...moneyFactIds, ...visitFacts.map((f) => f.id), ...clinicalFactIds])] },
};
}
}
......@@ -28,11 +28,44 @@ import type {
export class PersonaService {
private readonly logger = new Logger(PersonaService.name);
/// 租户级 M(累计净消费)分位阈值缓存:key=host:tenant → { at, q:[p20,p40,p60,p80] }。
/// RFM 的 M 打分要群体分位(图 B.1.1),但 PAC 按患者重算 → 分位阈值缓存复用(分布慢变);
/// 批量重算每轮首个患者算一次,后续命中缓存;TTL 兜底单刷场景。
private readonly mQuantileCache = new Map<string, { at: number; q: number[] }>();
private static readonly M_QUANTILE_TTL_MS = 30 * 60 * 1000;
constructor(
private readonly prisma: PrismaService,
private readonly registry: FeatureRegistry,
) {}
/** 租户内"累计净消费/患者"的 [p20,p40,p60,p80](cents)。缓存 30min;失败/无数据 → []。 */
private async getMonetaryQuantiles(hostId: string, tenantId: string, nowMs: number): Promise<number[]> {
const key = `${hostId}:${tenantId}`;
const hit = this.mQuantileCache.get(key);
if (hit && nowMs - hit.at < PersonaService.M_QUANTILE_TTL_MS) return hit.q;
try {
const rows = await this.prisma.$queryRaw<Array<{ q: number[] }>>`
WITH spend AS (
SELECT patient_id, SUM(
CASE type WHEN 'payment_record' THEN (content->>'amount_cents')::bigint
WHEN 'recharge_record' THEN (content->>'amount_cents')::bigint
WHEN 'refund_record' THEN -(content->>'amount_cents')::bigint END) AS net
FROM patient_facts
WHERE host_id = ${hostId}::uuid AND tenant_id = ${tenantId}
AND status IN ('active','fulfilled')
AND type IN ('payment_record','recharge_record','refund_record')
GROUP BY patient_id)
SELECT percentile_cont(ARRAY[0.2,0.4,0.6,0.8]) WITHIN GROUP (ORDER BY net)::float8[] AS q FROM spend`;
const q = (rows[0]?.q ?? []).map((x) => Number(x));
this.mQuantileCache.set(key, { at: nowMs, q });
return q;
} catch (err) {
this.logger.warn(`getMonetaryQuantiles failed (${key}): ${err instanceof Error ? err.message : err}`);
return [];
}
}
/**
* 重算单个 patient 的 Persona,产新版本并 supersede 旧 active。
*
......@@ -174,6 +207,13 @@ export class PersonaService {
: null,
factsByType,
now,
populationStats: {
monetaryQuantiles: await this.getMonetaryQuantiles(
patient.hostId,
patient.tenantId,
now.getTime(),
),
},
};
const drafts: PersonaFeatureDraft[] = [];
......
......@@ -3,3 +3,4 @@ export * from './schemas';
export * from './utils';
export * from './labels';
export * from './canonical-codes';
export * from './persona-feature-specs';
/**
* Persona Feature Registry(标签注册表)— 单一真理源(代码,git/PR review)
*
* 画像的核心是「标签化」。每个标签按业务整理好的「标签卡」形式定义(对齐 CDP 标签中台 / OneModel):
* 标签值 / 数据来源 / 数据字段 / 标签释义 / 计算算法 + 时间语义 / 层级 / owner / version。
*
* - 存储:**代码常量**(本文件),不落库;DB(persona_features)只存每患者每标签的实例值。
* - 时间语义:每个标签自己声明(snapshot 当前态 / window 窗口 / lifetime 全史 / trend 趋势 / mixed)。
* - 收口:producer 产出的 key 必须在此登记(CI 防漂移,见 persona-design-v2.md §二)。
* - 来源可切:同一标签现可从 PAC 事实层自算,宿主 CDP 报表给出后切宿主值,口径(算法)不变。
*/
export type FeatureTier = 'rule' | 'statistical' | 'model' | 'llm';
export type FeatureTimeSemantics = 'snapshot' | 'window' | 'lifetime' | 'trend' | 'mixed';
export interface PersonaFeatureSpec {
key: string; // PersonaFeatureKey
nameZh: string; // 标签名
tier: FeatureTier; // 层级:规则 / 统计 / 模型 / LLM
timeSemantics: FeatureTimeSemantics; // 时间语义
labelValues?: string[]; // 标签值(枚举型标签)
dataSource: string; // 数据来源(现状 + 未来切换)
dataFields: string[]; // 数据字段
meaning: string; // 标签释义
algorithm: string; // 计算算法(口径,人读;真理实现在 extractor)
owner: string;
version: number;
}
export const PERSONA_FEATURE_SPECS: Record<string, PersonaFeatureSpec> = {
// ── B.1.1 RFM 层级(业务整理好的 CDP 口径,直接采用)──
rfm: {
key: 'rfm',
nameZh: '价值分群(RFM)',
tier: 'statistical',
timeSemantics: 'mixed', // R=snapshot(最近) F/M=lifetime
labelValues: [
'重要价值客户',
'重要保持客户',
'重要发展客户',
'重要挽留客户',
'一般价值客户',
'一般保持客户',
'一般发展客户',
'低活跃客户',
],
dataSource:
'现:PAC 事实层自算(encounter/treatment(actual)/挂号 + payment/recharge/refund);未来:宿主 CDP「客户综合分析报表」直接给 R/F/M',
dataFields: ['last_visit_time', 'visit_times', 'net_receipts_total'],
meaning: '基于最近就诊时间(R)、就诊频次(F)、累计消费(M)三维度的客户价值分层',
algorithm: [
'Step1 R/F/M 得分(1-5):',
' R ≤540天=5 / 541-730=4 / 731-1095=3 / 1096-1460=2 / >1460=1',
' F ≥5次=5 / 3-4次=4 / 2次=2 / 1次=1',
' M 累计消费分位:TOP20%=5 / 20-40%=4 / 40-60%=3 / 60-80%=2 / BOTTOM20%=1(租户内分位)',
'Step2 分层:',
' R≥4 F≥3 M≥4→重要价值 / R=3 F≥3 M≥4→重要保持 / R≥4 F=2 M≥4→重要发展 / R≤2 F≥3 M≥4→重要挽留',
' R≥4 F≥3 M<4→一般价值 / R=3 F≥3 M<4→一般保持 / R≥4 F=2 M<4→一般发展 / 其余(含 R≤2 任意)→低活跃',
].join('\n'),
owner: 'pac-algo',
version: 1,
},
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment