Commit 6fafb50e by luoqi

feat(persona): RFM 八象限特征(统计层,additive)+ recompute --force

- 新增 rfm feature(融合 R最近/F频次/M金额三时间语义):data 带 segment 八象限 +
  lifecycle 生命周期 + valueTier(0-4)/riskScore(0-3)。统一旧 value+recall_risk。
- additive 接入(暂不动 scorer/旧特征);本地 928 验证:分布临床合理,且
  valueTier/riskScore 100% 复现旧 value.score/recall_risk.score → 后续翻转零风险。
- recompute-persona 加 --force:算法/特征变更后(数据没变)跳过水位幂等闸强制重算
  (部署到服务器也需要,否则全 noop)。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 0edbe90d
......@@ -18,6 +18,7 @@ interface Args {
host: string;
pid?: string;
pids?: string[]; // 多个 externalId(逗号分隔),定向重算受影响子集用
force?: boolean; // 跳过水位幂等闸:算法/特征变更后(数据没变)也强制重算
}
function parseArgs(argv: string[]): Args {
......@@ -25,6 +26,7 @@ function parseArgs(argv: string[]): Args {
for (const a of argv) {
if (a.startsWith('--host=')) args.host = a.slice('--host='.length);
else if (a.startsWith('--pid=')) args.pid = a.slice('--pid='.length);
else if (a === '--force') args.force = true;
else if (a.startsWith('--pids=')) {
args.pids = a.slice('--pids='.length).split(',').map((s) => s.trim()).filter(Boolean);
}
......@@ -65,6 +67,7 @@ async function bootstrap() {
const r = await svc.recompute({
patientId: p.id,
source: `manual:cli`,
force: args.force,
});
logger.log(
` ${p.externalId.padEnd(6)} ${(p.name ?? '').padEnd(12)} v${r.version} features=${r.featureCount} status=${r.status} (${r.durationMs}ms)`,
......
......@@ -5,6 +5,7 @@ import { TreatmentChainStatusFeatureExtractor } from './treatment-chain-status.f
import { RecallRiskFeatureExtractor } from './recall-risk.feature';
import { DoNotContactStatusFeatureExtractor } from './do-not-contact-status.feature';
import { EntitlementStatusFeatureExtractor } from './entitlement-status.feature';
import { RfmFeatureExtractor } from './rfm.feature';
/**
* FeatureRegistry — 收集所有规则路径的 PersonaFeature 提取器。
......@@ -23,7 +24,9 @@ export class FeatureRegistry {
risk: RecallRiskFeatureExtractor,
dnc: DoNotContactStatusFeatureExtractor,
entitlement: EntitlementStatusFeatureExtractor,
rfm: RfmFeatureExtractor,
) {
this.extractors = [value, chain, risk, dnc, entitlement];
// rfm(统计层)additive 加入;下一步翻转 scorer 读 rfm.data 后,推翻 value/chain/risk
this.extractors = [value, chain, risk, dnc, entitlement, rfm];
}
}
import { Injectable } from '@nestjs/common';
import {
PersonaFeatureKey,
FactType,
FactKind,
lookupDxTreatment,
} from '@pac/types';
import type {
ActiveFact,
FeatureExtractor,
FeatureExtractorContext,
PersonaFeatureDraft,
} from './feature.interface';
/**
* rfm 价值分群(RFM 八象限 + 生命周期)— 统计层第一个特征
*
* ⭐ 设计依据 docs/algorithm/persona-design-v2.md:
* - RFM 天然融合三种时间语义:R 最近(snapshot)/ F 频次(window-24m)/ M 金额(lifetime)。
* - 统一旧 value(=M)+ recall_risk(=R+gap):一个特征产出 valueTier + riskScore 给打分,
* 再叠加 segment(八象限)+ lifecycle(生命周期)供圈人群/话术。data 带 segment → 天然 campaign-ready。
* - 特征只产「事实」(data),分值映射是各消费场景的事(scorer 读 data 自己算 bonus)。
*
* 口径(默认值,上线后用扫描器看真实分布校准,集中在此):
* R 最近 = 距最后一次临床/到诊事件天数(encounter/treatment(actual)/diagnosis/recommendation/visit_registration)
* F 频次 = 近 24 月去重到诊「天数」(encounter/treatment(actual)/visit_registration)
* M 金额 = 累计净消费 = Σpayment + Σrecharge − Σrefund(cents,全历史)
* 高/低阈 = R 高 ≤270d(还在召回节律内)· F 高 ≥4 次/2年 · M 高 ≥¥3,000(银卡+)
*/
@Injectable()
export class RfmFeatureExtractor implements FeatureExtractor {
readonly key = PersonaFeatureKey.RFM;
// M 金额分档(与旧 value 一致,保证 valueBonus 行为不变)
private static readonly M_TIERS = [
{ cents: 3000_000, tier: 4, mScore: 5 }, // ¥30k 钻
{ cents: 1000_000, tier: 3, mScore: 5 }, // ¥10k 金
{ cents: 300_000, tier: 2, mScore: 4 }, // ¥3k 银(M 高分界)
{ cents: 50_000, tier: 1, mScore: 2 }, // ¥500 普通
];
// 八象限:(R 高, F 高, M 高) → segment key + 中文
private static segmentOf(rHi: boolean, fHi: boolean, mHi: boolean) {
const m = mHi ? 'important' : 'general';
const v = m === 'important' ? '重要' : '一般';
if (rHi && fHi) return { key: `${m}_value`, zh: `${v}价值` };
if (!rHi && fHi) return { key: `${m}_retain`, zh: `${v}保持` };
if (rHi && !fHi) return { key: `${m}_develop`, zh: `${v}发展` };
return { key: `${m}_winback`, zh: `${v}挽留` };
}
private static dayBucket(days: number, edges: number[]): number {
// edges 升序;返回 5..1(越靠前越"好"/越近)
for (let i = 0; i < edges.length; i++) if (days <= edges[i]!) return 5 - i;
return 5 - edges.length;
}
extract(ctx: FeatureExtractorContext): PersonaFeatureDraft {
const get = (t: string) => ctx.factsByType.get(t) ?? [];
const now = ctx.now;
const DAY = 86400_000;
// ── M:累计净消费 ──
const moneyFactIds: string[] = [];
let monetaryCents = 0;
for (const f of get('payment_record')) {
monetaryCents += Number((f.content as Record<string, unknown>).amount_cents ?? 0);
moneyFactIds.push(f.id);
}
for (const f of get('recharge_record')) {
monetaryCents += Number((f.content as Record<string, unknown>).amount_cents ?? 0);
moneyFactIds.push(f.id);
}
for (const f of get('refund_record')) {
monetaryCents -= Number((f.content as Record<string, unknown>).amount_cents ?? 0);
moneyFactIds.push(f.id);
}
const mTier = RfmFeatureExtractor.M_TIERS.find((t) => monetaryCents >= t.cents);
const valueTier = mTier?.tier ?? 0; // 0-4,喂 valueBonus
const mScore = mTier?.mScore ?? 1; // 1-5
const mHigh = monetaryCents >= 300_000; // ¥3k 银卡+
// ── R / F:到诊事件 ──
const visitFacts: ActiveFact[] = [
...get(FactType.ENCOUNTER_RECORD),
...get(FactType.TREATMENT_RECORD).filter((f) => f.kind === FactKind.ACTUAL),
...get(FactType.VISIT_REGISTRATION_RECORD),
];
// R 最近还纳入诊断/建议(任意临床触点),F 只算物理到诊
const recencyFacts: ActiveFact[] = [
...visitFacts,
...get(FactType.DIAGNOSIS_RECORD),
...get(FactType.RECOMMENDATION_RECORD),
];
const clinicalFactIds = recencyFacts.map((f) => f.id);
let latest: Date | null = null;
let earliest: Date | null = null;
for (const f of recencyFacts) {
if (!f.occurredAt) continue;
if (!latest || f.occurredAt > latest) latest = f.occurredAt;
if (!earliest || f.occurredAt < earliest) earliest = f.occurredAt;
}
const recencyDays = latest ? Math.floor((now.getTime() - latest.getTime()) / DAY) : 9999;
const firstDays = earliest ? Math.floor((now.getTime() - earliest.getTime()) / DAY) : 9999;
// F:近 24 月去重到诊天数
const win = now.getTime() - 730 * DAY;
const visitDays = new Set<string>();
for (const f of visitFacts) {
if (!f.occurredAt || f.occurredAt.getTime() < win) continue;
visitDays.add(f.occurredAt.toISOString().slice(0, 10));
}
const freqCount24m = visitDays.size;
const rHigh = recencyDays <= 270;
const fHigh = freqCount24m >= 4;
const rScore = RfmFeatureExtractor.dayBucket(recencyDays, [90, 270, 540, 900]); // 5..1
const fScore = freqCount24m >= 8 ? 5 : freqCount24m >= 4 ? 4 : freqCount24m >= 2 ? 3 : freqCount24m >= 1 ? 2 : 1;
const seg = RfmFeatureExtractor.segmentOf(rHigh, fHigh, mHigh);
// ── lifecycle 生命周期(R 派生;reactivated 回流暂缓,后续加)──
let lifecycle: string;
if (recencyDays > 540) lifecycle = 'churned';
else if (recencyDays > 270) lifecycle = 'silent';
else if (firstDays <= 180) lifecycle = 'new';
else lifecycle = 'active';
const lifecycleZh = { churned: '流失', silent: '沉默', new: '新客', active: '活跃' }[lifecycle]!;
// ── riskScore 0-3(=旧 recall_risk:R + 治疗缺口),喂 likelihoodBonus ──
const actualCats = new Set<string>();
for (const tx of get(FactType.TREATMENT_RECORD)) {
if (tx.kind !== FactKind.ACTUAL) continue;
const cat = String((tx.content as Record<string, unknown>).category ?? '');
if (cat) actualCats.add(cat);
}
let hasGap = false;
for (const sig of [...get(FactType.DIAGNOSIS_RECORD), ...get(FactType.RECOMMENDATION_RECORD)]) {
if (!sig.occurredAt) continue;
const rule = lookupDxTreatment(String((sig.content as Record<string, unknown>).code ?? ''));
if (!rule || rule.categories.some((c) => actualCats.has(c))) continue;
if (Math.floor((now.getTime() - sig.occurredAt.getTime()) / DAY) > rule.windowDays) {
hasGap = true;
break;
}
}
let riskScore: number;
if (recencyDays >= 540 && hasGap) riskScore = 3;
else if (recencyDays >= 360 || hasGap) riskScore = 2;
else if (recencyDays >= 180) riskScore = 1;
else riskScore = 0;
const yuan = (monetaryCents / 100).toLocaleString('zh-CN', { maximumFractionDigits: 0 });
const recencyStr = latest ? `距上次 ${recencyDays} 天` : '无到诊记录';
return {
key: this.key,
// 人读自包含:分群 · 生命周期 · R/F/M 概要
description: `${seg.zh} · ${lifecycleZh} · ${recencyStr} · 近2年${freqCount24m}次 · 累计¥${yuan}`,
// score = 粗排提示(R+F+M 之和,1-5 各维),非语义载体;场景从 data 自算分
score: rScore + fScore + mScore,
data: {
segment: seg.key, // 八象限 key → 圈人群
lifecycle, // 生命周期阶段
rScore,
fScore,
mScore,
rHigh,
fHigh,
mHigh,
recencyDays,
firstSeenDays: firstDays,
freqCount24m,
monetaryCents,
valueTier, // 0-4 → 兼容 valueBonus
riskScore, // 0-3 → 兼容 likelihoodBonus
hasTreatmentGap: hasGap,
},
evidence: { factIds: [...new Set([...moneyFactIds, ...clinicalFactIds])] },
};
}
}
......@@ -8,6 +8,7 @@ import { TreatmentChainStatusFeatureExtractor } from './features/treatment-chain
import { RecallRiskFeatureExtractor } from './features/recall-risk.feature';
import { DoNotContactStatusFeatureExtractor } from './features/do-not-contact-status.feature';
import { EntitlementStatusFeatureExtractor } from './features/entitlement-status.feature';
import { RfmFeatureExtractor } from './features/rfm.feature';
@Module({
controllers: [PersonaController],
......@@ -21,6 +22,7 @@ import { EntitlementStatusFeatureExtractor } from './features/entitlement-status
RecallRiskFeatureExtractor,
DoNotContactStatusFeatureExtractor,
EntitlementStatusFeatureExtractor,
RfmFeatureExtractor,
],
exports: [PersonaService],
})
......
......@@ -42,6 +42,7 @@ export class PersonaService {
patientId: string;
source: string; // 'txn:<id>' / 'manual:<userId>' / 'scheduled:<cronId>'
now?: Date;
force?: boolean; // 跳过水位幂等闸:算法/特征变更后重算(数据没变也要重跑)
}): Promise<RecomputeResult> {
const startedAt = new Date();
const now = input.now ?? new Date();
......@@ -71,6 +72,7 @@ export class PersonaService {
});
if (
!input.force &&
currentPersona &&
latestTxnForGate &&
currentPersona.eventWatermark !== null &&
......
......@@ -379,6 +379,9 @@ export const PersonaFeatureKey = {
RECALL_RISK: 'recall_risk', // 流失/复发风险
DO_NOT_CONTACT_STATUS: 'do_not_contact_status', // 不打扰状态(合规硬约束)
// 统计层(RFM 八象限 — 融合 R 最近/F 频次/M 金额三种时间语义;统一旧 value+recall_risk)
RFM: 'rfm', // 价值分群(RFM 八象限 + 生命周期;data 带 segment 供圈人群)
// v1 候选(规则路径,业务方反馈后逐步上)
ENTITLEMENT_STATUS: 'entitlement_status', // 权益身份(商保直付 / 医保 / 储值 / 私行;事实投影型,史+最近日期)
INCOMPLETE_TREATMENT: 'incomplete_treatment', // 未完成治疗(治疗链有缺口)
......
......@@ -38,6 +38,7 @@ export const planScenarioLabel = (key: string): string =>
// ─────────────────────────────────────────────────────────
export const PERSONA_FEATURE_META: Record<string, { label: string; tone: Tone }> = {
[PersonaFeatureKey.RFM]: { label: '价值分群', tone: 'indigo' },
[PersonaFeatureKey.VALUE]: { label: '患者价值', tone: 'indigo' },
[PersonaFeatureKey.TREATMENT_CHAIN_STATUS]: { label: '治疗链状态', tone: 'amber' },
[PersonaFeatureKey.RECALL_RISK]: { label: '流失风险', tone: 'emerald' },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment