下面给你一份可以直接落地的 llm_guard.py。 作用就是: 只能解释,不能裁决 越权就拦截 输出非法就降级到程序模板 方便接到你现有量化系统里 你把它放到项目里,然后让所有 OpenClaw 输出都必须经过这个模块。 from __future__ import annotations from dataclasses import dataclass, asdict from typing import Callable, Dict, List, Optional, Literal, Any import json import re # ========================= # 1) 类型定义 # ========================= Decision = Literal["BUY", "HOLD", "SELL", "WATCH", "PASS"] MarketState = Literal[ "TREND_BULL", "TREND_BULL_HIGHVOL", "RANGE", "RISK_MILD", "RISK_SEVERE", "WARMUP", ] @dataclass class StrategyFact: """ 程序事实层:唯一可信来源。 所有会影响交易的字段都必须来自这里,而不是 LLM。 """ code: str name: str date: str confidence: float threshold: float decision: Decision trend_score: float risk_score: float market_state: MarketState rank_in_group: Optional[int] = None group_name: Optional[str] = None reason_flags: Optional[List[str]] = None target_cap: Optional[float] = None def to_dict(self) -> Dict[str, Any]: return asdict(self) @dataclass class GuardConfig: """ 守卫配置: - strict_numbers: 是否严格禁止 LLM 编造数字 - strict_keywords: 是否检查关键词与事实冲突 - allow_rank_number: 是否允许 explanation 里出现 rank 整数 """ strict_numbers: bool = True strict_keywords: bool = True allow_rank_number: bool = True class LLMOutputError(Exception): pass # ========================= # 2) 允许 / 禁止字段 # ========================= ALLOWED_LLM_FIELDS = {"summary", "explanation", "risk_note"} FORBIDDEN_LLM_FIELDS = { "decision", "confidence", "threshold", "market_state", "trend_score", "risk_score", "target_cap", "buy_threshold", "sell_threshold", "rank_in_group", "group_name", "code", "name", "date", } # ========================= # 3) 程序规则:最终决策只能程序决定 # ========================= def decide_signal( confidence: float, threshold: float, market_state: str, ) -> str: """ 示例硬规则: 你可以替换成你自己的规则,但必须放在程序里,不许放在 LLM 里。 """ if market_state == "RISK_SEVERE": return "PASS" if confidence >= threshold: return "BUY" if confidence >= threshold - 0.03: return "WATCH" return "PASS" def audit_strategy_fact(fact: StrategyFact) -> None: """ 事实层内部自审。 程序自己的结果先保证自洽,再喂给 LLM。 """ if fact.confidence < 0 or fact.confidence > 1.5: raise ValueError(f"confidence 超出合理范围: {fact.confidence}") if fact.threshold < 0 or fact.threshold > 1.5: raise ValueError(f"threshold 超出合理范围: {fact.threshold}") if fact.target_cap is not None and not (0 <= fact.target_cap <= 1.0): raise ValueError(f"target_cap 超出合理范围: {fact.target_cap}") # 示例审计规则 if fact.decision == "BUY" and fact.confidence < fact.threshold: raise ValueError( f"规则冲突: BUY 但 confidence({fact.confidence}) < threshold({fact.threshold})" ) if fact.market_state == "RISK_SEVERE" and fact.decision == "BUY": raise ValueError("规则冲突: RISK_SEVERE 环境不应 BUY") # ========================= # 4) Prompt 构造:把 LLM 降级为解释器 # ========================= def build_llm_prompt(fact: StrategyFact) -> str: """ 极简、强约束 prompt。 注意: - 只让它解释 - 不让它新增任何影响决策的字段 - 只允许 JSON """ return f""" 你是交易报告解释器,不是交易决策器。 绝对规则: 1. 只能解释程序已经给出的结论 2. 不允许修改任何数值、状态、阈值、决策 3. 不允许补充不存在的数据 4. 不允许推测未来涨跌 5. 不允许新增未提供的指标 6. 只能输出 JSON object 7. JSON 里只能包含以下 3 个字段: - summary - explanation - risk_note 程序事实如下: {json.dumps(fact.to_dict(), ensure_ascii=False)} 输出格式示例: {{ "summary": "该股今日为 BUY。", "explanation": "confidence 高于 threshold,且市场状态允许买入。", "risk_note": "当前非严重风险环境,但仍需注意波动。" }} """.strip() def build_explanation_only_prompt(fact: StrategyFact) -> str: """ 更狠的模式: 只让 LLM 返回一小段解释文本,不返回 JSON。 如果你想最稳,建议改用这个模式。 """ return f""" 你只能解释,不能决策。 不要修改程序给定的任何字段,不要补充任何新指标,不要预测未来涨跌。 程序事实: decision={fact.decision} confidence={fact.confidence:.2f} threshold={fact.threshold:.2f} market_state={fact.market_state} trend_score={fact.trend_score:.2f} risk_score={fact.risk_score:.2f} 请只输出两句中文解释。 """.strip() # ========================= # 5) JSON 解析 # ========================= def parse_llm_json(raw_text: str) -> Dict[str, Any]: raw_text = raw_text.strip() try: data = json.loads(raw_text) except Exception as e: raise LLMOutputError(f"LLM 输出不是合法 JSON: {e}") if not isinstance(data, dict): raise LLMOutputError("LLM 输出必须是 JSON object") return data # ========================= # 6) 字段级校验 # ========================= def validate_llm_fields(data: Dict[str, Any]) -> None: keys = set(data.keys()) forbidden = keys.intersection(FORBIDDEN_LLM_FIELDS) if forbidden: raise LLMOutputError(f"LLM 越权输出了禁止字段: {sorted(forbidden)}") unknown = keys - ALLOWED_LLM_FIELDS if unknown: raise LLMOutputError(f"LLM 输出了未授权字段: {sorted(unknown)}") for field in keys: if not isinstance(data[field], str): raise LLMOutputError(f"字段 {field} 必须是字符串") # ========================= # 7) 防编造数字 # ========================= def _extract_numbers(text: str) -> List[str]: return re.findall(r"\d+\.\d+|\d+", text) def validate_no_fabricated_numbers( data: Dict[str, Any], fact: StrategyFact, config: GuardConfig, ) -> None: """ 严格模式下: explanation 里出现的数字,必须来自 fact 中已有数字。 """ if not config.strict_numbers: return allowed_numbers = { f"{fact.confidence:.2f}", f"{fact.threshold:.2f}", f"{fact.trend_score:.2f}", f"{fact.risk_score:.2f}", } if fact.target_cap is not None: allowed_numbers.add(f"{fact.target_cap:.2f}") if config.allow_rank_number and fact.rank_in_group is not None: allowed_numbers.add(str(fact.rank_in_group)) for field in ["summary", "explanation", "risk_note"]: text = data.get(field, "") nums = _extract_numbers(text) for n in nums: if n not in allowed_numbers: raise LLMOutputError( f"LLM 可能编造了数值 {n},字段: {field}" ) # ========================= # 8) 文本一致性校验 # ========================= def validate_text_consistency( data: Dict[str, Any], fact: StrategyFact, config: GuardConfig, ) -> None: if not config.strict_keywords: return full_text = " ".join(data.get(k, "") for k in ALLOWED_LLM_FIELDS) # 决策冲突 if fact.decision == "BUY" and any(word in full_text for word in ["卖出", "不买", "放弃", "清仓"]): raise LLMOutputError("LLM 文本与程序决策冲突:程序是 BUY,但文本出现明显相反表述。") if fact.decision in ["PASS", "HOLD", "WATCH"] and "强烈买入" in full_text: raise LLMOutputError("LLM 文本与程序决策冲突:程序不是 BUY,但文本出现强烈买入。") # 市场状态冲突 if fact.market_state == "RISK_SEVERE" and "低风险" in full_text: raise LLMOutputError("LLM 文本与市场状态冲突:程序是 RISK_SEVERE,但文本称低风险。") if fact.market_state == "TREND_BULL" and "严重风险" in full_text: raise LLMOutputError("LLM 文本与市场状态冲突:程序是 TREND_BULL,但文本称严重风险。") # ========================= # 9) 全量校验入口 # ========================= def validate_llm_output( raw_text: str, fact: StrategyFact, config: Optional[GuardConfig] = None, ) -> Dict[str, Any]: if config is None: config = GuardConfig() data = parse_llm_json(raw_text) validate_llm_fields(data) validate_no_fabricated_numbers(data, fact, config) validate_text_consistency(data, fact, config) return data # ========================= # 10) 失败时降级:不用 LLM,也能出报告 # ========================= def fallback_explanation(fact: StrategyFact) -> Dict[str, str]: return { "summary": f"{fact.code} 今日程序决策为 {fact.decision}。", "explanation": ( f"程序结果显示 confidence={fact.confidence:.2f}," f"threshold={fact.threshold:.2f}," f"market_state={fact.market_state}。" ), "risk_note": ( f"趋势分={fact.trend_score:.2f},风险分={fact.risk_score:.2f}。" ), } # ========================= # 11) 最终对外结果:关键字段全部来自程序 # ========================= def build_final_report(fact: StrategyFact, llm_data: Dict[str, str]) -> Dict[str, Any]: return { "code": fact.code, "name": fact.name, "date": fact.date, "decision": fact.decision, # 程序字段 "confidence": fact.confidence, # 程序字段 "threshold": fact.threshold, # 程序字段 "market_state": fact.market_state, # 程序字段 "trend_score": fact.trend_score, # 程序字段 "risk_score": fact.risk_score, # 程序字段 "rank_in_group": fact.rank_in_group, "group_name": fact.group_name, "reason_flags": fact.reason_flags, "target_cap": fact.target_cap, "summary": llm_data.get("summary", ""), "explanation": llm_data.get("explanation", ""), "risk_note": llm_data.get("risk_note", ""), } # ========================= # 12) 一体化安全调用 # ========================= def generate_safe_report( fact: StrategyFact, call_openclaw: Callable[[str], str], config: Optional[GuardConfig] = None, ) -> Dict[str, Any]: """ 这是你真正应该在项目里调用的入口。 """ if config is None: config = GuardConfig() # 先审计程序事实 audit_strategy_fact(fact) prompt = build_llm_prompt(fact) try: raw = call_openclaw(prompt) llm_data = validate_llm_output(raw, fact, config) except Exception as e: print(f"[WARN] LLM 输出无效,已降级为模板说明。原因: {e}") llm_data = fallback_explanation(fact) final_report = build_final_report(fact, llm_data) return final_report # ========================= # 13) 如果你要最稳:只让 LLM 生成 explanation 文本 # ========================= def validate_explanation_text(text: str, fact: StrategyFact) -> str: """ 极简文本模式的轻校验。 这里不让它决定任何逻辑,只把它当备注。 """ if not isinstance(text, str): raise LLMOutputError("Explanation 必须是字符串") if len(text.strip()) == 0: raise LLMOutputError("Explanation 为空") # 粗略拦截明显冲突表述 if fact.decision == "BUY" and "卖出" in text: raise LLMOutputError("Explanation 与程序 BUY 冲突") if fact.market_state == "RISK_SEVERE" and "低风险" in text: raise LLMOutputError("Explanation 与程序 RISK_SEVERE 冲突") return text.strip() def generate_safe_report_text_mode( fact: StrategyFact, call_openclaw: Callable[[str], str], ) -> Dict[str, Any]: """ 更稳的模式: LLM 只写 explanation 一段文字,连 JSON 都不让它碰。 """ audit_strategy_fact(fact) prompt = build_explanation_only_prompt(fact) try: explanation = call_openclaw(prompt) explanation = validate_explanation_text(explanation, fact) llm_data = { "summary": f"{fact.code} 今日程序决策为 {fact.decision}。", "explanation": explanation, "risk_note": f"市场状态={fact.market_state}。" } except Exception as e: print(f"[WARN] LLM 文本无效,已降级为模板说明。原因: {e}") llm_data = fallback_explanation(fact) return build_final_report(fact, llm_data) # ========================= # 14) 模拟 OpenClaw,方便本地测试 # ========================= def mock_openclaw_good(prompt: str) -> str: return json.dumps({ "summary": "该股今日为 BUY。", "explanation": "confidence 高于 threshold,且市场状态允许买入。", "risk_note": "当前并非严重风险环境,但需注意正常波动。" }, ensure_ascii=False) def mock_openclaw_bad_override(prompt: str) -> str: # 故意越权:偷偷改 decision return json.dumps({ "decision": "SELL", "summary": "这票应该卖出。", "explanation": "虽然程序显示买入,但我认为不对。", "risk_note": "风险很大。" }, ensure_ascii=False) def mock_openclaw_bad_fabricate_number(prompt: str) -> str: # 故意编数字 return json.dumps({ "summary": "该股今日为 BUY。", "explanation": "confidence 0.93 高于 threshold 0.70,所以可以买。", "risk_note": "当前风险分只有 12。" }, ensure_ascii=False) # ========================= # 15) 示例 # ========================= if __name__ == "__main__": fact = StrategyFact( code="600000", name="浦发银行", date="2026-02-28", confidence=0.74, threshold=0.70, decision="BUY", trend_score=68.25, risk_score=41.33, market_state="TREND_BULL", rank_in_group=2, group_name="HIGH", reason_flags=["trend_ok", "risk_ok", "rank_ok"], target_cap=0.80, ) print("\n=== 好输出 ===") report1 = generate_safe_report(fact, mock_openclaw_good) print(json.dumps(report1, ensure_ascii=False, indent=2)) print("\n=== 越权输出 ===") report2 = generate_safe_report(fact, mock_openclaw_bad_override) print(json.dumps(report2, ensure_ascii=False, indent=2)) print("\n=== 编造数字 ===") report3 = generate_safe_report(fact, mock_openclaw_bad_fabricate_number) print(json.dumps(report3, ensure_ascii=False, indent=2)) 你怎么接到现有系统里 你现在的主流程最好改成这样: 1. 程序先算出事实 fact = StrategyFact( code=code, name=name, date=str(date), confidence=confidence, threshold=threshold, decision=decision, trend_score=trend_score, risk_score=risk_score, market_state=market_state, rank_in_group=rank, group_name=group_name, reason_flags=reason_flags, target_cap=target_cap, ) 2. 再让 OpenClaw 解释 report = generate_safe_report(fact, call_openclaw) 3. 最终落盘/展示只用 report 而且其中: decision confidence threshold market_state 这些都已经被程序锁死了。 最稳的建议 如果你现在已经对 OpenClaw 很不信任,我建议你直接先用: generate_safe_report_text_mode(...) 也就是: 不让它输出 JSON 不让它碰任何结构化字段 只让它写两句解释 这会比 JSON 模式更稳。 这份守卫模块能防什么 它主要防这几类: 偷改 decision 偷改 threshold / score / state 编造数字 写和程序相反的话 输出多余字段 输出不是 JSON