""" 医疗NLP服务 功能: 将医生口述文本结构化为医疗实体 """ from flask import Flask, request, jsonify from flask_cors import CORS import torch from transformers import AutoTokenizer, AutoModelForTokenClassification import re app = Flask(__name__) CORS(app) # 允许跨域请求 # 全局变量存储模型 tokenizer = None model = None device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 医疗实体标签映射 LABEL_MAP = { 'B-CHIEF': '主诉', 'I-CHIEF': '主诉', 'B-HISTORY': '现病史', 'I-HISTORY': '现病史', 'B-DIAGNOSIS': '诊断', 'I-DIAGNOSIS': '诊断', 'B-MEDICATION': '用药', 'I-MEDICATION': '用药', 'O': '其他' } def load_model(): """ 加载医疗NER模型 这里使用示例,实际需要替换为真实的医疗模型 """ global tokenizer, model print('正在加载医疗NER模型...') # 方式1: 使用HuggingFace上的中文医疗模型 # model_name = "HuatGPT/HuatGPT-medical-ner" # 方式2: 使用本地训练的模型 model_path = "./models/medical-ner" try: tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForTokenClassification.from_pretrained(model_path) model.to(device) model.eval() print(f'模型加载成功,使用设备: {device}') except Exception as e: print(f'模型加载失败: {e}') print('使用规则匹配作为降级方案...') def extract_entities_with_model(text): """ 使用NER模型提取医疗实体 """ if model is None or tokenizer is None: raise Exception("模型未加载") # 分词 inputs = tokenizer( text, return_tensors="pt", truncation=True, max_length=512, padding=True ) inputs = {k: v.to(device) for k, v in inputs.items()} # 预测 with torch.no_grad(): outputs = model(**inputs) predictions = torch.argmax(outputs.logits, dim=2) # 解析实体 tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) labels = [LABEL_MAP.get(f"B-{p.item()}" if p.item() % 2 == 1 else f"I-{p.item()}", '其他') for p in predictions[0]] # 组合实体 entities = {} current_entity = None current_text = '' for token, label in zip(tokens, labels): if label == '其他': if current_entity: entities.setdefault(current_entity, []).append(current_text.strip()) current_entity = None current_text = '' else: if label != current_entity: if current_entity: entities.setdefault(current_entity, []).append(current_text.strip()) current_entity = label current_text = token.replace('##', '') else: current_text += token.replace('##', '') # 最后一个实体 if current_entity: entities.setdefault(current_entity, []).append(current_text.strip()) return entities def extract_entities_with_rules(text): """ 基于规则的医疗实体提取(降级方案) 适用于没有NER模型的情况 """ entities = { '主诉': '', '现病史': '', '诊断': '', '用药': '' } # 简单规则: 通过关键词分割 patterns = { '主诉': r'(?:主诉|患者因)[,,。]*(.*?)(?:现病史|病史|诊断|$)', '现病史': r'(?:现病史|病史)[,,。]*(.*?)(?:诊断|用药|体检|$)', '诊断': r'(?:诊断|诊断为)[,,。]*(.*?)(?:用药|治疗|建议|$)', '用药': r'(?:用药|服用)[,,。]*(.*?)(?:建议|注意事项|$)' } for field, pattern in patterns.items(): match = re.search(pattern, text, re.IGNORECASE) if match: entities[field] = match.group(1).strip() return entities @app.route('/extract', methods=['POST']) def extract_entities(): """ 接口: 提取医疗实体 请求示例: { "text": "患者主诉头痛三天,现病史显示有发热症状,诊断为上呼吸道感染,用药为阿司匹林" } 响应示例: { "主诉": "头痛三天", "现病史": "有发热症状", "诊断": "上呼吸道感染", "用药": "阿司匹林" } """ try: data = request.json text = data.get('text', '') if not text: return jsonify({'error': '文本不能为空'}), 400 # 优先使用模型,失败则使用规则 try: entities = extract_entities_with_model(text) except Exception as e: print(f'模型推理失败,使用规则匹配: {e}') entities = extract_entities_with_rules(text) # 转换为前端期望的格式 result = {} for field, values in entities.items(): if isinstance(values, list): result[field] = ' '.join(values) if values else '' else: result[field] = values print(f'提取结果: {result}') return jsonify(result) except Exception as e: print(f'处理错误: {e}') return jsonify({'error': str(e)}), 500 @app.route('/health', methods=['GET']) def health_check(): """健康检查接口""" return jsonify({ 'status': 'ok', 'model_loaded': model is not None, 'device': str(device) }) if __name__ == '__main__': # 加载模型 load_model() # 启动服务 print('医疗NLP服务启动中...') print('监听地址: http://0.0.0.0:5001') app.run(host='0.0.0.0', port=5001, debug=False)