| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- """
- 医疗NLP服务
- 功能: 将医生口述文本结构化为医疗实体
- """
- from flask import Flask, request, jsonify
- from flask_cors import CORS
- import torch
- from transformers import AutoTokenizer, AutoModelForTokenClassification
- import re
- app = Flask(__name__)
- CORS(app) # 允许跨域请求
- # 全局变量存储模型
- tokenizer = None
- model = None
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- # 医疗实体标签映射
- LABEL_MAP = {
- 'B-CHIEF': '主诉',
- 'I-CHIEF': '主诉',
- 'B-HISTORY': '现病史',
- 'I-HISTORY': '现病史',
- 'B-DIAGNOSIS': '诊断',
- 'I-DIAGNOSIS': '诊断',
- 'B-MEDICATION': '用药',
- 'I-MEDICATION': '用药',
- 'O': '其他'
- }
- def load_model():
- """
- 加载医疗NER模型
- 这里使用示例,实际需要替换为真实的医疗模型
- """
- global tokenizer, model
- print('正在加载医疗NER模型...')
- # 方式1: 使用HuggingFace上的中文医疗模型
- # model_name = "HuatGPT/HuatGPT-medical-ner"
- # 方式2: 使用本地训练的模型
- model_path = "./models/medical-ner"
- try:
- tokenizer = AutoTokenizer.from_pretrained(model_path)
- model = AutoModelForTokenClassification.from_pretrained(model_path)
- model.to(device)
- model.eval()
- print(f'模型加载成功,使用设备: {device}')
- except Exception as e:
- print(f'模型加载失败: {e}')
- print('使用规则匹配作为降级方案...')
- def extract_entities_with_model(text):
- """
- 使用NER模型提取医疗实体
- """
- if model is None or tokenizer is None:
- raise Exception("模型未加载")
- # 分词
- inputs = tokenizer(
- text,
- return_tensors="pt",
- truncation=True,
- max_length=512,
- padding=True
- )
- inputs = {k: v.to(device) for k, v in inputs.items()}
- # 预测
- with torch.no_grad():
- outputs = model(**inputs)
- predictions = torch.argmax(outputs.logits, dim=2)
- # 解析实体
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
- labels = [LABEL_MAP.get(f"B-{p.item()}" if p.item() % 2 == 1 else f"I-{p.item()}", '其他')
- for p in predictions[0]]
- # 组合实体
- entities = {}
- current_entity = None
- current_text = ''
- for token, label in zip(tokens, labels):
- if label == '其他':
- if current_entity:
- entities.setdefault(current_entity, []).append(current_text.strip())
- current_entity = None
- current_text = ''
- else:
- if label != current_entity:
- if current_entity:
- entities.setdefault(current_entity, []).append(current_text.strip())
- current_entity = label
- current_text = token.replace('##', '')
- else:
- current_text += token.replace('##', '')
- # 最后一个实体
- if current_entity:
- entities.setdefault(current_entity, []).append(current_text.strip())
- return entities
- def extract_entities_with_rules(text):
- """
- 基于规则的医疗实体提取(降级方案)
- 适用于没有NER模型的情况
- """
- entities = {
- '主诉': '',
- '现病史': '',
- '诊断': '',
- '用药': ''
- }
- # 简单规则: 通过关键词分割
- patterns = {
- '主诉': r'(?:主诉|患者因)[,,。]*(.*?)(?:现病史|病史|诊断|$)',
- '现病史': r'(?:现病史|病史)[,,。]*(.*?)(?:诊断|用药|体检|$)',
- '诊断': r'(?:诊断|诊断为)[,,。]*(.*?)(?:用药|治疗|建议|$)',
- '用药': r'(?:用药|服用)[,,。]*(.*?)(?:建议|注意事项|$)'
- }
- for field, pattern in patterns.items():
- match = re.search(pattern, text, re.IGNORECASE)
- if match:
- entities[field] = match.group(1).strip()
- return entities
- @app.route('/extract', methods=['POST'])
- def extract_entities():
- """
- 接口: 提取医疗实体
- 请求示例:
- {
- "text": "患者主诉头痛三天,现病史显示有发热症状,诊断为上呼吸道感染,用药为阿司匹林"
- }
- 响应示例:
- {
- "主诉": "头痛三天",
- "现病史": "有发热症状",
- "诊断": "上呼吸道感染",
- "用药": "阿司匹林"
- }
- """
- try:
- data = request.json
- text = data.get('text', '')
- if not text:
- return jsonify({'error': '文本不能为空'}), 400
- # 优先使用模型,失败则使用规则
- try:
- entities = extract_entities_with_model(text)
- except Exception as e:
- print(f'模型推理失败,使用规则匹配: {e}')
- entities = extract_entities_with_rules(text)
- # 转换为前端期望的格式
- result = {}
- for field, values in entities.items():
- if isinstance(values, list):
- result[field] = ' '.join(values) if values else ''
- else:
- result[field] = values
- print(f'提取结果: {result}')
- return jsonify(result)
- except Exception as e:
- print(f'处理错误: {e}')
- return jsonify({'error': str(e)}), 500
- @app.route('/health', methods=['GET'])
- def health_check():
- """健康检查接口"""
- return jsonify({
- 'status': 'ok',
- 'model_loaded': model is not None,
- 'device': str(device)
- })
- if __name__ == '__main__':
- # 加载模型
- load_model()
- # 启动服务
- print('医疗NLP服务启动中...')
- print('监听地址: http://0.0.0.0:5001')
- app.run(host='0.0.0.0', port=5001, debug=False)
|