WangKang
/
playwright_demo


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
							"""
医疗NLP服务
功能: 将医生口述文本结构化为医疗实体
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
import re

app = Flask(__name__)
CORS(app)  # 允许跨域请求

# 全局变量存储模型
tokenizer = None
model = None
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 医疗实体标签映射
LABEL_MAP = {
    'B-CHIEF': '主诉',
    'I-CHIEF': '主诉',
    'B-HISTORY': '现病史',
    'I-HISTORY': '现病史',
    'B-DIAGNOSIS': '诊断',
    'I-DIAGNOSIS': '诊断',
    'B-MEDICATION': '用药',
    'I-MEDICATION': '用药',
    'O': '其他'
}


def load_model():
    """
    加载医疗NER模型
    这里使用示例,实际需要替换为真实的医疗模型
    """
    global tokenizer, model

    print('正在加载医疗NER模型...')

    # 方式1: 使用HuggingFace上的中文医疗模型
    # model_name = "HuatGPT/HuatGPT-medical-ner"

    # 方式2: 使用本地训练的模型
    model_path = "./models/medical-ner"

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForTokenClassification.from_pretrained(model_path)
        model.to(device)
        model.eval()
        print(f'模型加载成功,使用设备: {device}')
    except Exception as e:
        print(f'模型加载失败: {e}')
        print('使用规则匹配作为降级方案...')


def extract_entities_with_model(text):
    """
    使用NER模型提取医疗实体
    """
    if model is None or tokenizer is None:
        raise Exception("模型未加载")

    # 分词
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=512,
        padding=True
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # 预测
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.argmax(outputs.logits, dim=2)

    # 解析实体
    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
    labels = [LABEL_MAP.get(f"B-{p.item()}" if p.item() % 2 == 1 else f"I-{p.item()}", '其他')
              for p in predictions[0]]

    # 组合实体
    entities = {}
    current_entity = None
    current_text = ''

    for token, label in zip(tokens, labels):
        if label == '其他':
            if current_entity:
                entities.setdefault(current_entity, []).append(current_text.strip())
                current_entity = None
                current_text = ''
        else:
            if label != current_entity:
                if current_entity:
                    entities.setdefault(current_entity, []).append(current_text.strip())
                current_entity = label
                current_text = token.replace('##', '')
            else:
                current_text += token.replace('##', '')

    # 最后一个实体
    if current_entity:
        entities.setdefault(current_entity, []).append(current_text.strip())

    return entities


def extract_entities_with_rules(text):
    """
    基于规则的医疗实体提取(降级方案)
    适用于没有NER模型的情况
    """
    entities = {
        '主诉': '',
        '现病史': '',
        '诊断': '',
        '用药': ''
    }

    # 简单规则: 通过关键词分割
    patterns = {
        '主诉': r'(?:主诉|患者因)[,，。]*(.*?)(?:现病史|病史|诊断|$)',
        '现病史': r'(?:现病史|病史)[,，。]*(.*?)(?:诊断|用药|体检|$)',
        '诊断': r'(?:诊断|诊断为)[,，。]*(.*?)(?:用药|治疗|建议|$)',
        '用药': r'(?:用药|服用)[,，。]*(.*?)(?:建议|注意事项|$)'
    }

    for field, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            entities[field] = match.group(1).strip()

    return entities


@app.route('/extract', methods=['POST'])
def extract_entities():
    """
    接口: 提取医疗实体

    请求示例:
    {
        "text": "患者主诉头痛三天,现病史显示有发热症状,诊断为上呼吸道感染,用药为阿司匹林"
    }

    响应示例:
    {
        "主诉": "头痛三天",
        "现病史": "有发热症状",
        "诊断": "上呼吸道感染",
        "用药": "阿司匹林"
    }
    """
    try:
        data = request.json
        text = data.get('text', '')

        if not text:
            return jsonify({'error': '文本不能为空'}), 400

        # 优先使用模型,失败则使用规则
        try:
            entities = extract_entities_with_model(text)
        except Exception as e:
            print(f'模型推理失败,使用规则匹配: {e}')
            entities = extract_entities_with_rules(text)

        # 转换为前端期望的格式
        result = {}
        for field, values in entities.items():
            if isinstance(values, list):
                result[field] = ' '.join(values) if values else ''
            else:
                result[field] = values

        print(f'提取结果: {result}')
        return jsonify(result)

    except Exception as e:
        print(f'处理错误: {e}')
        return jsonify({'error': str(e)}), 500


@app.route('/health', methods=['GET'])
def health_check():
    """健康检查接口"""
    return jsonify({
        'status': 'ok',
        'model_loaded': model is not None,
        'device': str(device)
    })


if __name__ == '__main__':
    # 加载模型
    load_model()

    # 启动服务
    print('医疗NLP服务启动中...')
    print('监听地址: http://0.0.0.0:5001')
    app.run(host='0.0.0.0', port=5001, debug=False)