""" 语音识别服务 (ASR) 功能: 将音频文件转为文字 支持多种ASR引擎 """ from flask import Flask, request, jsonify from flask_cors import CORS import tempfile import os app = Flask(__name__) CORS(app) # 选择ASR引擎 # 可选: 'faster-whisper', 'whisper', 'sherpa-onnx' ASR_ENGINE = os.getenv('ASR_ENGINE', 'faster-whisper') # 全局变量存储模型 model = None def load_model(): """ 加载ASR模型 """ global model print(f'正在加载ASR模型 (引擎: {ASR_ENGINE})...') if ASR_ENGINE == 'faster-whisper': from faster_whisper import WhisperModel # 使用量化模型减少内存占用 model = WhisperModel( "base", device="cpu", compute_type="int8" ) print('Faster-Whisper模型加载成功') elif ASR_ENGINE == 'whisper': import whisper model = whisper.load_model("base") print('Whisper模型加载成功') else: print('使用Sherpa-ONNX需要单独部署,请参考文档') model = None def transcribe_with_faster_whisper(audio_path): """ 使用Faster-Whisper进行语音识别 """ segments, info = model.transcribe( audio_path, language="zh", beam_size=5, vad_filter=True # 启用VAD过滤静音 ) text = "".join([segment.text for segment in segments]) return text.strip() def transcribe_with_whisper(audio_path): """ 使用OpenAI Whisper进行语音识别 """ result = model.transcribe(audio_path, language="zh", fp16=False) return result["text"].strip() @app.route('/transcribe', methods=['POST']) def transcribe(): """ 接口: 语音转文字 请求: - audio: 音频文件 (wav, mp3, m4a) 响应: { "text": "识别出的文字", "duration": 3.5 // 音频时长(秒) } """ try: # 检查是否有文件上传 if 'audio' not in request.files: return jsonify({'error': '未找到音频文件'}), 400 audio_file = request.files['audio'] # 保存到临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp: audio_file.save(tmp.name) audio_path = tmp.name try: # 进行语音识别 if ASR_ENGINE == 'faster-whisper': text = transcribe_with_faster_whisper(audio_path) elif ASR_ENGINE == 'whisper': text = transcribe_with_whisper(audio_path) else: return jsonify({'error': f'不支持的ASR引擎: {ASR_ENGINE}'}), 400 if not text: return jsonify({'error': '未能识别到语音'}), 400 print(f'识别结果: {text}') return jsonify({'text': text}) finally: # 删除临时文件 if os.path.exists(audio_path): os.remove(audio_path) except Exception as e: print(f'ASR错误: {e}') return jsonify({'error': str(e)}), 500 @app.route('/health', methods=['GET']) def health(): """健康检查""" return jsonify({ 'status': 'ok', 'engine': ASR_ENGINE, 'model_loaded': model is not None }) if __name__ == '__main__': load_model() print('ASR服务启动中...') print('监听地址: http://0.0.0.0:5000') app.run(host='0.0.0.0', port=5000, debug=False)