asr_server.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. """
  2. 语音识别服务 (ASR)
  3. 功能: 将音频文件转为文字
  4. 支持多种ASR引擎
  5. """
  6. from flask import Flask, request, jsonify
  7. from flask_cors import CORS
  8. import tempfile
  9. import os
  10. app = Flask(__name__)
  11. CORS(app)
  12. # 选择ASR引擎
  13. # 可选: 'faster-whisper', 'whisper', 'sherpa-onnx'
  14. ASR_ENGINE = os.getenv('ASR_ENGINE', 'faster-whisper')
  15. # 全局变量存储模型
  16. model = None
  17. def load_model():
  18. """
  19. 加载ASR模型
  20. """
  21. global model
  22. print(f'正在加载ASR模型 (引擎: {ASR_ENGINE})...')
  23. if ASR_ENGINE == 'faster-whisper':
  24. from faster_whisper import WhisperModel
  25. # 使用量化模型减少内存占用
  26. model = WhisperModel(
  27. "base",
  28. device="cpu",
  29. compute_type="int8"
  30. )
  31. print('Faster-Whisper模型加载成功')
  32. elif ASR_ENGINE == 'whisper':
  33. import whisper
  34. model = whisper.load_model("base")
  35. print('Whisper模型加载成功')
  36. else:
  37. print('使用Sherpa-ONNX需要单独部署,请参考文档')
  38. model = None
  39. def transcribe_with_faster_whisper(audio_path):
  40. """
  41. 使用Faster-Whisper进行语音识别
  42. """
  43. segments, info = model.transcribe(
  44. audio_path,
  45. language="zh",
  46. beam_size=5,
  47. vad_filter=True # 启用VAD过滤静音
  48. )
  49. text = "".join([segment.text for segment in segments])
  50. return text.strip()
  51. def transcribe_with_whisper(audio_path):
  52. """
  53. 使用OpenAI Whisper进行语音识别
  54. """
  55. result = model.transcribe(audio_path, language="zh", fp16=False)
  56. return result["text"].strip()
  57. @app.route('/transcribe', methods=['POST'])
  58. def transcribe():
  59. """
  60. 接口: 语音转文字
  61. 请求:
  62. - audio: 音频文件 (wav, mp3, m4a)
  63. 响应:
  64. {
  65. "text": "识别出的文字",
  66. "duration": 3.5 // 音频时长(秒)
  67. }
  68. """
  69. try:
  70. # 检查是否有文件上传
  71. if 'audio' not in request.files:
  72. return jsonify({'error': '未找到音频文件'}), 400
  73. audio_file = request.files['audio']
  74. # 保存到临时文件
  75. with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
  76. audio_file.save(tmp.name)
  77. audio_path = tmp.name
  78. try:
  79. # 进行语音识别
  80. if ASR_ENGINE == 'faster-whisper':
  81. text = transcribe_with_faster_whisper(audio_path)
  82. elif ASR_ENGINE == 'whisper':
  83. text = transcribe_with_whisper(audio_path)
  84. else:
  85. return jsonify({'error': f'不支持的ASR引擎: {ASR_ENGINE}'}), 400
  86. if not text:
  87. return jsonify({'error': '未能识别到语音'}), 400
  88. print(f'识别结果: {text}')
  89. return jsonify({'text': text})
  90. finally:
  91. # 删除临时文件
  92. if os.path.exists(audio_path):
  93. os.remove(audio_path)
  94. except Exception as e:
  95. print(f'ASR错误: {e}')
  96. return jsonify({'error': str(e)}), 500
  97. @app.route('/health', methods=['GET'])
  98. def health():
  99. """健康检查"""
  100. return jsonify({
  101. 'status': 'ok',
  102. 'engine': ASR_ENGINE,
  103. 'model_loaded': model is not None
  104. })
  105. if __name__ == '__main__':
  106. load_model()
  107. print('ASR服务启动中...')
  108. print('监听地址: http://0.0.0.0:5000')
  109. app.run(host='0.0.0.0', port=5000, debug=False)