59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
from flask import Flask, request, jsonify
|
||
import wenet
|
||
import os
|
||
import uuid
|
||
from datetime import datetime
|
||
|
||
app = Flask(__name__)
|
||
|
||
# 加载wenet模型
|
||
wenet_model_cn = wenet.load_model('chinese', device='cuda')
|
||
wenet_model_en = wenet.load_model('english', device='cuda')
|
||
|
||
def transcribe_audio(audio_path, language):
|
||
"""Transcribe audio file to text using wenet."""
|
||
if language == 'chinese':
|
||
result = wenet_model_cn.transcribe(audio_path)['text']
|
||
else:
|
||
result = wenet_model_en.transcribe(audio_path)['text']
|
||
result = result.replace("▁", " ")
|
||
print(result)
|
||
return result
|
||
|
||
@app.route('/transcribe', methods=['POST'])
|
||
def transcribe():
|
||
if 'audio' not in request.files or 'language' not in request.form:
|
||
return jsonify({"error": "Audio file and language must be provided"}), 400
|
||
|
||
audio_file = request.files['audio']
|
||
language = request.form['language']
|
||
|
||
if language not in ['chinese', 'english']:
|
||
return jsonify({"error": "Unsupported language"}), 400
|
||
|
||
# 设置缓存音频文件地址
|
||
unique_id = str(uuid.uuid4())
|
||
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
||
os.makedirs('temp', exist_ok=True)
|
||
input_audio_filename = f"input_{timestamp}_{unique_id}.wav"
|
||
input_audio_path = os.path.join('temp', input_audio_filename)
|
||
audio_file.save(input_audio_path)
|
||
|
||
try:
|
||
# 使用wenet,音频转文本
|
||
response_text = transcribe_audio(input_audio_path, language)
|
||
if language == "chinese":
|
||
response_text = response_text.replace(":", ",")
|
||
response_text = response_text.replace("*", "")
|
||
else:
|
||
response_text = response_text.replace(":", ",")
|
||
response_text = response_text.replace("*", "")
|
||
return jsonify({"text": response_text})
|
||
finally:
|
||
# 清理缓存音频文件
|
||
if os.path.exists(input_audio_path):
|
||
os.remove(input_audio_path)
|
||
|
||
if __name__ == '__main__':
|
||
app.run()
|