python调用讯飞语音听写(流式版)
                                                            生活随笔
收集整理的這篇文章主要介紹了
                                python调用讯飞语音听写(流式版)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.                        
                                目錄
- 一、引入所需要的包
 - 二、定義相關常量
 - 三、生成請求url
 - 四、音頻相關
 - 1. 首先使用pyaudio獲取音頻流
 - 2. 從音頻流中獲取音頻并上傳到接口
 
- 五、語音識別接口調用
 - 六、返回結果處理
 - 七、語音識別類
 - 八、調用
 - 附錄
 
一、引入所需要的包
import hashlib import base64 import hmac import json from urllib.parse import urlencode import loggingfrom wsgiref.handlers import format_date_time import datetime from datetime import datetime import time from time import mktime import _thread as thread import pyaudiofrom ws4py.client.threadedclient import WebSocketClientlogging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)二、定義相關常量
STATUS_FIRST_FRAME = 0 # 第一幀的標識 STATUS_CONTINUE_FRAME = 1 # 中間幀標識 STATUS_LAST_FRAME = 2 # 最后一幀的標識CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000三、生成請求url
class WsParam(object):# 初始化def __init__(self, APPId, APIKey, APISecret, AudioFile):self.APPId = APPIdself.APIKey = APIKeyself.APISecret = APISecretself.AudioFile = AudioFile# 公共參數(common)self.CommonArgs = {'app_id': self.APPId}# 業務參數(business),更多個性化參數可在官網查看self.BusinessArgs = {'domain': 'iat','language': 'zh_cn','accent': 'mandarin','vinfo': 1,'vad_eos': 10000,'dwa': 'wpgs','ptt': 0}# 生成urldef create_url(self):url = 'wss://ws-api.xfyun.cn/v2/iat'# 生成RFC1123格式的時間戳now = datetime.now()date = format_date_time(mktime(now.timetuple()))# 拼接字符串signature_origin = 'host: ' + 'ws-api.xfyun.cn' + '\n'signature_origin += 'date: ' + date + '\n'signature_origin += 'GET ' + '/v2/iat ' + 'HTTP/1.1'# 進行hmac-sha256進行加密signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),digestmod=hashlib.sha256).digest()signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')authorization_origin = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (self.APIKey, 'hmac-sha256', 'host date request-line', signature_sha)authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')# 將請求的鑒權參數組合為字典v = {'authorization': authorization,'date': date,'host': 'ws-api.xfyun.cn'}# 拼接鑒權參數,生成urlurl = url + '?' + urlencode(v)# print('date: ',date)# print('v: ',v)# 此處打印出建立連接時候的url,參考本demo的時候可取消上方打印的注釋,比對相同參數時生成的url與自己代碼生成的url是否一致# print('websocket url :', url)return url四、音頻相關
1. 首先使用pyaudio獲取音頻流
audio = pyaudio.PyAudio() stream = audio.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True)2. 從音頻流中獲取音頻并上傳到接口
while True:buf = stream.read()d = {'common': self.ws_param.CommonArgs,'business': self.ws_param.BusinessArgs,'data': {'status': 0, 'format': 'audio/L16;rate=16000','audio': str(base64.b64encode(buf), 'utf-8'),'encoding': 'raw'}}d = json.dumps(d)self.send(d)五、語音識別接口調用
# 收到websocket連接建立的處理 def opened(self):def run(*args):interval = 0.04 # 發送音頻間隔(單位:s)status = STATUS_FIRST_FRAME # 音頻的狀態信息,標識音頻是第一幀,還是中間幀、最后一幀audio = pyaudio.PyAudio()stream = audio.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True)while True:buf = stream.read(CHUNK)# 第一幀處理# 發送第一幀音頻,帶business 參數# appid 必須帶上,只需第一幀發送if status == STATUS_FIRST_FRAME:d = {'common': self.ws_param.CommonArgs,'business': self.ws_param.BusinessArgs,'data': {'status': 0, 'format': 'audio/L16;rate=16000','audio': str(base64.b64encode(buf), 'utf-8'),'encoding': 'raw'}}d = json.dumps(d)self.send(d)status = STATUS_CONTINUE_FRAME# 中間幀處理elif status == STATUS_CONTINUE_FRAME:d = {'data': {'status': 1, 'format': 'audio/L16;rate=16000','audio': str(base64.b64encode(buf), 'utf-8'),'encoding': 'raw'}}self.send(json.dumps(d))# 最后一幀處理elif status == STATUS_LAST_FRAME:d = {'data': {'status': 2, 'format': 'audio/L16;rate=16000','audio': str(base64.b64encode(buf), 'utf-8'),'encoding': 'raw'}}self.send(json.dumps(d))logging.info('錄音結束')time.sleep(1)stream.stop_stream()stream.close()audio.terminate()break# 模擬音頻采樣間隔time.sleep(interval)self.closed(1000, '')thread.start_new_thread(run, ())六、返回結果處理
# 收到websocket消息的處理 def received_message(self, message):message = message.__str__()try:code = json.loads(message)['code']sid = json.loads(message)['sid']status = json.loads(message)['data']['status']if code != 0:err_msg = json.loads(message)['message']logging.error('sid:%s call error:%s code is:%s' % (sid, err_msg, code))else:data = json.loads(message)['data']['result']ws = data['ws']pgs = data['pgs']sn = data['sn']result = ''for i in ws:for w in i['cw']:result += w['w']if pgs == 'rpl':rg = data['rg']self.rec_text.update({rg[0]: result})for i in range(rg[0] + 1, rg[1]):self.rec_text.pop(i, '404')else:self.rec_text[sn] = resultlogging.info('識別結果為: {}'.format(self.rec_text))except Exception as e:logging.info(message)logging.error('receive msg,but parse exception: {}'.format(e))七、語音識別類
class RecognitionWebsocket(WebSocketClient):def __init__(self, url, ws_param):super().__init__(url)self.ws_param = ws_paramself.rec_text = {}# 收到websocket消息的處理def received_message(self, message):...# 收到websocket錯誤的處理def on_error(self, error):logging.error(error)# 收到websocket關閉的處理def closed(self, code, reason=None):logging.info('語音識別通道關閉' + str(code) + str(reason))# 收到websocket連接建立的處理def opened(self):...八、調用
# AudioFile參數為空時表示不在本地生成音頻文件,是否設置為空可以根據開發需求確定 ws_param = WsParam(APPId='', APIKey='', APISecret='', AudioFile=r'')ws_url = ws_param.create_url() ws = RecognitionWebsocket(ws_url, ws_param) ws.connect() ws.run_forever()附錄
自動停止錄音的實現可以參考python錄音實現自動結束錄音
總結
以上是生活随笔為你收集整理的python调用讯飞语音听写(流式版)的全部內容,希望文章能夠幫你解決所遇到的問題。
                            
                        - 上一篇: 实现文本到声音的转换(WAVE)
 - 下一篇: AI人工智能将引入证券监管,数据库蓝海时