用SAPI实现Speech Recognition(SR) - 命令控制模式
生活随笔
收集整理的這篇文章主要介紹了
用SAPI实现Speech Recognition(SR) - 命令控制模式
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
微軟的語音識別,在這里我們簡稱它為SR(speech recognition),SR分為兩種模式的監(jiān)聽:第一種模式:聽寫模式,即隨意輸入語音,監(jiān)聽對象將最為接近的字或者詞,句反饋出來;第二種模式:命令與控制模式,劃定范圍監(jiān)聽,制定一組被選項做為監(jiān)聽的,用戶的語音輸入被反饋成最為接近的一個選項。說得通俗一些:第一種是填空題,第二種是選擇題目。
之前轉(zhuǎn)載的一品文章《用SAPI實現(xiàn)Speech Recognition(SR) - 聽寫模式》,介紹了“聽寫模式”的實現(xiàn),這一篇給出“命令與控制”模式的例子程序。
#include <windows.h> #include <sapi.h> #include <stdio.h> #include <string.h> #include <atlbase.h> #include "sphelper.h" inline HRESULT BlockForResult(ISpRecoContext * pRecoCtxt, ISpRecoResult ** ppResult) {HRESULT hr = S_OK;CSpEvent event;while (SUCCEEDED(hr) && SUCCEEDED(hr = event.GetFrom(pRecoCtxt)) && hr == S_FALSE){hr = pRecoCtxt->WaitForNotifyEvent(INFINITE);}*ppResult = event.RecoResult();if (*ppResult){(*ppResult)->AddRef();}return hr; }const WCHAR * StopWord() {const WCHAR * pchStop;LANGID LangId = ::SpGetUserDefaultUILanguage();switch (LangId){case MAKELANGID(LANG_JAPANESE, SUBLANG_DEFAULT):pchStop = L"}42N86\0b70e50fc0ea0e70fc/05708504608a087046";;break;default:pchStop = L"Stop";break;}return pchStop; }int main(int argc, char* argv[]) {HRESULT hr = E_FAIL;bool fUseTTS = true; // turn TTS play back on or off bool fReplay = true; // turn Audio replay on or off // Process optional arguments if (argc > 1){int i;for (i = 1; i < argc; i++){if (_stricmp(argv[i], "-noTTS") == 0){fUseTTS = false;continue;}if (_stricmp(argv[i], "-noReplay") == 0){fReplay = false;continue;}printf("Usage: %s [-noTTS] [-noReplay] ", argv[0]);return -1;}}if (SUCCEEDED(hr = ::CoInitialize(NULL))){{CComPtr<ISpRecoContext> cpRecoCtxt;CComPtr<ISpRecoGrammar> cpRecoGrammar;CComPtr<ISpVoice> cpVoice;if (FAILED(hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext))){printf("cpRecoCtxt.CoCreateInstance() fail. hr = %x", hr);return -2;}if (FAILED(hr = cpRecoCtxt->GetVoice(&cpVoice))){printf("cpRecoCtxt->GetVoice() fail. hr = %x", hr);return -3;}if (cpRecoCtxt && cpVoice){if (FAILED(hr = cpRecoCtxt->SetNotifyWin32Event())){printf("cpRecoCtxt->SetNotifyWin32Event() fail. hr = %x", hr);return -4;}if (FAILED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION)))){printf("cpRecoCtxt->SetInterest() fail. hr = %x", hr);return -5;}if (FAILED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL))){printf("cpRecoCtxt->SetAudioOptions() fail. hr = %x", hr);return -6;}if (FAILED(hr = cpRecoCtxt->CreateGrammar(7, &cpRecoGrammar))){printf("cpRecoCtxt->CreateGrammar() fail. hr = %x", hr);return -7;}if (FAILED(hr = cpRecoGrammar->SetGrammarState(SPGS_DISABLED))){printf("cpRecoGrammar->SetGrammarState() fail. hr = %x", hr);return -8;}if (FAILED(hr = cpRecoGrammar->LoadCmdFromFile(L"conf.xml", SPLO_DYNAMIC))){printf("cpRecoGrammar->LoadCmdFromFile() fail. hr = %x", hr);return -9;}SPSTATEHANDLE hRule;if (FAILED(hr = cpRecoGrammar->GetRule(L"COMMAND", NULL, SPRAF_Active, FALSE, &hRule))){printf("cpRecoGrammar->GetRule() fail. hr = %x", hr);return -9;}///目前使用的是靜態(tài)配置文件,以后可以研究動態(tài)加載命令/////if (FAILED(hr = cpRecoGrammar->ClearRule(hRule)))//{// printf("cpRecoGrammar->ClearRule() fail. hr = %x", hr);// return -10;//}//if (FAILED(hr = cpRecoGrammar->AddWordTransition(hRule, NULL, L"Frank Lee", NULL, SPWT_LEXICAL, 1, NULL)))//{// printf("cpRecoGrammar->AddWordTransition(1) fail. hr = %x", hr);// return -11;//}//if (FAILED(hr = cpRecoGrammar->AddWordTransition(hRule, NULL, L"self", NULL, SPWT_LEXICAL, 1, NULL)))//{// printf("cpRecoGrammar->AddWordTransition(2) fail. hr = %x", hr);// return -12;//}//if (FAILED(hr = cpRecoGrammar->AddWordTransition(hRule, NULL, L"SAPI beta", NULL, SPWT_LEXICAL, 1, NULL)))//{// printf("cpRecoGrammar->AddWordTransition(3) fail. hr = %x", hr);// return -13;//}if (FAILED(hr = cpRecoGrammar->Commit(NULL))){printf("cpRecoGrammar->Commit() fail. hr = %x", hr);return -14;}if (FAILED(hr = cpRecoGrammar->SetGrammarState(SPGS_ENABLED))){printf("cpRecoGrammar->SetGrammarState() fail. hr = %x", hr);return -15;}if (FAILED(hr = cpRecoGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE))){printf("cpRecoGrammar->SetRuleState() fail. hr = %x", hr);}/printf("Read to listen your command:\n");USES_CONVERSION;CComPtr<ISpRecoResult> cpResult; while (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult))){CSpDynamicString dstrText;if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))){printf("I heard: %s \n", W2A(dstrText));if (fUseTTS){cpVoice->Speak(L"I heard", SPF_ASYNC, NULL);cpVoice->Speak(dstrText, SPF_ASYNC, NULL);}if (fReplay){if (fUseTTS)cpVoice->Speak(L"when you said", SPF_ASYNC, NULL);elseprintf(" when you said ");cpResult->SpeakAudio(NULL, 0, NULL, NULL);}cpResult.Release();}}}}::CoUninitialize();}return hr; }命令與控制模式需要使用到配置文件來定義“候選命令”范圍,本例中用到XML配置文件“conf.xml”如下:
<GRAMMAR LANGID="804"> <DEFINE><ID NAME="CMD" VAL="10"/></DEFINE><RULE NAME="COMMAND" ID="CMD" TOPLEVEL="ACTIVE"><L><p>東南大學(xué)</P><p>滴水洞</p><p>運行趨勢分析</p><p>接地監(jiān)視</p><p>模型異動</p><p>中科院</p></L></RULE> </GRAMMAR>C&C模式的優(yōu)點是識別范圍小,識別準(zhǔn)確率高,可以識別非常用字詞組合。
后續(xù)如果有機會將在以下幾個方面繼續(xù)研究:
1. 如何實現(xiàn)動態(tài)修改識別范圍;
2. 如何實現(xiàn)用候選字詞組合成的基本語法,例如“畢業(yè)于”+“東南大學(xué)”;
3. 如何阻斷操作系統(tǒng)“控制指令”對識別過程的干擾。
總結(jié)
以上是生活随笔為你收集整理的用SAPI实现Speech Recognition(SR) - 命令控制模式的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Host XXX is not allo
- 下一篇: 2021选萃重组生物有没有这本书买?