- The added line is THIS COLOR.
- The deleted line is THIS COLOR.
Up:[[Tutorial]] Previous:[[Samples/Communication between agents]] Next:[[Samples/Operation on visual]]
//
// Written by N.D.Khoa on 2012-04-22
//
Up:[[Tutorial]]
----
#contents
* Speech recognition using Microsoft SAPI [#o63dca14]
This section explains how to perform speech recognition in client using Microsoft SAPI and send result to an Agent in SIGVerse.
※ This sample is tested with Visual studio 2010 and window 7
** Install Microsoft speech SDK [#q579c66f]
Download and install Microsoft speech SDK from [[here>http://www.microsoft.com/download/en/details.aspx?id=10121]]
** Build sample speech recognition using Microsoft SAPI [#a2cd5868]
*** Create controller [#f57fa8a9]
Let's create a controller of the agent that receives the audio data to perform speech recognition.
$ cd ~/sigverse-<version>/bin/NewWorld
$ emacs soundRecog.cpp
*** Create Recognizer: [#rdb14941]
soundRecog.cpp
#highlight(c#){{
using System.Speech.Recognition;
...
// Recognition Engines
SpeechRecognitionEngine _speechRecognitionEngine;
...
// Creating speech Recognition engine
_speechRecognitionEngine= new SpeechRecognitionEngine();
...
}}
#highlight(cpp){{
*** Add event handler: [#vf81deb6]
#highlight(c#){{
...
// The result will be processed in SpeechEngine_SpeechRecognized function
_speechRecognitionEngine.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs> (SpeechEngine_SpeechRecognized);
...
extern "C" Controller * createController ()
}}
*** Create Grammar: [#fd5fd131]
#highlight(c#){{
...
GrammarBuilder CMD = new GrammarBuilder();
CMD.Append(new Choices("Please take a cup of",
"I need some",
"take a cup of"));
//Providence T. F. Green Internatinoal Airport
GrammarBuilder OBJ = new GrammarBuilder();
Choices OBJchoices = new Choices("coffee",
"tea",
"milk",
"hot water");
OBJ.Append(OBJchoices);
OBJ.Append(new SemanticResultValue("Object"));
CMD.Append(OBJ);
//Boston Logan Internatinoal Airport
GrammarBuilder CTRL = new GrammarBuilder();
Choices CTRLchoices = new Choices("go back",
"go ahead",
"turn left",
"turn right",
"stop");
CTRL.Append(CTRLchoices);
CTRL.Append(new SemanticResultValue("CTRL"));
// Final choices
Choices finalChoices = new Choices("Hi",
"Hello",
"What is your name",
"How are you",
"How are you today",
"How'r you",
"Are you ready");
finalChoices.Add(CMD);
finalChoices.Add(CTRL);
//Final Grammar
GrammarBuilder dialogGrammar = new GrammarBuilder();
dialogGrammar.Culture = culture;
dialogGrammar.Append(new SemanticResultKey("Dialog",finalChoices));
// Load grammar
_speechRecognitionEngine.LoadGrammar(dialogGrammar);
...
}}
*** Set input source for Recognizer: [#eb3310db]
#highlight(c#){{
...
// Set input to Default Audio device
_speechRecognitionEngine.SetInputToDefaultAudioDevice();
...
}}
*** Start recognize: [#b48271a3]
#highlight(c#){{
...
// start asynchronous recognizer
try
{
return new AgentController;
_speechRecognitionEngine.RecognizeAsync(RecognizeMode.Multiple);
}
catch (InvalidOperationException e)
{
_isRecognizing = false;
LogLine("Failed");
LogLine(e.Message);
return false;
}
// start synchronous recognizer
try
{
_speechRecognitionEngine.Recognize();
}
catch (InvalidOperationException e)
{
LogLine(e.Message);
return false;
}
}}
*** Compile [#k5b359f9]
Modify the Makefile.
$ emacs Makefile
** Send result text to an Agent in SIGverse [#b49c4788]
*** Import sigverse.dll [#afb97c89]
Follow this tutorial [[import DLL:http://www.sociointelligenesis.org/SIGVerse/index.php?%E3%83%A1%E3%83%83%E3%82%BB%E3%83%BC%E3%82%B8%E9%80%81%E4%BF%A1%E3%83%84%E3%83%BC%E3%83%AB%E3%81%AE%E4%BD%9C%E6%88%90]]
*** Send result text to SIGverse [#of8c83cf]
#highlight(c#){{
sbyte[] String2Sbytes(string text)
{
sbyte[] osb = new sbyte[text.Length];
byte[] b = Encoding.ASCII.GetBytes(text);
for (int i = 0; i < b.Length; i++)
{
osb[i] = (sbyte)b[i];
}
return osb;
}
Specify the soundRecog.so as the object file.
void SendMessage2Agent(string hostname, string port, string agentname, string message)
{
MessageSender messageSender;
sbyte[] rs = String2Sbytes(message + "\0");
unsafe
{
sbyte[] _msgname = String2Sbytes("Microsoft Speech API");
fixed (sbyte* _mn = _msgname)
{
messageSender = new MessageSender(_mn);
fixed (sbyte* _hn = String2Sbytes(hostname))
{
if (messageSender.connect(_hn, Convert.ToInt32(port)))
{
LogLine("Connected to [" + hostname + ":" + port + "]");
sbyte* a = stackalloc sbyte[rs.Length];
for (int i = 0; i < rs.Length; i++)
{
a[i] = rs[i];
}
sbyte** _rs = &a;
sbyte[] agent = String2Sbytes(agentname);
fixed (sbyte* _agent = agent)
{
if (messageSender.sendto(_agent, 1, _rs))
{
LogLine("message [" + message + "] has been sent to [" + hostname + ":" + port + "]");
}
else
{
LogLine("error to send message to [" + hostname + ":" + port + "]");
}
}
}
else
{
LogLine("Can not connect to [" + hostname + ":" + port + "]. Please check ssh connection from SIGviewer");
}
}
}
}
}
// handle recognized event
void SpeechEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
...
# Specify the object file
OBJS = soundRecog.so
// Send message to SIGverse's Agent here
SendMessage2Agent(_hostNameSIGverse.Text, _portSIGverse.Text, _agentSIGverse.Text, e.Result.Text.ToLower());
And then compile it.
$ make
...
}
*** World file creation [#v3581958]
$ cd ..
$ emacs xml/soundRecog.xml
}}
Up:[[Tutorial]] Previous:[[Samples/Communication between agents]] Next:[[Samples/Operation on visual]]
#highlight(end)
Up:[[Tutorial]] Previous:[[Samples/Communication between agents]] Next:
#counter