I have a program that when the user says "Start" or "Stop", the program makes a skeleton display on the screen. I use the same code as the Shape Game, and it works fine there, but not on my code. I dont know which part of the code doesn’t work since this is my first time eith speech recognition programming. Thanks for your help(Sorry if my code is messy)
Recognizing the Speech
public class SpeechRecognizer : IDisposable
{
private KinectAudioSource kinectAudioSource;
private struct WhatSaid
{
public Verbs Verb;
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
this.Stop();
if (this.sre != null)
{
// NOTE: The SpeechRecognitionEngine can take a long time to dispose
// so we will dispose it on a background thread
ThreadPool.QueueUserWorkItem(
delegate(object state)
{
IDisposable toDispose = state as IDisposable;
if (toDispose != null)
{
toDispose.Dispose();
}
},
this.sre);
this.sre = null;
}
this.isDisposed = true;
}
}
public void Dispose()
{
this.Dispose(true);
GC.SuppressFinalize(this);
}
public EchoCancellationMode EchoCancellationMode
{
get
{
this.CheckDisposed();
return this.kinectAudioSource.EchoCancellationMode;
}
set
{
this.CheckDisposed();
this.kinectAudioSource.EchoCancellationMode = value;
}
}
public static SpeechRecognizer Create()
{
SpeechRecognizer recognizer = null;
try
{
recognizer = new SpeechRecognizer();
}
catch (Exception)
{
// speech prereq isn't installed. a null recognizer will be handled properly by the app.
}
return recognizer;
}
private void CheckDisposed()
{
if (this.isDisposed)
{
throw new ObjectDisposedException("SpeechRecognizer");
}
}
public void Stop()
{
this.CheckDisposed();
if (this.sre != null)
{
this.kinectAudioSource.Stop();
this.sre.RecognizeAsyncCancel();
this.sre.RecognizeAsyncStop();
this.sre.SpeechRecognized -= this.SreSpeechRecognized;
this.sre.SpeechHypothesized -= this.SreSpeechHypothesized;
this.sre.SpeechRecognitionRejected -= this.SreSpeechRecognitionRejected;
}
}
public void Start(KinectAudioSource kinectSource)
{
this.CheckDisposed();
this.kinectAudioSource = kinectSource;
this.kinectAudioSource.AutomaticGainControlEnabled = false;
this.kinectAudioSource.BeamAngleMode = BeamAngleMode.Adaptive;
var kinectStream = this.kinectAudioSource.Start();
this.sre.SetInputToAudioStream(
kinectStream, new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
this.sre.RecognizeAsync(RecognizeMode.Multiple);
}
public enum Verbs
{
None = 0,
Start,
Stop,
Resume,
Pause
}
private bool isDisposed;
private readonly Dictionary<string, WhatSaid> speechCommands = new Dictionary<string, WhatSaid>
{
{ "Start", new WhatSaid { Verb = Verbs.Start } },
{ "Stop", new WhatSaid { Verb = Verbs.Stop } },
{ "Resume", new WhatSaid { Verb = Verbs.Resume } },
{ "Pause", new WhatSaid { Verb = Verbs.Pause } },
};
private SpeechRecognitionEngine sre;
private static RecognizerInfo GetKinectRecognizer()
{
Func<RecognizerInfo, bool> matchingFunc = r =>
{
string value;
r.AdditionalInfo.TryGetValue("Kinect", out value);
return "True".Equals(value, StringComparison.InvariantCultureIgnoreCase) && "en-US".Equals(r.Culture.Name, StringComparison.InvariantCultureIgnoreCase);
};
return SpeechRecognitionEngine.InstalledRecognizers().Where(matchingFunc).FirstOrDefault();
}
private SpeechRecognizer()
{
RecognizerInfo ri = GetKinectRecognizer();
this.sre = new SpeechRecognitionEngine(ri);
this.LoadGrammar(this.sre);
}
private void LoadGrammar(SpeechRecognitionEngine speechRecognitionEngine)
{
// Build a simple grammar of shapes, colors, and some simple program control
var single = new Choices();
foreach (var phrase in this.speechCommands)
{
single.Add(phrase.Key);
}
var objectChoices = new Choices();
objectChoices.Add(single);
var actionGrammar = new GrammarBuilder();
actionGrammar.AppendWildcard();
actionGrammar.Append(objectChoices);
var allChoices = new Choices();
allChoices.Add(actionGrammar);
allChoices.Add(single);
// This is needed to ensure that it will work on machines with any culture, not just en-us.
var gb = new GrammarBuilder { Culture = speechRecognitionEngine.RecognizerInfo.Culture };
gb.Append(allChoices);
var g = new Grammar(gb);
speechRecognitionEngine.LoadGrammar(g);
speechRecognitionEngine.SpeechRecognized += this.SreSpeechRecognized;
speechRecognitionEngine.SpeechHypothesized += this.SreSpeechHypothesized;
speechRecognitionEngine.SpeechRecognitionRejected += this.SreSpeechRecognitionRejected;
}
private void SreSpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
var said = new SaidSomethingEventArgs { Verb = Verbs.None, Matched = "?" };
this.SetLabel("Word not Recognized.... Try 'Start', 'Stop', 'Pause' or 'Resume'");
if (this.SaidSomething != null)
{
this.SaidSomething(new object(), said);
}
}
private void SreSpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
{
this.SetLabel("I think you said: " + e.Result.Text);
}
public event EventHandler<SaidSomethingEventArgs> SaidSomething;
private void SreSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
this.SetLabel("\rSpeech Recognized: \t" + e.Result.Text);
if ((this.SaidSomething == null) || (e.Result.Confidence < 0.3))
{
return;
}
var said = new SaidSomethingEventArgs { Verb = 0, Phrase = e.Result.Text };
foreach (var phrase in this.speechCommands)
{
if (e.Result.Text.Contains(phrase.Key) && (phrase.Value.Verb == Verbs.Pause))
{
//pause = true;
break;
}
else if ((e.Result.Text.Contains(phrase.Key) && (phrase.Value.Verb == Verbs.Resume)))
{
//resume = true;
break;
}
else if ((e.Result.Text.Contains(phrase.Key) && (phrase.Value.Verb == Verbs.Start)))
{
//start = true;
break;
}
else if ((e.Result.Text.Contains(phrase.Key) && (phrase.Value.Verb == Verbs.Stop)))
{
//stop = true;
break;
}
}
// Look for a match in the order of the lists below, first match wins.
List<Dictionary<string, WhatSaid>> allDicts = new List<Dictionary<string, WhatSaid>> { this.speechCommands };
bool found = false;
for (int i = 0; i < allDicts.Count && !found; ++i)
{
foreach (var phrase in allDicts[i])
{
if (e.Result.Text.Contains(phrase.Key))
{
found = true;
break;
}
}
}
if (!found)
{
return;
}
}
public class SaidSomethingEventArgs : EventArgs
{
public Verbs Verb { get; set; }
public string Phrase { get; set; }
public string Matched { get; set; }
}
public event Action<string> SetLabel = delegate { };
}
In my Code
private void RecognizeSaidSomething(object sender, SpeechRecognizer.SpeechRecognizer.SaidSomethingEventArgs e)
{
FlyingText.FlyingText.NewFlyingText(this.skeleton.Width / 30, new Point(this.skeleton.Width / 2, this.skeleton.Height / 2), e.Matched);
switch (e.Verb)
{
case SpeechRecognizer.SpeechRecognizer.Verbs.Pause:
pause = true;
break;
case SpeechRecognizer.SpeechRecognizer.Verbs.Resume:
resume = true;
break;
case SpeechRecognizer.SpeechRecognizer.Verbs.Start:
start = true;
break;
case SpeechRecognizer.SpeechRecognizer.Verbs.Stop:
stop = true;
break;
}
}
It doesn’t look like you ever call
RecognizeSaidSomething()fromSreSpeechRecognized(). You create the event args:But it doesn’t appear that you do anything with it.
The
foreachloop below that doesn’t seem to serve any purpose, you test for the phrase then just break out of the loop. You don’t set any variables or call any functions in that loop that I can see.Then there is a
forloop that appears to do something similar to theforeachloop (just in a different manner). It searches for matches to the recognized phrase, but then doesn’t do anything with what it finds. It just returns.I would think somewhere in the
SreSpeechRecognized()event handler you want to callRecognizeSaidSomething()and pass it theSaidSomethingEventArgs.