This sounds interesting, but what I actually wanted was the following:
Imagine you call the dial plan and say: “Good day, my name is xyz, I didn’t understand your name correctly. I actually wanted to speak with ‘Mr. Mustermann.’”
The system should immediately recognize the phrase “didn’t understand” and wait until the person stops talking. Once they stop, it should immediately respond with: “My name is Mr. Flink.”
The problem I had, as mentioned earlier, is that the entire audio file gets uploaded to Google. Then, the text is evaluated and checked to see if any of the following words are recognized.
Let me give you an example:
same => n,MixMonitor(${FILENAME})
same => n,Set(GLOBAL(INCOMING_FREIZEICHEN)=${IF($[${ISNULL(${GLOBAL(INCOMING_FREIZEICHEN)})}]?1:${IF($[${GLOBAL(INCOMING_FREIZEICHEN)} = 3]?1:${MATH(${GLOBAL(INCOMING_FREIZEICHEN)}+1)})})})
same => n,Set(GLOBAL(INCOMING_FREIZEICHEN)=${CUT(INCOMING_FREIZEICHEN,.,1)})
same => n,Verbose(1, "This is call number ${GLOBAL(INCOMING_FREIZEICHEN)}")
same => n,Playback(/etc/asterisk/sounds/freizeichen/Freizeichen${GLOBAL(INCOMING_FREIZEICHEN)})
same => n,Playback(/etc/asterisk/sounds/annahme_standard/geraeusch)
same => n,Set(GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)=${IF($[${ISNULL(${GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)})}]?1:${IF($[${GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)} = 3]?1:${MATH(${GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)}+1)})})})
same => n,Set(GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)=${CUT(ANNAHME_STANDARD_BEGRUESSUNG,.,1)})
same => n,Verbose(1, "This is call number ${GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)}")
same => n,Playback(/etc/asterisk/sounds/annahme_standard/${GLOBAL(ANNAHME_STANDARD_BEGRUESSUNG)})
same => n,MixMonitor(/etc/asterisk/stt-vosk/audio_stream.wav)
same => n,WaitForSilence(2500,1,120)
same => n,Set(WAITSTATUS=${WAITSTATUS})
same => n,GotoIf($["${WAITSTATUS}" = "SILENCE"]?silence_detected_1:timeout_detected_1)
; Silence Detected Begrüssung 1
same => n(silence_detected_1),AGI(/etc/asterisk/stt-vosk/ivr-stt/server.sh)
same => n,NoOp(Recognized Text: ${RECOGNIZED_TEXT})
same => n,GotoIf($["${RECOGNIZED_TEXT}" =~ "(ihr|verstanden|falsch|verbunden|wer|wie|heissen|nicht)" & "${RECOGNIZED_TEXT}" =~ ".*[aeiouAEIOU].*"]?recognized_question_1)
same => n,GotoIf($["${RECOGNIZED_TEXT}" != "" & "${RECOGNIZED_TEXT}" =~ ".*[aeiouAEIOU].*"]?recognized_hello_1)
same => n,Goto(unrecognized_hello_1)