4 # AGI script that renders speech to text using iSpeech speech recognition engine.
5 # http://www.ispeech.org/
7 # Copyright (C) 2011 - 2014, Lefteris Zafiris <zaf.000@gmail.com>
9 # This program is free software, distributed under the terms of
10 # the GNU General Public License Version 2. See the LICENSE file
11 # at the top of the source tree.
13 # The Asterisk iSpeech plugin developmend was funded by ifbyphone http://ifbyphone.com/
18 # agi(ispeech-asr.agi,[lang],[freeform],[model],[timeout],[intkey],[NOBEEP])
19 # Records from the current channel untill 3 seconds of silence are detected
20 # (this can be set by the user by the 'timeout' argument, -1 for no timeout) or the
21 # interrupt key (# by default) is pressed. If NOBEEP is set, no beep sound is played
22 # back to the user to indicate the start of the recording. For 'freeform' and 'model'
23 # please refer to the ispeech API manual. 'freeform' defaults to 3 (Normal speech)
24 # The recorded sound is send over to iSpeech ASR service and the
25 # returned text string is assigned as the value of the channel variable 'utterance'.
26 # The script sets the following channel variables:
27 # status : Return status. 0 means success, non zero values indicate different errors.
28 # utterance : The generated text string.
29 # confidence : A value between 0 and 1 indicating how 'confident' the recognition engine
30 # feels about the result. Values bigger than 0.90 usually mean that the
31 # resulted text is correct.
33 # User defined parameters:
38 # $timeout (value in seconds of silence before recording is stopped)
40 # Default interupt key:
41 # $intkey (can be any digit from 0 to 9 or # and *, or a combination of them)
44 # $use_speex (encode sound data with speex voice encoder. 0: disable, 1: enable)
47 # $use_ssl (Use SSl for network traffic encryption. 0: disable, 1: enable)
49 # An API key is a password that is required for access. To obtain an API key please visit:
50 # http://www.ispeech.org/developers and register for a developer account.
55 use Encode qw(encode);
58 use File::Temp qw(tempfile);
62 # ----------------------------- #
63 # User defined parameters: #
64 # ----------------------------- #
66 my $key = "developerdemokeydeveloperdemokey";
69 my $language = "en-US";
71 # Default max silence timeout #
74 # Default interrupt key #
83 # Verbose debugging messages #
86 #Playback recorded speech #
89 # ----------------------------- #
107 my $ua_timeout = 180;
109 my $host = "api.ispeech.org/api/rest";
110 my $speex = `/usr/bin/which speexenc`;
111 my @models = qw(assistant date nfl nba usmoney numbersto9 numbersto99 numbersto999 time phonenumber streets sportsteam citystate);
114 # $language $freeform $model $timeout $intkey $beep $license #
115 ($AGI{arg_1}, $AGI{arg_2}, $AGI{arg_3}, $AGI{arg_4}, $AGI{arg_5}, $AGI{arg_6}, $AGI{arg_7}) = @ARGV;
119 $AGI{$1} = $2 if (/^agi_(\w+)\:\s+(.*)$/);
122 $name = " -- $AGI{request}:";
124 die "$name No API key found. Aborting.\n" if (!$key);
132 warn "$name Clearing channel variables.\n" if ($debug);
133 foreach (keys %response) {
134 print "SET VARIABLE \"$_\" \"$response{$_}\"\n";
138 # Warn if required programs not found. #
139 if ($use_speex && !$speex) {
140 warn "$name speexenc is missing.\n";
143 warn "$name Found speexenc in: $speex" if ($debug and $use_speex);
145 # Setting language, freeform, model, timeout, interrupt keys and BEEP indication #
146 if (length($AGI{arg_1})) {
147 $language = $AGI{arg_1} if ($AGI{arg_1} =~ /^[a-z]{2}(-[a-zA-Z]{2,6})?$/);
150 if (length($AGI{arg_2})) {
151 $freeform = $AGI{arg_2} if ($AGI{arg_1} >= 1 and $AGI{arg_1} <= 7);
154 if (length($AGI{arg_3})) {
156 $model = $AGI{arg_3} if ($AGI{arg_3} eq $_);
160 if (length($AGI{arg_4})) {
161 if ($AGI{arg_4} == -1) {
163 } elsif ($AGI{arg_4} =~ /^\d+$/) {
164 $silence = "s=$AGI{arg_2}";
166 $silence = "s=$timeout";
169 $silence = "s=$timeout";
172 if (length($AGI{arg_5})) {
173 $intkey = "0123456789#*" if ($AGI{arg_5} eq "any");
174 $intkey = $AGI{arg_5} if ($AGI{arg_5} =~ /^[0-9*#]+$/);
177 if (length($AGI{arg_6})) {
178 $beep = "" if ($AGI{arg_6} eq "NOBEEP");
181 if (length($AGI{arg_7})) {
185 # Answer channel if not already answered #
186 warn "$name Checking channel status.\n" if ($debug);
187 print "CHANNEL STATUS\n";
188 @result = checkresponse();
189 if ($result[0] == 4) {
190 warn "$name Answering channel.\n" if ($debug);
192 @result = checkresponse();
193 if ($result[0] != 0) {
194 die "$name Failed to answer channel.\n";
198 # Initialise User angent #
200 $url = "https://" . $host;
201 $ua = LWP::UserAgent->new(ssl_opts => {verify_hostname => 1});
203 $url = "http://" . $host;
204 $ua = LWP::UserAgent->new;
206 $ua->agent("Asterisk iSpeech ASR module");
208 $ua->timeout($ua_timeout);
210 # Hnadle interrupts #
211 $SIG{'INT'} = \&int_handler;
212 $SIG{'HUP'} = \&int_handler;
215 ($fh, $tmpname) = tempfile("ispeech-asr_XXXXXX", DIR => $tmpdir, UNLINK => 1);
216 print "RECORD FILE $tmpname $format \"$intkey\" \"-1\" $beep \"$silence\"\n";
217 @result = checkresponse();
218 die "$name Failed to record file, aborting...\n" if ($result[0] == -1);
220 # Replay back recorded data #
222 warn "$name Playing back recorded data:" if ($debug);
223 print "STREAM FILE $tmpname \"\"\n";
224 @result = checkresponse();
228 warn "$name Recording Format: $format, ",
229 "Encoding format: ", ($use_speex) ? "speex" : "wav", "\n",
230 "$name Languge: $language, SSL: ", ($use_ssl) ? "yes, " : "no, ",
231 "$silence, Interrupt keys: $intkey\n";
234 # Encode sound data #
236 $filetype = "audio/speex";
237 # Encode file to speex. #
239 system($speex, "--vbr", "--quiet", "$tmpname.$format", "$tmpname.spx") == 0
240 or die "$name $speex failed: $?\n";
241 open($fh, "<", "$tmpname.spx") or die "Can't read file: $!";
243 $filetype = "audio/x-wav";
244 open($fh, "<", "$tmpname.wav") or die "Can't read file: $!";
248 $audio = do { local $/; <$fh> };
250 $audio = encode_base64($audio, "");
252 $language = uri_escape($language);
253 $url .= "?apikey=$key&action=recognize&locale=$language";
256 $model = uri_escape($model);
257 $url .= "&model=$model";
259 $freeform = uri_escape($freeform);
260 $url .= "&freeform=$freeform";
263 $url .= "&content-type=$filetype&audio=$audio";
264 $url .= "&speexmode=1" if ($use_speex);
266 # Send adio data for analysis #
267 $uaresponse = $ua->get("$url");
269 warn "$name The response was: ", $uaresponse->decoded_content if ($debug);
270 die "$name Unable to get speech data.\n" if (!$uaresponse->is_success);
272 if ($uaresponse->code != 200) {
273 die "$name An iSpeech API error occured: " . uri_unescape($uaresponse->decoded_content) . "\n";
276 if ($uaresponse->decoded_content =~ /^text=(.*?)&confidence=(.*?)&result=(.*?)$/) {
277 $response{status} = "0" if($3 eq "success");
278 warn "$name Error reading audio file\n" if ($response{status});
279 $response{confidence} = "$2";
280 $response{utterance} = uri_unescape($1);
281 $response{utterance} = encode('utf8', $response{utterance});
282 $response{utterance} =~ s/\+/ /g;
285 foreach (keys %response) {
286 warn "$name Setting variable: $_ = $response{$_}\n" if ($debug);
287 print "SET VARIABLE \"$_\" \"$response{$_}\"\n";
297 if ($input =~ /^200 result=(-?\d+)\s?(.*)$/) {
298 warn "$name Command returned: $input\n" if ($debug);
299 @values = ("$1", "$2");
301 $input .= <STDIN> if ($input =~ /^520-Invalid/);
302 warn "$name Unexpected result: $input\n";
309 die "$name Interrupt signal received, terminating...\n";
314 warn "$name Cleaning temp files.\n" if ($debug);
315 unlink glob "$tmpname.*";