{"status":0,"id":"84e03bf4efe17fa7856333560d6faba4-1","hypotheses":[{"utterance":" ","confidence":0.85437811}]}
#!/usr/bin/perl -w package iON; use strict; use utf8; use base qw(Net::Server::Fork); sub process_request { my $self = shift; while (<STDIN>) { if (/text (\d+)/) { toText($1); next; } if (/quit/i) { print "+OK - Bye-bye ;)\n\n"; last; } print "-ERR - Command not found\n"; logSystem(" : $_", 0); } } iON->run(port => 16000, background => undef, log_level => 4, host => 'localhost'); 1;
Briefly run over, according to what is written here. We declare ourselves a module with the name iON based on the Net :: Server :: Fork module and start the server on port 16000 on localhost with the highest level of logging details and without the “demon” mode. Next, overload the process_request () function. She is responsible for processing the received data from the client. In our case, if the server sees the text of the text format number - the toText function is executed with the parameters as a number that the client sent us. With the quit command, I think everything is clear. sub toText { my $num = shift; print "+OK - Trying recognize text\n"; my $curl = WWW::Curl::Easy->new; $curl->setopt(CURLOPT_HEADER,1); $curl->setopt(CURLOPT_POST,1); #$curl->setopt(CURLOPT_VERBOSE, 1); my @myheaders=(); $myheaders[0] = "Content-Type: audio/x-flac; rate=16000"; $curl->setopt(CURLOPT_HTTPHEADER, \@myheaders); $curl->setopt(CURLOPT_URL, 'https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=ru-RU'); my $curlf = WWW::Curl::Form->new; $curlf->formaddfile("data/input-$num.flac", 'myfile', "audio/x-flac"); $curl->setopt(CURLOPT_HTTPPOST, $curlf); my $response_body; $curl->setopt(CURLOPT_WRITEDATA,\$response_body); # Starts the actual request my $retcode = $curl->perform; # Looking at the results... if ($retcode == 0) { $response_body =~ /\n\r\n(.*)/g; my $json = $1; my $json_xs = JSON::XS->new(); $json_xs->utf8(1); my @hypo = $json_xs->decode($json)->{'hypotheses'}; my $dost = $hypo[0][0]{'confidence'}; my $text = $hypo[0][0]{'utterance'}; $dost = 0.0 if !defined $dost; $text = "" if !defined $text; print "+OK - Text is: \"$text\", confidence is: $dost\n"; if($dost > 0.5) { checkcmd($text); } { print "+ERR - Confidence is lower, then 0.5\n"; #sayText(" !"); } } else { # Error code, type of error, error message print("+ERR - $retcode ".$curl->strerror($retcode)." ".$curl->errbuf); } system("rm data/input-$num.flac"); }
I will not describe in detail - it is exactly those actions that are needed for text recognition. Google is fed a file from the data subdirectory named input-number.flac . How it is formed there, a little later. After - the answer is read, and if its accuracy is above 0.5, the recognized text is passed as a parameter to the function checkcmd () . At the end of everything, the sound file is deleted. I note that it will be necessary to install the curl program and add more modules to the beginning of our script: use WWW::Curl::Easy; use WWW::Curl::Form; use JSON::XS;
Now about speech synthesis. This will be handled by a function called sayText () in the parameter quotation, which accepts the actual text that needs to be voiced. But first, let's add some missing modules and global variables: require Encode; use URI::Escape; use LWP::UserAgent; our $mp3_data;
Now the code itself: sub sayText { my $text = shift; print "+OK - Speaking \"$text\"\n"; my $url = "http://translate.google.com/translate_tts?tl=ru&q=".uri_escape_utf8($text); my $ua = LWP::UserAgent->new( agent => "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2"); $ua->get($url, ':content_cb' => \&callback); open (MP3, "|padsp splay -M") or die "[err] Can't save: $!\n"; print MP3 $mp3_data; close(MP3); $mp3_data = undef; print "+OK - Done!\n"; return; } sub callback { my ($data, $response, $protocol) = @_; $mp3_data .= $data; # }
As you can see, the server’s response as a stream is handled by the callback () function, which adds data to the $ mp3_data variable. The data is transmitted via the pipe to the splay program which is launched via the padsp program, which is responsible for the OSS emulation (in Ubuntu, the OSS was drunk). The -M switch causes the program to play data from the standard input. #!/usr/bin/perl use strict; use IO::Socket; while (1) { my $rnd = int(rand(1000)); `rec -q -c 1 -r 16000 ./data/input-$rnd.wav trim 0 4`; `flac -f -s ./data/input-$rnd.wav -o ./data/input-$rnd.flac`; `rm ./data/input-$rnd.wav`; my $sock = new IO::Socket::INET( PeerAddr => "localhost", PeerPort => 16000, Proto => 'tcp') || next; print $sock "text ".$rnd; undef $rnd; }
As we can see, writing and format conversion perform several programs called from the script: for(1..5) { system("perl mic.pl &>/dev/null"); sleep 1; }
Now we only need to implement the function checkcmd () in order to test the operation of the whole complex. We also need an address reversal to eliminate false positives. sub checkcmd { my $text = shift; if($text =~ //) { sayText(" - $text"); # if $text eq " "; } }
Now, put it all together. We got two scripts, let's call them srv.pl and mic.pl , as well as the data subdirectory for storing our sound files. #!/usr/bin/perl -w package iON; use strict; use utf8; use WWW::Curl::Easy; use WWW::Curl::Form; use JSON::XS; use URI::Escape; use LWP::UserAgent; require Encode; use base qw(Net::Server::Fork); ## ################################ $|=1; our $parent = $$; our $mp3_data; ################################ for(1..5) { system("perl mic.pl &>/dev/null"); sleep 1; } ## ############################### iON->run(port => 16000, background => undef, log_level => 4, host => 'localhost'); ################################ ################################ sub DESTROY { if($$ == $parent) { system("killall perl"); system("rm data/*.flac && rm data/*.wav"); } } ## ################################ sub process_request { my $self = shift; while (<STDIN>) { if (/text (\d+)/) { toText($1); next; } if (/quit/i) { print "+OK - Bye-bye ;)\n\n"; last; } print "-ERR - Command not found\n"; } } ############################### ############################### sub toText { my $num = shift; print "+OK - Trying recognize text\n"; my $curl = WWW::Curl::Easy->new; $curl->setopt(CURLOPT_HEADER,1); $curl->setopt(CURLOPT_POST,1); #$curl->setopt(CURLOPT_VERBOSE, 1); my @myheaders=(); $myheaders[0] = "Content-Type: audio/x-flac; rate=16000"; $curl->setopt(CURLOPT_HTTPHEADER, \@myheaders); $curl->setopt(CURLOPT_URL, 'https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=ru-RU'); my $curlf = WWW::Curl::Form->new; $curlf->formaddfile("data/input-$num.flac", 'myfile', "audio/x-flac"); $curl->setopt(CURLOPT_HTTPPOST, $curlf); my $response_body; $curl->setopt(CURLOPT_WRITEDATA,\$response_body); # Starts the actual request my $retcode = $curl->perform; # Looking at the results... if ($retcode == 0) { $response_body =~ /\n\r\n(.*)/g; my $json = $1; my $json_xs = JSON::XS->new(); $json_xs->utf8(1); my @hypo = $json_xs->decode($json)->{'hypotheses'}; my $dost = $hypo[0][0]{'confidence'}; my $text = $hypo[0][0]{'utterance'}; $dost = 0.0 if !defined $dost; $text = "" if !defined $text; print "+OK - Text is: \"$text\", confidence is: $dost\n"; if($dost > 0.5) { checkcmd($text); } { print "+ERR - Confidence is lower, then 0.5\n"; } } else { # Error code, type of error, error message print("+ERR - $retcode ".$curl->strerror($retcode)." ".$curl->errbuf); } system("rm data/input-$num.flac"); } ############################### ## ############################### sub checkcmd { my $text = shift; chomp $text; $text =~ s/ $//g; print "+OK - Got command \"$text\" (Length: ".length($text).")\n"; if($text =~ //) { sayText(" - $text"); } return; } ## ############################### sub sayText { my $text = shift; print "+OK - Speaking \"$text\"\n"; my $url = "http://translate.google.com/translate_tts?tl=ru&q=".uri_escape_utf8($text); my $ua = LWP::UserAgent->new( agent => "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2"); $ua->get($url, ':content_cb' => \&callback); open (MP3, "|padsp splay -M") or die "[err] Can't save: $!\n"; print MP3 $mp3_data; close(MP3); $mp3_data = undef; print "+OK - Done!\n"; return; } sub callback { my ($data, $response, $protocol) = @_; $mp3_data .= $data; # } ######################################## ######################################## 1;
#!/usr/bin/perl use strict; use IO::Socket; while (1) { my $rnd = int(rand(1000)); `rec -q -c 1 -r 16000 ./data/input-$rnd.wav trim 0 3`; `flac -f -s ./data/input-$rnd.wav -o ./data/input-$rnd.flac`; `rm ./data/input-$rnd.wav`; my $sock = new IO::Socket::INET( PeerAddr => "localhost", PeerPort => 16000, Proto => 'tcp') || next; print $sock "text ".$rnd; undef $rnd; }
chmod 755 srv.pl mic.pl
Source: https://habr.com/ru/post/129936/
All Articles