Abratel Telecom &amp Info

Fazer download em pdf ou txt
Fazer download em pdf ou txt
Você está na página 1de 7

Artigos de Abratel Telecom & Info

Reconhecimento de voz com asterisk


2012-01-21 19:01:28 Ulisses Fres

A idia utilizar EAGI para controle do canal de entrada de udio em conjunto com o File Descriptor, o Asterisk entrega o udio em formato RAW diretamente no File Descriptor 3, ento podemos utilizar esta informao da maneira que acharmos conveniente, para este caso a manipulao se torna muito prtica, o que me desprende totalmente das APPs prontas para gravaes inseridas no Asterisk Ex. Record, nada melhor do que ser livre para voar, claro vrias anlises se tornam possveis com isso e o leque de aplicaes possveis se tornam infinitas. Estou usando novamente o mdulo audiolab para efetuar o encode do udio em FLAC, caso exista alguma dificuldade para a instalao deste mdulo poderei pensar em adaptar o cdigo para uso externo do sox ou flac. Como ele funciona? Atende uma ligao O usurio tem no mximo 10 segundos para efetuar a fala Caso nao encontre atividade de voz encerra com timeout Estratgia para atividade de voz verdadeira para os seguintes valores RMS > 15 e Pitch > 75 Se atividade for encontrada o usurio poder falar por no mximo 10 segundos O script verifica blocos em tempo real com amostras de 1 em 1 segundo e verifica se a fala cessou Caso sim o script interrompe a gravao automticamente e envia o que foi gravado para o google Caso no o script continua o seu curso at seu mximo de 10 segundos Apos encontrada a resposta da fala no google o script seta a varivel GoogleUtterance Instalacao: Dependencies: apt-get install python-matplotlib apt-get install python-numpy apt-get install python-scipy apt-get install python-dev python-setuptools libsndfile-dev Download and install audiolab from: http://pypi.python.org/pypi/scikits.audiolab/ Example how use in dialplan from Asterisk: Extensions.conf

exten=>_11111111,1,Answer() exten=>_11111111,n,eagi,pahh.py exten=>_11111111,n,GotoIf($[${EXISTS(${GoogleUtterance})}]?hello:bye) exten=>_11111111,n(hello),NoOP(You Said = ${GoogleUtterance}) exten=>_11111111,n(bye),Hangup() Fiz um reconhecimento com comparacao: exten=>_1,1,Answer() exten=>_1,n,eagi(pahh.py) exten=>_1,n,GotoIf($[${EXISTS(${GoogleUtterance})}]?hello:bye) exten=>_1,n(hello),NoOP(You Said = ${GoogleUtterance}) exten=>_1,n(hello),GotoIf($["${GoogleUtterance}" = "9 0 8"]?acertei,s,1) exten=>_1,n(hello),GotoIf($["${GoogleUtterance}" = "9 0 5"]?acertei,s,100) exten=>_1,n(hello),GotoIf($["${GoogleUtterance}" = "9 1 3"]?acertei,s,200) exten=>_1,n(bye),Hangup() ; tratei a comparacao: [acertei] exten => s,1,Dial(DAHDI/8,20) exten => s,100,Dial(DAHDI/5,20) exten => s,200,Dial(DAHDI/13,20) Criar o script com nome pahh.py e colocar na pasta /var/lib/asterisk/agi-bin Efetuar o comando chmod +x /var/lib/asterisk/agi-bin/pahh.py Script pahh.py abaixo: #!/usr/bin/python #Copyright (c) 2012, Eng Eder de Souza #Accessing the Google API for speech recognition With Asterisk! #Eng Eder de Souza #date 15/01/2012 #http://ederwander.wordpress.com/2012/01/16/google-speech-python-asterisk/ # # This program is free software, distributed under the terms of # the GNU General Public License Version 2. See the COPYING file # at the top of the source tree. # #Revision 0.2 #History: #18/01/2012 bug fix in local variable declaration #19/01/2012 suport for old python interpretator #19/01/2012 removed matplotlib dependencies #19/01/2012 Submission of warnings DeprecationWarning and UserWarning import warnings warnings.simplefilter("ignore", DeprecationWarning) warnings.simplefilter("ignore", UserWarning) from scikits.audiolab import Format, Sndfile

from scipy.signal import firwin, lfilter from tempfile import mkstemp import numpy as np import urllib2 import math import sys import re import os #For Portuguese Brazilian Speech Recognizer! Lang="pt-BR" #or for English Speech Recognizer #Lang="en-US" url = 'https://www.google.com/speech-api/v1/recognize? xjerr=1&client=chromium&lang='+Lang silence=True env = {} RawRate=8000 chunk=1024 #http://en.wikipedia.org/wiki/Vocal_range #Assuming Vocal Range Frequency upper than 75 Hz VocalRange = 75.0 #cd, FileNameTmp = mkstemp('TmpSpeechFile.flac') #Assuming Energy threshold upper than 15 dB Threshold = 15 #10 seconds x 16000 samples/second x ( 16 bits / 8bits/byte ) = 160000 bytes #160000/1024 = +/- 157 #157*1024 = 160768 TimeoutSignal = 160768 #then 1 second x 16000 = 16000 #16000/1024 = 15,625 round to 16 #16*1024 = 16384 Timeout_NoSpeaking=16384 #normalization for RMS Calc SHORT_NORMALIZE = (1.0/32768.0) # LastBlock='' #File Descriptor delivery in Asterisk FD=3

#Open File Descriptor file=os.fdopen(FD, 'rb') signal=0 all=[] while 1: line = sys.stdin.readline().strip() if line == '': break key,data = line.split(':') if key[:4] <> 'agi_': sys.stderr.write("Did not work!\n"); sys.stderr.flush() continue key = key.strip() data = data.strip() if key <> '': env[key] = data for key in env.keys(): sys.stderr.write(" -- %s = %s\n" % (key, env[key])) sys.stderr.flush() def SendSpeech(File): flac=open(File,"rb").read() os.remove(File) header = {'Content-Type' : 'audio/x-flac; rate=8000'} req = urllib2.Request(url, flac, header) data = urllib2.urlopen(req) find = re.findall('"utterance":(.*),', data.read()) #utterance try: result = find[0].replace('"', '') except: sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "speech not recognized ..." + "\" " + "\n") sys.stdout.flush() if result: sys.stdout.write('SET VARIABLE GoogleUtterance "%s"\n'% str(result)) sys.stdout.flush() sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" "%s \n"% str(result)) sys.stdout.flush() def Filter(samps): FC = 0.05/(0.5*RawRate)

N = 200 a=1 b = firwin(N, cutoff=FC, window='hamming') return lfilter(b, a, samps) def Pitch(signal): if sys.version_info < (2, 6): crossing =[] for s in signal: crossing.append(s) else: crossing = [math.copysign(1.0, s) for s in signal] #index = find(np.diff(crossing)); index = np.nonzero(np.diff(crossing)); index=np.array(index)[0].tolist() f0=round(len(index) *RawRate /(2*np.prod(len(signal)))) return f0; def rms(shorts): rms2=0 count = len(shorts)/2 sum_squares = 0.0 for sample in shorts: n = sample * SHORT_NORMALIZE sum_squares += n*n rms2 = math.pow(sum_squares/count,0.5) return rms2 * 1000 def speaking(data): rms_value = rms(data) if rms_value > Threshold: return True else: return False def VAD(SumFrequency, data2): AVGFrequency = SumFrequency/(Timeout_NoSpeaking+1); if AVGFrequency > VocalRange/2: S=speaking(data2) if S: return True; else: return False; else: return False; def RecordSpeech(TimeoutSignal, LastBlock, LastLastBlock): for s in LastLastBlock:

all.append(s) for s in LastBlock: all.append(s) signal=0; while signal <= TimeoutSignal: RawSamps = file.read(Timeout_NoSpeaking) samps = np.fromstring(RawSamps, dtype=np.int16) for s in samps: all.append(s) signal = signal + Timeout_NoSpeaking; #rms_value=rms(samps) Speech=speaking(samps) #sys.stdout.write("EXEC NOOP %s \"\"\"\n"% str(rms_value)) #sys.stdout.flush() #if rms_value > Threshold: if Speech: sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Speech Found ..." + "\" " + "\n") sys.stdout.flush() else: sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "End of the Speech..." + "\" " + "\n") sys.stdout.flush() signal=TimeoutSignal+1 def PlayStream (params): sys.stderr.write("STREAM FILE %s \"\"\n" % str(params)) sys.stderr.flush() sys.stdout.write("STREAM FILE %s \"\"\n" % str(params)) sys.stdout.flush() result = sys.stdin.readline().strip() sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Hello Waiting For Speech ..." + "\" " + "\n") sys.stdout.flush() PlayStream("beep"); sys.stdout.flush() while silence: #Input Real-time Data Raw Audio from Asterisk RawSamps = file.read(chunk) samps = np.fromstring(RawSamps, dtype=np.int16) samps2=Filter(samps) Frequency=Pitch(samps2) rms_value=rms(samps) signal = signal + chunk; if (rms_value > Threshold) and (Frequency > VocalRange): silence=False

LastLastBlock=LastBlock LastBlock=samps sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Speech Detected Recording..." + "\" " + "\n") sys.stdout.flush() if (signal > TimeoutSignal): sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Time Out No Speech Detected ..." + "\" " + "\n") sys.stdout.flush() sys.exit() RecordSpeech(TimeoutSignal, LastBlock, LastLastBlock) array = np.array(all) fmt = Format('flac', 'pcm16') nchannels = 1 cd, FileNameTmp = mkstemp('TmpSpeechFile.flac') # making the file .flac afile = Sndfile(FileNameTmp, 'w', fmt, nchannels, RawRate) #writing in the file afile.write_frames(array) SendSpeech(FileNameTmp) # FIM ---------- CORTE AQUI ----------------Creditos: Eng Eder Wander

Você também pode gostar