SpeechRecognition package supports may APIs like
recognize_bing()
recognize_google()
recognize_google_cloud()
recognize_houndify()
recognize_ibm()
recognize_sphinx()
recognize_wit()
But the only free one with is recognize_google()
#pip3 install SpeechRecognition moviepy import speech_recognition as sr import moviepy.editor as mp clip = mp.VideoFileClip(r"c:/images/intro1.mp4") clip.audio.write_audiofile(r"c:/images/converted.wav") r = sr.Recognizer() r.energy_threshold = 300 #detect silent audio = sr.AudioFile("c:/images/converted.wav") with audio as source: r.adjust_for_ambient_noise(source, duration=0.5) #remove background noise audio_file = r.record(source) """ recognize_bing(): Microsoft Bing Speech recognize_google(): Google Web Speech API, free, working with audio files under 5 minutes,limited to only 50 requests per day recognize_google_cloud(): Google Cloud Speech - requires installation of the google-cloud-speech package recognize_houndify(): Houndify by SoundHound recognize_ibm(): IBM Speech to Text recognize_sphinx(): CMU Sphinx - requires installing PocketSphinx recognize_wit(): Wit.ai """ result = r.recognize_google(audio_file) print(result)
If you want to use the commercial google API, just download google json credentials and use the next code
#pip3 install SpeechRecognition moviepy #pip3 install --upgrade google-cloud-speech import speech_recognition as sr import moviepy.editor as mp import os def GoogleConnection(): dir = os.path.dirname(__file__) GOOGLE_APPLICATION_CREDENTIALS_file = os.path.join(dir, 'xxxxxx.json') os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=GOOGLE_APPLICATION_CREDENTIALS_file clip = mp.VideoFileClip(r"c:/images/intro1.mp4") clip.audio.write_audiofile(r"c:/images/converted.wav") r = sr.Recognizer() r.energy_threshold = 300 #detect silent audio = sr.AudioFile("c:/images/converted.wav") with audio as source: r.adjust_for_ambient_noise(source, duration=0.5) #remove background noise audio_file = r.record(source) GoogleConnection() result = r.recognize_google_cloud(audio_file) print(result)
and if we have a sound file ready on google firebase storage, we can process it using the next code
# Imports the Google Cloud client library from google.cloud import speech import os def GoogleConnection(): dir = os.path.dirname(__file__) GOOGLE_APPLICATION_CREDENTIALS_file = os.path.join(dir, 'xxxx.json') os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=GOOGLE_APPLICATION_CREDENTIALS_file GoogleConnection() # Instantiates a client client = speech.SpeechClient() # The name of the audio file to transcribe gcs_uri = "gs://xxx.appspot.com/Video/converted.wav" audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, #sample_rate_hertz=44100, audio_channel_count=2, language_code="en-US", ) # Detects speech in the audio file response = client.recognize(config=config, audio=audio) for result in response.results: print("Transcript: {}".format(result.alternatives[0].transcript))