Awesome! @cvp your code-in-progress was really helpful in getting my pattern recognition engine going.. starting to get the hang of this objc_util thing. Looks like we finished around the same time :) Here's my version which is a pretty item-for-item transcription of https://developer.apple.com/documentation/speech/recognizing_speech_in_live_audio?language=objc. Instantiate Recognizer()
and then you can call start()
and stop()
for live updates. It's only safe to instantiate once currently. The line print(bestTranscription.formattedString())
is where event handling of the recognition updates should occur.
from objc_util import *
NSLocale = ObjCClass('NSLocale')
SFSpeechRecognizer = ObjCClass('SFSpeechRecognizer')
AVAudioEngine = ObjCClass('AVAudioEngine')
AVAudioSession = ObjCClass('AVAudioSession')
SFSpeechAudioBufferRecognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest')
class Recognizer:
def __init__(self):
locale = NSLocale.alloc().initWithLocaleIdentifier(ns("en-US"))
self.speech_recognizer = SFSpeechRecognizer.alloc().initWithLocale(locale)
self.audio_engine = AVAudioEngine.new()
self.input_node = self.audio_engine.inputNode()
self.recognition_request = None
self.recognition_task = None
def recognitionTaskWithRequest_resultHandler(block_ptr, result_ptr, error_ptr):
# TODO: investigate https://forum.omz-software.com/topic/5232/traceback-using-gestures-module/21
is_final = False
if not result_ptr is None:
result = ObjCInstance(result_ptr)
bestTranscription = result.bestTranscription()
print(bestTranscription.formattedString())
is_final = result.isFinal()
if not error_ptr is None or is_final:
if is_final:
print("Speech recognition complete.")
if error_ptr is not None:
error = ObjCInstance(error_ptr)
print("Error in recognition task:", error)
self.audio_engine.stop()
self.input_node.removeTapOnBus_(0)
self.recognition_request = None
self.recognition_task = None
self.recognitionTaskWithRequest_resultHandler = recognitionTaskWithRequest_resultHandler
def installTapOnBus_tapBlock(block_ptr, buffer_ptr, when_ptr):
buffer = ObjCInstance(buffer_ptr)
when = ObjCInstance(when_ptr)
if not self.recognition_request is None:
self.recognition_request.appendAudioPCMBuffer_(buffer)
self.installTapOnBus_tapBlock = installTapOnBus_tapBlock
# https://forum.omz-software.com/topic/5380/initialize-search-field-of-a-uidocumentpickerviewcontroller/11
# must stay in scope when in use
self.result_handler = ObjCBlock(self.recognitionTaskWithRequest_resultHandler, restype=None, argtypes=[c_void_p, c_void_p, c_void_p])
self.tap_block = ObjCBlock(self.installTapOnBus_tapBlock, restype=None, argtypes=[c_void_p, c_void_p, c_void_p])
def start(self):
if self.recognition_task is not None:
print("Speech recognition already active.")
return
print("Starting speech recognition.")
audio_session = AVAudioSession.sharedInstance()
audio_session.setCategory_mode_options_error_(ns('AVAudioSessionCategoryRecord'), ns('AVAudioSessionModeMeasurement'), ns('AVAudioSessionCategoryOptionDuckOthers'), None)
audio_session.setActive_withOptions_error_(True, ns('AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation'), None)
self.recognition_request = SFSpeechAudioBufferRecognitionRequest.new()
if self.recognition_request is None:
print("Error: could not create recognition request!")
return
self.recognition_request.shouldReportPartialResults = True
self.recognition_task = self.speech_recognizer.recognitionTaskWithRequest_resultHandler_(self.recognition_request, self.result_handler)
recording_format = self.input_node.outputFormatForBus_(0)
self.input_node.installTapOnBus_bufferSize_format_block_(0, 1024, recording_format, self.tap_block)
self.audio_engine.prepare()
err_ptr = c_void_p() # https://forum.omz-software.com/topic/3618/querying-returned-nserror/2
self.audio_engine.startAndReturnError_(byref(err_ptr))
if err_ptr:
err = ObjCInstance(err_ptr)
print("Error in audio engine:", err)
def stop(self):
if self.audio_engine.isRunning():
self.audio_engine.stop()
if self.recognition_request is not None:
self.recognition_request.endAudio()