代码之家 › 专栏 › 技术社区 › Shiva

如何在iOS中将语音转换为文本[关闭]

speech-to-text speech-recognition swift objective-c ios

-1

Shiva · 技术社区 · 7 年前

据我所知,apple native framework没有用于将语音转换为文本的API,我们必须使用第三方框架来实现这一点,它有很多缺点,比如用户必须使用麦克风才能将语音转换为文本。

但是我可以找到很多将文字转换为语音的信息,但不能找到其他方法

如果有人能透露一些信息,那就太好了!

2 回复 | 直到 7 年前

Community Egal 4 年前

对于目标C,我编写了一个语音转换器类a,将语音转换为文本。

步骤1:创建语音转换器类

命名它,比如说ATSpeechRecognizer。

在语音识别器中。h:

#import <Foundation/Foundation.h>
#import <Speech/Speech.h>
#import <AVFoundation/AVFoundation.h>

typedef NS_ENUM(NSInteger, ATSpeechRecognizerState) {
    ATSpeechRecognizerStateRunning,
    ATSpeechRecognizerStateStopped
};

@protocol ATSpeechDelegate<NSObject>
@required
/*This method relays parsed text from Speech to the delegate responder class*/
-(void)convertedSpeechToText:(NSString *) parsedText;
/*This method relays change in Speech recognition ability to delegate responder class*/
-(void) speechRecAvailabilityChanged:(BOOL) status;
/*This method relays error messages to delegate responder class*/
-(void) sendErrorInfoToViewController:(NSString *) errorMessage;
@optional
/*This method relays info regarding whether speech rec is running or stopped to delegate responder class. State with be either ATSpeechRecognizerStateRunning or ATSpeechRecognizerStateStopped. You may or may not implement this method*/
-(void) changeStateIndicator:(ATSpeechRecognizerState) state;
@end

@interface ATSpeechRecognizer : NSObject <SFSpeechRecognizerDelegate>

+ (ATSpeechRecognizer *)sharedObject;

/*Delegate to communicate with requesting VCs*/
@property (weak, nonatomic) id<ATSpeechDelegate> delegate;

/*Class Methods*/
-(void) toggleRecording;
-(void) activateSpeechRecognizerWithLocaleIdentifier:(NSString *) localeIdentifier andBlock:(void (^)(BOOL isAuthorized))successBlock;
@end

#import "ATSpeechRecognizer.h"

@interface ATSpeechRecognizer ()

/*This object handles the speech recognition requests. It provides an audio input to the speech recognizer.*/

@property SFSpeechAudioBufferRecognitionRequest *speechAudioRecRequest;

/*The recognition task where it gives you the result of the recognition request. Having this object is handy as you can cancel or stop the task. */

@property SFSpeechRecognitionTask *speechRecogTask;

/*This is your Speech recognizer*/
@property SFSpeechRecognizer *speechRecognizer;

/*This is your audio engine. It is responsible for providing your audio input.*/

@property AVAudioEngine *audioEngine;

@end

@implementation ATSpeechRecognizer



#pragma mark - Constants

//Error Messages
#define kErrorMessageAuthorize  @"You declined the permission to perform speech Permission. Please authorize the operation in your device settings."
#define kErrorMessageRestricted @"Speech recognition isn't available on this OS version. Please upgrade to iOS 10 or later."
#define kErrorMessageNotDetermined  @"Speech recognition isn't authorized yet"
#define kErrorMessageAudioInputNotFound @"This device has no audio input node"
#define kErrorMessageRequestFailed @"Unable to create an SFSpeechAudioBufferRecognitionRequest object"
#define kErrorMessageAudioRecordingFailed   @"Unable to start Audio recording due to failure in Recording Engine"

#pragma mark - Singleton methods

+ (ATSpeechRecognizer *)sharedObject {
    static ATSpeechRecognizer *sharedClass = nil;
    static dispatch_once_t onceToken;
    dispatch_once(&onceToken, ^{
        sharedClass = [[self alloc] init];
    });
    return sharedClass;
}

- (id)init {
    if (self = [super init]) {
        
    }
    return self;
}

#pragma mark - Recognition methods

-(void) activateSpeechRecognizerWithLocaleIdentifier:(NSString *) localeIdentifier andBlock:(void (^)(BOOL isAuthorized))successBlock{
    //enter Described language here
    if([localeIdentifier length]>0){
        NSLocale *locale = [[NSLocale alloc] initWithLocaleIdentifier:localeIdentifier];
        _speechRecognizer = [[SFSpeechRecognizer alloc] initWithLocale:locale];
        _speechRecognizer.delegate = self;
        _audioEngine = [[AVAudioEngine alloc] init];
        [self getSpeechRecognizerAuthenticationStatusWithSuccessBlock:^(BOOL isAuthorized) {
            successBlock(isAuthorized);
        }];
    }
    else{
        successBlock(NO);
    }
    
}

/*Microphone usage Must be authorized in the info.plist*/

-(void) toggleRecording{
    if(_audioEngine.isRunning){
        [self stopAudioEngine];
    }
    else{
        [self startAudioEngine];
    }
}


#pragma mark - Internal Methods

/*
 In case different buttons are used for recording and stopping, these methods should be called indiviually. Otherwise use -(void) toggleRecording.
 */

-(void) startAudioEngine{
    if([self isDelegateValidForSelector:NSStringFromSelector(@selector(changeStateIndicator:))]){
        [_delegate changeStateIndicator:ATSpeechRecognizerStateRunning];
    }
    
    [self startRecordingSpeech];
}

-(void) stopAudioEngine{
    if([self isDelegateValidForSelector:NSStringFromSelector(@selector(changeStateIndicator:))]){
       [_delegate changeStateIndicator:ATSpeechRecognizerStateStopped];
    }
    [_audioEngine stop];
    [_speechAudioRecRequest endAudio];
    self.speechRecogTask = nil;
    self.speechAudioRecRequest = nil;
}

/*
 All the voice data is transmitted to Appleâs backend for processing. Therefore, it is mandatory to get the userâs authorization. Speech Recognition Must be authorized in the info.plist
 */

-(void) getSpeechRecognizerAuthenticationStatusWithSuccessBlock:(void (^)(BOOL isAuthorized))successBlock{
    [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {
        
        switch (status) {
            case SFSpeechRecognizerAuthorizationStatusAuthorized:
                successBlock(YES);
                break;
            case SFSpeechRecognizerAuthorizationStatusDenied:
                [self sendErrorMessageToDelegate:kErrorMessageAuthorize];
                successBlock(NO);

            case SFSpeechRecognizerAuthorizationStatusRestricted:
                [self sendErrorMessageToDelegate:kErrorMessageRestricted];
                successBlock(NO);
            case SFSpeechRecognizerAuthorizationStatusNotDetermined:
                [self sendErrorMessageToDelegate:kErrorMessageNotDetermined];
                successBlock(NO);
                break;
            default:
                break;
        }
    }];
}

-(void) startRecordingSpeech{
    
    /*
     Check if the Task is running. If yes, Cancel it and start anew
     */
    if(_speechRecogTask!=nil){
        [_speechRecogTask cancel];
        _speechRecogTask = nil;
    }
    
    /*
     Prepare for the audio recording. Here we set the category of the session as recording, the mode as measurement, and activate it
     */
    
    AVAudioSession *audioSession = [AVAudioSession sharedInstance];
    @try {
        [audioSession setCategory:AVAudioSessionCategoryRecord error:nil];
        [audioSession setMode:AVAudioSessionModeMeasurement error:nil];
        [audioSession setActive:YES error:nil];
    } @catch (NSException *exception) {
        [self sendErrorMessageToDelegate:exception.reason];
    }
    
    
    /*
     Instantiate the recognitionRequest. Here we create the SFSpeechAudioBufferRecognitionRequest object. Later, we use it to pass our audio data to Appleâs servers.
     */
    @try {
        _speechAudioRecRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
    } @catch (NSException *exception) {
        [self sendErrorMessageToDelegate:kErrorMessageRequestFailed];
    }
    
    
    /*
     Check if the audioEngine (your device) has an audio input for recording.
     */
    if(_audioEngine.inputNode!=nil){
        AVAudioInputNode *inputNode = _audioEngine.inputNode;
        
        /*If true, partial (non-final) results for each utterance will be reported.
         Default is true*/
        _speechAudioRecRequest.shouldReportPartialResults = YES;
        
        /*Start the recognition by calling the recognitionTask method of our speechRecognizer. This function has a completion handler. This completion handler will be called every time the recognition engine has received input, has refined its current recognition, or has been canceled or stopped, and will return a final transcript.*/
        
        _speechRecogTask = [_speechRecognizer recognitionTaskWithRequest:_speechAudioRecRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * _Nullable error) {
            
            BOOL isFinal = NO;
            if(result!=nil){
                if([self isDelegateValidForSelector:NSStringFromSelector(@selector(convertedSpeechToText:))]){
                    [_delegate convertedSpeechToText:[[result bestTranscription] formattedString]];
                }
                isFinal = [result isFinal]; //True if the hypotheses will not change; speech processing is complete.
                
            }
            
            //If Error of Completed, end it.
            if(error!=nil || isFinal){
                [_audioEngine stop];
                [inputNode removeTapOnBus:0];
                self.speechRecogTask = nil;
                self.speechAudioRecRequest = nil;
                if(error!=nil){
                    [self stopAudioEngine];
                    [self sendErrorMessageToDelegate:[NSString stringWithFormat:@"%li - %@",error.code, error.localizedDescription]];
                    
                }
            }
            
        }];
        
        /* Add an audio input to the recognitionRequest. Note that it is ok to add the audio input after starting the recognitionTask. The Speech Framework will start recognizing as soon as an audio input has been added.*/
        
        AVAudioFormat *recordingFormat = [inputNode outputFormatForBus:0];
        [inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
            [self.speechAudioRecRequest appendAudioPCMBuffer:buffer];
        }];
        
        /*Prepare and start the audioEngine.*/
        [_audioEngine prepare];
        @try {
            [_audioEngine startAndReturnError:nil];
        } @catch (NSException *exception) {
            [self sendErrorMessageToDelegate:kErrorMessageAudioRecordingFailed];
        }
        
    }
    else{
        [self sendErrorMessageToDelegate:kErrorMessageAudioInputNotFound];
    }
    
    
}

-(BOOL) isDelegateValidForSelector:(NSString*)selectorName{
    if(_delegate!=nil && [_delegate respondsToSelector:NSSelectorFromString(selectorName)]){
        return YES;
    }
    return NO;
}

-(void) sendErrorMessageToDelegate:(NSString*) errorMessage{
    if([self isDelegateValidForSelector:NSStringFromSelector(@selector(sendErrorInfoToViewController:))]){
        [_delegate sendErrorInfoToViewController:errorMessage];
    }
}

#pragma mark - Speech Recognizer Delegate Methods

-(void) speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidChange:(BOOL)available{
    if(!available){
        [self stopAudioEngine];
    }
    [_delegate speechRecAvailabilityChanged:available];
}

就这样。 现在,您可以在任何要将语音转换为文本的项目中的任何位置使用该类

第2步:在VC中设置ATSpeechRecognizer类

#import "ATSpeechRecognizer.h"
@interface ViewController : UIViewController <ATSpeechDelegate>{
    BOOL isRecAllowed;
}

在VC上使用以下方法 viewDidLoad

-(void) setUpSpeechRecognizerService{
    [ATSpeechRecognizer sharedObject].delegate = self;
    [[ATSpeechRecognizer sharedObject] activateSpeechRecognizerWithLocaleIdentifier:@"en-US" andBlock:^(BOOL isAuthorized) {
        isRecAllowed = isAuthorized; /*Is operation allowed or not?*/
    }];
}

现在设置委托方法:

#pragma mark - Speech Recog Delegates

-(void) convertedSpeechToText:(NSString *)parsedText{
    if(parsedText!=nil){
        _txtView.text = parsedText; //You got Text from voice. Use it as you want
    }
    
}

-(void) speechRecAvailabilityChanged:(BOOL)status{
    isRecAllowed = status; //Status of Conversion ability has changed. Use Status flag to allow/stop operations
}

-(void) changeStateIndicator:(ATSpeechRecognizerState) state{
    if(state==ATSpeechRecognizerStateStopped){
        //Speech Recognizer is Stopped
        _lblState.text = @"Stopped";
        
    }
    else{
        //Speech Recognizer is running
        _lblState.text = @"Running";
    }
    _txtView.text = @"";
}

-(void) sendErrorInfoToViewController:(NSString *)errorMessage{
    [self showPopUpForErrorMessage:errorMessage]; /*Some error occured. Show it to user*/
}

- (IBAction)btnRecordTapped:(id)sender {
    if(!isRecAllowed){
        [self showPopUpForErrorMessage:@"Speech recognition is either not authorized or available for this device. Please authorize the operation or upgrade to latest iOS. If you have done all this, check your internet connectivity"];
    }
    else{
        [[ATSpeechRecognizer sharedObject] toggleRecording]; /*If speech Recognizer is running, it will turn it off. if it is off, it will set it on*/
        
        /*
         If you want to do it mannually, use startAudioEngine method and stopAudioEngine method to explicitly perform those operations instead of toggleRecording
         
         */
    }
    
}

就这样。您需要的所有进一步解释都在代码注释中。如果你需要进一步解释,请告诉我。

Pang Mohammad Imran 7 年前

import UIKit
import Speech

public class ViewController: UIViewController, SFSpeechRecognizerDelegate {
    // MARK: Properties

    private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))!

    private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?

    private var recognitionTask: SFSpeechRecognitionTask?

    private let audioEngine = AVAudioEngine()

    @IBOutlet var textView : UITextView!

    @IBOutlet var recordButton : UIButton!

    // MARK: UIViewController

    public override func viewDidLoad() {
        super.viewDidLoad()

        // Disable the record buttons until authorization has been granted.
        recordButton.isEnabled = false
    }

    override public func viewDidAppear(_ animated: Bool) {
        speechRecognizer.delegate = self

        SFSpeechRecognizer.requestAuthorization { authStatus in
            /*
                The callback may not be called on the main thread. Add an
                operation to the main queue to update the record button's state.
            */
            OperationQueue.main.addOperation {
                switch authStatus {
                    case .authorized:
                        self.recordButton.isEnabled = true

                    case .denied:
                        self.recordButton.isEnabled = false
                        self.recordButton.setTitle("User denied access to speech recognition", for: .disabled)

                    case .restricted:
                        self.recordButton.isEnabled = false
                        self.recordButton.setTitle("Speech recognition restricted on this device", for: .disabled)

                    case .notDetermined:
                        self.recordButton.isEnabled = false
                        self.recordButton.setTitle("Speech recognition not yet authorized", for: .disabled)
                }
            }
        }
    }

    private func startRecording() throws {

        // Cancel the previous task if it's running.
        if let recognitionTask = recognitionTask {
            recognitionTask.cancel()
            self.recognitionTask = nil
        }

        let audioSession = AVAudioSession.sharedInstance()
        try audioSession.setCategory(AVAudioSessionCategoryRecord)
        try audioSession.setMode(AVAudioSessionModeMeasurement)
        try audioSession.setActive(true, with: .notifyOthersOnDeactivation)

        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()

        guard let inputNode = audioEngine.inputNode else { fatalError("Audio engine has no input node") }
        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }

        // Configure request so that results are returned before audio recording is finished
        recognitionRequest.shouldReportPartialResults = true

        // A recognition task represents a speech recognition session.
        // We keep a reference to the task so that it can be cancelled.
        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
            var isFinal = false

            if let result = result {
                self.textView.text = result.bestTranscription.formattedString
                isFinal = result.isFinal
            }

            if error != nil || isFinal {
                self.audioEngine.stop()
                inputNode.removeTap(onBus: 0)

                self.recognitionRequest = nil
                self.recognitionTask = nil

                self.recordButton.isEnabled = true
                self.recordButton.setTitle("Start Recording", for: [])
            }
        }

        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            self.recognitionRequest?.append(buffer)
        }

        audioEngine.prepare()

        try audioEngine.start()

        textView.text = "(Go ahead, I'm listening)"
    }

    // MARK: SFSpeechRecognizerDelegate

    public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
        if available {
            recordButton.isEnabled = true
            recordButton.setTitle("Start Recording", for: [])
        } else {
            recordButton.isEnabled = false
            recordButton.setTitle("Recognition not available", for: .disabled)
        }
    }

    // MARK: Interface Builder actions

    @IBAction func recordButtonTapped() {
        if audioEngine.isRunning {
            audioEngine.stop()
            recognitionRequest?.endAudio()
            recordButton.isEnabled = false
            recordButton.setTitle("Stopping", for: .disabled)
        } else {
            try! startRecording()
            recordButton.setTitle("Stop recording", for: [])
        }
    }
}