From 9eb9cdd0083dcddf51f0fc81cf400d4fcd8b0b66 Mon Sep 17 00:00:00 2001 From: Hamcha Date: Thu, 20 Apr 2017 11:47:18 +0200 Subject: [PATCH] First version of the STT module (only synchronous) --- mods/main.go | 3 +++ mods/stt.go | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/mods/main.go b/mods/main.go index f84ee2b..e6b2ebe 100644 --- a/mods/main.go +++ b/mods/main.go @@ -48,6 +48,7 @@ var mods = map[string]Mod{ OnMessage: talk, }, "stt": { + OnInit: initstt, OnMessage: stt, }, } @@ -90,6 +91,7 @@ var gillmt *string var sourcesans *string var proverbi *string var wittoken *string +var gapifile *string func main() { brokerAddr := flag.String("broker", "localhost:7314", "Broker address:port") @@ -100,6 +102,7 @@ func main() { macropath = flag.String("macropath", "macros.json", "Path to macros db (JSON)") proverbi = flag.String("proverbi", "proverbi.txt", "Path to proverbi pairs (separated by /)") wittoken = flag.String("wit", "", "Wit.ai token") + gapifile = flag.String("gapi", "", "Google API Service Credentials file") disable := flag.String("disable", "", "Blacklist mods (separated by comma)") enable := flag.String("enable", "", "Whitelist mods (separated by comma)") flag.Parse() diff --git a/mods/stt.go b/mods/stt.go index 4848593..6d9c954 100644 --- a/mods/stt.go +++ b/mods/stt.go @@ -1,16 +1,30 @@ package main import ( + "context" "fmt" "log" "encoding/base64" - "io/ioutil" - + speech "cloud.google.com/go/speech/apiv1" "github.com/hamcha/clessy/tg" + "google.golang.org/api/option" + speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1" ) +var sttClient *speech.Client +var sttCtx context.Context + +func initstt() { + sttCtx = context.Background() + var err error + sttClient, err = speech.NewClient(sttCtx, option.WithServiceAccountFile(*gapifile)) + if err != nil { + panic(fmt.Errorf("Could not initialize Google Speech API client: %s", err.Error())) + } +} + func stt(broker *tg.Broker, update tg.APIMessage) { if isCommand(update, "stt") { // Make replies work @@ -18,7 +32,10 @@ func stt(broker *tg.Broker, update tg.APIMessage) { update.Voice = update.ReplyTo.Voice } - broker.SendTextMessage(update.Chat, fmt.Sprintf("Ok! Eccoti intanto delle informazioni sul file: \nDurata: %ds\nDimensione: %d\nTipo: %s", update.Voice.Duration, update.Voice.FileSize, *update.Voice.MimeType), &update.MessageID) + if update.Voice.Duration > 20 { + broker.SendTextMessage(update.Chat, "L'audio dura un bel po' (>20s), al momento non posso farci nulla :S", , &update.MessageID) + return + } broker.GetFile(update.Voice.FileID, func(broker *tg.Broker, data tg.BrokerUpdate) { if data.Type == tg.BError { @@ -34,14 +51,30 @@ func stt(broker *tg.Broker, update tg.APIMessage) { return } - err = ioutil.WriteFile(update.Voice.FileID, bytes, 0755) + resp, err := sttClient.Recognize(sttCtx, &speechpb.RecognizeRequest{ + Config: &speechpb.RecognitionConfig{ + Encoding: speechpb.RecognitionConfig_OGG_OPUS, + SampleRateHertz: 16000, + LanguageCode: "it-IT", + }, + Audio: &speechpb.RecognitionAudio{ + AudioSource: &speechpb.RecognitionAudio_Content{Content: bytes}, + }, + }) if err != nil { - log.Printf("[stt] Save to file error: %s\n", err.Error()) + log.Printf("[stt] STT request error: %s\n", err.Error()) broker.SendTextMessage(update.Chat, "ERRORE! @hamcha controlla la console!", &update.MessageID) return } - broker.SendTextMessage(update.Chat, fmt.Sprintf("Salvata registrazione su disco! (fname %s)", update.Voice.FileID), &update.MessageID) + out := "Questo รจ quello che ho capito:\n" + for _, result := range resp.Results { + for _, alt := range result.Alternatives { + out += fmt.Sprintf("\"%v\"", alt.Transcript, alt.Confidence) + } + } + + broker.SendTextMessage(update.Chat, out, &update.MessageID) }) } }