First version of the STT module (only synchronous)

This commit is contained in:
Hamcha 2017-04-20 11:47:18 +02:00
parent f5840cc60c
commit 9eb9cdd008
2 changed files with 42 additions and 6 deletions

View File

@ -48,6 +48,7 @@ var mods = map[string]Mod{
OnMessage: talk,
},
"stt": {
OnInit: initstt,
OnMessage: stt,
},
}
@ -90,6 +91,7 @@ var gillmt *string
var sourcesans *string
var proverbi *string
var wittoken *string
var gapifile *string
func main() {
brokerAddr := flag.String("broker", "localhost:7314", "Broker address:port")
@ -100,6 +102,7 @@ func main() {
macropath = flag.String("macropath", "macros.json", "Path to macros db (JSON)")
proverbi = flag.String("proverbi", "proverbi.txt", "Path to proverbi pairs (separated by /)")
wittoken = flag.String("wit", "", "Wit.ai token")
gapifile = flag.String("gapi", "", "Google API Service Credentials file")
disable := flag.String("disable", "", "Blacklist mods (separated by comma)")
enable := flag.String("enable", "", "Whitelist mods (separated by comma)")
flag.Parse()

View File

@ -1,16 +1,30 @@
package main
import (
"context"
"fmt"
"log"
"encoding/base64"
"io/ioutil"
speech "cloud.google.com/go/speech/apiv1"
"github.com/hamcha/clessy/tg"
"google.golang.org/api/option"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)
var sttClient *speech.Client
var sttCtx context.Context
func initstt() {
sttCtx = context.Background()
var err error
sttClient, err = speech.NewClient(sttCtx, option.WithServiceAccountFile(*gapifile))
if err != nil {
panic(fmt.Errorf("Could not initialize Google Speech API client: %s", err.Error()))
}
}
func stt(broker *tg.Broker, update tg.APIMessage) {
if isCommand(update, "stt") {
// Make replies work
@ -18,7 +32,10 @@ func stt(broker *tg.Broker, update tg.APIMessage) {
update.Voice = update.ReplyTo.Voice
}
broker.SendTextMessage(update.Chat, fmt.Sprintf("Ok! Eccoti intanto delle informazioni sul file: \n<b>Durata: </b> %ds\n<b>Dimensione:</b> %d\n<b>Tipo:</b> %s", update.Voice.Duration, update.Voice.FileSize, *update.Voice.MimeType), &update.MessageID)
if update.Voice.Duration > 20 {
broker.SendTextMessage(update.Chat, "L'audio dura un bel po' (>20s), al momento non posso farci nulla :S", , &update.MessageID)
return
}
broker.GetFile(update.Voice.FileID, func(broker *tg.Broker, data tg.BrokerUpdate) {
if data.Type == tg.BError {
@ -34,14 +51,30 @@ func stt(broker *tg.Broker, update tg.APIMessage) {
return
}
err = ioutil.WriteFile(update.Voice.FileID, bytes, 0755)
resp, err := sttClient.Recognize(sttCtx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_OGG_OPUS,
SampleRateHertz: 16000,
LanguageCode: "it-IT",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: bytes},
},
})
if err != nil {
log.Printf("[stt] Save to file error: %s\n", err.Error())
log.Printf("[stt] STT request error: %s\n", err.Error())
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &update.MessageID)
return
}
broker.SendTextMessage(update.Chat, fmt.Sprintf("Salvata registrazione su disco! (fname %s)", update.Voice.FileID), &update.MessageID)
out := "Questo è quello che ho capito:\n"
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
out += fmt.Sprintf("\"%v\"", alt.Transcript, alt.Confidence)
}
}
broker.SendTextMessage(update.Chat, out, &update.MessageID)
})
}
}