diff --git a/mods/main.go b/mods/main.go
index f84ee2b..e6b2ebe 100644
--- a/mods/main.go
+++ b/mods/main.go
@@ -48,6 +48,7 @@ var mods = map[string]Mod{
OnMessage: talk,
},
"stt": {
+ OnInit: initstt,
OnMessage: stt,
},
}
@@ -90,6 +91,7 @@ var gillmt *string
var sourcesans *string
var proverbi *string
var wittoken *string
+var gapifile *string
func main() {
brokerAddr := flag.String("broker", "localhost:7314", "Broker address:port")
@@ -100,6 +102,7 @@ func main() {
macropath = flag.String("macropath", "macros.json", "Path to macros db (JSON)")
proverbi = flag.String("proverbi", "proverbi.txt", "Path to proverbi pairs (separated by /)")
wittoken = flag.String("wit", "", "Wit.ai token")
+ gapifile = flag.String("gapi", "", "Google API Service Credentials file")
disable := flag.String("disable", "", "Blacklist mods (separated by comma)")
enable := flag.String("enable", "", "Whitelist mods (separated by comma)")
flag.Parse()
diff --git a/mods/stt.go b/mods/stt.go
index 4848593..6d9c954 100644
--- a/mods/stt.go
+++ b/mods/stt.go
@@ -1,16 +1,30 @@
package main
import (
+ "context"
"fmt"
"log"
"encoding/base64"
- "io/ioutil"
-
+ speech "cloud.google.com/go/speech/apiv1"
"github.com/hamcha/clessy/tg"
+ "google.golang.org/api/option"
+ speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)
+var sttClient *speech.Client
+var sttCtx context.Context
+
+func initstt() {
+ sttCtx = context.Background()
+ var err error
+ sttClient, err = speech.NewClient(sttCtx, option.WithServiceAccountFile(*gapifile))
+ if err != nil {
+ panic(fmt.Errorf("Could not initialize Google Speech API client: %s", err.Error()))
+ }
+}
+
func stt(broker *tg.Broker, update tg.APIMessage) {
if isCommand(update, "stt") {
// Make replies work
@@ -18,7 +32,10 @@ func stt(broker *tg.Broker, update tg.APIMessage) {
update.Voice = update.ReplyTo.Voice
}
- broker.SendTextMessage(update.Chat, fmt.Sprintf("Ok! Eccoti intanto delle informazioni sul file: \nDurata: %ds\nDimensione: %d\nTipo: %s", update.Voice.Duration, update.Voice.FileSize, *update.Voice.MimeType), &update.MessageID)
+ if update.Voice.Duration > 20 {
+ broker.SendTextMessage(update.Chat, "L'audio dura un bel po' (>20s), al momento non posso farci nulla :S", , &update.MessageID)
+ return
+ }
broker.GetFile(update.Voice.FileID, func(broker *tg.Broker, data tg.BrokerUpdate) {
if data.Type == tg.BError {
@@ -34,14 +51,30 @@ func stt(broker *tg.Broker, update tg.APIMessage) {
return
}
- err = ioutil.WriteFile(update.Voice.FileID, bytes, 0755)
+ resp, err := sttClient.Recognize(sttCtx, &speechpb.RecognizeRequest{
+ Config: &speechpb.RecognitionConfig{
+ Encoding: speechpb.RecognitionConfig_OGG_OPUS,
+ SampleRateHertz: 16000,
+ LanguageCode: "it-IT",
+ },
+ Audio: &speechpb.RecognitionAudio{
+ AudioSource: &speechpb.RecognitionAudio_Content{Content: bytes},
+ },
+ })
if err != nil {
- log.Printf("[stt] Save to file error: %s\n", err.Error())
+ log.Printf("[stt] STT request error: %s\n", err.Error())
broker.SendTextMessage(update.Chat, "ERRORE! @hamcha controlla la console!", &update.MessageID)
return
}
- broker.SendTextMessage(update.Chat, fmt.Sprintf("Salvata registrazione su disco! (fname %s)", update.Voice.FileID), &update.MessageID)
+ out := "Questo รจ quello che ho capito:\n"
+ for _, result := range resp.Results {
+ for _, alt := range result.Alternatives {
+ out += fmt.Sprintf("\"%v\"", alt.Transcript, alt.Confidence)
+ }
+ }
+
+ broker.SendTextMessage(update.Chat, out, &update.MessageID)
})
}
}