87 lines
2.6 KiB
Go
87 lines
2.6 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
|
|
"encoding/base64"
|
|
|
|
speech "cloud.google.com/go/speech/apiv1"
|
|
"git.fromouter.space/hamcha/tg"
|
|
"google.golang.org/api/option"
|
|
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
|
|
)
|
|
|
|
var sttClient *speech.Client
|
|
var sttCtx context.Context
|
|
|
|
func stt_init() {
|
|
sttCtx = context.Background()
|
|
var err error
|
|
sttClient, err = speech.NewClient(sttCtx, option.WithServiceAccountFile(*gapifile))
|
|
if err != nil {
|
|
panic(fmt.Errorf("Could not initialize Google Speech API client: %s", err.Error()))
|
|
}
|
|
}
|
|
|
|
func stt_message(broker *tg.Broker, update tg.APIMessage) {
|
|
if isCommand(update, "stt") {
|
|
// Make replies work
|
|
if update.ReplyTo != nil && update.ReplyTo.Voice != nil {
|
|
update.Voice = update.ReplyTo.Voice
|
|
}
|
|
|
|
// Make sure it's a voice message
|
|
if update.Voice == nil {
|
|
broker.SendTextMessage(update.Chat, "Non vedo un messaggio vocale da ascoltare", &update.MessageID)
|
|
return
|
|
}
|
|
|
|
if update.Voice.Duration > 60 {
|
|
broker.SendTextMessage(update.Chat, "L'audio dura un bel po' (> 1 minuto), al momento non posso farci nulla :S", &update.MessageID)
|
|
return
|
|
}
|
|
|
|
broker.GetFile(update.Voice.FileID, func(broker *tg.Broker, data tg.BrokerUpdate) {
|
|
if data.Type == tg.BError {
|
|
log.Printf("[stt] Received error from broker: %s\n", *data.Error)
|
|
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &update.MessageID)
|
|
return
|
|
}
|
|
|
|
bytes, err := base64.StdEncoding.DecodeString(*data.Bytes)
|
|
if err != nil {
|
|
log.Printf("[stt] Base64 decode error: %s\n", err.Error())
|
|
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &update.MessageID)
|
|
return
|
|
}
|
|
|
|
broker.SendChatAction(update.Chat, tg.ActionTyping)
|
|
resp, err := sttClient.Recognize(sttCtx, &speechpb.RecognizeRequest{
|
|
Config: &speechpb.RecognitionConfig{
|
|
Encoding: speechpb.RecognitionConfig_OGG_OPUS,
|
|
SampleRateHertz: 16000,
|
|
LanguageCode: "it-IT",
|
|
},
|
|
Audio: &speechpb.RecognitionAudio{
|
|
AudioSource: &speechpb.RecognitionAudio_Content{Content: bytes},
|
|
},
|
|
})
|
|
if err != nil {
|
|
log.Printf("[stt] STT request error: %s\n", err.Error())
|
|
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &update.MessageID)
|
|
return
|
|
}
|
|
|
|
out := "Questo è quello che ho capito:\n"
|
|
for _, result := range resp.Results {
|
|
for _, alt := range result.Alternatives {
|
|
out += fmt.Sprintf("\"%v\"\n", alt.Transcript)
|
|
}
|
|
}
|
|
|
|
broker.SendTextMessage(update.Chat, out, &update.MessageID)
|
|
})
|
|
}
|
|
}
|