This repository has been archived on 2023-07-05. You can view files and clone it, but cannot push or open issues or pull requests.
clessy/mods/stt.go

100 lines
2.8 KiB
Go

package main
import (
"context"
"fmt"
"log"
"encoding/base64"
speech "cloud.google.com/go/speech/apiv1"
"git.fromouter.space/hamcha/tg"
"google.golang.org/api/option"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)
var sttClient *speech.Client
var sttCtx context.Context
func stt_init() {
sttCtx = context.Background()
var err error
sttClient, err = speech.NewClient(sttCtx, option.WithServiceAccountFile(*gapifile))
if err != nil {
panic(fmt.Errorf("Could not initialize Google Speech API client: %s", err.Error()))
}
}
func stt_message(broker *tg.Broker, update tg.APIMessage) {
if isCommand(update, "stt") {
// Make replies work
if update.ReplyTo != nil && update.ReplyTo.Voice != nil {
update.Voice = update.ReplyTo.Voice
}
// Make sure it's a voice message
if update.Voice == nil {
broker.SendTextMessage(update.Chat, "Non vedo un messaggio vocale da ascoltare", &tg.MessageOptions{
ReplyID: &update.MessageID,
})
return
}
if update.Voice.Duration > 60 {
broker.SendTextMessage(update.Chat, "L'audio dura un bel po' (> 1 minuto), al momento non posso farci nulla :S", &tg.MessageOptions{
ReplyID: &update.MessageID,
})
return
}
broker.GetFile(update.Voice.FileID, func(broker *tg.Broker, data tg.BrokerUpdate) {
if data.Type == tg.BError {
log.Printf("[stt] Received error from broker: %s\n", *data.Error)
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &tg.MessageOptions{
ReplyID: &update.MessageID,
})
return
}
bytes, err := base64.StdEncoding.DecodeString(*data.Bytes)
if err != nil {
log.Printf("[stt] Base64 decode error: %s\n", err.Error())
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &tg.MessageOptions{
ReplyID: &update.MessageID,
})
return
}
broker.SendChatAction(update.Chat, tg.ActionTyping)
resp, err := sttClient.Recognize(sttCtx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_OGG_OPUS,
SampleRateHertz: 16000,
LanguageCode: "it-IT",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: bytes},
},
})
if err != nil {
log.Printf("[stt] STT request error: %s\n", err.Error())
broker.SendTextMessage(update.Chat, "<b>ERRORE!</b> @hamcha controlla la console!", &tg.MessageOptions{
ReplyID: &update.MessageID,
})
return
}
out := "Questo è quello che ho capito:\n"
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
out += fmt.Sprintf("\"%v\"\n", alt.Transcript)
}
}
broker.SendTextMessage(update.Chat, out, &tg.MessageOptions{
ReplyID: &update.MessageID,
})
})
}
}