This repository has been archived on 2023-07-05. You can view files and clone it, but cannot push or open issues or pull requests.
clessy/stats/stats.go

422 lines
10 KiB
Go
Raw Normal View History

2016-02-10 14:31:53 +00:00
package main
import (
"encoding/binary"
2016-02-13 22:49:48 +00:00
"encoding/json"
2016-02-10 14:31:53 +00:00
"log"
"strconv"
2016-02-13 22:49:48 +00:00
"strings"
2016-02-10 14:31:53 +00:00
"time"
"github.com/boltdb/bolt"
"github.com/hamcha/clessy/tg"
2016-02-10 14:31:53 +00:00
)
const (
MessageTypeText int = 0
MessageTypeAudio int = 1
MessageTypePhoto int = 2
MessageTypeSticker int = 3
MessageTypeVideo int = 4
MessageTypeVoice int = 5
MessageTypeContact int = 6
MessageTypeLocation int = 7
MessageTypeDocument int = 8
MessageTypeMax int = 9
2016-02-10 14:31:53 +00:00
)
type Stats struct {
2016-02-12 16:11:33 +00:00
ByUserCount map[string]uint64
ByWeekday [7]uint64
ByHour [24]uint64
ByType [MessageTypeMax]uint64
2016-02-13 20:58:05 +00:00
ByDay map[string]uint64
2016-02-12 16:11:33 +00:00
TodayDate time.Time
Today uint64
TotalCount uint64
2016-02-10 14:31:53 +00:00
}
var stats Stats
2016-02-13 22:49:48 +00:00
type UserCount map[string]uint64
var words map[string]UserCount
2016-02-10 14:31:53 +00:00
func MakeUint(bval []byte, bucketName string, key string) uint64 {
if bval != nil {
intval, bts := binary.Uvarint(bval)
if bts > 0 {
return intval
} else {
log.Printf("[%s] Value of key \"%s\" is NaN: %v\r\n", bucketName, key, bval)
return 0
}
} else {
log.Printf("[%s] Key \"%s\" does not exist, set to 0\n", bucketName, key)
return 0
}
}
func PutUint(value uint64) []byte {
bytes := make([]byte, 10)
n := binary.PutUvarint(bytes, value)
return bytes[:n]
}
2016-02-10 14:31:53 +00:00
func loadStats() {
// Load today
stats.TodayDate = time.Now()
err := db.Update(func(tx *bolt.Tx) error {
2016-02-10 14:31:53 +00:00
b, err := tx.CreateBucketIfNotExists([]byte("global"))
if err != nil {
return err
}
// Load total messages counter
2016-02-12 16:11:33 +00:00
stats.TotalCount = MakeUint(b.Get([]byte("count")), "global", "count")
2016-02-10 14:31:53 +00:00
// Load hour counters
b, err = tx.CreateBucketIfNotExists([]byte("hour"))
if err != nil {
return err
}
for i := 0; i < 24; i++ {
2016-02-12 16:11:33 +00:00
stats.ByHour[i] = MakeUint(b.Get([]byte{byte(i)}), "hour", strconv.Itoa(i))
2016-02-10 14:31:53 +00:00
}
// Load weekday counters
b, err = tx.CreateBucketIfNotExists([]byte("weekday"))
if err != nil {
return err
}
for i := 0; i < 7; i++ {
2016-02-12 16:11:33 +00:00
stats.ByWeekday[i] = MakeUint(b.Get([]byte{byte(i)}), "weekday", strconv.Itoa(i))
2016-02-10 14:31:53 +00:00
}
2016-02-13 20:58:05 +00:00
// Load day counters
2016-02-13 20:59:39 +00:00
stats.ByDay = make(map[string]uint64)
2016-02-10 14:31:53 +00:00
b, err = tx.CreateBucketIfNotExists([]byte("date"))
if err != nil {
return err
}
2016-02-13 20:58:05 +00:00
b.ForEach(func(day, messages []byte) error {
stats.ByDay[string(day)] = MakeUint(messages, "date", string(day))
return nil
})
2016-02-10 14:31:53 +00:00
todayKey := stats.TodayDate.Format("2006-1-2")
2016-02-12 16:11:33 +00:00
stats.Today = MakeUint(b.Get([]byte(todayKey)), "date", todayKey)
2016-02-10 14:31:53 +00:00
// Load user counters
stats.ByUserCount = make(map[string]uint64)
b, err = tx.CreateBucketIfNotExists([]byte("users-count"))
if err != nil {
return err
}
b.ForEach(func(user, messages []byte) error {
stats.ByUserCount[string(user)] = MakeUint(messages, "users-count", string(user))
return nil
2016-02-10 14:31:53 +00:00
})
// Load type counters
b, err = tx.CreateBucketIfNotExists([]byte("types"))
if err != nil {
return err
}
for i := 0; i < MessageTypeMax; i++ {
2016-02-12 16:11:33 +00:00
stats.ByType[i] = MakeUint(b.Get([]byte{byte(i)}), "types", strconv.Itoa(i))
2016-02-10 14:31:53 +00:00
}
2016-02-13 22:49:48 +00:00
// Load dictionary
b, err = tx.CreateBucketIfNotExists([]byte("words"))
if err != nil {
return err
}
words = make(map[string]UserCount)
b.ForEach(func(word, ucount []byte) error {
var val UserCount
err := json.Unmarshal(ucount, &val)
if err != nil {
return err
}
words[string(word)] = val
return nil
})
2016-02-10 14:31:53 +00:00
return nil
})
assert(err)
}
func updateDate() {
2016-02-13 20:58:05 +00:00
dateKey := stats.TodayDate.Format("2006-1-2")
err := db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("date"))
2016-02-13 20:58:05 +00:00
err := b.Put([]byte(dateKey), PutUint(stats.Today))
if err != nil {
return err
}
return nil
})
if err != nil {
log.Println("[updateDate] Couldn't save last day stats: " + err.Error())
}
2016-02-13 20:58:05 +00:00
stats.ByDay[dateKey] = stats.Today
stats.TodayDate = time.Now()
stats.Today = 0
}
func updateMean(currentMean, meanCount, newValue uint64) uint64 {
return ((currentMean * meanCount) + newValue) / (meanCount + 1)
}
func updateStats(message tg.APIMessage) {
//
// Local update
//
// DB Update flags
updatetype := 0
// Update total count
stats.TotalCount++
// Update individual user's count
username := message.User.Username
val, exists := stats.ByUserCount[username]
if !exists {
val = 0
}
stats.ByUserCount[username] = val + 1
// Update time counters
now := time.Now()
hour := now.Hour()
wday := now.Weekday()
stats.ByHour[hour]++
stats.ByWeekday[wday]++
// Check for day reset
if now.Day() != stats.TodayDate.Day() {
updateDate()
}
stats.Today++
// Text message
if message.Text != nil {
stats.ByType[MessageTypeText]++
updatetype = MessageTypeText
2016-02-13 22:49:48 +00:00
// Process words
processWords(message)
}
// Audio message
if message.Audio != nil {
stats.ByType[MessageTypeAudio]++
updatetype = MessageTypeAudio
}
// Photo
if message.Photo != nil {
stats.ByType[MessageTypePhoto]++
updatetype = MessageTypePhoto
}
// Sticker
if message.Sticker != nil {
stats.ByType[MessageTypeSticker]++
updatetype = MessageTypeSticker
}
// Video
if message.Video != nil {
stats.ByType[MessageTypeVideo]++
updatetype = MessageTypeVideo
}
// Voice message
if message.Voice != nil {
stats.ByType[MessageTypeVoice]++
updatetype = MessageTypeVoice
}
// Contact
if message.Contact != nil {
stats.ByType[MessageTypeContact]++
updatetype = MessageTypeContact
}
// Location
if message.Location != nil {
stats.ByType[MessageTypeLocation]++
updatetype = MessageTypeLocation
}
// Document
if message.Document != nil {
stats.ByType[MessageTypeDocument]++
updatetype = MessageTypeDocument
}
//
// DB Update
//
err := db.Update(func(tx *bolt.Tx) error {
// Update total counters
b := tx.Bucket([]byte("global"))
err := b.Put([]byte("count"), PutUint(stats.TotalCount))
if err != nil {
return err
}
// Update time counters
b = tx.Bucket([]byte("hour"))
err = b.Put([]byte{byte(hour)}, PutUint(stats.ByHour[hour]))
if err != nil {
return err
}
b = tx.Bucket([]byte("weekday"))
2016-02-11 15:45:42 +00:00
err = b.Put([]byte{byte(wday)}, PutUint(stats.ByWeekday[wday]))
if err != nil {
return err
}
b = tx.Bucket([]byte("date"))
todayKey := stats.TodayDate.Format("2006-1-2")
err = b.Put([]byte(todayKey), PutUint(stats.Today))
if err != nil {
return err
}
// Update user counters
b = tx.Bucket([]byte("users-count"))
err = b.Put([]byte(username), PutUint(stats.ByUserCount[username]))
if err != nil {
return err
}
// Update type counter
b = tx.Bucket([]byte("types"))
err = b.Put([]byte{byte(updatetype)}, PutUint(stats.ByType[updatetype]))
if err != nil {
return err
}
return nil
})
if err != nil {
log.Println("[updateStats] Got error while updating DB: " + err.Error())
}
}
2016-02-13 22:49:48 +00:00
func processWords(message tg.APIMessage) {
if len(*(message).Text) < 3 {
return
}
wordList := strings.Split(*(message.Text), " ")
err := db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("words"))
for _, word := range wordList {
if len(word) < 3 {
2016-02-13 23:07:50 +00:00
continue
}
2016-02-13 23:14:51 +00:00
word = strings.ToLower(word)
if strings.HasPrefix(word, "http") {
continue
}
2016-02-13 23:05:32 +00:00
word = strings.Trim(word, " ?!.,:;/-_()[]{}'\"+=*^\n")
2016-02-13 22:59:24 +00:00
count, ok := words[word]
if !ok {
count = make(UserCount)
2016-02-13 22:49:48 +00:00
}
val, ok := count[message.User.Username]
if !ok {
val = 0
}
count[message.User.Username] = val + 1
2016-02-13 23:36:18 +00:00
words[word] = count
2016-02-13 22:49:48 +00:00
j, err := json.Marshal(count)
if err != nil {
return err
}
b.Put([]byte(word), j)
}
return nil
})
if err != nil {
log.Println("[processWords] Error encountered: " + err.Error())
}
}
2016-02-14 10:07:21 +00:00
var FILTER = []string{
2016-02-14 18:50:37 +00:00
"100", "abbastanza", "abbia", "abbiamo", "adesso", "again", "agli", "ah", "alcune",
"alcuni", "all", "all'inizio", "alla", "alle", "allo", "allora", "almeno", "also",
"alto", "altra", "altre", "altri", "altrimenti", "altro", "amici", "amico", "amo",
"anche", "ancora", "and", "andare", "andato", "anime", "anni", "anzi", "appena", "apposta",
"are", "assieme", "avanti", "aver", "avere", "avete", "aveva", "avevano", "avevi", "avevo",
"avrebbe", "avrei", "avuto", "base", "bel", "bella", "belle", "belli", "bellissimo",
"bello", "ben", "bene", "benissimo", "bisogno", "bravo", "brutta", "brutto", "cambia",
"che", "chi", "cioe", "cioè", "ciò", "coi", "col", "com'è", "come", "con", "cos'è", "cosa",
"così", "cui", "dai", "dal", "dalla", "dalle", "danno", "dare", "degli", "dei", "del",
"della", "delle", "dello", "deve", "devi", "devo", "dove", "e", "era", "erano", "eri",
"ero", "fa", "fai", "fanno", "finché", "gia", "già", "giù", "gli", "hai", "han", "hanno",
"have", "il", "in", "io", "l'altro", "l'avevo", "l'ha", "l'hai", "l'hanno", "l'ho",
"la", "lei", "lui", "lì", "ma", "me", "meno", "mentre", "mia", "mie", "miei", "mio", "molti",
"molto", "negli", "nei", "nel", "nella", "nelle", "nello", "no", "noi", "non", "not", "nuovi",
"nuovo", "ok", "oltre", "oppure", "ora", "per", "perche", "perchè", "perché", "però",
"piu", "più", "po", "poi", "puoi", "pure", "può", "qua", "qualche", "quale", "quando",
"quanti", "quanto", "quasi", "quei", "quel", "quella", "quelle", "quelli", "quello",
"questa", "queste", "questi", "questo", "qui", "quindi", "sai", "sarei", "sarà", "se",
"sei", "sempre", "sennò", "senza", "si", "sia", "siamo", "siano", "siete", "son", "sono",
"sopra", "sta", "stai", "ste", "sti", "stiamo", "sto", "sua", "sue", "sui", "sul", "sulla",
"sulle", "suo", "suoi", "sì", "tanta", "tante", "tanti", "tanto", "te", "that", "the",
"then", "too", "tra", "troppi", "troppo", "tua", "tuo", "tuoi", "tutta", "tutte",
"tutti", "tutto", "un'altra", "una", "uno", "usa", "usi", "uso", "vai", "verso", "via",
"voglia", "voglio", "vogliono", "voi", "volete", "voleva", "volevo", "volta", "volte",
"vorrei", "vuoi", "vuol", "vuole", "was",
2016-02-14 10:07:21 +00:00
}
2016-02-14 18:50:37 +00:00
const USAGE_THRESHOLD = 10
2016-02-14 10:07:21 +00:00
func filteredWords() map[string]UserCount {
filtered := make(map[string]UserCount)
for word, usage := range words {
// Check for too common
2016-02-14 18:50:37 +00:00
isfilter := false
for _, filter := range FILTER {
if word == filter {
isfilter = true
break
2016-02-14 10:07:21 +00:00
}
2016-02-14 18:50:37 +00:00
}
if isfilter {
continue
}
2016-02-14 10:07:21 +00:00
// Check for not common enough
2016-02-14 18:50:37 +00:00
good := false
ucount := make(UserCount)
for user, count := range usage {
if count < USAGE_THRESHOLD {
continue
}
if !good {
good = true
2016-02-14 10:07:21 +00:00
}
2016-02-14 18:50:37 +00:00
ucount[user] = count
2016-02-14 10:07:21 +00:00
}
2016-02-14 18:50:37 +00:00
if !good {
2016-02-14 10:07:21 +00:00
continue
}
filtered[word] = usage
}
return filtered
}