From 83a918a861f5ab9ef2f1033a26ba61993808b696 Mon Sep 17 00:00:00 2001 From: Hamcha Date: Sun, 14 Feb 2016 11:07:21 +0100 Subject: [PATCH] stats: Better attempt at filtering --- stats/stats.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++ stats/web.go | 18 +----------------- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/stats/stats.go b/stats/stats.go index 00be7ed..1361b7f 100644 --- a/stats/stats.go +++ b/stats/stats.go @@ -352,3 +352,49 @@ func processWords(message tg.APIMessage) { log.Println("[processWords] Error encountered: " + err.Error()) } } + +var FILTER = []string{ + "che", "non", "per", "una", "sono", "come", "con", "anche", "piu", "tipo", + "perché", "era", "del", "poi", "fare", "gli", "cosa", "solo", "fatto", + "hai", "quello", "quando", "quindi", "ora", "sia", "roba", "mio", "son", "tutto", + "tutti", "uno", "the", "prima", "dire", "cosi", "cazzo", "visto", "sei", + "quanto", "dei", "sta", "credo", "mai", "tanto", "ancora", "nel", "sto", "pure", + "della", "c'è", "fai", "alla", "dai", "due", "gia", "dove", "puoi", "oddio", + "hanno", "no", "altro", "comunque", "magari", "gioco", "essere", "sì", "l'ho", + "gente", "chi", "meno", "sempre", "pare", "bene", "devo", "vuoi", "lui", "sul", + "quella", "po", "vero", "quel", "anni", "tra", "euro", "senza", "cose", + "avere", "also", "han", "parte", "tempo", "perche", "ogni", "mia", "detto", +} + +const USAGE_THRESHOLD = 3 + +func filteredWords() map[string]UserCount { + filtered := make(map[string]UserCount) + for word, usage := range words { + // Check for too common + isfilter := false + for _, filter := range FILTER { + if word == filter { + isfilter = true + break + } + } + if isfilter { + continue + } + + // Check for not common enough + max := uint64(0) + for _, count := range usage { + if count > max { + max = count + } + } + if max < USAGE_THRESHOLD { + continue + } + + filtered[word] = usage + } + return filtered +} diff --git a/stats/web.go b/stats/web.go index b22f732..6a6bd67 100644 --- a/stats/web.go +++ b/stats/web.go @@ -22,25 +22,9 @@ func webUsers(rw http.ResponseWriter, req *http.Request) { } } -const USAGE_THRESHOLD = 3 - func webWords(rw http.ResponseWriter, req *http.Request) { - // Filter words under a certain usage - filtered := make(map[string]UserCount) - for word, usage := range words { - total := uint64(0) - for _, count := range usage { - total += count - } - - if total < USAGE_THRESHOLD { - continue - } - filtered[word] = usage - } - rw.Header().Set("Content-Type", "application/json") - err := json.NewEncoder(rw).Encode(filtered) + err := json.NewEncoder(rw).Encode(filteredWords()) if err != nil { log.Println("[webWords] JSON Encoding error: " + err.Error()) }