diff --git a/data/cbow.bin b/data/cbow.bin index 144976b..e2ea366 100644 Binary files a/data/cbow.bin and b/data/cbow.bin differ diff --git a/data/cbow.txt b/data/cbow.txt index cc6e145..2b26064 100644 --- a/data/cbow.txt +++ b/data/cbow.txt @@ -2540,7 +2540,7 @@ _BSIDE_ (A) (^) (WW) (BB) (instant) fuse \ creatures you control gain deathtouch (A) (^) (^) (BB) (BB) (sorcery) choose a color . target player reveals his or her hand and discards all cards of that color . -(Y) (^) (^) (^) (RR) (GG) (legendary) (creature) hellion (&/) (/&) devour X \ where X is the number of creatures devoured this way +(Y) (^) (^) (^) (RR) (GG) (legendary) (creature) hellion (&/) (/&) devour X , where X is the number of creatures devoured this way (A) (^) (GG) (GG) (sorcery) put a &^^^/ /&^^^ green beast creature token onto the battlefield . then if an opponent controls more creatures than you , return @ to its owner's hand . @@ -14104,7 +14104,7 @@ _BSIDE_ (Y) (planeswalker) chandra ((&^^^^)) +&^ : @ deals &^^ damage to target (O) (^) (UU) (instant) return target nonland permanent to its owner's hand . -(N) (XX) (XX) (UU) (sorcery) x target creatures gain islandwalk until end of turn . +(N) (XX) (XX) (UU) (sorcery) X target creatures gain islandwalk until end of turn . (A) (^) (^) (^) (^) (^) (WW) (WW) (creature) avatar (&^^^^/) (/&^^^^^^^) vigilance \ when @ enters the battlefield , for each opponent , exile up to one target creature that player controls and that player gains life equal to its power . @@ -23642,7 +23642,7 @@ _BSIDE_ (N) (creature) werewolf (&^^^^^/) (/&^^^^^) @ attacks each turn if able (O) (^) (GG) (creature) elf (&^^/) (/&^) T : untap target creature . -(O) (XX) (RR) (sorcery) x target creatures can't block this turn . +(O) (XX) (RR) (sorcery) X target creatures can't block this turn . (O) (^) (^) (^) (WW) (instant) put any number of target artifact cards from your graveyard on top of your library . \ draw a card . @@ -25272,7 +25272,7 @@ _BSIDE_ (A) (^) (^) (^) (UU) (RR) (instant) put a &^^^/ /&^ red elemental creatu (O) (^) (BB) (sorcery) target player reveals his or her hand . you choose a nonland card from it . that player puts that card into his or her library third from the top . -(O) (XX) (GG) (instant) cast @ only during the declare blockers step . \ x target attacking creatures become blocked . @ deals &^ damage to each of those creatures . +(O) (XX) (GG) (instant) cast @ only during the declare blockers step . \ X target attacking creatures become blocked . @ deals &^ damage to each of those creatures . (O) (^) (^) (^) (^) (artifact) (creature) horse (&^^/) (/&^^) metalcraft ~ @ gets +&^^/ /+&^^ as long as you control three or more artifacts . @@ -26638,7 +26638,7 @@ _BSIDE_ (N) (^) (RR) (instant) fuse \ @ deals &^^ damage to target creature or p (O) (GG) (creature) elf druid (&^/) (/&^) morph GG \ tap two untapped elves you control : add one mana of any color to your mana pool . -(N) (XX) (RR) (instant) x target blocked creatures assign their combat damage this turn as though they weren't blocked . +(N) (XX) (RR) (instant) X target blocked creatures assign their combat damage this turn as though they weren't blocked . (O) (^) (^) (RR) (creature) goblin shaman (&^/) (/&^) whenever @ becomes tapped , it deals &^ damage to target creature or player . diff --git a/data/output.txt b/data/output.txt index 67792a1..14819b7 100644 --- a/data/output.txt +++ b/data/output.txt @@ -2536,7 +2536,7 @@ |5sorcery|4|6|7|8|9choose a color. target player reveals his or her hand and discards all cards of that color.|3{^^BBBB}|0A|1persecute| -|5creature|4legendary|6hellion|7|8&/&|9devour X\where X is the number of creatures devoured this way|3{^^^RRGG}|0Y|1thromok the insatiable| +|5creature|4legendary|6hellion|7|8&/&|9devour X, where X is the number of creatures devoured this way|3{^^^RRGG}|0Y|1thromok the insatiable| |5sorcery|4|6|7|8|9put a &^^^/&^^^ green beast creature token onto the battlefield. then if an opponent controls more creatures than you, return @ to its owner's hand.|3{^GGGG}|0A|1pulse of the tangle| @@ -14064,7 +14064,7 @@ |5instant|4|6|7|8|9return target nonland permanent to its owner's hand.|3{^UU}|0O|1disperse| -|5sorcery|4|6|7|8|9x target creatures gain islandwalk until end of turn.|3{XXXXUU}|0N|1part water| +|5sorcery|4|6|7|8|9X target creatures gain islandwalk until end of turn.|3{XXXXUU}|0N|1part water| |5creature|4|6avatar|7|8&^^^^/&^^^^^^^|9vigilance\when @ enters the battlefield, for each opponent, exile up to one target creature that player controls and that player gains life equal to its power.|3{^^^^^WWWW}|0A|1luminate primordial| @@ -23569,7 +23569,7 @@ |5creature|4|6elf|7|8&^^/&^|9T: untap target creature.|3{^GG}|0O|1seeker of skybreak| -|5sorcery|4|6|7|8|9x target creatures can't block this turn.|3{XXRR}|0O|1wave of indifference| +|5sorcery|4|6|7|8|9X target creatures can't block this turn.|3{XXRR}|0O|1wave of indifference| |5instant|4|6|7|8|9put any number of target artifact cards from your graveyard on top of your library.\draw a card.|3{^^^WW}|0O|1frantic salvage| @@ -25190,7 +25190,7 @@ |5sorcery|4|6|7|8|9target player reveals his or her hand. you choose a nonland card from it. that player puts that card into his or her library third from the top.|3{^BB}|0O|1lost hours| -|5instant|4|6|7|8|9cast @ only during the declare blockers step.\x target attacking creatures become blocked. @ deals &^ damage to each of those creatures.|3{XXGG}|0O|1choking vines| +|5instant|4|6|7|8|9cast @ only during the declare blockers step.\X target attacking creatures become blocked. @ deals &^ damage to each of those creatures.|3{XXGG}|0O|1choking vines| |5artifact creature|4|6horse|7|8&^^/&^^|9metalcraft ~ @ gets +&^^/+&^^ as long as you control three or more artifacts.|3{^^^^}|0O|1chrome steed| @@ -26552,7 +26552,7 @@ |5creature|4|6elf druid|7|8&^/&^|9morph {GG}\tap two untapped elves you control: add one mana of any color to your mana pool.|3{GG}|0O|1birchlore rangers| -|5instant|4|6|7|8|9x target blocked creatures assign their combat damage this turn as though they weren't blocked.|3{XXRR}|0N|1outmaneuver| +|5instant|4|6|7|8|9X target blocked creatures assign their combat damage this turn as though they weren't blocked.|3{XXRR}|0N|1outmaneuver| |5creature|4|6goblin shaman|7|8&^/&^|9whenever @ becomes tapped, it deals &^ damage to target creature or player.|3{^^RR}|0O|1goblin medics| diff --git a/lib/nltk_model.py b/lib/nltk_model.py new file mode 100644 index 0000000..b88894f --- /dev/null +++ b/lib/nltk_model.py @@ -0,0 +1,305 @@ +# Natural Language Toolkit: Language Models +# +# Copyright (C) 2001-2014 NLTK Project +# Authors: Steven Bird +# Daniel Blanchard +# Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +# +# adapted for mtgencode Nov. 2015 +# an attempt was made to preserve the exact functionality of this code, +# hampered somewhat by its brokenness + +from __future__ import unicode_literals + +from math import log + +from nltk.probability import ConditionalProbDist, ConditionalFreqDist, LidstoneProbDist +from nltk.util import ngrams +from nltk_model_api import ModelI + +from nltk import compat + + +def _estimator(fdist, **estimator_kwargs): + """ + Default estimator function using a LidstoneProbDist. + """ + # can't be an instance method of NgramModel as they + # can't be pickled either. + return LidstoneProbDist(fdist, 0.001, **estimator_kwargs) + + +@compat.python_2_unicode_compatible +class NgramModel(ModelI): + """ + A processing interface for assigning a probability to the next word. + """ + + def __init__(self, n, train, pad_left=True, pad_right=False, + estimator=None, **estimator_kwargs): + """ + Create an ngram language model to capture patterns in n consecutive + words of training text. An estimator smooths the probabilities derived + from the text and may allow generation of ngrams not seen during + training. See model.doctest for more detailed testing + + >>> from nltk.corpus import brown + >>> lm = NgramModel(3, brown.words(categories='news')) + >>> lm + + >>> lm._backoff + + >>> lm.entropy(brown.words(categories='humor')) + ... # doctest: +ELLIPSIS + 12.0399... + + :param n: the order of the language model (ngram size) + :type n: int + :param train: the training text + :type train: list(str) or list(list(str)) + :param pad_left: whether to pad the left of each sentence with an (n-1)-gram of empty strings + :type pad_left: bool + :param pad_right: whether to pad the right of each sentence with an (n-1)-gram of empty strings + :type pad_right: bool + :param estimator: a function for generating a probability distribution + :type estimator: a function that takes a ConditionalFreqDist and + returns a ConditionalProbDist + :param estimator_kwargs: Extra keyword arguments for the estimator + :type estimator_kwargs: (any) + """ + + # protection from cryptic behavior for calling programs + # that use the pre-2.0.2 interface + assert(isinstance(pad_left, bool)) + assert(isinstance(pad_right, bool)) + + self._lpad = ('',) * (n - 1) if pad_left else () + self._rpad = ('',) * (n - 1) if pad_right else () + + # make sure n is greater than zero, otherwise print it + assert (n > 0), n + + # For explicitness save the check whether this is a unigram model + self.is_unigram_model = (n == 1) + # save the ngram order number + self._n = n + # save left and right padding + self._lpad = ('',) * (n - 1) if pad_left else () + self._rpad = ('',) * (n - 1) if pad_right else () + + if estimator is None: + estimator = _estimator + + cfd = ConditionalFreqDist() + + # set read-only ngrams set (see property declaration below to reconfigure) + self._ngrams = set() + + # If given a list of strings instead of a list of lists, create enclosing list + if (train is not None) and isinstance(train[0], compat.string_types): + train = [train] + + # we need to keep track of the number of word types we encounter + vocabulary = set() + for sent in train: + raw_ngrams = ngrams(sent, n, pad_left, pad_right, pad_symbol='') + for ngram in raw_ngrams: + self._ngrams.add(ngram) + context = tuple(ngram[:-1]) + token = ngram[-1] + cfd[context][token] += 1 + vocabulary.add(token) + + # Unless number of bins is explicitly passed, we should use the number + # of word types encountered during training as the bins value. + # If right padding is on, this includes the padding symbol. + if 'bins' not in estimator_kwargs: + estimator_kwargs['bins'] = len(vocabulary) + + self._model = ConditionalProbDist(cfd, estimator, **estimator_kwargs) + + # recursively construct the lower-order models + if not self.is_unigram_model: + self._backoff = NgramModel(n-1, train, + pad_left, pad_right, + estimator, + **estimator_kwargs) + + self._backoff_alphas = dict() + # For each condition (or context) + for ctxt in cfd.conditions(): + backoff_ctxt = ctxt[1:] + backoff_total_pr = 0.0 + total_observed_pr = 0.0 + + # this is the subset of words that we OBSERVED following + # this context. + # i.e. Count(word | context) > 0 + for words in self._words_following(ctxt, cfd): + + # so, _words_following as fixed gives back a whole list now... + for word in words: + + total_observed_pr += self.prob(word, ctxt) + # we also need the total (n-1)-gram probability of + # words observed in this n-gram context + backoff_total_pr += self._backoff.prob(word, backoff_ctxt) + + assert (0 <= total_observed_pr <= 1), total_observed_pr + # beta is the remaining probability weight after we factor out + # the probability of observed words. + # As a sanity check, both total_observed_pr and backoff_total_pr + # must be GE 0, since probabilities are never negative + beta = 1.0 - total_observed_pr + + # backoff total has to be less than one, otherwise we get + # an error when we try subtracting it from 1 in the denominator + assert (0 <= backoff_total_pr < 1), backoff_total_pr + alpha_ctxt = beta / (1.0 - backoff_total_pr) + + self._backoff_alphas[ctxt] = alpha_ctxt + + # broken + # def _words_following(self, context, cond_freq_dist): + # for ctxt, word in cond_freq_dist.iterkeys(): + # if ctxt == context: + # yield word + + # fixed + def _words_following(self, context, cond_freq_dist): + for ctxt in cond_freq_dist.iterkeys(): + if ctxt == context: + yield cond_freq_dist[ctxt].keys() + + def prob(self, word, context): + """ + Evaluate the probability of this word in this context using Katz Backoff. + + :param word: the word to get the probability of + :type word: str + :param context: the context the word is in + :type context: list(str) + """ + context = tuple(context) + if (context + (word,) in self._ngrams) or (self.is_unigram_model): + return self._model[context].prob(word) + else: + return self._alpha(context) * self._backoff.prob(word, context[1:]) + + def _alpha(self, context): + """Get the backoff alpha value for the given context + """ + error_message = "Alphas and backoff are not defined for unigram models" + assert not self.is_unigram_model, error_message + + if context in self._backoff_alphas: + return self._backoff_alphas[context] + else: + return 1 + + def logprob(self, word, context): + """ + Evaluate the (negative) log probability of this word in this context. + + :param word: the word to get the probability of + :type word: str + :param context: the context the word is in + :type context: list(str) + """ + return -log(self.prob(word, context), 2) + + @property + def ngrams(self): + return self._ngrams + + @property + def backoff(self): + return self._backoff + + @property + def model(self): + return self._model + + def choose_random_word(self, context): + ''' + Randomly select a word that is likely to appear in this context. + + :param context: the context the word is in + :type context: list(str) + ''' + + return self.generate(1, context)[-1] + + # NB, this will always start with same word if the model + # was trained on a single text + def generate(self, num_words, context=()): + ''' + Generate random text based on the language model. + + :param num_words: number of words to generate + :type num_words: int + :param context: initial words in generated string + :type context: list(str) + ''' + + text = list(context) + for i in range(num_words): + text.append(self._generate_one(text)) + return text + + def _generate_one(self, context): + context = (self._lpad + tuple(context))[-self._n + 1:] + if context in self: + return self[context].generate() + elif self._n > 1: + return self._backoff._generate_one(context[1:]) + else: + return '.' + + def entropy(self, text): + """ + Calculate the approximate cross-entropy of the n-gram model for a + given evaluation text. + This is the average log probability of each word in the text. + + :param text: words to use for evaluation + :type text: list(str) + """ + + H = 0.0 # entropy is conventionally denoted by "H" + text = list(self._lpad) + text + list(self._rpad) + for i in range(self._n - 1, len(text)): + context = tuple(text[(i - self._n + 1):i]) + token = text[i] + H += self.logprob(token, context) + return H / float(len(text) - (self._n - 1)) + + def perplexity(self, text): + """ + Calculates the perplexity of the given text. + This is simply 2 ** cross-entropy for the text. + + :param text: words to calculate perplexity of + :type text: list(str) + """ + + return pow(2.0, self.entropy(text)) + + def __contains__(self, item): + if not isinstance(item, tuple): + item = (item,) + return item in self._model + + def __getitem__(self, item): + if not isinstance(item, tuple): + item = (item,) + return self._model[item] + + def __repr__(self): + return '' % (len(self._ngrams), self._n) + +if __name__ == "__main__": + import doctest + doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE) diff --git a/lib/nltk_model_api.py b/lib/nltk_model_api.py new file mode 100644 index 0000000..7198a76 --- /dev/null +++ b/lib/nltk_model_api.py @@ -0,0 +1,42 @@ +# Natural Language Toolkit: API for Language Models +# +# Copyright (C) 2001-2014 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT +# +# imported for use in mtgcode Nov. 2015 + + +# should this be a subclass of ConditionalProbDistI? + +class ModelI(object): + """ + A processing interface for assigning a probability to the next word. + """ + + def __init__(self): + '''Create a new language model.''' + raise NotImplementedError() + + def prob(self, word, context): + '''Evaluate the probability of this word in this context.''' + raise NotImplementedError() + + def logprob(self, word, context): + '''Evaluate the (negative) log probability of this word in this context.''' + raise NotImplementedError() + + def choose_random_word(self, context): + '''Randomly select a word that is likely to appear in this context.''' + raise NotImplementedError() + + def generate(self, n): + '''Generate n words of text from the language model.''' + raise NotImplementedError() + + def entropy(self, text): + '''Evaluate the total entropy of a message with respect to the model. + This is the sum of the log probability of each word in the message.''' + raise NotImplementedError() + diff --git a/lib/transforms.py b/lib/transforms.py index 319955c..7517f96 100644 --- a/lib/transforms.py +++ b/lib/transforms.py @@ -134,6 +134,8 @@ def text_pass_4b_x(s): s = s.replace('x.', x_marker + '.') s = s.replace('x,', x_marker + ',') s = s.replace('x/x', x_marker + '/' + x_marker) + s = s.replace('x target', x_marker + ' target') + s = s.replace('si' + x_marker + ' target', 'six target') return s @@ -425,8 +427,9 @@ def text_pass_11_linetrans(s): for line in lines: if not '.' in line: # because this is inconsistent - line = line.replace(';', ',') - sublines = line.split(',') + line = line.replace(',', ';') + line = line.replace('; where', ', where') # Thromok the Insatiable + sublines = line.split(';') for subline in sublines: if 'equip' in subline or 'enchant' in subline: prelines += [subline.strip()] diff --git a/scripts/validate.py b/scripts/validate.py index 0e7f799..08cb1ae 100755 --- a/scripts/validate.py +++ b/scripts/validate.py @@ -67,6 +67,12 @@ def check_pt(card): and not card.pt) return None +def check_lands(card): + if 'land' in card.types: + return card.cost.format() == '_NOCOST_' + else: + return None + # doesn't handle granted activated abilities in "" def check_X(card): correct = None @@ -143,6 +149,13 @@ def check_X(card): return correct +def check_kicker(card): + # also lazy and simple + if 'kicker' in card.text.text or 'kicked' in card.text.text: + return 'kicker' in card.text.text and 'kicked' in card.text.text + else: + return None + def check_counters(card): uses = len(re.findall(re.escape(utils.counter_marker), card.text.text)) if uses > 0: @@ -150,51 +163,199 @@ def check_counters(card): else: return None +def check_choices(card): + bullets = len(re.findall(re.escape(utils.bullet_marker), card.text.text)) + obracks = len(re.findall(re.escape(utils.choice_open_delimiter), card.text.text)) + cbracks = len(re.findall(re.escape(utils.choice_close_delimiter), card.text.text)) + if bullets + obracks + cbracks > 0: + if not (obracks == cbracks and bullets > 0): + return False + # could compile ahead of time + choice_regex = (re.escape(utils.choice_open_delimiter) + re.escape(utils.unary_marker) + + r'.*' + re.escape(utils.bullet_marker) + r'.*' + + re.escape(utils.choice_close_delimiter)) + nochoices = re.sub(choice_regex, '', card.text.text) + nobullets = len(re.findall(re.escape(utils.bullet_marker), nochoices)) + noobracks = len(re.findall(re.escape(utils.choice_open_delimiter), nochoices)) + nocbracks = len(re.findall(re.escape(utils.choice_close_delimiter), nochoices)) + return nobullets + noobracks + nocbracks == 0 + else: + return None + +def check_auras(card): + # a bit loose + if 'enchantment' in card.types or 'aura' in card.subtypes or 'enchant' in card.text.text: + return 'enchantment' in card.types or 'aura' in card.subtypes or 'enchant' in card.text.text + else: + return None + +def check_equipment(card): + # probably even looser, chould check for actual equip abilities and noncreatureness + if 'equipment' in card.subtypes: + return 'equip' in card.text.text + else: + return None + +def check_planeswalkers(card): + if 'planeswalker' in card.types: + good_lines = 0 + bad_lines = 0 + initial_re = r'^[+-]?' + re.escape(utils.unary_marker) + re.escape(utils.unary_counter) + '*:' + initial_re_X = r'^[-+]' + re.escape(utils.x_marker) + '+:' + for line in card.text_lines: + if len(re.findall(initial_re, line.text)) == 1: + good_lines += 1 + elif len(re.findall(initial_re_X, line.text)) == 1: + good_lines += 1 + elif 'can be your commander' in line.text: + pass + elif 'countertype' in line.text or 'transform' in line.text: + pass + else: + bad_lines += 1 + return good_lines > 1 and bad_lines == 0 + else: + return None + +def check_levelup(card): + if 'level' in card.text.text: + uplines = 0 + llines = 0 + for line in card.text_lines: + if 'countertype ' + utils.counter_marker + ' level' in line.text: + uplines += 1 + llines += 1 + elif 'with level up' in line.text: + llines += 1 + elif 'level up' in line.text: + uplines += 1 + elif 'level' in line.text: + llines += 1 + return uplines == 1 and llines > 0 + else: + return None + +def check_activated(card): + activated = 0 + for line in card.text_lines: + if '.' in line.text: + subtext = re.sub(r'"[^"]*"', '', line.text) + if 'forecast' in subtext: + pass + elif 'return ' + utils.this_marker + ' from your graveyard' in subtext: + pass + elif 'on the stack' in subtext: + pass + elif ':' in subtext: + activated += 1 + if activated > 0: + return list_only(card.types, ['creature', 'land', 'artifact', 'enchantment', 'planeswalker', 'tribal']) + else: + return None + +def check_triggered(card): + triggered = 0 + triggered_2 = 0 + for line in card.text_lines: + if 'when ' + utils.this_marker + ' enters the battlefield' in line.text: + triggered += 1 + if 'when ' + utils.this_marker + ' leaves the battlefield' in line.text: + triggered += 1 + if 'when ' + utils.this_marker + ' dies' in line.text: + triggered += 1 + elif 'at the beginning' == line.text[:16] or 'when' == line.text[:4]: + if 'from your graveyard' in line.text: + triggered_2 += 1 + elif 'in your graveyard' in line.text: + triggered_2 += 1 + elif 'if ' + utils.this_marker + ' is suspended' in line.text: + triggered_2 += 1 + elif 'if that card is exiled' in line.text or 'if ' + utils.this_marker + ' is exiled' in line.text: + triggered_2 += 1 + elif 'when the creature ' + utils.this_marker + ' haunts' in line.text: + triggered_2 += 1 + elif 'when you cycle ' + utils.this_marker in line.text or 'when you cast ' + utils.this_marker in line.text: + triggered_2 += 1 + elif 'this turn' in line.text or 'this combat' in line.text or 'your next upkeep' in line.text: + triggered_2 += 1 + elif 'from your library' in line.text: + triggered_2 += 1 + elif 'you discard ' + utils.this_marker in line.text or 'you to discard ' + utils.this_marker in line.text: + triggered_2 += 1 + else: + triggered += 1 + + if triggered > 0: + return list_only(card.types, ['creature', 'land', 'artifact', 'enchantment', 'planeswalker', 'tribal']) + elif triggered_2: + return True + else: + return None + props = OrderedDict([ ('types', check_types), ('pt', check_pt), + ('lands', check_lands), ('X', check_X), + ('kicker', check_kicker), ('counters', check_counters), + ('choices', check_choices), + ('auras', check_auras), + ('equipment', check_equipment), + ('planeswalkers', check_planeswalkers), + ('levelup', check_levelup), + ('activated', check_activated), + ('triggered', check_triggered), ]) values = OrderedDict([(k, (0,0,0)) for k in props]) -def main(fname, oname = None, verbose = True): +def main(fname, oname = None, verbose = True, dump = False): # may need to set special arguments here cards = jdecode.mtg_open_file(fname, verbose=verbose) - rg = {} - for card in cards: - g = rare_grams(card, thresh=2, grams=2) - if len(card.text_words) > 0: - g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words)))) - if g in rg: - rg[g] += 1 - else: - rg[g] = 1 - if g >= 60: - print g - print card.format() + + do_grams = False - tot = 0 - vmax = sum(rg.values()) - pct90 = None - pct95 = None - pct99 = None - for i in sorted(rg): - print str(i) + ' rare ngrams: ' + str(rg[i]) - tot += rg[i] - if pct90 is None and tot >= vmax * 0.90: - pct90 = i - if pct95 is None and tot >= vmax * 0.95: - pct95 = i - if pct99 is None and tot >= vmax * 0.99: - pct99 = i + if do_grams: + rg = {} + for card in cards: + g = rare_grams(card, thresh=2, grams=2) + if len(card.text_words) > 0: + g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words)))) + if g in rg: + rg[g] += 1 + else: + rg[g] = 1 + if g >= 60: + print g + print card.format() - print '90% - ' + str(pct90) - print '95% - ' + str(pct95) - print '99% - ' + str(pct99) - exit(0) + tot = 0 + vmax = sum(rg.values()) + pct90 = None + pct95 = None + pct99 = None + for i in sorted(rg): + print str(i) + ' rare ngrams: ' + str(rg[i]) + tot += rg[i] + if pct90 is None and tot >= vmax * 0.90: + pct90 = i + if pct95 is None and tot >= vmax * 0.95: + pct95 = i + if pct99 is None and tot >= vmax * 0.99: + pct99 = i + + print '90% - ' + str(pct90) + print '95% - ' + str(pct95) + print '99% - ' + str(pct99) + exit(0) + + total_all = 0 + total_good = 0 + total_bad = 0 for card in cards: + total_all += 1 + overall = True for prop in props: (total, good, bad) = values[prop] this_prop = props[prop](card) @@ -204,14 +365,34 @@ def main(fname, oname = None, verbose = True): good += 1 else: bad += 1 + overall = False + if card.name not in ['demonic pact', 'lavaclaw reaches', + "ertai's trickery", 'rumbling aftershocks', # i hate these + ] and dump: + print('---- ' + prop + '----') + print(card.encode()) + print(card.format()) values[prop] = (total, good, bad) + if overall: + total_good += 1 + else: + total_bad += 1 + + # summary + print('-- overall --') + print(' total: ' + str(total_all)) + print(' good : ' + str(total_good)) + print(' bad : ' + str(total_bad)) + print('----') + # breakdown for prop in props: (total, good, bad) = values[prop] - print prop + ':' - print ' total: ' + str(total) - print ' good : ' + str(good) - print ' bad : ' + str(bad) + print(prop + ':') + print(' total: ' + str(total)) + print(' good : ' + str(good)) + print(' bad : ' + str(bad)) + if __name__ == '__main__': @@ -224,7 +405,10 @@ if __name__ == '__main__': help='name of output file, will be overwritten') parser.add_argument('-v', '--verbose', action='store_true', help='verbose output') + parser.add_argument('-d', '--dump', action='store_true', + help='print invalid cards') args = parser.parse_args() - main(args.infile, args.outfile, verbose=args.verbose) + main(args.infile, args.outfile, verbose=args.verbose, dump=args.dump) exit(0) +