2015-07-02 04:11:33 +00:00
|
|
|
import re
|
|
|
|
import codecs
|
|
|
|
import sys
|
2015-07-07 09:26:09 +00:00
|
|
|
import random
|
2015-07-02 04:11:33 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
import utils
|
2015-07-04 09:39:41 +00:00
|
|
|
|
2015-07-08 07:22:54 +00:00
|
|
|
# Format a list of rows of data into nice columns.
|
|
|
|
# Note that it's the columns that are nice, not this code.
|
2015-07-05 07:59:02 +00:00
|
|
|
def padrows(l):
|
|
|
|
# get length for each field
|
|
|
|
lens = []
|
|
|
|
for ll in l:
|
|
|
|
for i, field in enumerate(ll):
|
|
|
|
if i < len(lens):
|
|
|
|
lens[i] = max(len(str(field)), lens[i])
|
|
|
|
else:
|
|
|
|
lens += [len(str(field))]
|
|
|
|
# now pad out to that length
|
|
|
|
padded = []
|
|
|
|
for ll in l:
|
|
|
|
padded += ['']
|
|
|
|
for i, field in enumerate(ll):
|
|
|
|
s = str(field)
|
|
|
|
pad = ' ' * (lens[i] - len(s))
|
|
|
|
padded[-1] += (s + pad + ' ')
|
|
|
|
return padded
|
2015-07-07 09:26:09 +00:00
|
|
|
def printrows(l):
|
|
|
|
for row in l:
|
|
|
|
print row
|
2015-07-05 07:59:02 +00:00
|
|
|
|
2015-07-08 07:22:54 +00:00
|
|
|
def randomize_all_mana(text):
|
|
|
|
manastrs = re.findall(utils.mana_regex, text)
|
|
|
|
newtext = text
|
|
|
|
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
|
|
newtext = newtext.replace(manastr, utils.reserved_marker)
|
|
|
|
for manastr in manastrs:
|
|
|
|
newtext = newtext.replace(utils.reserved_marker,
|
|
|
|
Manacost(manastr).reencode(randomize = True),
|
|
|
|
1)
|
|
|
|
return newtext
|
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
# so this stuff still needs to be cleaned up
|
2015-07-04 09:39:41 +00:00
|
|
|
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
|
|
|
creature_keywords = [
|
|
|
|
# evergreen
|
|
|
|
'deathtouch',
|
|
|
|
'defender',
|
|
|
|
'double strike',
|
|
|
|
'first strike',
|
|
|
|
'flash',
|
|
|
|
'flying',
|
|
|
|
'haste',
|
|
|
|
'hexproof',
|
|
|
|
'indestructible',
|
|
|
|
'lifelink',
|
|
|
|
'menace',
|
|
|
|
'prowess',
|
|
|
|
'reach',
|
|
|
|
'trample',
|
|
|
|
'vigilance',
|
|
|
|
# no longer evergreen
|
|
|
|
'banding',
|
|
|
|
'fear',
|
|
|
|
'shroud',
|
|
|
|
'intimidate',
|
2015-07-05 07:59:02 +00:00
|
|
|
# expert level keywords
|
|
|
|
'absorb',
|
|
|
|
'amplify',
|
|
|
|
'annihilator',
|
|
|
|
'battle cry',
|
|
|
|
'bolster',
|
|
|
|
'bloodthirst',
|
2015-07-04 09:39:41 +00:00
|
|
|
'bushido',
|
2015-07-05 07:59:02 +00:00
|
|
|
'changeling',
|
|
|
|
'convoke',
|
|
|
|
'devour',
|
|
|
|
'evolve',
|
2015-07-04 09:39:41 +00:00
|
|
|
'exalted',
|
2015-07-05 07:59:02 +00:00
|
|
|
'extort',
|
|
|
|
'fading',
|
|
|
|
'flanking',
|
|
|
|
'frenzy',
|
|
|
|
'graft',
|
|
|
|
'haunt',
|
|
|
|
'horsemanship',
|
|
|
|
'infect',
|
|
|
|
'modular',
|
|
|
|
#'morph',
|
|
|
|
#'ninjutsu',
|
|
|
|
'persist',
|
|
|
|
'poisonous',
|
|
|
|
'provoke',
|
|
|
|
#'prowl',
|
|
|
|
'rampage',
|
|
|
|
'ripple',
|
|
|
|
#'scavenge',
|
2015-07-04 09:39:41 +00:00
|
|
|
'shadow',
|
2015-07-05 07:59:02 +00:00
|
|
|
'soulbond',
|
|
|
|
'soulshift',
|
|
|
|
'split second',
|
|
|
|
'sunburst',
|
|
|
|
'undying',
|
|
|
|
#'unearth',
|
|
|
|
'unleash',
|
|
|
|
'vanishing',
|
|
|
|
'wither',
|
2015-07-04 09:39:41 +00:00
|
|
|
] # there are other keywords out there, these are just easy to detect
|
|
|
|
|
|
|
|
# data aggregating classes
|
|
|
|
class Manacost:
|
|
|
|
'''mana cost representation with data'''
|
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
# hardcoded to be dependent on the symbol structure... ah well
|
2015-07-04 09:39:41 +00:00
|
|
|
def get_colors(self):
|
|
|
|
colors = ''
|
|
|
|
for sym in self.symbols:
|
|
|
|
if self.symbols[sym] > 0:
|
|
|
|
symcolors = re.sub(r'2|P|S|X', '', sym)
|
|
|
|
for symcolor in symcolors:
|
|
|
|
if symcolor not in colors:
|
|
|
|
colors += symcolor
|
2015-07-07 09:26:09 +00:00
|
|
|
# sort so the order is always consistent
|
|
|
|
return ''.join(sorted(colors))
|
2015-07-04 09:39:41 +00:00
|
|
|
|
|
|
|
def check_colors(self, symbolstring):
|
|
|
|
for sym in symbolstring:
|
|
|
|
if not sym in self.colors:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
def __init__(self, text):
|
|
|
|
self.raw = text
|
|
|
|
self.cmc = 0
|
|
|
|
self.colorless = 0
|
|
|
|
self.sequence = []
|
2015-07-07 09:26:09 +00:00
|
|
|
self.symbols = {sym : 0 for sym in utils.mana_syms}
|
|
|
|
self.allsymbols = {sym : 0 for sym in utils.mana_symall}
|
2015-07-04 09:39:41 +00:00
|
|
|
|
|
|
|
if text == '':
|
|
|
|
self._parsed = True
|
|
|
|
self._valid = True
|
|
|
|
self.none = True
|
|
|
|
self.inner = ''
|
|
|
|
|
|
|
|
elif not (len(self.raw) >= 2 and self.raw[0] == '{' and self.raw[-1] == '}'):
|
|
|
|
self._parsed = False
|
|
|
|
self._valid = False
|
|
|
|
self.none = False
|
|
|
|
|
|
|
|
else:
|
|
|
|
self._parsed = True
|
|
|
|
self._valid = True
|
|
|
|
self.none = False
|
|
|
|
self.inner = self.raw[1:-1]
|
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
# structure mirrors the decoding in utils, but we pull out different data here
|
|
|
|
idx = 0
|
|
|
|
while idx < len(self.inner):
|
|
|
|
# taking this branch is an infinite loop if unary_marker is empty
|
|
|
|
if (len(utils.mana_unary_marker) > 0 and
|
|
|
|
self.inner[idx:idx+len(utils.mana_unary_marker)] == utils.mana_unary_marker):
|
|
|
|
idx += len(utils.mana_unary_marker)
|
|
|
|
self.sequence += [utils.mana_unary_marker]
|
|
|
|
elif self.inner[idx:idx+len(utils.mana_unary_counter)] == utils.mana_unary_counter:
|
|
|
|
idx += len(utils.mana_unary_counter)
|
|
|
|
self.sequence += [utils.mana_unary_counter]
|
2015-07-04 09:39:41 +00:00
|
|
|
self.colorless += 1
|
|
|
|
self.cmc += 1
|
|
|
|
else:
|
2015-07-07 09:26:09 +00:00
|
|
|
old_idx = idx
|
|
|
|
for symlen in range(utils.mana_symlen_min, utils.mana_symlen_max + 1):
|
|
|
|
encoded_sym = self.inner[idx:idx+symlen]
|
|
|
|
if encoded_sym in utils.mana_symall_decode:
|
|
|
|
idx += symlen
|
|
|
|
# leave the sequence encoded for convenience
|
|
|
|
self.sequence += [encoded_sym]
|
|
|
|
sym = utils.mana_symall_decode[encoded_sym]
|
|
|
|
self.allsymbols[sym] += 1
|
|
|
|
if sym in utils.mana_symalt:
|
|
|
|
self.symbols[utils.mana_alt(sym)] += 1
|
|
|
|
else:
|
|
|
|
self.symbols[sym] += 1
|
|
|
|
if sym == utils.mana_X:
|
|
|
|
self.cmc += 0
|
|
|
|
elif utils.mana_2 in sym:
|
|
|
|
self.cmc += 2
|
|
|
|
else:
|
|
|
|
self.cmc += 1
|
|
|
|
break
|
|
|
|
# otherwise we'll go into an infinite loop if we see a symbol we don't know
|
|
|
|
if idx == old_idx:
|
|
|
|
idx += 1
|
|
|
|
self._valid = False
|
2015-07-04 09:39:41 +00:00
|
|
|
|
|
|
|
self.colors = self.get_colors()
|
|
|
|
|
|
|
|
def __str__(self):
|
2015-07-08 07:22:54 +00:00
|
|
|
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence)
|
|
|
|
+ utils.mana_close_delimiter)
|
2015-07-04 09:39:41 +00:00
|
|
|
|
|
|
|
def format(self, for_forum):
|
2015-07-08 07:22:54 +00:00
|
|
|
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence, for_forum)
|
|
|
|
+ utils.mana_close_delimiter)
|
2015-07-07 09:26:09 +00:00
|
|
|
|
|
|
|
def reencode(self, randomize = False):
|
2015-07-08 07:22:54 +00:00
|
|
|
if self.none:
|
|
|
|
return ''
|
|
|
|
elif randomize:
|
2015-07-07 09:26:09 +00:00
|
|
|
# so this won't work very well if mana_unary_marker isn't empty
|
|
|
|
return (utils.mana_open_delimiter
|
|
|
|
+ ''.join(random.sample(self.sequence, len(self.sequence)))
|
|
|
|
+ utils.mana_close_delimiter)
|
2015-07-04 09:39:41 +00:00
|
|
|
else:
|
2015-07-07 09:26:09 +00:00
|
|
|
return utils.mana_open_delimiter + ''.join(self.sequence) + utils.mana_close_delimiter
|
2015-07-04 09:39:41 +00:00
|
|
|
|
2015-07-02 04:11:33 +00:00
|
|
|
class Card:
|
|
|
|
'''card representation with data'''
|
|
|
|
|
|
|
|
def __init__(self, text):
|
|
|
|
self.raw = text
|
2015-07-04 09:39:41 +00:00
|
|
|
self._parsed = True
|
|
|
|
self._valid = True
|
|
|
|
|
|
|
|
if '\n' in self.raw:
|
|
|
|
halves = self.raw.split('\n')
|
|
|
|
if not len(halves) == 2:
|
|
|
|
self._parsed = False
|
|
|
|
self._valid = False
|
|
|
|
self.fields = halves
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self.raw = halves[0]
|
|
|
|
self.bside = Card(halves[1])
|
|
|
|
if not self.bside._valid:
|
|
|
|
self._valid = False
|
|
|
|
else:
|
|
|
|
self.bside = None
|
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
fields = self.raw.split(utils.fieldsep)
|
2015-07-05 07:59:02 +00:00
|
|
|
if not len(fields) >= 10:
|
2015-07-02 04:11:33 +00:00
|
|
|
self._parsed = False
|
|
|
|
self._valid = False
|
|
|
|
self.fields = fields
|
|
|
|
else:
|
|
|
|
if not fields[1] == '':
|
|
|
|
self.name = fields[1]
|
|
|
|
else:
|
|
|
|
self.name = ''
|
|
|
|
self._valid = False
|
|
|
|
|
|
|
|
if not fields[2] == '':
|
|
|
|
self.supertypes = fields[2].split(' ')
|
|
|
|
else:
|
2015-07-04 09:39:41 +00:00
|
|
|
self.supertypes = []
|
2015-07-02 04:11:33 +00:00
|
|
|
|
|
|
|
if not fields[3] == '':
|
|
|
|
self.types = fields[3].split(' ')
|
|
|
|
else:
|
2015-07-04 09:39:41 +00:00
|
|
|
self.types = []
|
2015-07-02 04:11:33 +00:00
|
|
|
self._valid = False
|
|
|
|
|
|
|
|
if not fields[4] == '':
|
|
|
|
self.loyalty = fields[4]
|
|
|
|
try:
|
|
|
|
self.loyalty_value = int(self.loyalty)
|
|
|
|
except ValueError:
|
|
|
|
self.loyalty_value = None
|
|
|
|
# strictly speaking, '* where * is something' is valid...
|
|
|
|
# self._valid = False
|
|
|
|
else:
|
|
|
|
self.loyalty = None
|
|
|
|
self.loyalty_value = None
|
|
|
|
|
|
|
|
if not fields[5] == '':
|
|
|
|
self.subtypes = fields[5].split(' ')
|
2015-07-04 09:39:41 +00:00
|
|
|
if 'creature' in self.types:
|
|
|
|
self.creaturetypes = self.subtypes
|
|
|
|
else:
|
|
|
|
self.creaturetypes = []
|
2015-07-02 04:11:33 +00:00
|
|
|
else:
|
2015-07-04 09:39:41 +00:00
|
|
|
self.subtypes = []
|
|
|
|
self.creaturetypes = []
|
2015-07-02 04:11:33 +00:00
|
|
|
|
|
|
|
if not fields[6] == '':
|
|
|
|
self.pt = fields[6]
|
2015-07-07 09:26:09 +00:00
|
|
|
self.power = None
|
|
|
|
self.power_value = None
|
|
|
|
self.toughness = None
|
|
|
|
self.toughness_value = None
|
2015-07-02 04:11:33 +00:00
|
|
|
p_t = self.pt.split('/')
|
|
|
|
if len(p_t) == 2:
|
2015-07-07 09:26:09 +00:00
|
|
|
self.power = p_t[0]
|
|
|
|
try:
|
|
|
|
self.power_value = int(self.power)
|
|
|
|
except ValueError:
|
|
|
|
self.power_value = None
|
|
|
|
self.toughness = p_t[1]
|
|
|
|
try:
|
|
|
|
self.toughness_value = int(self.toughness)
|
|
|
|
except ValueError:
|
|
|
|
self.toughness_value = None
|
2015-07-02 04:11:33 +00:00
|
|
|
else:
|
|
|
|
self._valid = False
|
|
|
|
else:
|
|
|
|
self.pt = None
|
|
|
|
self.power = None
|
|
|
|
self.power_value = None
|
|
|
|
self.toughness = None
|
|
|
|
self.toughness_value = None
|
|
|
|
|
2015-07-04 09:39:41 +00:00
|
|
|
# if there's no cost (lands) then cost.none will be True
|
|
|
|
self.cost = Manacost(fields[7])
|
|
|
|
|
2015-07-02 04:11:33 +00:00
|
|
|
if not fields[8] == '':
|
|
|
|
self.text = fields[8]
|
2015-07-07 09:26:09 +00:00
|
|
|
self.text_lines = self.text.split(utils.newline)
|
2015-07-04 09:39:41 +00:00
|
|
|
self.text_words = re.sub(punctuation_chars, ' ', self.text).split()
|
|
|
|
self.creature_words = []
|
|
|
|
# SUPER HACK
|
|
|
|
if 'creature' in self.types:
|
|
|
|
for line in self.text_lines:
|
2015-07-05 07:59:02 +00:00
|
|
|
orig_line = line
|
2015-07-04 09:39:41 +00:00
|
|
|
guess = []
|
|
|
|
for keyword in creature_keywords:
|
|
|
|
if keyword in line:
|
|
|
|
guess += [keyword]
|
|
|
|
line = line.replace(keyword, '')
|
2015-07-05 07:59:02 +00:00
|
|
|
# yeah, I said it was a hack
|
|
|
|
if re.sub(punctuation_chars, ' ', line).split() == [] or 'protect' in line or 'walk' in line or 'sliver creatures' in line or 'you control have' in line:
|
2015-07-04 09:39:41 +00:00
|
|
|
for word in guess:
|
|
|
|
if word not in self.creature_words:
|
|
|
|
self.creature_words += [word]
|
2015-07-05 07:59:02 +00:00
|
|
|
# elif len(guess) > 0 and len(line) < 30:
|
|
|
|
# print orig_line
|
2015-07-02 04:11:33 +00:00
|
|
|
else:
|
2015-07-07 09:26:09 +00:00
|
|
|
self.text = ''
|
2015-07-04 09:39:41 +00:00
|
|
|
self.text_lines = []
|
|
|
|
self.text_words = []
|
|
|
|
self.creature_words = []
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return ''.join([
|
2015-07-07 09:26:09 +00:00
|
|
|
utils.fieldsep,
|
2015-07-04 09:39:41 +00:00
|
|
|
self.name,
|
2015-07-07 09:26:09 +00:00
|
|
|
utils.fieldsep,
|
|
|
|
(' ' + utils.dash_marker + ' ').join([' '.join(self.supertypes + self.types),
|
2015-07-04 09:39:41 +00:00
|
|
|
' '.join(self.subtypes)]),
|
2015-07-07 09:26:09 +00:00
|
|
|
utils.fieldsep,
|
2015-07-04 09:39:41 +00:00
|
|
|
str(self.cost.cmc) if self.cost.colors == ''
|
|
|
|
else str(self.cost.cmc) + ', ' + self.cost.colors,
|
2015-07-07 09:26:09 +00:00
|
|
|
utils.fieldsep,
|
|
|
|
])
|
|
|
|
|
|
|
|
def reencode(self, randomize = False):
|
|
|
|
return ''.join([
|
|
|
|
utils.fieldsep,
|
|
|
|
self.name,
|
|
|
|
utils.fieldsep,
|
|
|
|
' '.join(self.supertypes),
|
|
|
|
utils.fieldsep,
|
|
|
|
' '.join(self.types),
|
|
|
|
utils.fieldsep,
|
|
|
|
self.loyalty if self.loyalty else '',
|
|
|
|
utils.fieldsep,
|
|
|
|
' '.join(self.subtypes),
|
|
|
|
utils.fieldsep,
|
|
|
|
self.pt if self.pt else '',
|
|
|
|
utils.fieldsep,
|
|
|
|
self.cost.reencode(randomize) if not self.cost.none else '',
|
|
|
|
utils.fieldsep,
|
2015-07-08 07:22:54 +00:00
|
|
|
self.text if not randomize else randomize_all_mana(self.text),
|
2015-07-07 09:26:09 +00:00
|
|
|
utils.fieldsep,
|
|
|
|
utils.bsidesep + self.bside.reencode(randomize) if self.bside else '',
|
2015-07-04 09:39:41 +00:00
|
|
|
])
|
2015-07-02 04:11:33 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
# global card pools
|
|
|
|
unparsed_cards = []
|
|
|
|
invalid_cards = []
|
|
|
|
cards = []
|
|
|
|
allcards = []
|
|
|
|
|
|
|
|
# global indices
|
|
|
|
by_name = {}
|
|
|
|
by_type = {}
|
|
|
|
by_type_inclusive = {}
|
|
|
|
by_supertype = {}
|
|
|
|
by_supertype_inclusive = {}
|
|
|
|
by_subtype = {}
|
|
|
|
by_subtype_inclusive = {}
|
|
|
|
by_color = {}
|
|
|
|
by_color_inclusive = {}
|
2015-07-08 07:22:54 +00:00
|
|
|
by_color_count = {}
|
2015-07-07 09:26:09 +00:00
|
|
|
by_cmc = {}
|
|
|
|
by_cost = {}
|
|
|
|
by_power = {}
|
|
|
|
by_toughness = {}
|
|
|
|
by_pt = {}
|
|
|
|
by_loyalty = {}
|
|
|
|
by_textlines = {}
|
|
|
|
by_textlen = {}
|
|
|
|
|
2015-07-08 07:22:54 +00:00
|
|
|
indices = {
|
|
|
|
'by_name' : by_name,
|
|
|
|
'by_type' : by_type,
|
|
|
|
'by_type_inclusive' : by_type_inclusive,
|
|
|
|
'by_supertype' : by_supertype,
|
|
|
|
'by_supertype_inclusive' : by_supertype_inclusive,
|
|
|
|
'by_subtype' : by_subtype,
|
|
|
|
'by_subtype_inclusive' : by_subtype_inclusive,
|
|
|
|
'by_color' : by_color,
|
|
|
|
'by_color_inclusive' : by_color_inclusive,
|
|
|
|
'by_color_count' : by_color_count,
|
|
|
|
'by_cmc' : by_cmc,
|
|
|
|
'by_cost' : by_cost,
|
|
|
|
'by_power' : by_power,
|
|
|
|
'by_toughness' : by_toughness,
|
|
|
|
'by_pt' : by_pt,
|
|
|
|
'by_loyalty' : by_loyalty,
|
|
|
|
'by_textlines' : by_textlines,
|
|
|
|
'by_textlen' : by_textlen,
|
|
|
|
}
|
|
|
|
|
|
|
|
def index_size(d):
|
|
|
|
return sum(map(lambda k: len(d[k]), d))
|
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
def inc(d, k, obj):
|
2015-07-08 07:22:54 +00:00
|
|
|
if k or k == 0:
|
2015-07-07 09:26:09 +00:00
|
|
|
if k in d:
|
|
|
|
d[k] += obj
|
|
|
|
else:
|
|
|
|
d[k] = obj
|
2015-07-04 09:39:41 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
# build the global indices
|
|
|
|
def analyze(cardtexts):
|
|
|
|
global unparsed_cards, invalid_cards, cards, allcards
|
2015-07-02 04:11:33 +00:00
|
|
|
for cardtext in cardtexts:
|
2015-07-07 09:26:09 +00:00
|
|
|
# the empty card is not interesting
|
|
|
|
if not cardtext:
|
2015-07-04 09:39:41 +00:00
|
|
|
continue
|
2015-07-07 09:26:09 +00:00
|
|
|
card = Card(cardtext)
|
|
|
|
if card._valid:
|
|
|
|
cards += [card]
|
|
|
|
allcards += [card]
|
|
|
|
elif card._parsed:
|
|
|
|
invalid_cards += [card]
|
|
|
|
allcards += [card]
|
2015-07-05 07:59:02 +00:00
|
|
|
else:
|
2015-07-07 09:26:09 +00:00
|
|
|
unparsed_cards += [card]
|
|
|
|
|
|
|
|
if card._parsed:
|
|
|
|
inc(by_name, card.name, [card])
|
|
|
|
|
|
|
|
inc(by_type, ' '.join(card.types), [card])
|
|
|
|
for t in card.types:
|
|
|
|
inc(by_type_inclusive, t, [card])
|
|
|
|
inc(by_supertype, ' '.join(card.supertypes), [card])
|
|
|
|
for t in card.supertypes:
|
|
|
|
inc(by_supertype_inclusive, t, [card])
|
|
|
|
inc(by_subtype, ' '.join(card.subtypes), [card])
|
|
|
|
for t in card.subtypes:
|
|
|
|
inc(by_subtype_inclusive, t, [card])
|
|
|
|
|
|
|
|
if card.cost.colors:
|
|
|
|
inc(by_color, card.cost.colors, [card])
|
|
|
|
for c in card.cost.colors:
|
|
|
|
inc(by_color_inclusive, c, [card])
|
2015-07-08 07:22:54 +00:00
|
|
|
inc(by_color_count, len(card.cost.colors), [card])
|
2015-07-04 09:39:41 +00:00
|
|
|
else:
|
2015-07-07 09:26:09 +00:00
|
|
|
# colorless, still want to include in these tables
|
|
|
|
inc(by_color, 'A', [card])
|
|
|
|
inc(by_color_inclusive, 'A', [card])
|
2015-07-08 07:22:54 +00:00
|
|
|
inc(by_color_count, 0, [card])
|
2015-07-05 07:59:02 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
inc(by_cmc, card.cost.cmc, [card])
|
2015-07-08 07:22:54 +00:00
|
|
|
inc(by_cost, card.cost.reencode() if card.cost.reencode() else 'none', [card])
|
2015-07-05 07:59:02 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
inc(by_power, card.power, [card])
|
|
|
|
inc(by_toughness, card.toughness, [card])
|
|
|
|
inc(by_pt, card.pt, [card])
|
2015-07-05 07:59:02 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
inc(by_loyalty, card.loyalty, [card])
|
|
|
|
|
|
|
|
inc(by_textlines, len(card.text_lines), [card])
|
|
|
|
inc(by_textlen, len(card.text), [card])
|
|
|
|
|
|
|
|
# summarize the indices
|
2015-07-08 07:22:54 +00:00
|
|
|
# Yes, this printing code is pretty terrible.
|
|
|
|
def summarize(hsize = 10, vsize = 10, cmcsize = 20):
|
2015-07-07 09:26:09 +00:00
|
|
|
print '===================='
|
|
|
|
print str(len(cards)) + ' valid cards, ' + str(len(invalid_cards)) + ' invalid cards.'
|
|
|
|
print str(len(allcards)) + ' cards parsed, ' + str(len(unparsed_cards)) + ' failed to parse'
|
|
|
|
print '--------------------'
|
|
|
|
print str(len(by_name)) + ' unique card names'
|
|
|
|
print '--------------------'
|
2015-07-08 07:22:54 +00:00
|
|
|
print (str(len(by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
|
|
|
|
+ str(len(by_color)) + ' combinations')
|
2015-07-07 09:26:09 +00:00
|
|
|
print 'Breakdown by color:'
|
|
|
|
rows = [by_color_inclusive.keys()]
|
|
|
|
rows += [[len(by_color_inclusive[k]) for k in rows[0]]]
|
|
|
|
printrows(padrows(rows))
|
2015-07-08 07:22:54 +00:00
|
|
|
print 'Breakdown by number of colors:'
|
|
|
|
rows = [by_color_count.keys()]
|
|
|
|
rows += [[len(by_color_count[k]) for k in rows[0]]]
|
|
|
|
printrows(padrows(rows))
|
2015-07-07 09:26:09 +00:00
|
|
|
print '--------------------'
|
|
|
|
print str(len(by_type_inclusive)) + ' unique card types, ' + str(len(by_type)) + ' combinations'
|
|
|
|
print 'Breakdown by type:'
|
|
|
|
d = sorted(by_type_inclusive,
|
2015-07-08 07:22:54 +00:00
|
|
|
lambda x,y: cmp(len(by_type_inclusive[x]), len(by_type_inclusive[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = [[k for k in d[:hsize]]]
|
2015-07-07 09:26:09 +00:00
|
|
|
rows += [[len(by_type_inclusive[k]) for k in rows[0]]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '--------------------'
|
|
|
|
print (str(len(by_subtype_inclusive)) + ' unique subtypes, '
|
|
|
|
+ str(len(by_subtype)) + ' combinations')
|
|
|
|
print '-- Popular subtypes: --'
|
|
|
|
d = sorted(by_subtype_inclusive,
|
2015-07-08 07:22:54 +00:00
|
|
|
lambda x,y: cmp(len(by_subtype_inclusive[x]), len(by_subtype_inclusive[y])),
|
|
|
|
reverse = True)
|
2015-07-07 09:26:09 +00:00
|
|
|
rows = []
|
2015-07-08 07:22:54 +00:00
|
|
|
for k in d[0:vsize]:
|
2015-07-07 09:26:09 +00:00
|
|
|
rows += [[k, len(by_subtype_inclusive[k])]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '-- Top combinations: --'
|
|
|
|
d = sorted(by_subtype,
|
2015-07-08 07:22:54 +00:00
|
|
|
lambda x,y: cmp(len(by_subtype[x]), len(by_subtype[y])),
|
|
|
|
reverse = True)
|
2015-07-07 09:26:09 +00:00
|
|
|
rows = []
|
2015-07-08 07:22:54 +00:00
|
|
|
for k in d[0:vsize]:
|
2015-07-07 09:26:09 +00:00
|
|
|
rows += [[k, len(by_subtype[k])]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '--------------------'
|
|
|
|
print (str(len(by_supertype_inclusive)) + ' unique supertypes, '
|
|
|
|
+ str(len(by_supertype)) + ' combinations')
|
|
|
|
print 'Breakdown by supertype:'
|
|
|
|
d = sorted(by_supertype_inclusive,
|
2015-07-08 07:22:54 +00:00
|
|
|
lambda x,y: cmp(len(by_supertype_inclusive[x]),len(by_supertype_inclusive[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = [[k for k in d[:hsize]]]
|
2015-07-07 09:26:09 +00:00
|
|
|
rows += [[len(by_supertype_inclusive[k]) for k in rows[0]]]
|
|
|
|
printrows(padrows(rows))
|
2015-07-08 07:22:54 +00:00
|
|
|
print '--------------------'
|
|
|
|
print str(len(by_cmc)) + ' different CMCs, ' + str(len(by_cost)) + ' unique mana costs'
|
|
|
|
print 'Breakdown by CMC:'
|
|
|
|
d = sorted(by_cmc, reverse = False)
|
|
|
|
rows = [[k for k in d[:cmcsize]]]
|
|
|
|
rows += [[len(by_cmc[k]) for k in rows[0]]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '-- Popular mana costs: --'
|
|
|
|
d = sorted(by_cost,
|
|
|
|
lambda x,y: cmp(len(by_cost[x]), len(by_cost[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = []
|
|
|
|
for k in d[0:vsize]:
|
|
|
|
rows += [[utils.from_mana(k), len(by_cost[k])]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '--------------------'
|
|
|
|
print str(len(by_pt)) + ' unique p/t combinations'
|
|
|
|
print ('Largest power: ' + str(max(map(len, by_power)) - 1) +
|
|
|
|
', largest toughness: ' + str(max(map(len, by_toughness)) - 1))
|
|
|
|
print '-- Popular p/t values: --'
|
|
|
|
d = sorted(by_pt,
|
|
|
|
lambda x,y: cmp(len(by_pt[x]), len(by_pt[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = []
|
|
|
|
for k in d[0:vsize]:
|
|
|
|
rows += [[utils.from_unary(k), len(by_pt[k])]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '--------------------'
|
|
|
|
print 'Loyalty values:'
|
|
|
|
d = sorted(by_loyalty,
|
|
|
|
lambda x,y: cmp(len(by_loyalty[x]), len(by_loyalty[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = []
|
|
|
|
for k in d[0:vsize]:
|
|
|
|
rows += [[utils.from_unary(k), len(by_loyalty[k])]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '--------------------'
|
|
|
|
print('Card text ranges from ' + str(min(by_textlen)) + ' to '
|
|
|
|
+ str(max(by_textlen)) + ' characters in length')
|
|
|
|
print('Card text ranges from ' + str(min(by_textlines)) + ' to '
|
|
|
|
+ str(max(by_textlines)) + ' lines')
|
|
|
|
print '-- Line counts by frequency: --'
|
|
|
|
d = sorted(by_textlines,
|
|
|
|
lambda x,y: cmp(len(by_textlines[x]), len(by_textlines[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = []
|
|
|
|
for k in d[0:vsize]:
|
|
|
|
rows += [[k, len(by_textlines[k])]]
|
|
|
|
printrows(padrows(rows))
|
2015-07-07 09:26:09 +00:00
|
|
|
print '===================='
|
2015-07-08 07:22:54 +00:00
|
|
|
|
2015-07-07 09:26:09 +00:00
|
|
|
|
|
|
|
# describe outliers in the indices
|
2015-07-08 07:22:54 +00:00
|
|
|
def outliers(hsize = 10, vsize = 10, dump_invalid = False):
|
|
|
|
print '********************'
|
|
|
|
print 'Overview of indices:'
|
|
|
|
rows = [['Index Name', 'Keys', 'Total Members']]
|
|
|
|
for index in indices:
|
|
|
|
rows += [[index, len(indices[index]), index_size(indices[index])]]
|
|
|
|
printrows(padrows(rows))
|
|
|
|
print '********************'
|
|
|
|
if len(by_name) > 0:
|
|
|
|
scardname = sorted(by_name,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = False)[0]
|
|
|
|
print 'Shortest Cardname: (' + str(len(scardname)) + ')'
|
|
|
|
print ' ' + scardname
|
|
|
|
lcardname = sorted(by_name,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Longest Cardname: (' + str(len(lcardname)) + ')'
|
|
|
|
print ' ' + lcardname
|
|
|
|
d = sorted(by_name,
|
|
|
|
lambda x,y: cmp(len(by_name[x]), len(by_name[y])),
|
|
|
|
reverse = True)
|
|
|
|
rows = []
|
|
|
|
for k in d[0:vsize]:
|
|
|
|
if len(by_name[k]) > 1:
|
|
|
|
rows += [[k, len(by_name[k])]]
|
|
|
|
if rows == []:
|
|
|
|
print('No duplicated cardnames')
|
|
|
|
else:
|
|
|
|
print '-- Most duplicated names: --'
|
|
|
|
printrows(padrows(rows))
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by name?'
|
|
|
|
print '--------------------'
|
|
|
|
if len(by_type) > 0:
|
|
|
|
ltypes = sorted(by_type,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Longest card type: (' + str(len(ltypes)) + ')'
|
|
|
|
print ' ' + ltypes
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by type?'
|
|
|
|
if len(by_subtype) > 0:
|
|
|
|
lsubtypes = sorted(by_subtype,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
|
|
|
|
print ' ' + lsubtypes
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by subtype?'
|
|
|
|
if len(by_supertype) > 0:
|
|
|
|
lsupertypes = sorted(by_supertype,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
|
|
|
|
print ' ' + lsupertypes
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by supertype?'
|
|
|
|
print '--------------------'
|
|
|
|
if len(by_cost) > 0:
|
|
|
|
lcost = sorted(by_cost,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Longest mana cost: (' + str(len(lcost)) + ')'
|
|
|
|
print ' ' + utils.from_mana(lcost)
|
|
|
|
print '\n' + by_cost[lcost][0].reencode() + '\n'
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by cost?'
|
|
|
|
if len(by_cmc) > 0:
|
|
|
|
lcmc = sorted(by_cmc, reverse = True)[0]
|
|
|
|
print 'Largest cmc: (' + str(lcmc) + ')'
|
|
|
|
print ' ' + str(by_cmc[lcmc][0].cost)
|
|
|
|
print '\n' + by_cmc[lcmc][0].reencode()
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by cmc?'
|
|
|
|
print '--------------------'
|
|
|
|
if len(by_power) > 0:
|
|
|
|
lpower = sorted(by_power,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Largest creature power: ' + utils.from_unary(lpower)
|
|
|
|
print '\n' + by_power[lpower][0].reencode() + '\n'
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by power?'
|
|
|
|
if len(by_toughness) > 0:
|
|
|
|
ltoughness = sorted(by_toughness,
|
|
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
|
|
reverse = True)[0]
|
|
|
|
print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
|
|
|
|
print '\n' + by_toughness[ltoughness][0].reencode()
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by toughness?'
|
|
|
|
print '--------------------'
|
|
|
|
if len(by_textlines) > 0:
|
|
|
|
llines = sorted(by_textlines, reverse = True)[0]
|
|
|
|
print 'Most lines of text in a card: ' + str(llines)
|
|
|
|
print '\n' + by_textlines[llines][0].reencode() + '\n'
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by line count?'
|
|
|
|
if len(by_textlen) > 0:
|
|
|
|
ltext = sorted(by_textlen, reverse = True)[0]
|
|
|
|
print 'Most chars in a card text: ' + str(ltext)
|
|
|
|
print '\n' + by_textlen[ltext][0].reencode()
|
|
|
|
else:
|
|
|
|
print 'No cards indexed by char count?'
|
|
|
|
print '--------------------'
|
|
|
|
print 'There were ' + str(len(invalid_cards)) + ' invalid cards.'
|
|
|
|
if dump_invalid:
|
|
|
|
for card in invalid_cards:
|
|
|
|
print '\n' + card.raw
|
|
|
|
elif len(invalid_cards) > 0:
|
|
|
|
print 'Not summarizing.'
|
|
|
|
print '--------------------'
|
|
|
|
print 'There were ' + str(len(unparsed_cards)) + ' unparsed cards.'
|
|
|
|
if dump_invalid:
|
|
|
|
for card in unparsed_cards:
|
|
|
|
print '\n' + card.raw
|
|
|
|
elif len(unparsed_cards) > 0:
|
|
|
|
print 'Not summarizing.'
|
|
|
|
print '===================='
|
2015-07-07 09:26:09 +00:00
|
|
|
|
|
|
|
def main(fname, oname = None, verbose = False):
|
|
|
|
if verbose:
|
|
|
|
print 'Opening encoded card file: ' + fname
|
|
|
|
|
|
|
|
with open(fname, 'rt') as f:
|
|
|
|
text = f.read()
|
|
|
|
|
|
|
|
cardtexts = text.split(utils.cardsep)
|
|
|
|
analyze(cardtexts)
|
|
|
|
summarize()
|
2015-07-08 07:22:54 +00:00
|
|
|
outliers(dump_invalid = False)
|
2015-07-02 04:11:33 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
import sys
|
|
|
|
if len(sys.argv) == 2:
|
|
|
|
main(sys.argv[1])
|
|
|
|
elif len(sys.argv) == 3:
|
|
|
|
main(sys.argv[1], oname = sys.argv[2])
|
|
|
|
else:
|
|
|
|
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
|
|
|
exit(1)
|