cbf8ac34e5
of a really specific feature. Other improvements; changing the keys used during json decode reordered everything in output.txt, but there shouldn't be any other major changes.
738 lines
26 KiB
Python
738 lines
26 KiB
Python
import re
|
|
import codecs
|
|
import sys
|
|
import random
|
|
|
|
import utils
|
|
|
|
# Format a list of rows of data into nice columns.
|
|
# Note that it's the columns that are nice, not this code.
|
|
def padrows(l):
|
|
# get length for each field
|
|
lens = []
|
|
for ll in l:
|
|
for i, field in enumerate(ll):
|
|
if i < len(lens):
|
|
lens[i] = max(len(str(field)), lens[i])
|
|
else:
|
|
lens += [len(str(field))]
|
|
# now pad out to that length
|
|
padded = []
|
|
for ll in l:
|
|
padded += ['']
|
|
for i, field in enumerate(ll):
|
|
s = str(field)
|
|
pad = ' ' * (lens[i] - len(s))
|
|
padded[-1] += (s + pad + ' ')
|
|
return padded
|
|
def printrows(l):
|
|
for row in l:
|
|
print row
|
|
|
|
def randomize_all_mana(text):
|
|
manastrs = re.findall(utils.mana_regex, text)
|
|
newtext = text
|
|
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
newtext = newtext.replace(manastr, utils.reserved_marker)
|
|
for manastr in manastrs:
|
|
newtext = newtext.replace(utils.reserved_marker,
|
|
Manacost(manastr).reencode(randomize = True),
|
|
1)
|
|
return newtext
|
|
|
|
# so this stuff still needs to be cleaned up
|
|
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
|
creature_keywords = [
|
|
# evergreen
|
|
'deathtouch',
|
|
'defender',
|
|
'double strike',
|
|
'first strike',
|
|
'flash',
|
|
'flying',
|
|
'haste',
|
|
'hexproof',
|
|
'indestructible',
|
|
'lifelink',
|
|
'menace',
|
|
'prowess',
|
|
'reach',
|
|
'trample',
|
|
'vigilance',
|
|
# no longer evergreen
|
|
'banding',
|
|
'fear',
|
|
'shroud',
|
|
'intimidate',
|
|
# expert level keywords
|
|
'absorb',
|
|
'amplify',
|
|
'annihilator',
|
|
'battle cry',
|
|
'bolster',
|
|
'bloodthirst',
|
|
'bushido',
|
|
'changeling',
|
|
'convoke',
|
|
'devour',
|
|
'evolve',
|
|
'exalted',
|
|
'extort',
|
|
'fading',
|
|
'flanking',
|
|
'frenzy',
|
|
'graft',
|
|
'haunt',
|
|
'horsemanship',
|
|
'infect',
|
|
'modular',
|
|
#'morph',
|
|
#'ninjutsu',
|
|
'persist',
|
|
'poisonous',
|
|
'provoke',
|
|
#'prowl',
|
|
'rampage',
|
|
'ripple',
|
|
#'scavenge',
|
|
'shadow',
|
|
'soulbond',
|
|
'soulshift',
|
|
'split second',
|
|
'sunburst',
|
|
'undying',
|
|
#'unearth',
|
|
'unleash',
|
|
'vanishing',
|
|
'wither',
|
|
] # there are other keywords out there, these are just easy to detect
|
|
|
|
# data aggregating classes
|
|
class Manacost:
|
|
'''mana cost representation with data'''
|
|
|
|
# hardcoded to be dependent on the symbol structure... ah well
|
|
def get_colors(self):
|
|
colors = ''
|
|
for sym in self.symbols:
|
|
if self.symbols[sym] > 0:
|
|
symcolors = re.sub(r'2|P|S|X', '', sym)
|
|
for symcolor in symcolors:
|
|
if symcolor not in colors:
|
|
colors += symcolor
|
|
# sort so the order is always consistent
|
|
return ''.join(sorted(colors))
|
|
|
|
def check_colors(self, symbolstring):
|
|
for sym in symbolstring:
|
|
if not sym in self.colors:
|
|
return False
|
|
return True
|
|
|
|
def __init__(self, text):
|
|
self.raw = text
|
|
self.cmc = 0
|
|
self.colorless = 0
|
|
self.sequence = []
|
|
self.symbols = {sym : 0 for sym in utils.mana_syms}
|
|
self.allsymbols = {sym : 0 for sym in utils.mana_symall}
|
|
|
|
if text == '':
|
|
self._parsed = True
|
|
self._valid = True
|
|
self.none = True
|
|
self.inner = ''
|
|
|
|
elif not (len(self.raw) >= 2 and self.raw[0] == '{' and self.raw[-1] == '}'):
|
|
self._parsed = False
|
|
self._valid = False
|
|
self.none = False
|
|
|
|
else:
|
|
self._parsed = True
|
|
self._valid = True
|
|
self.none = False
|
|
self.inner = self.raw[1:-1]
|
|
|
|
# structure mirrors the decoding in utils, but we pull out different data here
|
|
idx = 0
|
|
while idx < len(self.inner):
|
|
# taking this branch is an infinite loop if unary_marker is empty
|
|
if (len(utils.mana_unary_marker) > 0 and
|
|
self.inner[idx:idx+len(utils.mana_unary_marker)] == utils.mana_unary_marker):
|
|
idx += len(utils.mana_unary_marker)
|
|
self.sequence += [utils.mana_unary_marker]
|
|
elif self.inner[idx:idx+len(utils.mana_unary_counter)] == utils.mana_unary_counter:
|
|
idx += len(utils.mana_unary_counter)
|
|
self.sequence += [utils.mana_unary_counter]
|
|
self.colorless += 1
|
|
self.cmc += 1
|
|
else:
|
|
old_idx = idx
|
|
for symlen in range(utils.mana_symlen_min, utils.mana_symlen_max + 1):
|
|
encoded_sym = self.inner[idx:idx+symlen]
|
|
if encoded_sym in utils.mana_symall_decode:
|
|
idx += symlen
|
|
# leave the sequence encoded for convenience
|
|
self.sequence += [encoded_sym]
|
|
sym = utils.mana_symall_decode[encoded_sym]
|
|
self.allsymbols[sym] += 1
|
|
if sym in utils.mana_symalt:
|
|
self.symbols[utils.mana_alt(sym)] += 1
|
|
else:
|
|
self.symbols[sym] += 1
|
|
if sym == utils.mana_X:
|
|
self.cmc += 0
|
|
elif utils.mana_2 in sym:
|
|
self.cmc += 2
|
|
else:
|
|
self.cmc += 1
|
|
break
|
|
# otherwise we'll go into an infinite loop if we see a symbol we don't know
|
|
if idx == old_idx:
|
|
idx += 1
|
|
self._valid = False
|
|
|
|
self.colors = self.get_colors()
|
|
|
|
def __str__(self):
|
|
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence)
|
|
+ utils.mana_close_delimiter)
|
|
|
|
def format(self, for_forum):
|
|
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence, for_forum)
|
|
+ utils.mana_close_delimiter)
|
|
|
|
def reencode(self, randomize = False):
|
|
if self.none:
|
|
return ''
|
|
elif randomize:
|
|
# so this won't work very well if mana_unary_marker isn't empty
|
|
return (utils.mana_open_delimiter
|
|
+ ''.join(random.sample(self.sequence, len(self.sequence)))
|
|
+ utils.mana_close_delimiter)
|
|
else:
|
|
return utils.mana_open_delimiter + ''.join(self.sequence) + utils.mana_close_delimiter
|
|
|
|
class Card:
|
|
'''card representation with data'''
|
|
|
|
def __init__(self, text):
|
|
self.raw = text
|
|
self._parsed = True
|
|
self._valid = True
|
|
|
|
if '\n' in self.raw:
|
|
halves = self.raw.split('\n')
|
|
if not len(halves) == 2:
|
|
self._parsed = False
|
|
self._valid = False
|
|
self.fields = halves
|
|
return
|
|
else:
|
|
self.raw = halves[0]
|
|
self.bside = Card(halves[1])
|
|
if not self.bside._valid:
|
|
self._valid = False
|
|
else:
|
|
self.bside = None
|
|
|
|
fields = self.raw.split(utils.fieldsep)
|
|
if not len(fields) >= 10:
|
|
self._parsed = False
|
|
self._valid = False
|
|
self.fields = fields
|
|
else:
|
|
if not fields[1] == '':
|
|
self.name = fields[1]
|
|
else:
|
|
self.name = ''
|
|
self._valid = False
|
|
|
|
if not fields[2] == '':
|
|
self.supertypes = fields[2].split(' ')
|
|
else:
|
|
self.supertypes = []
|
|
|
|
if not fields[3] == '':
|
|
self.types = fields[3].split(' ')
|
|
else:
|
|
self.types = []
|
|
self._valid = False
|
|
|
|
if not fields[4] == '':
|
|
self.loyalty = fields[4]
|
|
try:
|
|
self.loyalty_value = int(self.loyalty)
|
|
except ValueError:
|
|
self.loyalty_value = None
|
|
# strictly speaking, '* where * is something' is valid...
|
|
# self._valid = False
|
|
else:
|
|
self.loyalty = None
|
|
self.loyalty_value = None
|
|
|
|
if not fields[5] == '':
|
|
self.subtypes = fields[5].split(' ')
|
|
if 'creature' in self.types:
|
|
self.creaturetypes = self.subtypes
|
|
else:
|
|
self.creaturetypes = []
|
|
else:
|
|
self.subtypes = []
|
|
self.creaturetypes = []
|
|
|
|
if not fields[6] == '':
|
|
self.pt = fields[6]
|
|
self.power = None
|
|
self.power_value = None
|
|
self.toughness = None
|
|
self.toughness_value = None
|
|
p_t = self.pt.split('/')
|
|
if len(p_t) == 2:
|
|
self.power = p_t[0]
|
|
try:
|
|
self.power_value = int(self.power)
|
|
except ValueError:
|
|
self.power_value = None
|
|
self.toughness = p_t[1]
|
|
try:
|
|
self.toughness_value = int(self.toughness)
|
|
except ValueError:
|
|
self.toughness_value = None
|
|
else:
|
|
self._valid = False
|
|
else:
|
|
self.pt = None
|
|
self.power = None
|
|
self.power_value = None
|
|
self.toughness = None
|
|
self.toughness_value = None
|
|
|
|
# if there's no cost (lands) then cost.none will be True
|
|
self.cost = Manacost(fields[7])
|
|
|
|
if not fields[8] == '':
|
|
self.text = fields[8]
|
|
self.text_lines = self.text.split(utils.newline)
|
|
self.text_words = re.sub(punctuation_chars, ' ', self.text).split()
|
|
self.creature_words = []
|
|
# SUPER HACK
|
|
if 'creature' in self.types:
|
|
for line in self.text_lines:
|
|
orig_line = line
|
|
guess = []
|
|
for keyword in creature_keywords:
|
|
if keyword in line:
|
|
guess += [keyword]
|
|
line = line.replace(keyword, '')
|
|
# yeah, I said it was a hack
|
|
if re.sub(punctuation_chars, ' ', line).split() == [] or 'protect' in line or 'walk' in line or 'sliver creatures' in line or 'you control have' in line:
|
|
for word in guess:
|
|
if word not in self.creature_words:
|
|
self.creature_words += [word]
|
|
# elif len(guess) > 0 and len(line) < 30:
|
|
# print orig_line
|
|
else:
|
|
self.text = ''
|
|
self.text_lines = []
|
|
self.text_words = []
|
|
self.creature_words = []
|
|
|
|
def __str__(self):
|
|
return ''.join([
|
|
utils.fieldsep,
|
|
self.name,
|
|
utils.fieldsep,
|
|
(' ' + utils.dash_marker + ' ').join([' '.join(self.supertypes + self.types),
|
|
' '.join(self.subtypes)]),
|
|
utils.fieldsep,
|
|
str(self.cost.cmc) if self.cost.colors == ''
|
|
else str(self.cost.cmc) + ', ' + self.cost.colors,
|
|
utils.fieldsep,
|
|
])
|
|
|
|
def reencode(self, randomize = False):
|
|
return ''.join([
|
|
utils.fieldsep,
|
|
self.name,
|
|
utils.fieldsep,
|
|
' '.join(self.supertypes),
|
|
utils.fieldsep,
|
|
' '.join(self.types),
|
|
utils.fieldsep,
|
|
self.loyalty if self.loyalty else '',
|
|
utils.fieldsep,
|
|
' '.join(self.subtypes),
|
|
utils.fieldsep,
|
|
self.pt if self.pt else '',
|
|
utils.fieldsep,
|
|
self.cost.reencode(randomize) if not self.cost.none else '',
|
|
utils.fieldsep,
|
|
self.text if not randomize else randomize_all_mana(self.text),
|
|
utils.fieldsep,
|
|
utils.bsidesep + self.bside.reencode(randomize) if self.bside else '',
|
|
])
|
|
|
|
# global card pools
|
|
unparsed_cards = []
|
|
invalid_cards = []
|
|
cards = []
|
|
allcards = []
|
|
|
|
# global indices
|
|
by_name = {}
|
|
by_type = {}
|
|
by_type_inclusive = {}
|
|
by_supertype = {}
|
|
by_supertype_inclusive = {}
|
|
by_subtype = {}
|
|
by_subtype_inclusive = {}
|
|
by_color = {}
|
|
by_color_inclusive = {}
|
|
by_color_count = {}
|
|
by_cmc = {}
|
|
by_cost = {}
|
|
by_power = {}
|
|
by_toughness = {}
|
|
by_pt = {}
|
|
by_loyalty = {}
|
|
by_textlines = {}
|
|
by_textlen = {}
|
|
|
|
indices = {
|
|
'by_name' : by_name,
|
|
'by_type' : by_type,
|
|
'by_type_inclusive' : by_type_inclusive,
|
|
'by_supertype' : by_supertype,
|
|
'by_supertype_inclusive' : by_supertype_inclusive,
|
|
'by_subtype' : by_subtype,
|
|
'by_subtype_inclusive' : by_subtype_inclusive,
|
|
'by_color' : by_color,
|
|
'by_color_inclusive' : by_color_inclusive,
|
|
'by_color_count' : by_color_count,
|
|
'by_cmc' : by_cmc,
|
|
'by_cost' : by_cost,
|
|
'by_power' : by_power,
|
|
'by_toughness' : by_toughness,
|
|
'by_pt' : by_pt,
|
|
'by_loyalty' : by_loyalty,
|
|
'by_textlines' : by_textlines,
|
|
'by_textlen' : by_textlen,
|
|
}
|
|
|
|
def index_size(d):
|
|
return sum(map(lambda k: len(d[k]), d))
|
|
|
|
def inc(d, k, obj):
|
|
if k or k == 0:
|
|
if k in d:
|
|
d[k] += obj
|
|
else:
|
|
d[k] = obj
|
|
|
|
# build the global indices
|
|
def analyze(cardtexts):
|
|
global unparsed_cards, invalid_cards, cards, allcards
|
|
for cardtext in cardtexts:
|
|
# the empty card is not interesting
|
|
if not cardtext:
|
|
continue
|
|
card = Card(cardtext)
|
|
if card._valid:
|
|
cards += [card]
|
|
allcards += [card]
|
|
elif card._parsed:
|
|
invalid_cards += [card]
|
|
allcards += [card]
|
|
else:
|
|
unparsed_cards += [card]
|
|
|
|
if card._parsed:
|
|
inc(by_name, card.name, [card])
|
|
|
|
inc(by_type, ' '.join(card.types), [card])
|
|
for t in card.types:
|
|
inc(by_type_inclusive, t, [card])
|
|
inc(by_supertype, ' '.join(card.supertypes), [card])
|
|
for t in card.supertypes:
|
|
inc(by_supertype_inclusive, t, [card])
|
|
inc(by_subtype, ' '.join(card.subtypes), [card])
|
|
for t in card.subtypes:
|
|
inc(by_subtype_inclusive, t, [card])
|
|
|
|
if card.cost.colors:
|
|
inc(by_color, card.cost.colors, [card])
|
|
for c in card.cost.colors:
|
|
inc(by_color_inclusive, c, [card])
|
|
inc(by_color_count, len(card.cost.colors), [card])
|
|
else:
|
|
# colorless, still want to include in these tables
|
|
inc(by_color, 'A', [card])
|
|
inc(by_color_inclusive, 'A', [card])
|
|
inc(by_color_count, 0, [card])
|
|
|
|
inc(by_cmc, card.cost.cmc, [card])
|
|
inc(by_cost, card.cost.reencode() if card.cost.reencode() else 'none', [card])
|
|
|
|
inc(by_power, card.power, [card])
|
|
inc(by_toughness, card.toughness, [card])
|
|
inc(by_pt, card.pt, [card])
|
|
|
|
inc(by_loyalty, card.loyalty, [card])
|
|
|
|
inc(by_textlines, len(card.text_lines), [card])
|
|
inc(by_textlen, len(card.text), [card])
|
|
|
|
# summarize the indices
|
|
# Yes, this printing code is pretty terrible.
|
|
def summarize(hsize = 10, vsize = 10, cmcsize = 20):
|
|
print '===================='
|
|
print str(len(cards)) + ' valid cards, ' + str(len(invalid_cards)) + ' invalid cards.'
|
|
print str(len(allcards)) + ' cards parsed, ' + str(len(unparsed_cards)) + ' failed to parse'
|
|
print '--------------------'
|
|
print str(len(by_name)) + ' unique card names'
|
|
print '--------------------'
|
|
print (str(len(by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
|
|
+ str(len(by_color)) + ' combinations')
|
|
print 'Breakdown by color:'
|
|
rows = [by_color_inclusive.keys()]
|
|
rows += [[len(by_color_inclusive[k]) for k in rows[0]]]
|
|
printrows(padrows(rows))
|
|
print 'Breakdown by number of colors:'
|
|
rows = [by_color_count.keys()]
|
|
rows += [[len(by_color_count[k]) for k in rows[0]]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print str(len(by_type_inclusive)) + ' unique card types, ' + str(len(by_type)) + ' combinations'
|
|
print 'Breakdown by type:'
|
|
d = sorted(by_type_inclusive,
|
|
lambda x,y: cmp(len(by_type_inclusive[x]), len(by_type_inclusive[y])),
|
|
reverse = True)
|
|
rows = [[k for k in d[:hsize]]]
|
|
rows += [[len(by_type_inclusive[k]) for k in rows[0]]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print (str(len(by_subtype_inclusive)) + ' unique subtypes, '
|
|
+ str(len(by_subtype)) + ' combinations')
|
|
print '-- Popular subtypes: --'
|
|
d = sorted(by_subtype_inclusive,
|
|
lambda x,y: cmp(len(by_subtype_inclusive[x]), len(by_subtype_inclusive[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
rows += [[k, len(by_subtype_inclusive[k])]]
|
|
printrows(padrows(rows))
|
|
print '-- Top combinations: --'
|
|
d = sorted(by_subtype,
|
|
lambda x,y: cmp(len(by_subtype[x]), len(by_subtype[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
rows += [[k, len(by_subtype[k])]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print (str(len(by_supertype_inclusive)) + ' unique supertypes, '
|
|
+ str(len(by_supertype)) + ' combinations')
|
|
print 'Breakdown by supertype:'
|
|
d = sorted(by_supertype_inclusive,
|
|
lambda x,y: cmp(len(by_supertype_inclusive[x]),len(by_supertype_inclusive[y])),
|
|
reverse = True)
|
|
rows = [[k for k in d[:hsize]]]
|
|
rows += [[len(by_supertype_inclusive[k]) for k in rows[0]]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print str(len(by_cmc)) + ' different CMCs, ' + str(len(by_cost)) + ' unique mana costs'
|
|
print 'Breakdown by CMC:'
|
|
d = sorted(by_cmc, reverse = False)
|
|
rows = [[k for k in d[:cmcsize]]]
|
|
rows += [[len(by_cmc[k]) for k in rows[0]]]
|
|
printrows(padrows(rows))
|
|
print '-- Popular mana costs: --'
|
|
d = sorted(by_cost,
|
|
lambda x,y: cmp(len(by_cost[x]), len(by_cost[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
rows += [[utils.from_mana(k), len(by_cost[k])]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print str(len(by_pt)) + ' unique p/t combinations'
|
|
print ('Largest power: ' + str(max(map(len, by_power)) - 1) +
|
|
', largest toughness: ' + str(max(map(len, by_toughness)) - 1))
|
|
print '-- Popular p/t values: --'
|
|
d = sorted(by_pt,
|
|
lambda x,y: cmp(len(by_pt[x]), len(by_pt[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
rows += [[utils.from_unary(k), len(by_pt[k])]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print 'Loyalty values:'
|
|
d = sorted(by_loyalty,
|
|
lambda x,y: cmp(len(by_loyalty[x]), len(by_loyalty[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
rows += [[utils.from_unary(k), len(by_loyalty[k])]]
|
|
printrows(padrows(rows))
|
|
print '--------------------'
|
|
print('Card text ranges from ' + str(min(by_textlen)) + ' to '
|
|
+ str(max(by_textlen)) + ' characters in length')
|
|
print('Card text ranges from ' + str(min(by_textlines)) + ' to '
|
|
+ str(max(by_textlines)) + ' lines')
|
|
print '-- Line counts by frequency: --'
|
|
d = sorted(by_textlines,
|
|
lambda x,y: cmp(len(by_textlines[x]), len(by_textlines[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
rows += [[k, len(by_textlines[k])]]
|
|
printrows(padrows(rows))
|
|
print '===================='
|
|
|
|
|
|
# describe outliers in the indices
|
|
def outliers(hsize = 10, vsize = 10, dump_invalid = False):
|
|
print '********************'
|
|
print 'Overview of indices:'
|
|
rows = [['Index Name', 'Keys', 'Total Members']]
|
|
for index in indices:
|
|
rows += [[index, len(indices[index]), index_size(indices[index])]]
|
|
printrows(padrows(rows))
|
|
print '********************'
|
|
if len(by_name) > 0:
|
|
scardname = sorted(by_name,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = False)[0]
|
|
print 'Shortest Cardname: (' + str(len(scardname)) + ')'
|
|
print ' ' + scardname
|
|
lcardname = sorted(by_name,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Longest Cardname: (' + str(len(lcardname)) + ')'
|
|
print ' ' + lcardname
|
|
d = sorted(by_name,
|
|
lambda x,y: cmp(len(by_name[x]), len(by_name[y])),
|
|
reverse = True)
|
|
rows = []
|
|
for k in d[0:vsize]:
|
|
if len(by_name[k]) > 1:
|
|
rows += [[k, len(by_name[k])]]
|
|
if rows == []:
|
|
print('No duplicated cardnames')
|
|
else:
|
|
print '-- Most duplicated names: --'
|
|
printrows(padrows(rows))
|
|
else:
|
|
print 'No cards indexed by name?'
|
|
print '--------------------'
|
|
if len(by_type) > 0:
|
|
ltypes = sorted(by_type,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Longest card type: (' + str(len(ltypes)) + ')'
|
|
print ' ' + ltypes
|
|
else:
|
|
print 'No cards indexed by type?'
|
|
if len(by_subtype) > 0:
|
|
lsubtypes = sorted(by_subtype,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
|
|
print ' ' + lsubtypes
|
|
else:
|
|
print 'No cards indexed by subtype?'
|
|
if len(by_supertype) > 0:
|
|
lsupertypes = sorted(by_supertype,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
|
|
print ' ' + lsupertypes
|
|
else:
|
|
print 'No cards indexed by supertype?'
|
|
print '--------------------'
|
|
if len(by_cost) > 0:
|
|
lcost = sorted(by_cost,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Longest mana cost: (' + str(len(lcost)) + ')'
|
|
print ' ' + utils.from_mana(lcost)
|
|
print '\n' + by_cost[lcost][0].reencode() + '\n'
|
|
else:
|
|
print 'No cards indexed by cost?'
|
|
if len(by_cmc) > 0:
|
|
lcmc = sorted(by_cmc, reverse = True)[0]
|
|
print 'Largest cmc: (' + str(lcmc) + ')'
|
|
print ' ' + str(by_cmc[lcmc][0].cost)
|
|
print '\n' + by_cmc[lcmc][0].reencode()
|
|
else:
|
|
print 'No cards indexed by cmc?'
|
|
print '--------------------'
|
|
if len(by_power) > 0:
|
|
lpower = sorted(by_power,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Largest creature power: ' + utils.from_unary(lpower)
|
|
print '\n' + by_power[lpower][0].reencode() + '\n'
|
|
else:
|
|
print 'No cards indexed by power?'
|
|
if len(by_toughness) > 0:
|
|
ltoughness = sorted(by_toughness,
|
|
lambda x,y: cmp(len(x), len(y)),
|
|
reverse = True)[0]
|
|
print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
|
|
print '\n' + by_toughness[ltoughness][0].reencode()
|
|
else:
|
|
print 'No cards indexed by toughness?'
|
|
print '--------------------'
|
|
if len(by_textlines) > 0:
|
|
llines = sorted(by_textlines, reverse = True)[0]
|
|
print 'Most lines of text in a card: ' + str(llines)
|
|
print '\n' + by_textlines[llines][0].reencode() + '\n'
|
|
else:
|
|
print 'No cards indexed by line count?'
|
|
if len(by_textlen) > 0:
|
|
ltext = sorted(by_textlen, reverse = True)[0]
|
|
print 'Most chars in a card text: ' + str(ltext)
|
|
print '\n' + by_textlen[ltext][0].reencode()
|
|
else:
|
|
print 'No cards indexed by char count?'
|
|
print '--------------------'
|
|
print 'There were ' + str(len(invalid_cards)) + ' invalid cards.'
|
|
if dump_invalid:
|
|
for card in invalid_cards:
|
|
print '\n' + card.raw
|
|
elif len(invalid_cards) > 0:
|
|
print 'Not summarizing.'
|
|
print '--------------------'
|
|
print 'There were ' + str(len(unparsed_cards)) + ' unparsed cards.'
|
|
if dump_invalid:
|
|
for card in unparsed_cards:
|
|
print '\n' + card.raw
|
|
elif len(unparsed_cards) > 0:
|
|
print 'Not summarizing.'
|
|
print '===================='
|
|
|
|
def main(fname, oname = None, verbose = False):
|
|
if verbose:
|
|
print 'Opening encoded card file: ' + fname
|
|
|
|
with open(fname, 'rt') as f:
|
|
text = f.read()
|
|
|
|
cardtexts = text.split(utils.cardsep)
|
|
analyze(cardtexts)
|
|
summarize()
|
|
outliers(dump_invalid = False)
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
if len(sys.argv) == 2:
|
|
main(sys.argv[1])
|
|
elif len(sys.argv) == 3:
|
|
main(sys.argv[1], oname = sys.argv[2])
|
|
else:
|
|
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
|
exit(1)
|