Card now has flexible input from encoded formats. Data mining code updated.
Unfortunately, python does not do import in a nice way without using the full bore module system, I'll deal with that another day.
This commit is contained in:
parent
31877972c2
commit
40fc695826
4 changed files with 492 additions and 409 deletions
395
datamine.py
395
datamine.py
|
@ -1,395 +0,0 @@
|
||||||
import re
|
|
||||||
import codecs
|
|
||||||
import sys
|
|
||||||
import random
|
|
||||||
|
|
||||||
import lib.utils as utils
|
|
||||||
from lib.card import Card
|
|
||||||
from lib.mana import Manacost
|
|
||||||
|
|
||||||
# Format a list of rows of data into nice columns.
|
|
||||||
# Note that it's the columns that are nice, not this code.
|
|
||||||
def padrows(l):
|
|
||||||
# get length for each field
|
|
||||||
lens = []
|
|
||||||
for ll in l:
|
|
||||||
for i, field in enumerate(ll):
|
|
||||||
if i < len(lens):
|
|
||||||
lens[i] = max(len(str(field)), lens[i])
|
|
||||||
else:
|
|
||||||
lens += [len(str(field))]
|
|
||||||
# now pad out to that length
|
|
||||||
padded = []
|
|
||||||
for ll in l:
|
|
||||||
padded += ['']
|
|
||||||
for i, field in enumerate(ll):
|
|
||||||
s = str(field)
|
|
||||||
pad = ' ' * (lens[i] - len(s))
|
|
||||||
padded[-1] += (s + pad + ' ')
|
|
||||||
return padded
|
|
||||||
def printrows(l):
|
|
||||||
for row in l:
|
|
||||||
print row
|
|
||||||
|
|
||||||
# global card pools
|
|
||||||
unparsed_cards = []
|
|
||||||
invalid_cards = []
|
|
||||||
cards = []
|
|
||||||
allcards = []
|
|
||||||
|
|
||||||
# global indices
|
|
||||||
by_name = {}
|
|
||||||
by_type = {}
|
|
||||||
by_type_inclusive = {}
|
|
||||||
by_supertype = {}
|
|
||||||
by_supertype_inclusive = {}
|
|
||||||
by_subtype = {}
|
|
||||||
by_subtype_inclusive = {}
|
|
||||||
by_color = {}
|
|
||||||
by_color_inclusive = {}
|
|
||||||
by_color_count = {}
|
|
||||||
by_cmc = {}
|
|
||||||
by_cost = {}
|
|
||||||
by_power = {}
|
|
||||||
by_toughness = {}
|
|
||||||
by_pt = {}
|
|
||||||
by_loyalty = {}
|
|
||||||
by_textlines = {}
|
|
||||||
by_textlen = {}
|
|
||||||
|
|
||||||
indices = {
|
|
||||||
'by_name' : by_name,
|
|
||||||
'by_type' : by_type,
|
|
||||||
'by_type_inclusive' : by_type_inclusive,
|
|
||||||
'by_supertype' : by_supertype,
|
|
||||||
'by_supertype_inclusive' : by_supertype_inclusive,
|
|
||||||
'by_subtype' : by_subtype,
|
|
||||||
'by_subtype_inclusive' : by_subtype_inclusive,
|
|
||||||
'by_color' : by_color,
|
|
||||||
'by_color_inclusive' : by_color_inclusive,
|
|
||||||
'by_color_count' : by_color_count,
|
|
||||||
'by_cmc' : by_cmc,
|
|
||||||
'by_cost' : by_cost,
|
|
||||||
'by_power' : by_power,
|
|
||||||
'by_toughness' : by_toughness,
|
|
||||||
'by_pt' : by_pt,
|
|
||||||
'by_loyalty' : by_loyalty,
|
|
||||||
'by_textlines' : by_textlines,
|
|
||||||
'by_textlen' : by_textlen,
|
|
||||||
}
|
|
||||||
|
|
||||||
def index_size(d):
|
|
||||||
return sum(map(lambda k: len(d[k]), d))
|
|
||||||
|
|
||||||
def inc(d, k, obj):
|
|
||||||
if k or k == 0:
|
|
||||||
if k in d:
|
|
||||||
d[k] += obj
|
|
||||||
else:
|
|
||||||
d[k] = obj
|
|
||||||
|
|
||||||
# build the global indices
|
|
||||||
def analyze(cardtexts):
|
|
||||||
global unparsed_cards, invalid_cards, cards, allcards
|
|
||||||
for cardtext in cardtexts:
|
|
||||||
# the empty card is not interesting
|
|
||||||
if not cardtext:
|
|
||||||
continue
|
|
||||||
card = Card(cardtext)
|
|
||||||
if card._valid:
|
|
||||||
cards += [card]
|
|
||||||
allcards += [card]
|
|
||||||
elif card._parsed:
|
|
||||||
invalid_cards += [card]
|
|
||||||
allcards += [card]
|
|
||||||
else:
|
|
||||||
unparsed_cards += [card]
|
|
||||||
|
|
||||||
if card._parsed:
|
|
||||||
inc(by_name, card.name, [card])
|
|
||||||
|
|
||||||
inc(by_type, ' '.join(card.types), [card])
|
|
||||||
for t in card.types:
|
|
||||||
inc(by_type_inclusive, t, [card])
|
|
||||||
inc(by_supertype, ' '.join(card.supertypes), [card])
|
|
||||||
for t in card.supertypes:
|
|
||||||
inc(by_supertype_inclusive, t, [card])
|
|
||||||
inc(by_subtype, ' '.join(card.subtypes), [card])
|
|
||||||
for t in card.subtypes:
|
|
||||||
inc(by_subtype_inclusive, t, [card])
|
|
||||||
|
|
||||||
if card.cost.colors:
|
|
||||||
inc(by_color, card.cost.colors, [card])
|
|
||||||
for c in card.cost.colors:
|
|
||||||
inc(by_color_inclusive, c, [card])
|
|
||||||
inc(by_color_count, len(card.cost.colors), [card])
|
|
||||||
else:
|
|
||||||
# colorless, still want to include in these tables
|
|
||||||
inc(by_color, 'A', [card])
|
|
||||||
inc(by_color_inclusive, 'A', [card])
|
|
||||||
inc(by_color_count, 0, [card])
|
|
||||||
|
|
||||||
inc(by_cmc, card.cost.cmc, [card])
|
|
||||||
inc(by_cost, card.cost.reencode() if card.cost.reencode() else 'none', [card])
|
|
||||||
|
|
||||||
inc(by_power, card.power, [card])
|
|
||||||
inc(by_toughness, card.toughness, [card])
|
|
||||||
inc(by_pt, card.pt, [card])
|
|
||||||
|
|
||||||
inc(by_loyalty, card.loyalty, [card])
|
|
||||||
|
|
||||||
inc(by_textlines, len(card.text_lines), [card])
|
|
||||||
inc(by_textlen, len(card.text), [card])
|
|
||||||
|
|
||||||
# summarize the indices
|
|
||||||
# Yes, this printing code is pretty terrible.
|
|
||||||
def summarize(hsize = 10, vsize = 10, cmcsize = 20):
|
|
||||||
print '===================='
|
|
||||||
print str(len(cards)) + ' valid cards, ' + str(len(invalid_cards)) + ' invalid cards.'
|
|
||||||
print str(len(allcards)) + ' cards parsed, ' + str(len(unparsed_cards)) + ' failed to parse'
|
|
||||||
print '--------------------'
|
|
||||||
print str(len(by_name)) + ' unique card names'
|
|
||||||
print '--------------------'
|
|
||||||
print (str(len(by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
|
|
||||||
+ str(len(by_color)) + ' combinations')
|
|
||||||
print 'Breakdown by color:'
|
|
||||||
rows = [by_color_inclusive.keys()]
|
|
||||||
rows += [[len(by_color_inclusive[k]) for k in rows[0]]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print 'Breakdown by number of colors:'
|
|
||||||
rows = [by_color_count.keys()]
|
|
||||||
rows += [[len(by_color_count[k]) for k in rows[0]]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print str(len(by_type_inclusive)) + ' unique card types, ' + str(len(by_type)) + ' combinations'
|
|
||||||
print 'Breakdown by type:'
|
|
||||||
d = sorted(by_type_inclusive,
|
|
||||||
lambda x,y: cmp(len(by_type_inclusive[x]), len(by_type_inclusive[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = [[k for k in d[:hsize]]]
|
|
||||||
rows += [[len(by_type_inclusive[k]) for k in rows[0]]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print (str(len(by_subtype_inclusive)) + ' unique subtypes, '
|
|
||||||
+ str(len(by_subtype)) + ' combinations')
|
|
||||||
print '-- Popular subtypes: --'
|
|
||||||
d = sorted(by_subtype_inclusive,
|
|
||||||
lambda x,y: cmp(len(by_subtype_inclusive[x]), len(by_subtype_inclusive[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
rows += [[k, len(by_subtype_inclusive[k])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '-- Top combinations: --'
|
|
||||||
d = sorted(by_subtype,
|
|
||||||
lambda x,y: cmp(len(by_subtype[x]), len(by_subtype[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
rows += [[k, len(by_subtype[k])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print (str(len(by_supertype_inclusive)) + ' unique supertypes, '
|
|
||||||
+ str(len(by_supertype)) + ' combinations')
|
|
||||||
print 'Breakdown by supertype:'
|
|
||||||
d = sorted(by_supertype_inclusive,
|
|
||||||
lambda x,y: cmp(len(by_supertype_inclusive[x]),len(by_supertype_inclusive[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = [[k for k in d[:hsize]]]
|
|
||||||
rows += [[len(by_supertype_inclusive[k]) for k in rows[0]]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print str(len(by_cmc)) + ' different CMCs, ' + str(len(by_cost)) + ' unique mana costs'
|
|
||||||
print 'Breakdown by CMC:'
|
|
||||||
d = sorted(by_cmc, reverse = False)
|
|
||||||
rows = [[k for k in d[:cmcsize]]]
|
|
||||||
rows += [[len(by_cmc[k]) for k in rows[0]]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '-- Popular mana costs: --'
|
|
||||||
d = sorted(by_cost,
|
|
||||||
lambda x,y: cmp(len(by_cost[x]), len(by_cost[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
rows += [[utils.from_mana(k), len(by_cost[k])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print str(len(by_pt)) + ' unique p/t combinations'
|
|
||||||
print ('Largest power: ' + str(max(map(len, by_power)) - 1) +
|
|
||||||
', largest toughness: ' + str(max(map(len, by_toughness)) - 1))
|
|
||||||
print '-- Popular p/t values: --'
|
|
||||||
d = sorted(by_pt,
|
|
||||||
lambda x,y: cmp(len(by_pt[x]), len(by_pt[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
rows += [[utils.from_unary(k), len(by_pt[k])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print 'Loyalty values:'
|
|
||||||
d = sorted(by_loyalty,
|
|
||||||
lambda x,y: cmp(len(by_loyalty[x]), len(by_loyalty[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
rows += [[utils.from_unary(k), len(by_loyalty[k])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '--------------------'
|
|
||||||
print('Card text ranges from ' + str(min(by_textlen)) + ' to '
|
|
||||||
+ str(max(by_textlen)) + ' characters in length')
|
|
||||||
print('Card text ranges from ' + str(min(by_textlines)) + ' to '
|
|
||||||
+ str(max(by_textlines)) + ' lines')
|
|
||||||
print '-- Line counts by frequency: --'
|
|
||||||
d = sorted(by_textlines,
|
|
||||||
lambda x,y: cmp(len(by_textlines[x]), len(by_textlines[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
rows += [[k, len(by_textlines[k])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '===================='
|
|
||||||
|
|
||||||
|
|
||||||
# describe outliers in the indices
|
|
||||||
def outliers(hsize = 10, vsize = 10, dump_invalid = False):
|
|
||||||
print '********************'
|
|
||||||
print 'Overview of indices:'
|
|
||||||
rows = [['Index Name', 'Keys', 'Total Members']]
|
|
||||||
for index in indices:
|
|
||||||
rows += [[index, len(indices[index]), index_size(indices[index])]]
|
|
||||||
printrows(padrows(rows))
|
|
||||||
print '********************'
|
|
||||||
if len(by_name) > 0:
|
|
||||||
scardname = sorted(by_name,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = False)[0]
|
|
||||||
print 'Shortest Cardname: (' + str(len(scardname)) + ')'
|
|
||||||
print ' ' + scardname
|
|
||||||
lcardname = sorted(by_name,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Longest Cardname: (' + str(len(lcardname)) + ')'
|
|
||||||
print ' ' + lcardname
|
|
||||||
d = sorted(by_name,
|
|
||||||
lambda x,y: cmp(len(by_name[x]), len(by_name[y])),
|
|
||||||
reverse = True)
|
|
||||||
rows = []
|
|
||||||
for k in d[0:vsize]:
|
|
||||||
if len(by_name[k]) > 1:
|
|
||||||
rows += [[k, len(by_name[k])]]
|
|
||||||
if rows == []:
|
|
||||||
print('No duplicated cardnames')
|
|
||||||
else:
|
|
||||||
print '-- Most duplicated names: --'
|
|
||||||
printrows(padrows(rows))
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by name?'
|
|
||||||
print '--------------------'
|
|
||||||
if len(by_type) > 0:
|
|
||||||
ltypes = sorted(by_type,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Longest card type: (' + str(len(ltypes)) + ')'
|
|
||||||
print ' ' + ltypes
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by type?'
|
|
||||||
if len(by_subtype) > 0:
|
|
||||||
lsubtypes = sorted(by_subtype,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
|
|
||||||
print ' ' + lsubtypes
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by subtype?'
|
|
||||||
if len(by_supertype) > 0:
|
|
||||||
lsupertypes = sorted(by_supertype,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
|
|
||||||
print ' ' + lsupertypes
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by supertype?'
|
|
||||||
print '--------------------'
|
|
||||||
if len(by_cost) > 0:
|
|
||||||
lcost = sorted(by_cost,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Longest mana cost: (' + str(len(lcost)) + ')'
|
|
||||||
print ' ' + utils.from_mana(lcost)
|
|
||||||
print '\n' + by_cost[lcost][0].reencode() + '\n'
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by cost?'
|
|
||||||
if len(by_cmc) > 0:
|
|
||||||
lcmc = sorted(by_cmc, reverse = True)[0]
|
|
||||||
print 'Largest cmc: (' + str(lcmc) + ')'
|
|
||||||
print ' ' + str(by_cmc[lcmc][0].cost)
|
|
||||||
print '\n' + by_cmc[lcmc][0].reencode()
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by cmc?'
|
|
||||||
print '--------------------'
|
|
||||||
if len(by_power) > 0:
|
|
||||||
lpower = sorted(by_power,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Largest creature power: ' + utils.from_unary(lpower)
|
|
||||||
print '\n' + by_power[lpower][0].reencode() + '\n'
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by power?'
|
|
||||||
if len(by_toughness) > 0:
|
|
||||||
ltoughness = sorted(by_toughness,
|
|
||||||
lambda x,y: cmp(len(x), len(y)),
|
|
||||||
reverse = True)[0]
|
|
||||||
print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
|
|
||||||
print '\n' + by_toughness[ltoughness][0].reencode()
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by toughness?'
|
|
||||||
print '--------------------'
|
|
||||||
if len(by_textlines) > 0:
|
|
||||||
llines = sorted(by_textlines, reverse = True)[0]
|
|
||||||
print 'Most lines of text in a card: ' + str(llines)
|
|
||||||
print '\n' + by_textlines[llines][0].reencode() + '\n'
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by line count?'
|
|
||||||
if len(by_textlen) > 0:
|
|
||||||
ltext = sorted(by_textlen, reverse = True)[0]
|
|
||||||
print 'Most chars in a card text: ' + str(ltext)
|
|
||||||
print '\n' + by_textlen[ltext][0].reencode()
|
|
||||||
else:
|
|
||||||
print 'No cards indexed by char count?'
|
|
||||||
print '--------------------'
|
|
||||||
print 'There were ' + str(len(invalid_cards)) + ' invalid cards.'
|
|
||||||
if dump_invalid:
|
|
||||||
for card in invalid_cards:
|
|
||||||
print '\n' + card.raw
|
|
||||||
elif len(invalid_cards) > 0:
|
|
||||||
print 'Not summarizing.'
|
|
||||||
print '--------------------'
|
|
||||||
print 'There were ' + str(len(unparsed_cards)) + ' unparsed cards.'
|
|
||||||
if dump_invalid:
|
|
||||||
for card in unparsed_cards:
|
|
||||||
print '\n' + card.raw
|
|
||||||
elif len(unparsed_cards) > 0:
|
|
||||||
print 'Not summarizing.'
|
|
||||||
print '===================='
|
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = False):
|
|
||||||
if verbose:
|
|
||||||
print 'Opening encoded card file: ' + fname
|
|
||||||
|
|
||||||
with open(fname, 'rt') as f:
|
|
||||||
text = f.read()
|
|
||||||
|
|
||||||
cardtexts = text.split(utils.cardsep)
|
|
||||||
analyze(cardtexts)
|
|
||||||
summarize()
|
|
||||||
outliers(dump_invalid = False)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
if len(sys.argv) == 2:
|
|
||||||
main(sys.argv[1])
|
|
||||||
elif len(sys.argv) == 3:
|
|
||||||
main(sys.argv[1], oname = sys.argv[2])
|
|
||||||
else:
|
|
||||||
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
|
||||||
exit(1)
|
|
|
@ -202,9 +202,74 @@ def fields_from_json(src_json):
|
||||||
# we don't need to worry about bsides because we handle that in the constructor
|
# we don't need to worry about bsides because we handle that in the constructor
|
||||||
return parsed, valid and fields_check_valid(fields), fields
|
return parsed, valid and fields_check_valid(fields), fields
|
||||||
|
|
||||||
def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep):
|
||||||
|
parsed = True
|
||||||
|
valid = True
|
||||||
|
fields = {}
|
||||||
|
|
||||||
|
if fmt_labeled:
|
||||||
|
labels = {fmt_labeled[k] : k for k in fmt_labeled}
|
||||||
|
field_label_regex = '[' + ''.join(labels.keys()) + ']'
|
||||||
|
def addf(fields, fkey, fval):
|
||||||
|
if fkey in fields:
|
||||||
|
fields[fkey] += [fval]
|
||||||
|
else:
|
||||||
|
fields[fkey] = [fval]
|
||||||
|
|
||||||
|
textfields = src_text.split(fieldsep)
|
||||||
|
idx = 0
|
||||||
|
true_idx = 0
|
||||||
|
for textfield in textfields:
|
||||||
|
# ignore leading or trailing empty fields due to seps
|
||||||
|
if textfield == '':
|
||||||
|
if true_idx == 0 or true_idx == len(textfields) - 1:
|
||||||
|
true_idx += 1
|
||||||
|
continue
|
||||||
|
# count the field index for other empty fields but don't add them
|
||||||
|
else:
|
||||||
|
idx += 1
|
||||||
|
true_idx += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
lab = None
|
||||||
|
if fmt_labeled:
|
||||||
|
labs = re.findall(field_label_regex, textfield)
|
||||||
|
# use the first label if we saw any at all
|
||||||
|
if len(labs) > 0:
|
||||||
|
lab = labs[0]
|
||||||
|
# try to use the field label if we got one
|
||||||
|
if lab and lab in labels:
|
||||||
|
fname = labels[lab]
|
||||||
|
# fall back to the field order specified
|
||||||
|
elif idx < len(fmt_ordered):
|
||||||
|
fname = fmt_ordered[idx]
|
||||||
|
# we don't know what to do with this field: call it other
|
||||||
|
else:
|
||||||
|
fname = field_other
|
||||||
|
parsed = False
|
||||||
|
valid = False
|
||||||
|
|
||||||
|
# specialized handling
|
||||||
|
if fname in [field_cost]:
|
||||||
|
fval = Manacost(textfield)
|
||||||
|
parsed = parsed and fval.parsed
|
||||||
|
valid = valid and fval.valid
|
||||||
|
addf(fields, fname, (idx, fval))
|
||||||
|
elif fname in [field_text]:
|
||||||
|
fval = Manatext(textfield)
|
||||||
|
valid = valid and fval.valid
|
||||||
|
addf(fields, fname, (idx, fval))
|
||||||
|
elif fname in [field_supertypes, field_types, field_subtypes]:
|
||||||
|
addf(fields, fname, (idx, textfield.split()))
|
||||||
|
else:
|
||||||
|
addf(fields, fname, (idx, textfield))
|
||||||
|
|
||||||
|
idx += 1
|
||||||
|
true_idx += 1
|
||||||
|
|
||||||
|
# again, bsides are handled by the constructor
|
||||||
|
return parsed, valid and fields_check_valid(fields), fields
|
||||||
|
|
||||||
# Here's the actual Card class that other files should use.
|
# Here's the actual Card class that other files should use.
|
||||||
|
|
||||||
|
@ -272,8 +337,8 @@ class Card:
|
||||||
if self.fields:
|
if self.fields:
|
||||||
for field in self.fields:
|
for field in self.fields:
|
||||||
# look for a specialized set function
|
# look for a specialized set function
|
||||||
if '_set_' + field in self.__dict__:
|
if hasattr(self, '_set_' + field):
|
||||||
self.__dict__['_set_' + field](self.fields[field])
|
getattr(self, '_set_' + field)(self.fields[field])
|
||||||
# otherwise use the default one
|
# otherwise use the default one
|
||||||
elif field in self.__dict__:
|
elif field in self.__dict__:
|
||||||
self.set_field_default(field, self.fields[field])
|
self.set_field_default(field, self.fields[field])
|
||||||
|
@ -330,7 +395,6 @@ class Card:
|
||||||
break # only use the first one...
|
break # only use the first one...
|
||||||
|
|
||||||
def _set_text(self, values):
|
def _set_text(self, values):
|
||||||
mtext = ''
|
|
||||||
for idx, value in values:
|
for idx, value in values:
|
||||||
mtext = value
|
mtext = value
|
||||||
self.__dict__[field_text] = mtext
|
self.__dict__[field_text] = mtext
|
||||||
|
@ -340,6 +404,7 @@ class Card:
|
||||||
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
|
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
|
||||||
' ',
|
' ',
|
||||||
fulltext).split()
|
fulltext).split()
|
||||||
|
break # only use the first one...
|
||||||
|
|
||||||
def _set_other(self, values):
|
def _set_other(self, values):
|
||||||
# just record these, we could do somthing unset valid if we really wanted
|
# just record these, we could do somthing unset valid if we really wanted
|
||||||
|
@ -358,8 +423,8 @@ class Card:
|
||||||
|
|
||||||
for field in fmt_ordered:
|
for field in fmt_ordered:
|
||||||
if field in self.__dict__:
|
if field in self.__dict__:
|
||||||
if self.__dict__[field]:
|
|
||||||
outfield = self.__dict__[field]
|
outfield = self.__dict__[field]
|
||||||
|
if outfield:
|
||||||
# specialized field handling for the ones that aren't strings (sigh)
|
# specialized field handling for the ones that aren't strings (sigh)
|
||||||
if isinstance(outfield, list):
|
if isinstance(outfield, list):
|
||||||
outfield_str = ' '.join(outfield)
|
outfield_str = ' '.join(outfield)
|
||||||
|
|
380
lib/datalib.py
Normal file
380
lib/datalib.py
Normal file
|
@ -0,0 +1,380 @@
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import utils
|
||||||
|
from cardlib import Card
|
||||||
|
|
||||||
|
# Format a list of rows of data into nice columns.
|
||||||
|
# Note that it's the columns that are nice, not this code.
|
||||||
|
def padrows(l):
|
||||||
|
# get length for each field
|
||||||
|
lens = []
|
||||||
|
for ll in l:
|
||||||
|
for i, field in enumerate(ll):
|
||||||
|
if i < len(lens):
|
||||||
|
lens[i] = max(len(str(field)), lens[i])
|
||||||
|
else:
|
||||||
|
lens += [len(str(field))]
|
||||||
|
# now pad out to that length
|
||||||
|
padded = []
|
||||||
|
for ll in l:
|
||||||
|
padded += ['']
|
||||||
|
for i, field in enumerate(ll):
|
||||||
|
s = str(field)
|
||||||
|
pad = ' ' * (lens[i] - len(s))
|
||||||
|
padded[-1] += (s + pad + ' ')
|
||||||
|
return padded
|
||||||
|
def printrows(l):
|
||||||
|
for row in l:
|
||||||
|
print row
|
||||||
|
|
||||||
|
# index management helpers
|
||||||
|
def index_size(d):
|
||||||
|
return sum(map(lambda k: len(d[k]), d))
|
||||||
|
|
||||||
|
def inc(d, k, obj):
|
||||||
|
if k or k == 0:
|
||||||
|
if k in d:
|
||||||
|
d[k] += obj
|
||||||
|
else:
|
||||||
|
d[k] = obj
|
||||||
|
|
||||||
|
# thanks gleemax
|
||||||
|
def plimit(s, mlen = 1000):
|
||||||
|
if len(s) > mlen:
|
||||||
|
return s[:1000] + '[...]'
|
||||||
|
else:
|
||||||
|
return s
|
||||||
|
|
||||||
|
class Datamine:
|
||||||
|
# build the global indices
|
||||||
|
def __init__(self, card_srcs):
|
||||||
|
# global card pools
|
||||||
|
self.unparsed_cards = []
|
||||||
|
self.invalid_cards = []
|
||||||
|
self.cards = []
|
||||||
|
self.allcards = []
|
||||||
|
|
||||||
|
# global indices
|
||||||
|
self.by_name = {}
|
||||||
|
self.by_type = {}
|
||||||
|
self.by_type_inclusive = {}
|
||||||
|
self.by_supertype = {}
|
||||||
|
self.by_supertype_inclusive = {}
|
||||||
|
self.by_subtype = {}
|
||||||
|
self.by_subtype_inclusive = {}
|
||||||
|
self.by_color = {}
|
||||||
|
self.by_color_inclusive = {}
|
||||||
|
self.by_color_count = {}
|
||||||
|
self.by_cmc = {}
|
||||||
|
self.by_cost = {}
|
||||||
|
self.by_power = {}
|
||||||
|
self.by_toughness = {}
|
||||||
|
self.by_pt = {}
|
||||||
|
self.by_loyalty = {}
|
||||||
|
self.by_textlines = {}
|
||||||
|
self.by_textlen = {}
|
||||||
|
|
||||||
|
self.indices = {
|
||||||
|
'by_name' : self.by_name,
|
||||||
|
'by_type' : self.by_type,
|
||||||
|
'by_type_inclusive' : self.by_type_inclusive,
|
||||||
|
'by_supertype' : self.by_supertype,
|
||||||
|
'by_supertype_inclusive' : self.by_supertype_inclusive,
|
||||||
|
'by_subtype' : self.by_subtype,
|
||||||
|
'by_subtype_inclusive' : self.by_subtype_inclusive,
|
||||||
|
'by_color' : self.by_color,
|
||||||
|
'by_color_inclusive' : self.by_color_inclusive,
|
||||||
|
'by_color_count' : self.by_color_count,
|
||||||
|
'by_cmc' : self.by_cmc,
|
||||||
|
'by_cost' : self.by_cost,
|
||||||
|
'by_power' : self.by_power,
|
||||||
|
'by_toughness' : self.by_toughness,
|
||||||
|
'by_pt' : self.by_pt,
|
||||||
|
'by_loyalty' : self.by_loyalty,
|
||||||
|
'by_textlines' : self.by_textlines,
|
||||||
|
'by_textlen' : self.by_textlen,
|
||||||
|
}
|
||||||
|
|
||||||
|
for card_src in card_srcs:
|
||||||
|
# the empty card is not interesting
|
||||||
|
if not card_src:
|
||||||
|
continue
|
||||||
|
card = Card(card_src)
|
||||||
|
if card.valid:
|
||||||
|
self.cards += [card]
|
||||||
|
self.allcards += [card]
|
||||||
|
elif card.parsed:
|
||||||
|
self.invalid_cards += [card]
|
||||||
|
self.allcards += [card]
|
||||||
|
else:
|
||||||
|
self.unparsed_cards += [card]
|
||||||
|
|
||||||
|
if card.parsed:
|
||||||
|
inc(self.by_name, card.name, [card])
|
||||||
|
|
||||||
|
inc(self.by_type, ' '.join(card.types), [card])
|
||||||
|
for t in card.types:
|
||||||
|
inc(self.by_type_inclusive, t, [card])
|
||||||
|
inc(self.by_supertype, ' '.join(card.supertypes), [card])
|
||||||
|
for t in card.supertypes:
|
||||||
|
inc(self.by_supertype_inclusive, t, [card])
|
||||||
|
inc(self.by_subtype, ' '.join(card.subtypes), [card])
|
||||||
|
for t in card.subtypes:
|
||||||
|
inc(self.by_subtype_inclusive, t, [card])
|
||||||
|
|
||||||
|
if card.cost.colors:
|
||||||
|
inc(self.by_color, card.cost.colors, [card])
|
||||||
|
for c in card.cost.colors:
|
||||||
|
inc(self.by_color_inclusive, c, [card])
|
||||||
|
inc(self.by_color_count, len(card.cost.colors), [card])
|
||||||
|
else:
|
||||||
|
# colorless, still want to include in these tables
|
||||||
|
inc(self.by_color, 'A', [card])
|
||||||
|
inc(self.by_color_inclusive, 'A', [card])
|
||||||
|
inc(self.by_color_count, 0, [card])
|
||||||
|
|
||||||
|
inc(self.by_cmc, card.cost.cmc, [card])
|
||||||
|
inc(self.by_cost, card.cost.encode() if card.cost.encode() else 'none', [card])
|
||||||
|
|
||||||
|
inc(self.by_power, card.pt_p, [card])
|
||||||
|
inc(self.by_toughness, card.pt_t, [card])
|
||||||
|
inc(self.by_pt, card.pt, [card])
|
||||||
|
|
||||||
|
inc(self.by_loyalty, card.loyalty, [card])
|
||||||
|
|
||||||
|
inc(self.by_textlines, len(card.text_lines), [card])
|
||||||
|
inc(self.by_textlen, len(card.text.encode()), [card])
|
||||||
|
|
||||||
|
# summarize the indices
|
||||||
|
# Yes, this printing code is pretty terrible.
|
||||||
|
def summarize(self, hsize = 10, vsize = 10, cmcsize = 20):
|
||||||
|
print '===================='
|
||||||
|
print str(len(self.cards)) + ' valid cards, ' + str(len(self.invalid_cards)) + ' invalid cards.'
|
||||||
|
print str(len(self.allcards)) + ' cards parsed, ' + str(len(self.unparsed_cards)) + ' failed to parse'
|
||||||
|
print '--------------------'
|
||||||
|
print str(len(self.by_name)) + ' unique card names'
|
||||||
|
print '--------------------'
|
||||||
|
print (str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
|
||||||
|
+ str(len(self.by_color)) + ' combinations')
|
||||||
|
print 'Breakdown by color:'
|
||||||
|
rows = [self.by_color_inclusive.keys()]
|
||||||
|
rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print 'Breakdown by number of colors:'
|
||||||
|
rows = [self.by_color_count.keys()]
|
||||||
|
rows += [[len(self.by_color_count[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print str(len(self.by_type_inclusive)) + ' unique card types, ' + str(len(self.by_type)) + ' combinations'
|
||||||
|
print 'Breakdown by type:'
|
||||||
|
d = sorted(self.by_type_inclusive,
|
||||||
|
lambda x,y: cmp(len(self.by_type_inclusive[x]), len(self.by_type_inclusive[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = [[k for k in d[:hsize]]]
|
||||||
|
rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print (str(len(self.by_subtype_inclusive)) + ' unique subtypes, '
|
||||||
|
+ str(len(self.by_subtype)) + ' combinations')
|
||||||
|
print '-- Popular subtypes: --'
|
||||||
|
d = sorted(self.by_subtype_inclusive,
|
||||||
|
lambda x,y: cmp(len(self.by_subtype_inclusive[x]), len(self.by_subtype_inclusive[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[k, len(self.by_subtype_inclusive[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '-- Top combinations: --'
|
||||||
|
d = sorted(self.by_subtype,
|
||||||
|
lambda x,y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[k, len(self.by_subtype[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print (str(len(self.by_supertype_inclusive)) + ' unique supertypes, '
|
||||||
|
+ str(len(self.by_supertype)) + ' combinations')
|
||||||
|
print 'Breakdown by supertype:'
|
||||||
|
d = sorted(self.by_supertype_inclusive,
|
||||||
|
lambda x,y: cmp(len(self.by_supertype_inclusive[x]),len(self.by_supertype_inclusive[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = [[k for k in d[:hsize]]]
|
||||||
|
rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print str(len(self.by_cmc)) + ' different CMCs, ' + str(len(self.by_cost)) + ' unique mana costs'
|
||||||
|
print 'Breakdown by CMC:'
|
||||||
|
d = sorted(self.by_cmc, reverse = False)
|
||||||
|
rows = [[k for k in d[:cmcsize]]]
|
||||||
|
rows += [[len(self.by_cmc[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '-- Popular mana costs: --'
|
||||||
|
d = sorted(self.by_cost,
|
||||||
|
lambda x,y: cmp(len(self.by_cost[x]), len(self.by_cost[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[utils.from_mana(k), len(self.by_cost[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print str(len(self.by_pt)) + ' unique p/t combinations'
|
||||||
|
if len(self.by_power) > 0 and len(self.by_toughness) > 0:
|
||||||
|
print ('Largest power: ' + str(max(map(len, self.by_power)) - 1) +
|
||||||
|
', largest toughness: ' + str(max(map(len, self.by_toughness)) - 1))
|
||||||
|
print '-- Popular p/t values: --'
|
||||||
|
d = sorted(self.by_pt,
|
||||||
|
lambda x,y: cmp(len(self.by_pt[x]), len(self.by_pt[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[utils.from_unary(k), len(self.by_pt[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print 'Loyalty values:'
|
||||||
|
d = sorted(self.by_loyalty,
|
||||||
|
lambda x,y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[utils.from_unary(k), len(self.by_loyalty[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
if len(self.by_textlen) > 0 and len(self.by_textlines) > 0:
|
||||||
|
print('Card text ranges from ' + str(min(self.by_textlen)) + ' to '
|
||||||
|
+ str(max(self.by_textlen)) + ' characters in length')
|
||||||
|
print('Card text ranges from ' + str(min(self.by_textlines)) + ' to '
|
||||||
|
+ str(max(self.by_textlines)) + ' lines')
|
||||||
|
print '-- Line counts by frequency: --'
|
||||||
|
d = sorted(self.by_textlines,
|
||||||
|
lambda x,y: cmp(len(self.by_textlines[x]), len(self.by_textlines[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[k, len(self.by_textlines[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '===================='
|
||||||
|
|
||||||
|
|
||||||
|
# describe outliers in the indices
|
||||||
|
def outliers(self, hsize = 10, vsize = 10, dump_invalid = False):
|
||||||
|
print '********************'
|
||||||
|
print 'Overview of indices:'
|
||||||
|
rows = [['Index Name', 'Keys', 'Total Members']]
|
||||||
|
for index in self.indices:
|
||||||
|
rows += [[index, len(self.indices[index]), index_size(self.indices[index])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '********************'
|
||||||
|
if len(self.by_name) > 0:
|
||||||
|
scardname = sorted(self.by_name,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = False)[0]
|
||||||
|
print 'Shortest Cardname: (' + str(len(scardname)) + ')'
|
||||||
|
print ' ' + scardname
|
||||||
|
lcardname = sorted(self.by_name,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest Cardname: (' + str(len(lcardname)) + ')'
|
||||||
|
print ' ' + lcardname
|
||||||
|
d = sorted(self.by_name,
|
||||||
|
lambda x,y: cmp(len(self.by_name[x]), len(self.by_name[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
if len(self.by_name[k]) > 1:
|
||||||
|
rows += [[k, len(self.by_name[k])]]
|
||||||
|
if rows == []:
|
||||||
|
print('No duplicated cardnames')
|
||||||
|
else:
|
||||||
|
print '-- Most duplicated names: --'
|
||||||
|
printrows(padrows(rows))
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by name?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(self.by_type) > 0:
|
||||||
|
ltypes = sorted(self.by_type,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest card type: (' + str(len(ltypes)) + ')'
|
||||||
|
print ' ' + ltypes
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by type?'
|
||||||
|
if len(self.by_subtype) > 0:
|
||||||
|
lsubtypes = sorted(self.by_subtype,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
|
||||||
|
print ' ' + lsubtypes
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by subtype?'
|
||||||
|
if len(self.by_supertype) > 0:
|
||||||
|
lsupertypes = sorted(self.by_supertype,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
|
||||||
|
print ' ' + lsupertypes
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by supertype?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(self.by_cost) > 0:
|
||||||
|
lcost = sorted(self.by_cost,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest mana cost: (' + str(len(lcost)) + ')'
|
||||||
|
print ' ' + utils.from_mana(lcost)
|
||||||
|
print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n'
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by cost?'
|
||||||
|
if len(self.by_cmc) > 0:
|
||||||
|
lcmc = sorted(self.by_cmc, reverse = True)[0]
|
||||||
|
print 'Largest cmc: (' + str(lcmc) + ')'
|
||||||
|
print ' ' + str(self.by_cmc[lcmc][0].cost)
|
||||||
|
print '\n' + plimit(self.by_cmc[lcmc][0].encode())
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by cmc?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(self.by_power) > 0:
|
||||||
|
lpower = sorted(self.by_power,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Largest creature power: ' + utils.from_unary(lpower)
|
||||||
|
print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n'
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by power?'
|
||||||
|
if len(self.by_toughness) > 0:
|
||||||
|
ltoughness = sorted(self.by_toughness,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
|
||||||
|
print '\n' + plimit(self.by_toughness[ltoughness][0].encode())
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by toughness?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(self.by_textlines) > 0:
|
||||||
|
llines = sorted(self.by_textlines, reverse = True)[0]
|
||||||
|
print 'Most lines of text in a card: ' + str(llines)
|
||||||
|
print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n'
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by line count?'
|
||||||
|
if len(self.by_textlen) > 0:
|
||||||
|
ltext = sorted(self.by_textlen, reverse = True)[0]
|
||||||
|
print 'Most chars in a card text: ' + str(ltext)
|
||||||
|
print '\n' + plimit(self.by_textlen[ltext][0].encode())
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by char count?'
|
||||||
|
print '--------------------'
|
||||||
|
print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.'
|
||||||
|
if dump_invalid:
|
||||||
|
for card in self.invalid_cards:
|
||||||
|
print '\n' + repr(card.fields)
|
||||||
|
elif len(self.invalid_cards) > 0:
|
||||||
|
print 'Not summarizing.'
|
||||||
|
print '--------------------'
|
||||||
|
print 'There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.'
|
||||||
|
if dump_invalid:
|
||||||
|
for card in self.unparsed_cards:
|
||||||
|
print '\n' + repr(card.fields)
|
||||||
|
elif len(self.unparsed_cards) > 0:
|
||||||
|
print 'Not summarizing.'
|
||||||
|
print '===================='
|
33
summarize.py
Normal file
33
summarize.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import lib.utils as utils
|
||||||
|
import lib.jdecode as jdecode
|
||||||
|
from lib.datalib import Datamine
|
||||||
|
|
||||||
|
def main(fname, verbose = True):
|
||||||
|
if fname[-5:] == '.json':
|
||||||
|
if verbose:
|
||||||
|
print 'This looks like a json file: ' + fname
|
||||||
|
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
||||||
|
card_srcs = []
|
||||||
|
for json_cardname in json_srcs:
|
||||||
|
if len(json_srcs[json_cardname]) > 0:
|
||||||
|
card_srcs += [json_srcs[json_cardname][0]]
|
||||||
|
else:
|
||||||
|
if verbose:
|
||||||
|
print 'Opening encoded card file: ' + fname
|
||||||
|
with open(fname, 'rt') as f:
|
||||||
|
text = f.read()
|
||||||
|
card_srcs = text.split(utils.cardsep)
|
||||||
|
|
||||||
|
mine = Datamine(card_srcs)
|
||||||
|
mine.summarize()
|
||||||
|
mine.outliers(dump_invalid = False)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
main(sys.argv[1])
|
||||||
|
else:
|
||||||
|
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file>'
|
||||||
|
exit(1)
|
Loading…
Reference in a new issue