Datamining code is in a working state, randomize_mana.py is a good demo
of a really specific feature. Other improvements; changing the keys used during json decode reordered everything in output.txt, but there shouldn't be any other major changes.
This commit is contained in:
parent
01c78549f5
commit
cbf8ac34e5
5 changed files with 28985 additions and 28744 deletions
254
datamine.py
254
datamine.py
|
@ -5,7 +5,8 @@ import random
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
# format a list of rows of data into nice columns
|
# Format a list of rows of data into nice columns.
|
||||||
|
# Note that it's the columns that are nice, not this code.
|
||||||
def padrows(l):
|
def padrows(l):
|
||||||
# get length for each field
|
# get length for each field
|
||||||
lens = []
|
lens = []
|
||||||
|
@ -28,6 +29,17 @@ def printrows(l):
|
||||||
for row in l:
|
for row in l:
|
||||||
print row
|
print row
|
||||||
|
|
||||||
|
def randomize_all_mana(text):
|
||||||
|
manastrs = re.findall(utils.mana_regex, text)
|
||||||
|
newtext = text
|
||||||
|
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
newtext = newtext.replace(manastr, utils.reserved_marker)
|
||||||
|
for manastr in manastrs:
|
||||||
|
newtext = newtext.replace(utils.reserved_marker,
|
||||||
|
Manacost(manastr).reencode(randomize = True),
|
||||||
|
1)
|
||||||
|
return newtext
|
||||||
|
|
||||||
# so this stuff still needs to be cleaned up
|
# so this stuff still needs to be cleaned up
|
||||||
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
||||||
creature_keywords = [
|
creature_keywords = [
|
||||||
|
@ -184,13 +196,17 @@ class Manacost:
|
||||||
self.colors = self.get_colors()
|
self.colors = self.get_colors()
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return utils.mana_untranslate(''.join(self.sequence))
|
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence)
|
||||||
|
+ utils.mana_close_delimiter)
|
||||||
|
|
||||||
def format(self, for_forum):
|
def format(self, for_forum):
|
||||||
return utils.mana_untranslate(''.join(self.sequence, for_forum))
|
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence, for_forum)
|
||||||
|
+ utils.mana_close_delimiter)
|
||||||
|
|
||||||
def reencode(self, randomize = False):
|
def reencode(self, randomize = False):
|
||||||
if randomize:
|
if self.none:
|
||||||
|
return ''
|
||||||
|
elif randomize:
|
||||||
# so this won't work very well if mana_unary_marker isn't empty
|
# so this won't work very well if mana_unary_marker isn't empty
|
||||||
return (utils.mana_open_delimiter
|
return (utils.mana_open_delimiter
|
||||||
+ ''.join(random.sample(self.sequence, len(self.sequence)))
|
+ ''.join(random.sample(self.sequence, len(self.sequence)))
|
||||||
|
@ -353,7 +369,7 @@ class Card:
|
||||||
utils.fieldsep,
|
utils.fieldsep,
|
||||||
self.cost.reencode(randomize) if not self.cost.none else '',
|
self.cost.reencode(randomize) if not self.cost.none else '',
|
||||||
utils.fieldsep,
|
utils.fieldsep,
|
||||||
self.text,
|
self.text if not randomize else randomize_all_mana(self.text),
|
||||||
utils.fieldsep,
|
utils.fieldsep,
|
||||||
utils.bsidesep + self.bside.reencode(randomize) if self.bside else '',
|
utils.bsidesep + self.bside.reencode(randomize) if self.bside else '',
|
||||||
])
|
])
|
||||||
|
@ -374,6 +390,7 @@ by_subtype = {}
|
||||||
by_subtype_inclusive = {}
|
by_subtype_inclusive = {}
|
||||||
by_color = {}
|
by_color = {}
|
||||||
by_color_inclusive = {}
|
by_color_inclusive = {}
|
||||||
|
by_color_count = {}
|
||||||
by_cmc = {}
|
by_cmc = {}
|
||||||
by_cost = {}
|
by_cost = {}
|
||||||
by_power = {}
|
by_power = {}
|
||||||
|
@ -383,8 +400,32 @@ by_loyalty = {}
|
||||||
by_textlines = {}
|
by_textlines = {}
|
||||||
by_textlen = {}
|
by_textlen = {}
|
||||||
|
|
||||||
|
indices = {
|
||||||
|
'by_name' : by_name,
|
||||||
|
'by_type' : by_type,
|
||||||
|
'by_type_inclusive' : by_type_inclusive,
|
||||||
|
'by_supertype' : by_supertype,
|
||||||
|
'by_supertype_inclusive' : by_supertype_inclusive,
|
||||||
|
'by_subtype' : by_subtype,
|
||||||
|
'by_subtype_inclusive' : by_subtype_inclusive,
|
||||||
|
'by_color' : by_color,
|
||||||
|
'by_color_inclusive' : by_color_inclusive,
|
||||||
|
'by_color_count' : by_color_count,
|
||||||
|
'by_cmc' : by_cmc,
|
||||||
|
'by_cost' : by_cost,
|
||||||
|
'by_power' : by_power,
|
||||||
|
'by_toughness' : by_toughness,
|
||||||
|
'by_pt' : by_pt,
|
||||||
|
'by_loyalty' : by_loyalty,
|
||||||
|
'by_textlines' : by_textlines,
|
||||||
|
'by_textlen' : by_textlen,
|
||||||
|
}
|
||||||
|
|
||||||
|
def index_size(d):
|
||||||
|
return sum(map(lambda k: len(d[k]), d))
|
||||||
|
|
||||||
def inc(d, k, obj):
|
def inc(d, k, obj):
|
||||||
if k:
|
if k or k == 0:
|
||||||
if k in d:
|
if k in d:
|
||||||
d[k] += obj
|
d[k] += obj
|
||||||
else:
|
else:
|
||||||
|
@ -424,46 +465,51 @@ def analyze(cardtexts):
|
||||||
inc(by_color, card.cost.colors, [card])
|
inc(by_color, card.cost.colors, [card])
|
||||||
for c in card.cost.colors:
|
for c in card.cost.colors:
|
||||||
inc(by_color_inclusive, c, [card])
|
inc(by_color_inclusive, c, [card])
|
||||||
|
inc(by_color_count, len(card.cost.colors), [card])
|
||||||
else:
|
else:
|
||||||
# colorless, still want to include in these tables
|
# colorless, still want to include in these tables
|
||||||
inc(by_color, 'A', [card])
|
inc(by_color, 'A', [card])
|
||||||
inc(by_color_inclusive, 'A', [card])
|
inc(by_color_inclusive, 'A', [card])
|
||||||
|
inc(by_color_count, 0, [card])
|
||||||
|
|
||||||
inc(by_cmc, card.cost.cmc, [card])
|
inc(by_cmc, card.cost.cmc, [card])
|
||||||
inc(by_cost, card.cost.reencode(), [card])
|
inc(by_cost, card.cost.reencode() if card.cost.reencode() else 'none', [card])
|
||||||
|
|
||||||
|
|
||||||
inc(by_power, card.power, [card])
|
inc(by_power, card.power, [card])
|
||||||
inc(by_toughness, card.toughness, [card])
|
inc(by_toughness, card.toughness, [card])
|
||||||
inc(by_pt, card.pt, [card])
|
inc(by_pt, card.pt, [card])
|
||||||
|
|
||||||
|
|
||||||
inc(by_loyalty, card.loyalty, [card])
|
inc(by_loyalty, card.loyalty, [card])
|
||||||
|
|
||||||
inc(by_textlines, len(card.text_lines), [card])
|
inc(by_textlines, len(card.text_lines), [card])
|
||||||
inc(by_textlen, len(card.text), [card])
|
inc(by_textlen, len(card.text), [card])
|
||||||
|
|
||||||
# summarize the indices
|
# summarize the indices
|
||||||
def summarize():
|
# Yes, this printing code is pretty terrible.
|
||||||
|
def summarize(hsize = 10, vsize = 10, cmcsize = 20):
|
||||||
print '===================='
|
print '===================='
|
||||||
print str(len(cards)) + ' valid cards, ' + str(len(invalid_cards)) + ' invalid cards.'
|
print str(len(cards)) + ' valid cards, ' + str(len(invalid_cards)) + ' invalid cards.'
|
||||||
print str(len(allcards)) + ' cards parsed, ' + str(len(unparsed_cards)) + ' failed to parse'
|
print str(len(allcards)) + ' cards parsed, ' + str(len(unparsed_cards)) + ' failed to parse'
|
||||||
print '--------------------'
|
print '--------------------'
|
||||||
print str(len(by_name)) + ' unique card names'
|
print str(len(by_name)) + ' unique card names'
|
||||||
print '--------------------'
|
print '--------------------'
|
||||||
print (str(len(by_color)) + ' represented colors (including colorless as \'A\'), '
|
print (str(len(by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
|
||||||
+ str(len(by_color_inclusive)) + ' combinations')
|
+ str(len(by_color)) + ' combinations')
|
||||||
print 'Breakdown by color:'
|
print 'Breakdown by color:'
|
||||||
rows = [by_color_inclusive.keys()]
|
rows = [by_color_inclusive.keys()]
|
||||||
rows += [[len(by_color_inclusive[k]) for k in rows[0]]]
|
rows += [[len(by_color_inclusive[k]) for k in rows[0]]]
|
||||||
printrows(padrows(rows))
|
printrows(padrows(rows))
|
||||||
|
print 'Breakdown by number of colors:'
|
||||||
|
rows = [by_color_count.keys()]
|
||||||
|
rows += [[len(by_color_count[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
print '--------------------'
|
print '--------------------'
|
||||||
print str(len(by_type_inclusive)) + ' unique card types, ' + str(len(by_type)) + ' combinations'
|
print str(len(by_type_inclusive)) + ' unique card types, ' + str(len(by_type)) + ' combinations'
|
||||||
print 'Breakdown by type:'
|
print 'Breakdown by type:'
|
||||||
d = sorted(by_type_inclusive,
|
d = sorted(by_type_inclusive,
|
||||||
lambda x,y: cmp(len(by_type_inclusive[x]), len(by_type_inclusive[y])),
|
lambda x,y: cmp(len(by_type_inclusive[x]), len(by_type_inclusive[y])),
|
||||||
reverse = True)
|
reverse = True)
|
||||||
rows = [[k for k in d[:10]]]
|
rows = [[k for k in d[:hsize]]]
|
||||||
rows += [[len(by_type_inclusive[k]) for k in rows[0]]]
|
rows += [[len(by_type_inclusive[k]) for k in rows[0]]]
|
||||||
printrows(padrows(rows))
|
printrows(padrows(rows))
|
||||||
print '--------------------'
|
print '--------------------'
|
||||||
|
@ -474,7 +520,7 @@ def summarize():
|
||||||
lambda x,y: cmp(len(by_subtype_inclusive[x]), len(by_subtype_inclusive[y])),
|
lambda x,y: cmp(len(by_subtype_inclusive[x]), len(by_subtype_inclusive[y])),
|
||||||
reverse = True)
|
reverse = True)
|
||||||
rows = []
|
rows = []
|
||||||
for k in d[0:10]:
|
for k in d[0:vsize]:
|
||||||
rows += [[k, len(by_subtype_inclusive[k])]]
|
rows += [[k, len(by_subtype_inclusive[k])]]
|
||||||
printrows(padrows(rows))
|
printrows(padrows(rows))
|
||||||
print '-- Top combinations: --'
|
print '-- Top combinations: --'
|
||||||
|
@ -482,7 +528,7 @@ def summarize():
|
||||||
lambda x,y: cmp(len(by_subtype[x]), len(by_subtype[y])),
|
lambda x,y: cmp(len(by_subtype[x]), len(by_subtype[y])),
|
||||||
reverse = True)
|
reverse = True)
|
||||||
rows = []
|
rows = []
|
||||||
for k in d[0:10]:
|
for k in d[0:vsize]:
|
||||||
rows += [[k, len(by_subtype[k])]]
|
rows += [[k, len(by_subtype[k])]]
|
||||||
printrows(padrows(rows))
|
printrows(padrows(rows))
|
||||||
print '--------------------'
|
print '--------------------'
|
||||||
|
@ -492,15 +538,182 @@ def summarize():
|
||||||
d = sorted(by_supertype_inclusive,
|
d = sorted(by_supertype_inclusive,
|
||||||
lambda x,y: cmp(len(by_supertype_inclusive[x]),len(by_supertype_inclusive[y])),
|
lambda x,y: cmp(len(by_supertype_inclusive[x]),len(by_supertype_inclusive[y])),
|
||||||
reverse = True)
|
reverse = True)
|
||||||
rows = [[k for k in d]]
|
rows = [[k for k in d[:hsize]]]
|
||||||
rows += [[len(by_supertype_inclusive[k]) for k in rows[0]]]
|
rows += [[len(by_supertype_inclusive[k]) for k in rows[0]]]
|
||||||
printrows(padrows(rows))
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print str(len(by_cmc)) + ' different CMCs, ' + str(len(by_cost)) + ' unique mana costs'
|
||||||
|
print 'Breakdown by CMC:'
|
||||||
|
d = sorted(by_cmc, reverse = False)
|
||||||
|
rows = [[k for k in d[:cmcsize]]]
|
||||||
|
rows += [[len(by_cmc[k]) for k in rows[0]]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '-- Popular mana costs: --'
|
||||||
|
d = sorted(by_cost,
|
||||||
|
lambda x,y: cmp(len(by_cost[x]), len(by_cost[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[utils.from_mana(k), len(by_cost[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print str(len(by_pt)) + ' unique p/t combinations'
|
||||||
|
print ('Largest power: ' + str(max(map(len, by_power)) - 1) +
|
||||||
|
', largest toughness: ' + str(max(map(len, by_toughness)) - 1))
|
||||||
|
print '-- Popular p/t values: --'
|
||||||
|
d = sorted(by_pt,
|
||||||
|
lambda x,y: cmp(len(by_pt[x]), len(by_pt[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[utils.from_unary(k), len(by_pt[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print 'Loyalty values:'
|
||||||
|
d = sorted(by_loyalty,
|
||||||
|
lambda x,y: cmp(len(by_loyalty[x]), len(by_loyalty[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[utils.from_unary(k), len(by_loyalty[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '--------------------'
|
||||||
|
print('Card text ranges from ' + str(min(by_textlen)) + ' to '
|
||||||
|
+ str(max(by_textlen)) + ' characters in length')
|
||||||
|
print('Card text ranges from ' + str(min(by_textlines)) + ' to '
|
||||||
|
+ str(max(by_textlines)) + ' lines')
|
||||||
|
print '-- Line counts by frequency: --'
|
||||||
|
d = sorted(by_textlines,
|
||||||
|
lambda x,y: cmp(len(by_textlines[x]), len(by_textlines[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
rows += [[k, len(by_textlines[k])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
print '===================='
|
print '===================='
|
||||||
# TODO: more to come
|
|
||||||
|
|
||||||
# describe outliers in the indices
|
# describe outliers in the indices
|
||||||
def outliers():
|
def outliers(hsize = 10, vsize = 10, dump_invalid = False):
|
||||||
pass
|
print '********************'
|
||||||
|
print 'Overview of indices:'
|
||||||
|
rows = [['Index Name', 'Keys', 'Total Members']]
|
||||||
|
for index in indices:
|
||||||
|
rows += [[index, len(indices[index]), index_size(indices[index])]]
|
||||||
|
printrows(padrows(rows))
|
||||||
|
print '********************'
|
||||||
|
if len(by_name) > 0:
|
||||||
|
scardname = sorted(by_name,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = False)[0]
|
||||||
|
print 'Shortest Cardname: (' + str(len(scardname)) + ')'
|
||||||
|
print ' ' + scardname
|
||||||
|
lcardname = sorted(by_name,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest Cardname: (' + str(len(lcardname)) + ')'
|
||||||
|
print ' ' + lcardname
|
||||||
|
d = sorted(by_name,
|
||||||
|
lambda x,y: cmp(len(by_name[x]), len(by_name[y])),
|
||||||
|
reverse = True)
|
||||||
|
rows = []
|
||||||
|
for k in d[0:vsize]:
|
||||||
|
if len(by_name[k]) > 1:
|
||||||
|
rows += [[k, len(by_name[k])]]
|
||||||
|
if rows == []:
|
||||||
|
print('No duplicated cardnames')
|
||||||
|
else:
|
||||||
|
print '-- Most duplicated names: --'
|
||||||
|
printrows(padrows(rows))
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by name?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(by_type) > 0:
|
||||||
|
ltypes = sorted(by_type,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest card type: (' + str(len(ltypes)) + ')'
|
||||||
|
print ' ' + ltypes
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by type?'
|
||||||
|
if len(by_subtype) > 0:
|
||||||
|
lsubtypes = sorted(by_subtype,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
|
||||||
|
print ' ' + lsubtypes
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by subtype?'
|
||||||
|
if len(by_supertype) > 0:
|
||||||
|
lsupertypes = sorted(by_supertype,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
|
||||||
|
print ' ' + lsupertypes
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by supertype?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(by_cost) > 0:
|
||||||
|
lcost = sorted(by_cost,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Longest mana cost: (' + str(len(lcost)) + ')'
|
||||||
|
print ' ' + utils.from_mana(lcost)
|
||||||
|
print '\n' + by_cost[lcost][0].reencode() + '\n'
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by cost?'
|
||||||
|
if len(by_cmc) > 0:
|
||||||
|
lcmc = sorted(by_cmc, reverse = True)[0]
|
||||||
|
print 'Largest cmc: (' + str(lcmc) + ')'
|
||||||
|
print ' ' + str(by_cmc[lcmc][0].cost)
|
||||||
|
print '\n' + by_cmc[lcmc][0].reencode()
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by cmc?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(by_power) > 0:
|
||||||
|
lpower = sorted(by_power,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Largest creature power: ' + utils.from_unary(lpower)
|
||||||
|
print '\n' + by_power[lpower][0].reencode() + '\n'
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by power?'
|
||||||
|
if len(by_toughness) > 0:
|
||||||
|
ltoughness = sorted(by_toughness,
|
||||||
|
lambda x,y: cmp(len(x), len(y)),
|
||||||
|
reverse = True)[0]
|
||||||
|
print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
|
||||||
|
print '\n' + by_toughness[ltoughness][0].reencode()
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by toughness?'
|
||||||
|
print '--------------------'
|
||||||
|
if len(by_textlines) > 0:
|
||||||
|
llines = sorted(by_textlines, reverse = True)[0]
|
||||||
|
print 'Most lines of text in a card: ' + str(llines)
|
||||||
|
print '\n' + by_textlines[llines][0].reencode() + '\n'
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by line count?'
|
||||||
|
if len(by_textlen) > 0:
|
||||||
|
ltext = sorted(by_textlen, reverse = True)[0]
|
||||||
|
print 'Most chars in a card text: ' + str(ltext)
|
||||||
|
print '\n' + by_textlen[ltext][0].reencode()
|
||||||
|
else:
|
||||||
|
print 'No cards indexed by char count?'
|
||||||
|
print '--------------------'
|
||||||
|
print 'There were ' + str(len(invalid_cards)) + ' invalid cards.'
|
||||||
|
if dump_invalid:
|
||||||
|
for card in invalid_cards:
|
||||||
|
print '\n' + card.raw
|
||||||
|
elif len(invalid_cards) > 0:
|
||||||
|
print 'Not summarizing.'
|
||||||
|
print '--------------------'
|
||||||
|
print 'There were ' + str(len(unparsed_cards)) + ' unparsed cards.'
|
||||||
|
if dump_invalid:
|
||||||
|
for card in unparsed_cards:
|
||||||
|
print '\n' + card.raw
|
||||||
|
elif len(unparsed_cards) > 0:
|
||||||
|
print 'Not summarizing.'
|
||||||
|
print '===================='
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = False):
|
def main(fname, oname = None, verbose = False):
|
||||||
if verbose:
|
if verbose:
|
||||||
|
@ -512,7 +725,7 @@ def main(fname, oname = None, verbose = False):
|
||||||
cardtexts = text.split(utils.cardsep)
|
cardtexts = text.split(utils.cardsep)
|
||||||
analyze(cardtexts)
|
analyze(cardtexts)
|
||||||
summarize()
|
summarize()
|
||||||
outliers()
|
outliers(dump_invalid = False)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
|
@ -523,4 +736,3 @@ if __name__ == '__main__':
|
||||||
else:
|
else:
|
||||||
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
|
@ -513,7 +513,7 @@ def main(fname, oname = None, verbose = True):
|
||||||
if oname == None:
|
if oname == None:
|
||||||
print val + '\n'
|
print val + '\n'
|
||||||
else:
|
else:
|
||||||
ofile.write(val + '\n\n')
|
ofile.write(val + cardsep)
|
||||||
|
|
||||||
# print len(badwords)
|
# print len(badwords)
|
||||||
# for word in badwords:
|
# for word in badwords:
|
||||||
|
|
|
@ -2,7 +2,7 @@ import json
|
||||||
|
|
||||||
# to allow filtering of sets like un sets, etc...
|
# to allow filtering of sets like un sets, etc...
|
||||||
def legal_set(set):
|
def legal_set(set):
|
||||||
return not set['type'] == 'un'
|
return not (set['type'] == 'un' or set['name'] == 'Celebration')
|
||||||
|
|
||||||
def mtg_open_json(fname, verbose = False):
|
def mtg_open_json(fname, verbose = False):
|
||||||
|
|
||||||
|
@ -25,7 +25,8 @@ def mtg_open_json(fname, verbose = False):
|
||||||
cardnumber = None
|
cardnumber = None
|
||||||
if 'number' in card:
|
if 'number' in card:
|
||||||
cardnumber = card['number']
|
cardnumber = card['number']
|
||||||
cardname = card['name']
|
# the lower avoids duplication of at least one card (Will-o/O'-the-Wisp)
|
||||||
|
cardname = card['name'].lower()
|
||||||
|
|
||||||
uid = set['code']
|
uid = set['code']
|
||||||
if cardnumber == None:
|
if cardnumber == None:
|
||||||
|
@ -46,8 +47,6 @@ def mtg_open_json(fname, verbose = False):
|
||||||
if uid[-1:] == 'b':
|
if uid[-1:] == 'b':
|
||||||
bsides[uid] = card
|
bsides[uid] = card
|
||||||
|
|
||||||
#break
|
|
||||||
|
|
||||||
for uid in bsides:
|
for uid in bsides:
|
||||||
aside_uid = uid[:-1] + 'a'
|
aside_uid = uid[:-1] + 'a'
|
||||||
if aside_uid in asides:
|
if aside_uid in asides:
|
||||||
|
|
57406
output.txt
57406
output.txt
File diff suppressed because it is too large
Load diff
44
randomize_mana.py
Normal file
44
randomize_mana.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
import utils
|
||||||
|
import datamine
|
||||||
|
import random
|
||||||
|
|
||||||
|
def main(fname, oname = None, verbose = True):
|
||||||
|
if verbose:
|
||||||
|
print 'Opening encoded card file: ' + fname
|
||||||
|
|
||||||
|
with open(fname, 'rt') as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
cardtexts = text.split(utils.cardsep)
|
||||||
|
|
||||||
|
# overkill
|
||||||
|
datamine.analyze(cardtexts)
|
||||||
|
|
||||||
|
multicards = []
|
||||||
|
reps = 5
|
||||||
|
|
||||||
|
for card in datamine.cards:
|
||||||
|
for i in range(reps):
|
||||||
|
multicards += [card.reencode(randomize = True)]
|
||||||
|
|
||||||
|
random.shuffle(multicards)
|
||||||
|
|
||||||
|
if oname:
|
||||||
|
if verbose:
|
||||||
|
print 'Writing output to: ' + oname
|
||||||
|
with open(oname, 'w') as ofile:
|
||||||
|
for textcard in multicards:
|
||||||
|
ofile.write(textcard + utils.cardsep)
|
||||||
|
else:
|
||||||
|
for textcard in multicards:
|
||||||
|
print textcard + '\n'
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
main(sys.argv[1])
|
||||||
|
elif len(sys.argv) == 3:
|
||||||
|
main(sys.argv[1], oname = sys.argv[2])
|
||||||
|
else:
|
||||||
|
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
||||||
|
exit(1)
|
Loading…
Reference in a new issue