2to3 conversion (some things not yet changed)

This commit is contained in:
David Heyman 2016-08-13 21:16:43 -04:00
parent 65905cf656
commit 2f95b79f10
24 changed files with 388 additions and 356 deletions

View file

@ -19,7 +19,7 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
# there is a sane thing to do here (namely, produce both at the same time)
# but we don't support it yet.
if for_mse and for_html:
print 'ERROR - decode.py - incompatible formats "mse" and "html"'
print('ERROR - decode.py - incompatible formats "mse" and "html"')
return
fmt_ordered = cardlib.fmt_ordered_default
@ -52,16 +52,16 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
namediff = Namediff()
cbow = CBOW()
if verbose:
print 'Computing nearest names...'
nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3)
print('Computing nearest names...')
nearest_names = namediff.nearest_par([c.name for c in cards], n=3)
if verbose:
print 'Computing nearest cards...'
print('Computing nearest cards...')
nearest_cards = cbow.nearest_par(cards)
for i in range(0, len(cards)):
cards[i].nearest_names = nearest_names[i]
cards[i].nearest_cards = nearest_cards[i]
if verbose:
print '...Done.'
print('...Done.')
def hoverimg(cardname, dist, nd):
truename = nd.names[cardname]
@ -238,17 +238,18 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
if oname:
if for_html:
print oname
print(oname)
# if ('.html' != oname[-])
# oname += '.html'
if verbose:
print 'Writing output to: ' + oname
print('Writing output to: ' + oname)
with open(oname, 'w') as ofile:
writecards(ofile)
if for_mse:
# Copy whatever output file is produced, name the copy 'set' (yes, no extension).
# Copy whatever output file is produced, name the copy 'set' (yes,
# no extension).
if os.path.isfile('set'):
print 'ERROR: tried to overwrite existing file "set" - aborting.'
print('ERROR: tried to overwrite existing file "set" - aborting.')
return
shutil.copyfile(oname, 'set')
# Use the freaky mse extension instead of zip.
@ -258,7 +259,8 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
zf.write('set')
finally:
if verbose:
print 'Made an MSE set file called ' + oname + '.mse-set.'
print('Made an MSE set file called ' +
oname + '.mse-set.')
# The set file is useless outside the .mse-set, delete it.
os.remove('set')
else:

View file

@ -47,16 +47,16 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
raise ValueError('encode.py: unknown encoding: ' + encoding)
if verbose:
print 'Preparing to encode:'
print ' Using encoding ' + repr(encoding)
print('Preparing to encode:')
print(' Using encoding ' + repr(encoding))
if stable:
print ' NOT randomizing order of cards.'
print(' NOT randomizing order of cards.')
if randomize_mana:
print ' Randomizing order of symobls in manacosts.'
print(' Randomizing order of symobls in manacosts.')
if not fmt_labeled:
print ' NOT labeling fields for this run (may be harder to decode).'
print(' NOT labeling fields for this run (may be harder to decode).')
if not line_transformations:
print ' NOT using line reordering transformations'
print(' NOT using line reordering transformations')
cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations)
@ -82,7 +82,7 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
if oname:
if verbose:
print 'Writing output to: ' + oname
print('Writing output to: ' + oname)
with open(oname, 'w') as ofile:
writecards(ofile)
else:

View file

@ -247,20 +247,19 @@ def fields_from_json(src_json, linetrans = True):
fields[field_cost] = [(-1, cost)]
if 'supertypes' in src_json:
fields[field_supertypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['supertypes']))]
fields[field_supertypes] = [
(-1, [utils.to_ascii(s.lower()) for s in src_json['supertypes']])]
if 'types' in src_json:
fields[field_types] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['types']))]
fields[field_types] = [(-1, [utils.to_ascii(s.lower())
for s in src_json['types']])]
else:
parsed = False
if 'subtypes' in src_json:
fields[field_subtypes] = [(-1, map(lambda s: utils.to_ascii(s.lower())
# urza's lands...
.replace('"', "'").replace('-', utils.dash_marker),
src_json['subtypes']))]
fields[field_subtypes] = [(-1, [utils.to_ascii(s.lower())
# urza's lands...
.replace('"', "'").replace('-', utils.dash_marker) for s in src_json['subtypes']])]
if 'rarity' in src_json:
@ -323,7 +322,7 @@ def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep):
if fmt_labeled:
labels = {fmt_labeled[k] : k for k in fmt_labeled}
field_label_regex = '[' + ''.join(labels.keys()) + ']'
field_label_regex = '[' + ''.join(list(labels.keys())) + ']'
def addf(fields, fkey, fval):
# make sure you pass a pair
if fval and fval[1]:
@ -544,14 +543,13 @@ class Card:
self.__dict__[field_text] = mtext
fulltext = mtext.encode()
if fulltext:
self.__dict__[field_text + '_lines'] = map(Manatext,
fulltext.split(utils.newline))
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
' ',
self.__dict__[field_text + '_lines'] = list(map(Manatext,
fulltext.split(utils.newline)))
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
' ',
fulltext).split()
self.__dict__[field_text + '_lines_words'] = map(
lambda line: re.sub(utils.unletters_regex, ' ', line).split(),
fulltext.split(utils.newline))
self.__dict__[field_text + '_lines_words'] = [re.sub(
utils.unletters_regex, ' ', line).split() for line in fulltext.split(utils.newline)]
else:
self.valid = False
self.__dict__[field_other] += [(idx, '<text> ' + str(value))]
@ -667,11 +665,12 @@ class Card:
outstr += linebreak
basetypes = map(str.capitalize, self.__dict__[field_types])
basetypes = list(map(str.capitalize, self.__dict__[field_types]))
if vdump and len(basetypes) < 1:
basetypes = ['_NOTYPE_']
outstr += ' '.join(map(str.capitalize, self.__dict__[field_supertypes]) + basetypes)
outstr += ' '.join(list(map(str.capitalize,
self.__dict__[field_supertypes])) + basetypes)
if self.__dict__[field_subtypes]:
outstr += (' ' + utils.dash_marker + ' ' +
@ -1005,8 +1004,8 @@ class Card:
if coststr:
outstr += coststr + ' '
typestr = ' '.join(map(lambda s: '(' + s + ')',
self.__dict__[field_supertypes] + self.__dict__[field_types]))
typestr = ' '.join(
['(' + s + ')' for s in self.__dict__[field_supertypes] + self.__dict__[field_types]])
if typestr:
outstr += typestr + ' '
@ -1014,8 +1013,8 @@ class Card:
outstr += ' '.join(self.__dict__[field_subtypes]) + ' '
if self.__dict__[field_pt]:
outstr += ' '.join(map(lambda s: '(' + s + ')',
self.__dict__[field_pt].replace('/', '/ /').split()))
outstr += ' '.join(['(' + s + ')' for s in self.__dict__[
field_pt].replace('/', '/ /').split()])
outstr += ' '
if self.__dict__[field_loyalty]:

View file

@ -147,7 +147,7 @@ def f_nearest(card, vocab, vecs, cardvecs, n):
def f_nearest_per_thread(workitem):
(workcards, vocab, vecs, cardvecs, n) = workitem
return map(lambda card: f_nearest(card, vocab, vecs, cardvecs, n), workcards)
return [f_nearest(card, vocab, vecs, cardvecs, n) for card in workcards]
class CBOW:
def __init__(self, verbose = True,
@ -157,17 +157,18 @@ class CBOW:
self.cardvecs = []
if self.verbose:
print 'Building a cbow model...'
print('Building a cbow model...')
if self.verbose:
print ' Reading binary vector data from: ' + vector_fname
print(' Reading binary vector data from: ' + vector_fname)
(vocab, vecs) = read_vector_file(vector_fname)
self.vocab = vocab
self.vecs = vecs
if self.verbose:
print ' Reading encoded cards from: ' + card_fname
print ' They\'d better be in the same order as the file used to build the vector model!'
print(' Reading encoded cards from: ' + card_fname)
print(
' They\'d better be in the same order as the file used to build the vector model!')
with open(card_fname, 'rt') as f:
text = f.read()
for card_src in text.split(utils.cardsep):
@ -179,10 +180,10 @@ class CBOW:
card.vectorize()))]
if self.verbose:
print '... Done.'
print ' vocab size: ' + str(len(self.vocab))
print ' raw vecs: ' + str(len(self.vecs))
print ' card vecs: ' + str(len(self.cardvecs))
print('... Done.')
print(' vocab size: ' + str(len(self.vocab)))
print(' raw vecs: ' + str(len(self.vecs)))
print(' card vecs: ' + str(len(self.cardvecs)))
def nearest(self, card, n=5):
return f_nearest(card, self.vocab, self.vecs, self.cardvecs, n)
@ -190,6 +191,7 @@ class CBOW:
def nearest_par(self, cards, n=5, threads=cores):
workpool = multiprocessing.Pool(threads)
proto_worklist = namediff.list_split(cards, threads)
worklist = map(lambda x: (x, self.vocab, self.vecs, self.cardvecs, n), proto_worklist)
worklist = [(x, self.vocab, self.vecs, self.cardvecs, n)
for x in proto_worklist]
donelist = workpool.map(f_nearest_per_thread, worklist)
return namediff.list_flatten(donelist)

View file

@ -25,11 +25,13 @@ def padrows(l):
return padded
def printrows(l):
for row in l:
print row
print(row)
# index management helpers
def index_size(d):
return sum(map(lambda k: len(d[k]), d))
return sum([len(d[k]) for k in d])
def inc(d, k, obj):
if k or k == 0:
@ -148,35 +150,39 @@ class Datamine:
# summarize the indices
# Yes, this printing code is pretty terrible.
def summarize(self, hsize = 10, vsize = 10, cmcsize = 20):
print '===================='
print str(len(self.cards)) + ' valid cards, ' + str(len(self.invalid_cards)) + ' invalid cards.'
print str(len(self.allcards)) + ' cards parsed, ' + str(len(self.unparsed_cards)) + ' failed to parse'
print '--------------------'
print str(len(self.by_name)) + ' unique card names'
print '--------------------'
print (str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
+ str(len(self.by_color)) + ' combinations')
print 'Breakdown by color:'
rows = [self.by_color_inclusive.keys()]
print('====================')
print(str(len(self.cards)) + ' valid cards, ' +
str(len(self.invalid_cards)) + ' invalid cards.')
print(str(len(self.allcards)) + ' cards parsed, ' +
str(len(self.unparsed_cards)) + ' failed to parse')
print('--------------------')
print(str(len(self.by_name)) + ' unique card names')
print('--------------------')
print((str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), '
+ str(len(self.by_color)) + ' combinations'))
print('Breakdown by color:')
rows = [list(self.by_color_inclusive.keys())]
rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]]
printrows(padrows(rows))
print 'Breakdown by number of colors:'
rows = [self.by_color_count.keys()]
print('Breakdown by number of colors:')
rows = [list(self.by_color_count.keys())]
rows += [[len(self.by_color_count[k]) for k in rows[0]]]
printrows(padrows(rows))
print '--------------------'
print str(len(self.by_type_inclusive)) + ' unique card types, ' + str(len(self.by_type)) + ' combinations'
print 'Breakdown by type:'
d = sorted(self.by_type_inclusive,
lambda x,y: cmp(len(self.by_type_inclusive[x]), len(self.by_type_inclusive[y])),
reverse = True)
print('--------------------')
print(str(len(self.by_type_inclusive)) + ' unique card types, ' +
str(len(self.by_type)) + ' combinations')
print('Breakdown by type:')
d = sorted(self.by_type_inclusive,
lambda x, y: cmp(len(self.by_type_inclusive[x]), len(
self.by_type_inclusive[y])),
reverse=True)
rows = [[k for k in d[:hsize]]]
rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]]
printrows(padrows(rows))
print '--------------------'
print (str(len(self.by_subtype_inclusive)) + ' unique subtypes, '
+ str(len(self.by_subtype)) + ' combinations')
print '-- Popular subtypes: --'
print('--------------------')
print((str(len(self.by_subtype_inclusive)) + ' unique subtypes, '
+ str(len(self.by_subtype)) + ' combinations'))
print('-- Popular subtypes: --')
d = sorted(self.by_subtype_inclusive,
lambda x,y: cmp(len(self.by_subtype_inclusive[x]), len(self.by_subtype_inclusive[y])),
reverse = True)
@ -184,7 +190,7 @@ class Datamine:
for k in d[0:vsize]:
rows += [[k, len(self.by_subtype_inclusive[k])]]
printrows(padrows(rows))
print '-- Top combinations: --'
print('-- Top combinations: --')
d = sorted(self.by_subtype,
lambda x,y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])),
reverse = True)
@ -192,24 +198,26 @@ class Datamine:
for k in d[0:vsize]:
rows += [[k, len(self.by_subtype[k])]]
printrows(padrows(rows))
print '--------------------'
print (str(len(self.by_supertype_inclusive)) + ' unique supertypes, '
+ str(len(self.by_supertype)) + ' combinations')
print 'Breakdown by supertype:'
d = sorted(self.by_supertype_inclusive,
lambda x,y: cmp(len(self.by_supertype_inclusive[x]),len(self.by_supertype_inclusive[y])),
reverse = True)
print('--------------------')
print((str(len(self.by_supertype_inclusive)) + ' unique supertypes, '
+ str(len(self.by_supertype)) + ' combinations'))
print('Breakdown by supertype:')
d = sorted(self.by_supertype_inclusive,
lambda x, y: cmp(len(self.by_supertype_inclusive[x]), len(
self.by_supertype_inclusive[y])),
reverse=True)
rows = [[k for k in d[:hsize]]]
rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]]
printrows(padrows(rows))
print '--------------------'
print str(len(self.by_cmc)) + ' different CMCs, ' + str(len(self.by_cost)) + ' unique mana costs'
print 'Breakdown by CMC:'
d = sorted(self.by_cmc, reverse = False)
print('--------------------')
print(str(len(self.by_cmc)) + ' different CMCs, ' +
str(len(self.by_cost)) + ' unique mana costs')
print('Breakdown by CMC:')
d = sorted(self.by_cmc, reverse=False)
rows = [[k for k in d[:cmcsize]]]
rows += [[len(self.by_cmc[k]) for k in rows[0]]]
printrows(padrows(rows))
print '-- Popular mana costs: --'
print('-- Popular mana costs: --')
d = sorted(self.by_cost,
lambda x,y: cmp(len(self.by_cost[x]), len(self.by_cost[y])),
reverse = True)
@ -217,12 +225,12 @@ class Datamine:
for k in d[0:vsize]:
rows += [[utils.from_mana(k), len(self.by_cost[k])]]
printrows(padrows(rows))
print '--------------------'
print str(len(self.by_pt)) + ' unique p/t combinations'
print('--------------------')
print(str(len(self.by_pt)) + ' unique p/t combinations')
if len(self.by_power) > 0 and len(self.by_toughness) > 0:
print ('Largest power: ' + str(max(map(len, self.by_power)) - 1) +
', largest toughness: ' + str(max(map(len, self.by_toughness)) - 1))
print '-- Popular p/t values: --'
print(('Largest power: ' + str(max(list(map(len, self.by_power))) - 1) +
', largest toughness: ' + str(max(list(map(len, self.by_toughness))) - 1)))
print('-- Popular p/t values: --')
d = sorted(self.by_pt,
lambda x,y: cmp(len(self.by_pt[x]), len(self.by_pt[y])),
reverse = True)
@ -230,8 +238,8 @@ class Datamine:
for k in d[0:vsize]:
rows += [[utils.from_unary(k), len(self.by_pt[k])]]
printrows(padrows(rows))
print '--------------------'
print 'Loyalty values:'
print('--------------------')
print('Loyalty values:')
d = sorted(self.by_loyalty,
lambda x,y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])),
reverse = True)
@ -239,13 +247,13 @@ class Datamine:
for k in d[0:vsize]:
rows += [[utils.from_unary(k), len(self.by_loyalty[k])]]
printrows(padrows(rows))
print '--------------------'
print('--------------------')
if len(self.by_textlen) > 0 and len(self.by_textlines) > 0:
print('Card text ranges from ' + str(min(self.by_textlen)) + ' to '
+ str(max(self.by_textlen)) + ' characters in length')
print('Card text ranges from ' + str(min(self.by_textlines)) + ' to '
+ str(max(self.by_textlines)) + ' lines')
print '-- Line counts by frequency: --'
print(('Card text ranges from ' + str(min(self.by_textlen)) + ' to '
+ str(max(self.by_textlen)) + ' characters in length'))
print(('Card text ranges from ' + str(min(self.by_textlines)) + ' to '
+ str(max(self.by_textlines)) + ' lines'))
print('-- Line counts by frequency: --')
d = sorted(self.by_textlines,
lambda x,y: cmp(len(self.by_textlines[x]), len(self.by_textlines[y])),
reverse = True)
@ -253,29 +261,29 @@ class Datamine:
for k in d[0:vsize]:
rows += [[k, len(self.by_textlines[k])]]
printrows(padrows(rows))
print '===================='
print('====================')
# describe outliers in the indices
def outliers(self, hsize = 10, vsize = 10, dump_invalid = False):
print '********************'
print 'Overview of indices:'
def outliers(self, hsize=10, vsize=10, dump_invalid=False):
print('********************')
print('Overview of indices:')
rows = [['Index Name', 'Keys', 'Total Members']]
for index in self.indices:
rows += [[index, len(self.indices[index]), index_size(self.indices[index])]]
rows += [[index, len(self.indices[index]),
index_size(self.indices[index])]]
printrows(padrows(rows))
print '********************'
print('********************')
if len(self.by_name) > 0:
scardname = sorted(self.by_name,
lambda x,y: cmp(len(x), len(y)),
reverse = False)[0]
print 'Shortest Cardname: (' + str(len(scardname)) + ')'
print ' ' + scardname
lcardname = sorted(self.by_name,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Longest Cardname: (' + str(len(lcardname)) + ')'
print ' ' + lcardname
scardname = sorted(self.by_name,
lambda x, y: cmp(len(x), len(y)),
reverse=False)[0]
print('Shortest Cardname: (' + str(len(scardname)) + ')')
print(' ' + scardname)
lcardname = sorted(self.by_name,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Longest Cardname: (' + str(len(lcardname)) + ')')
print(' ' + lcardname)
d = sorted(self.by_name,
lambda x,y: cmp(len(self.by_name[x]), len(self.by_name[y])),
reverse = True)
@ -286,94 +294,95 @@ class Datamine:
if rows == []:
print('No duplicated cardnames')
else:
print '-- Most duplicated names: --'
print('-- Most duplicated names: --')
printrows(padrows(rows))
else:
print 'No cards indexed by name?'
print '--------------------'
print('No cards indexed by name?')
print('--------------------')
if len(self.by_type) > 0:
ltypes = sorted(self.by_type,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Longest card type: (' + str(len(ltypes)) + ')'
print ' ' + ltypes
ltypes = sorted(self.by_type,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Longest card type: (' + str(len(ltypes)) + ')')
print(' ' + ltypes)
else:
print 'No cards indexed by type?'
print('No cards indexed by type?')
if len(self.by_subtype) > 0:
lsubtypes = sorted(self.by_subtype,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
print ' ' + lsubtypes
lsubtypes = sorted(self.by_subtype,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Longest subtype: (' + str(len(lsubtypes)) + ')')
print(' ' + lsubtypes)
else:
print 'No cards indexed by subtype?'
print('No cards indexed by subtype?')
if len(self.by_supertype) > 0:
lsupertypes = sorted(self.by_supertype,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
print ' ' + lsupertypes
lsupertypes = sorted(self.by_supertype,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Longest supertype: (' + str(len(lsupertypes)) + ')')
print(' ' + lsupertypes)
else:
print 'No cards indexed by supertype?'
print '--------------------'
print('No cards indexed by supertype?')
print('--------------------')
if len(self.by_cost) > 0:
lcost = sorted(self.by_cost,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Longest mana cost: (' + str(len(lcost)) + ')'
print ' ' + utils.from_mana(lcost)
print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n'
lcost = sorted(self.by_cost,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Longest mana cost: (' + str(len(lcost)) + ')')
print(' ' + utils.from_mana(lcost))
print('\n' + plimit(self.by_cost[lcost][0].encode()) + '\n')
else:
print 'No cards indexed by cost?'
print('No cards indexed by cost?')
if len(self.by_cmc) > 0:
lcmc = sorted(self.by_cmc, reverse = True)[0]
print 'Largest cmc: (' + str(lcmc) + ')'
print ' ' + str(self.by_cmc[lcmc][0].cost)
print '\n' + plimit(self.by_cmc[lcmc][0].encode())
lcmc = sorted(self.by_cmc, reverse=True)[0]
print('Largest cmc: (' + str(lcmc) + ')')
print(' ' + str(self.by_cmc[lcmc][0].cost))
print('\n' + plimit(self.by_cmc[lcmc][0].encode()))
else:
print 'No cards indexed by cmc?'
print '--------------------'
print('No cards indexed by cmc?')
print('--------------------')
if len(self.by_power) > 0:
lpower = sorted(self.by_power,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Largest creature power: ' + utils.from_unary(lpower)
print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n'
else:
print 'No cards indexed by power?'
lpower = sorted(self.by_power,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Largest creature power: ' + utils.from_unary(lpower))
print('\n' + plimit(self.by_power[lpower][0].encode()) + '\n')
else:
print('No cards indexed by power?')
if len(self.by_toughness) > 0:
ltoughness = sorted(self.by_toughness,
lambda x,y: cmp(len(x), len(y)),
reverse = True)[0]
print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
print '\n' + plimit(self.by_toughness[ltoughness][0].encode())
else:
print 'No cards indexed by toughness?'
print '--------------------'
ltoughness = sorted(self.by_toughness,
lambda x, y: cmp(len(x), len(y)),
reverse=True)[0]
print('Largest creature toughness: ' +
utils.from_unary(ltoughness))
print('\n' + plimit(self.by_toughness[ltoughness][0].encode()))
else:
print('No cards indexed by toughness?')
print('--------------------')
if len(self.by_textlines) > 0:
llines = sorted(self.by_textlines, reverse = True)[0]
print 'Most lines of text in a card: ' + str(llines)
print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n'
else:
print 'No cards indexed by line count?'
llines = sorted(self.by_textlines, reverse=True)[0]
print('Most lines of text in a card: ' + str(llines))
print('\n' + plimit(self.by_textlines[llines][0].encode()) + '\n')
else:
print('No cards indexed by line count?')
if len(self.by_textlen) > 0:
ltext = sorted(self.by_textlen, reverse = True)[0]
print 'Most chars in a card text: ' + str(ltext)
print '\n' + plimit(self.by_textlen[ltext][0].encode())
else:
print 'No cards indexed by char count?'
print '--------------------'
print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.'
ltext = sorted(self.by_textlen, reverse=True)[0]
print('Most chars in a card text: ' + str(ltext))
print('\n' + plimit(self.by_textlen[ltext][0].encode()))
else:
print('No cards indexed by char count?')
print('--------------------')
print('There were ' + str(len(self.invalid_cards)) + ' invalid cards.')
if dump_invalid:
for card in self.invalid_cards:
print '\n' + repr(card.fields)
print('\n' + repr(card.fields))
elif len(self.invalid_cards) > 0:
print 'Not summarizing.'
print '--------------------'
print 'There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.'
print('Not summarizing.')
print('--------------------')
print('There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.')
if dump_invalid:
for card in self.unparsed_cards:
print '\n' + repr(card.fields)
print('\n' + repr(card.fields))
elif len(self.unparsed_cards) > 0:
print 'Not summarizing.'
print '===================='
print('Not summarizing.')
print('====================')

View file

@ -63,7 +63,7 @@ def mtg_open_json(fname, verbose = False):
#print bsides[uid]
if verbose:
print 'Opened ' + str(len(allcards)) + ' uniquely named cards.'
print('Opened ' + str(len(allcards)) + ' uniquely named cards.')
return allcards
# filters to ignore some undesirable cards, only used when opening json
@ -91,7 +91,7 @@ def mtg_open_file(fname, verbose = False,
if fname[-5:] == '.json':
if verbose:
print 'This looks like a json file: ' + fname
print('This looks like a json file: ' + fname)
json_srcs = mtg_open_json(fname, verbose)
# sorted for stability
for json_cardname in sorted(json_srcs):
@ -136,7 +136,7 @@ def mtg_open_file(fname, verbose = False,
# fall back to opening a normal encoded file
else:
if verbose:
print 'Opening encoded card file: ' + fname
print('Opening encoded card file: ' + fname)
with open(fname, 'rt') as f:
text = f.read()
for card_src in text.split(utils.cardsep):
@ -152,8 +152,8 @@ def mtg_open_file(fname, verbose = False,
unparsed += 1
if verbose:
print (str(valid) + ' valid, ' + str(skipped) + ' skipped, '
+ str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.')
print((str(valid) + ' valid, ' + str(skipped) + ' skipped, '
+ str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.'))
good_count = 0
bad_count = 0
@ -168,7 +168,7 @@ def mtg_open_file(fname, verbose = False,
break
# random heuristic
if bad_count > 10:
print 'WARNING: Saw a bunch of unparsed cards:'
print ' Is this a legacy format, you may need to specify the field order.'
print('WARNING: Saw a bunch of unparsed cards:')
print(' Is this a legacy format, you may need to specify the field order.')
return cards

View file

@ -136,7 +136,7 @@ class Manacost:
else:
ld = ''
rd = ''
return ' '.join(map(lambda s: ld + s + rd, sorted(self.sequence)))
return ' '.join([ld + s + rd for s in sorted(self.sequence)])
class Manatext:

View file

@ -48,7 +48,7 @@ def f_nearest_per_thread(workitem):
(worknames, names, n) = workitem
# each thread (well, process) needs to generate its own matchers
matchers = [difflib.SequenceMatcher(b=name, autojunk=False) for name in names]
return map(lambda name: f_nearest(name, matchers, n), worknames)
return [f_nearest(name, matchers, n) for name in worknames]
class Namediff:
def __init__(self, verbose = True,
@ -59,10 +59,10 @@ class Namediff:
self.cardstrings = {}
if self.verbose:
print 'Setting up namediff...'
print('Setting up namediff...')
if self.verbose:
print ' Reading names from: ' + json_fname
print(' Reading names from: ' + json_fname)
json_srcs = jdecode.mtg_open_json(json_fname, verbose)
namecount = 0
for json_cardname in sorted(json_srcs):
@ -81,7 +81,7 @@ class Namediff:
jnum = ''
if name in self.names:
print ' Duplicate name ' + name + ', ignoring.'
print(' Duplicate name ' + name + ', ignoring.')
else:
self.names[name] = jname
self.cardstrings[name] = card.encode()
@ -91,13 +91,15 @@ class Namediff:
self.codes[name] = ''
namecount += 1
print ' Read ' + str(namecount) + ' unique cardnames'
print ' Building SequenceMatcher objects.'
self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names]
self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings]
print(' Read ' + str(namecount) + ' unique cardnames')
print(' Building SequenceMatcher objects.')
print '... Done.'
self.matchers = [difflib.SequenceMatcher(
b=n, autojunk=False) for n in self.names]
self.card_matchers = [difflib.SequenceMatcher(
b=self.cardstrings[n], autojunk=False) for n in self.cardstrings]
print('... Done.')
def nearest(self, name, n=3):
return f_nearest(name, self.matchers, n)
@ -105,7 +107,7 @@ class Namediff:
def nearest_par(self, names, n=3, threads=cores):
workpool = multiprocessing.Pool(threads)
proto_worklist = list_split(names, threads)
worklist = map(lambda x: (x, self.names, n), proto_worklist)
worklist = [(x, self.names, n) for x in proto_worklist]
donelist = workpool.map(f_nearest_per_thread, worklist)
return list_flatten(donelist)
@ -115,6 +117,7 @@ class Namediff:
def nearest_card_par(self, cards, n=5, threads=cores):
workpool = multiprocessing.Pool(threads)
proto_worklist = list_split(cards, threads)
worklist = map(lambda x: (map(lambda c: c.encode(), x), self.cardstrings.values(), n), proto_worklist)
worklist = [([c.encode() for c in x], list(
self.cardstrings.values()), n) for x in proto_worklist]
donelist = workpool.map(f_nearest_per_thread, worklist)
return list_flatten(donelist)

View file

@ -11,7 +11,7 @@
# an attempt was made to preserve the exact functionality of this code,
# hampered somewhat by its brokenness
from __future__ import unicode_literals
from math import log
@ -169,9 +169,9 @@ class NgramModel(ModelI):
# fixed
def _words_following(self, context, cond_freq_dist):
for ctxt in cond_freq_dist.iterkeys():
for ctxt in cond_freq_dist.keys():
if ctxt == context:
yield cond_freq_dist[ctxt].keys()
yield list(cond_freq_dist[ctxt].keys())
def prob(self, word, context):
"""

View file

@ -144,7 +144,7 @@ def text_pass_4b_x(s):
s = s.replace(' x ', ' ' + x_marker + ' ')
s = s.replace('x:', x_marker + ':')
s = s.replace('x~', x_marker + '~')
s = s.replace(u'x\u2014', x_marker + u'\u2014')
s = s.replace('x\u2014', x_marker + '\u2014')
s = s.replace('x.', x_marker + '.')
s = s.replace('x,', x_marker + ',')
s = s.replace('x is', x_marker + ' is')
@ -359,7 +359,8 @@ def text_pass_7_choice(s):
# to '[n = ability = ability]\n'
def choice_formatting_helper(s_helper, prefix, count, suffix = ''):
single_choices = re.findall(ur'(' + prefix + ur'\n?(\u2022.*(\n|$))+)', s_helper)
single_choices = re.findall(
r'(' + prefix + r'\n?(\u2022.*(\n|$))+)', s_helper)
for choice in single_choices:
newchoice = choice[0]
newchoice = newchoice.replace(prefix, unary_marker + (unary_counter * count) + suffix)
@ -371,20 +372,23 @@ def text_pass_7_choice(s):
s_helper = s_helper.replace(choice[0], newchoice)
return s_helper
s = choice_formatting_helper(s, ur'choose one \u2014', 1)
s = choice_formatting_helper(s, ur'choose one \u2014 ', 1) # ty Promise of Power
s = choice_formatting_helper(s, ur'choose two \u2014', 2)
s = choice_formatting_helper(s, ur'choose two \u2014 ', 2) # ty Profane Command
s = choice_formatting_helper(s, ur'choose one or both \u2014', 0)
s = choice_formatting_helper(s, ur'choose one or more \u2014', 0)
s = choice_formatting_helper(s, ur'choose khans or dragons.', 1)
# this is for 'an opponent chooses one', which will be a bit weird but still work out
s = choice_formatting_helper(s, ur'chooses one \u2014', 1)
s = choice_formatting_helper(s, r'choose one \u2014', 1)
s = choice_formatting_helper(
s, r'choose one \u2014 ', 1) # ty Promise of Power
s = choice_formatting_helper(s, r'choose two \u2014', 2)
s = choice_formatting_helper(
s, r'choose two \u2014 ', 2) # ty Profane Command
s = choice_formatting_helper(s, r'choose one or both \u2014', 0)
s = choice_formatting_helper(s, r'choose one or more \u2014', 0)
s = choice_formatting_helper(s, r'choose khans or dragons.', 1)
# this is for 'an opponent chooses one', which will be a bit weird but
# still work out
s = choice_formatting_helper(s, r'chooses one \u2014', 1)
# Demonic Pact has 'choose one that hasn't been chosen'...
s = choice_formatting_helper(s, ur"choose one that hasn't been chosen \u2014", 1,
s = choice_formatting_helper(s, r"choose one that hasn't been chosen \u2014", 1,
suffix=" that hasn't been chosen")
# 'choose n. you may choose the same mode more than once.'
s = choice_formatting_helper(s, ur'choose three. you may choose the same mode more than once.', 3,
s = choice_formatting_helper(s, r'choose three. you may choose the same mode more than once.', 3,
suffix='. you may choose the same mode more than once.')
return s
@ -409,7 +413,7 @@ def text_pass_8_equip(s):
else:
s = equip + '\n' + s
nonmana = re.findall(ur'(equip\u2014.*(\n|$))', s)
nonmana = re.findall(r'(equip\u2014.*(\n|$))', s)
if len(nonmana) == 1:
equip = nonmana[0][0]
s = s.replace('\n' + equip, '')
@ -470,7 +474,7 @@ def text_pass_11_linetrans(s):
postlines += [subline]
else:
keylines += [subline]
elif u'\u2014' in line and not u' \u2014 ' in line:
elif '\u2014' in line and not ' \u2014 ' in line:
if 'equip' in line or 'enchant' in line:
prelines += [line]
elif 'countertype' in line or 'kicker' in line:
@ -669,6 +673,6 @@ def text_unpass_7_newlines(s):
def text_unpass_8_unicode(s):
s = s.replace(dash_marker, u'\u2014')
s = s.replace(bullet_marker, u'\u2022')
s = s.replace(dash_marker, '\u2014')
s = s.replace(bullet_marker, '\u2022')
return s

View file

@ -83,20 +83,20 @@ json_field_info_code = config.json_field_info_code
# unicode / ascii conversion
unicode_trans = {
u'\u2014' : dash_marker, # unicode long dash
u'\u2022' : bullet_marker, # unicode bullet
u'\u2019' : '"', # single quote
u'\u2018' : '"', # single quote
u'\u2212' : '-', # minus sign
u'\xe6' : 'ae', # ae symbol
u'\xfb' : 'u', # u with caret
u'\xfa' : 'u', # u with accent
u'\xe9' : 'e', # e with accent
u'\xe1' : 'a', # a with accent
u'\xe0' : 'a', # a with accent going the other way
u'\xe2' : 'a', # a with caret
u'\xf6' : 'o', # o with umlaut
u'\xed' : 'i', # i with accent
'\u2014': dash_marker, # unicode long dash
'\u2022': bullet_marker, # unicode bullet
'\u2019': '"', # single quote
'\u2018': '"', # single quote
'\u2212': '-', # minus sign
'\xe6': 'ae', # ae symbol
'\xfb': 'u', # u with caret
'\xfa': 'u', # u with accent
'\xe9': 'e', # e with accent
'\xe1': 'a', # a with accent
'\xe0': 'a', # a with accent going the other way
'\xe2': 'a', # a with caret
'\xf6': 'o', # o with umlaut
'\xed': 'i', # i with accent
}
# this one is one-way only
@ -121,7 +121,7 @@ def to_unary(s, warn = False):
elif i > unary_max:
i = unary_max
if warn:
print s
print(s)
s = s.replace(n, unary_marker + unary_counter * i)
else:
s = s.replace(n, unary_marker + unary_counter * i)

View file

@ -35,14 +35,16 @@ def annotate_values(values):
def print_statistics(stats, ident = 0):
for k in stats:
if isinstance(stats[k], OrderedDict):
print(' ' * ident + str(k) + ':')
print_statistics(stats[k], ident=ident+2)
print((' ' * ident + str(k) + ':'))
print_statistics(stats[k], ident=ident + 2)
elif isinstance(stats[k], dict):
print(' ' * ident + str(k) + ': <dict with ' + str(len(stats[k])) + ' entries>')
print((' ' * ident + str(k) + ': <dict with ' +
str(len(stats[k])) + ' entries>'))
elif isinstance(stats[k], list):
print(' ' * ident + str(k) + ': <list with ' + str(len(stats[k])) + ' entries>')
print((' ' * ident + str(k) + ': <list with ' +
str(len(stats[k])) + ' entries>'))
else:
print(' ' * ident + str(k) + ': ' + str(stats[k]))
print((' ' * ident + str(k) + ': ' + str(stats[k])))
def get_statistics(fname, lm = None, sep = False, verbose=False):
stats = OrderedDict()

View file

@ -22,7 +22,7 @@ def sample(cp, temp, count, seed = None, ident = 'output'):
+ ' -seed ' + str(seed)
+ ' >> ' + outfile)
if os.path.exists(outfile):
print(outfile + ' already exists, skipping')
print((outfile + ' already exists, skipping'))
return False
else:
# UNSAFE SHELL=TRUE FOR CONVENIENCE
@ -45,7 +45,7 @@ def find_best_cp(cpdir):
def process_dir(cpdir, temp, count, seed = None, ident = 'output', verbose = False):
if verbose:
print('processing ' + cpdir)
print(('processing ' + cpdir))
best_cp = find_best_cp(cpdir)
if not best_cp is None:
sample(best_cp, temp, count, seed=seed, ident=ident)

View file

@ -47,20 +47,21 @@ def process_dir(basedir, targetdir, ident, copy_cp = False, verbose = False):
cp_infos = identify_checkpoints(basedir, ident)
for (dpath, cpath, (epoch, vloss, temp)) in cp_infos:
if verbose:
print('found dumpfile ' + dpath)
dname = basedirname + '_epoch' + epoch + '_' + vloss + '.' + ident + '.' + temp + '.txt'
print(('found dumpfile ' + dpath))
dname = basedirname + '_epoch' + epoch + '_' + \
vloss + '.' + ident + '.' + temp + '.txt'
cname = basedirname + '_epoch' + epoch + '_' + vloss + '.t7'
tdpath = os.path.join(targetdir, dname)
tcpath = os.path.join(targetdir, cname)
if verbose:
print(' cpx ' + dpath + ' ' + tdpath)
print((' cpx ' + dpath + ' ' + tdpath))
with open(dpath, 'rt') as infile:
with open(tdpath, 'wt') as outfile:
outfile.write(cleanup_dump(infile.read()))
if copy_cp:
if os.path.isfile(cpath):
if verbose:
print(' cp ' + cpath + ' ' + tcpath)
print((' cp ' + cpath + ' ' + tcpath))
shutil.copy(cpath, tcpath)
if copy_cp and len(cp_infos) > 0:
@ -68,7 +69,7 @@ def process_dir(basedir, targetdir, ident, copy_cp = False, verbose = False):
tcmdpath = os.path.join(targetdir, basedirname + '.command')
if os.path.isfile(cmdpath):
if verbose:
print(' cp ' + cmdpath + ' ' + tcmdpath)
print((' cp ' + cmdpath + ' ' + tcmdpath))
shutil.copy(cmdpath, tcmdpath)
for path in os.listdir(basedir):

View file

@ -19,14 +19,14 @@ def main(fname, oname, verbose = True, parallel = True):
cbow = CBOW()
if verbose:
print 'Computing nearest names...'
print('Computing nearest names...')
if parallel:
nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=1)
nearest_names = namediff.nearest_par([c.name for c in cards], n=1)
else:
nearest_names = [namediff.nearest(c.name, n=1) for c in cards]
if verbose:
print 'Computing nearest cards...'
print('Computing nearest cards...')
if parallel:
nearest_cards = cbow.nearest_par(cards, n=1)
else:
@ -45,7 +45,7 @@ def main(fname, oname, verbose = True, parallel = True):
# nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards]
if verbose:
print '...Done.'
print('...Done.')
# write to a file to store the data, this is a terribly long computation
# we could also just store this same info in the cards themselves as more fields...

View file

@ -2,7 +2,7 @@
def parse_keyfile(f, d, constructor = lambda x: x):
for line in f:
kv = map(lambda s: s.strip(), line.split(':'))
kv = [s.strip() for s in line.split(':')]
if not len(kv) == 2:
continue
d[kv[0]] = constructor(kv[1])
@ -18,8 +18,8 @@ def merge_dicts(d1, d2):
def main(fname1, fname2, verbose = True):
if verbose:
print 'opening ' + fname1 + ' as base key/value store'
print 'opening ' + fname2 + ' as target key/value store'
print('opening ' + fname1 + ' as base key/value store')
print('opening ' + fname2 + ' as target key/value store')
d1 = {}
d2 = {}
@ -32,8 +32,8 @@ def main(fname1, fname2, verbose = True):
tot2 = sum(d2.values())
if verbose:
print ' ' + fname1 + ': ' + str(len(d1)) + ', total ' + str(tot1)
print ' ' + fname2 + ': ' + str(len(d2)) + ', total ' + str(tot2)
print(' ' + fname1 + ': ' + str(len(d1)) + ', total ' + str(tot1))
print(' ' + fname2 + ': ' + str(len(d2)) + ', total ' + str(tot2))
d_merged = merge_dicts(d1, d2)
@ -49,20 +49,21 @@ def main(fname1, fname2, verbose = True):
else:
ratios[k] = float(v2 * tot1) / float(v1 * tot2)
print 'shared: ' + str(len(ratios))
for k in sorted(ratios, lambda x,y: cmp(d2[x], d2[y]), reverse=True):
print ' ' + k + ': ' + str(d2[k]) + '/' + str(d1[k]) + ' (' + str(ratios[k]) + ')'
print ''
print '1 only: ' + str(len(only_1))
for k in sorted(only_1, lambda x,y: cmp(d1[x], d1[y]), reverse=True):
print ' ' + k + ': ' + str(d1[k])
print ''
print('shared: ' + str(len(ratios)))
for k in sorted(ratios, lambda x, y: cmp(d2[x], d2[y]), reverse=True):
print(' ' + k + ': ' + str(d2[k]) + '/' +
str(d1[k]) + ' (' + str(ratios[k]) + ')')
print('')
print '2 only: ' + str(len(only_2))
for k in sorted(only_2, lambda x,y: cmp(d2[x], d2[y]), reverse=True):
print ' ' + k + ': ' + str(d2[k])
print ''
print('1 only: ' + str(len(only_1)))
for k in sorted(only_1, lambda x, y: cmp(d1[x], d1[y]), reverse=True):
print(' ' + k + ': ' + str(d1[k]))
print('')
print('2 only: ' + str(len(only_2)))
for k in sorted(only_2, lambda x, y: cmp(d2[x], d2[y]), reverse=True):
print(' ' + k + ': ' + str(d2[k]))
print('')
if __name__ == '__main__':

View file

@ -91,13 +91,13 @@ def check_X(card):
lcosts = mt.costs[:actcosts]
rcosts = mt.costs[actcosts:]
if 'X' in sides[0] or (utils.reserved_mana_marker in sides[0] and
'X' in ''.join(map(lambda c: c.encode(), lcosts))):
'X' in ''.join([c.encode() for c in lcosts])):
if incost:
return False # bad, duplicated Xs in costs
return False # bad, duplicated Xs in costs
if 'X' in sides[1] or (utils.reserved_mana_marker in sides[1] and
'X' in ''.join(map(lambda c: c.encode(), rcosts))):
'X' in ''.join([c.encode() for c in rcosts])):
correct = True # good, defined X is either specified or used
if 'monstrosity' in sides[1]:
extra_cost_lines += 1
@ -384,9 +384,9 @@ def process_props(cards, dump = False, uncovered = False):
if card.name not in ['demonic pact', 'lavaclaw reaches',
"ertai's trickery", 'rumbling aftershocks', # i hate these
] and dump:
print('---- ' + prop + ' ----')
print(card.encode())
print(card.format())
print(('---- ' + prop + ' ----'))
print((card.encode()))
print((card.format()))
values[prop] = (total, good, bad)
if overall:
total_good += 1
@ -396,8 +396,8 @@ def process_props(cards, dump = False, uncovered = False):
total_uncovered += 1
if uncovered:
print('---- uncovered ----')
print(card.encode())
print(card.format())
print((card.encode()))
print((card.format()))
return ((total_all, total_good, total_bad, total_uncovered),
values)
@ -419,8 +419,8 @@ def main(fname, oname = None, verbose = False, dump = False):
else:
rg[g] = 1
if g >= 60:
print g
print card.format()
print(g)
print(card.format())
tot = 0
vmax = sum(rg.values())
@ -428,7 +428,7 @@ def main(fname, oname = None, verbose = False, dump = False):
pct95 = None
pct99 = None
for i in sorted(rg):
print str(i) + ' rare ngrams: ' + str(rg[i])
print(str(i) + ' rare ngrams: ' + str(rg[i]))
tot += rg[i]
if pct90 is None and tot >= vmax * 0.90:
pct90 = i
@ -437,9 +437,9 @@ def main(fname, oname = None, verbose = False, dump = False):
if pct99 is None and tot >= vmax * 0.99:
pct99 = i
print '90% - ' + str(pct90)
print '95% - ' + str(pct95)
print '99% - ' + str(pct99)
print('90% - ' + str(pct90))
print('95% - ' + str(pct95))
print('99% - ' + str(pct99))
else:
((total_all, total_good, total_bad, total_uncovered),
@ -447,19 +447,21 @@ def main(fname, oname = None, verbose = False, dump = False):
# summary
print('-- overall --')
print(' total : ' + str(total_all))
print(' good : ' + str(total_good) + ' ' + pct(total_good, total_all))
print(' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all))
print(' uncocoverd: ' + str(total_uncovered) + ' ' + pct(total_uncovered, total_all))
print((' total : ' + str(total_all)))
print((' good : ' + str(total_good) +
' ' + pct(total_good, total_all)))
print((' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all)))
print((' uncocoverd: ' + str(total_uncovered) +
' ' + pct(total_uncovered, total_all)))
print('----')
# breakdown
for prop in props:
(total, good, bad) = values[prop]
print(prop + ':')
print(' total: ' + str(total) + ' ' + pct(total, total_all))
print(' good : ' + str(good) + ' ' + pct(good, total_all))
print(' bad : ' + str(bad) + ' ' + pct(bad, total_all))
print((prop + ':'))
print((' total: ' + str(total) + ' ' + pct(total, total_all)))
print((' good : ' + str(good) + ' ' + pct(good, total_all)))
print((' bad : ' + str(bad) + ' ' + pct(bad, total_all)))
if __name__ == '__main__':

View file

@ -33,22 +33,24 @@ def describe_bins(gramdict, bins):
for i in range(0, len(counts)):
if counts[i] > 0:
print (' ' + (str(bins[i]) if i < len(bins) else str(bins[-1]) + '+')
+ ': ' + str(counts[i]))
print((' ' + (str(bins[i]) if i < len(bins) else str(bins[-1]) + '+')
+ ': ' + str(counts[i])))
def extract_language(cards, separate_lines = True):
def extract_language(cards, separate_lines=True):
if separate_lines:
lang = [line.vectorize() for card in cards for line in card.text_lines]
else:
lang = [card.text.vectorize() for card in cards]
return map(lambda s: s.split(), lang)
return [s.split() for s in lang]
def build_ngram_model(cards, n, separate_lines = True, verbose = False):
def build_ngram_model(cards, n, separate_lines=True, verbose=False):
if verbose:
print('generating ' + str(n) + '-gram model')
print(('generating ' + str(n) + '-gram model'))
lang = extract_language(cards, separate_lines=separate_lines)
if verbose:
print('found ' + str(len(lang)) + ' sentences')
print(('found ' + str(len(lang)) + ' sentences'))
lm = model.NgramModel(n, lang, pad_left=True, pad_right=True)
if verbose:
print(lm)
@ -65,30 +67,30 @@ def main(fname, oname, gmin = 2, gmax = 8, nltk = False, sep = False, verbose =
lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose)
if verbose:
teststr = 'when @ enters the battlefield'
print('litmus test: perplexity of ' + repr(teststr))
print(' ' + str(lm.perplexity(teststr.split())))
print(('litmus test: perplexity of ' + repr(teststr)))
print((' ' + str(lm.perplexity(teststr.split()))))
if verbose:
print('pickling module to ' + oname)
print(('pickling module to ' + oname))
with open(oname, 'wb') as f:
pickle.dump(lm, f)
else:
bins = [1, 2, 3, 10, 30, 100, 300, 1000]
if gmin < 2 or gmax < gmin:
print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax)
print('invalid gram sizes: ' + str(gmin) + '-' + str(gmax))
exit(1)
for grams in range(gmin, gmax+1):
for grams in range(gmin, gmax + 1):
if verbose:
print 'generating ' + str(grams) + '-grams...'
print('generating ' + str(grams) + '-grams...')
gramdict = {}
for card in cards:
update_ngrams(card.text_lines_words, gramdict, grams)
oname_full = oname + '.' + str(grams) + 'g'
if verbose:
print(' writing ' + str(len(gramdict)) + ' unique ' + str(grams)
+ '-grams to ' + oname_full)
print((' writing ' + str(len(gramdict)) + ' unique ' + str(grams)
+ '-grams to ' + oname_full))
describe_bins(gramdict, bins)
with open(oname_full, 'wt') as f:

View file

@ -87,8 +87,9 @@ def main(fname, oname, n=20, verbose=False):
#selected = selected[:limit]
if verbose:
print('computing nearest cards for ' + str(len(selected)) + ' candindates...')
cbow_nearest = cbow.nearest_par(map(lambda (i, c): c, selected))
print(('computing nearest cards for ' +
str(len(selected)) + ' candindates...'))
cbow_nearest = cbow.nearest_par([i_c[1] for i_c in selected])
for i in range(0, len(selected)):
(j, card) = selected[i]
selected[i] = (j, card, cbow_nearest[i])
@ -104,17 +105,17 @@ def main(fname, oname, n=20, verbose=False):
break
for (i, card, realcard, dist) in final:
print '-- real --'
print realcard.format()
print '-- fake --'
print card.format()
print '-- stats --'
print('-- real --')
print(realcard.format())
print('-- fake --')
print(card.format())
print('-- stats --')
perp_per = stats['ngram']['perp_per'][i]
perp_max = stats['ngram']['perp_max'][i]
print dist
print perp_per
print perp_max
print '----'
print(dist)
print(perp_per)
print(perp_max)
print('----')
if not oname is None:
with open(oname, 'wt') as ofile:
@ -126,7 +127,7 @@ def main(fname, oname, n=20, verbose=False):
ofile.write('version control:\n\ttype: none\napprentice code: ')
# Copy whatever output file is produced, name the copy 'set' (yes, no extension).
if os.path.isfile('set'):
print 'ERROR: tried to overwrite existing file "set" - aborting.'
print('ERROR: tried to overwrite existing file "set" - aborting.')
return
shutil.copyfile(oname, 'set')
# Use the freaky mse extension instead of zip.
@ -136,7 +137,8 @@ def main(fname, oname, n=20, verbose=False):
zf.write('set')
finally:
if verbose:
print 'Made an MSE set file called ' + oname + '.mse-set.'
print('Made an MSE set file called ' +
oname + '.mse-set.')
# The set file is useless outside the .mse-set, delete it.
os.remove('set')

View file

@ -41,37 +41,37 @@ def check_lines(fname):
for line in prel:
if line.strip() == '':
print(card.name, card.text.text)
print((card.name, card.text.text))
if any(line.startswith(s) for s in known):
line = 'known'
prelines.add(line)
for line in postl:
if line.strip() == '':
print(card.name, card.text.text)
print((card.name, card.text.text))
if any(line.startswith(s) for s in known):
line = 'known'
postlines.add(line)
for line in keyl:
if line.strip() == '':
print(card.name, card.text.text)
print((card.name, card.text.text))
if any(line.startswith(s) for s in known):
line = 'known'
keylines.add(line)
for line in mainl:
if line.strip() == '':
print(card.name, card.text.text)
print((card.name, card.text.text))
# if any(line.startswith(s) for s in known):
# line = 'known'
mainlines.add(line)
for line in costl:
if line.strip() == '':
print(card.name, card.text.text)
print((card.name, card.text.text))
# if any(line.startswith(s) for s in known) or 'cycling' in line or 'monstrosity' in line:
# line = 'known'
costlines.add(line)
print('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}'
.format(len(prelines), len(keylines), len(mainlines), len(postlines)))
print(('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}'
.format(len(prelines), len(keylines), len(mainlines), len(postlines))))
print('\nprelines')
for line in sorted(prelines):
@ -109,7 +109,7 @@ def check_vocab(fname):
vocab[word] += 1
for word in sorted(vocab, lambda x,y: cmp(vocab[x], vocab[y]), reverse = True):
print('{:8d} : {:s}'.format(vocab[word], word))
print(('{:8d} : {:s}'.format(vocab[word], word)))
n = 3
@ -120,8 +120,8 @@ def check_vocab(fname):
for word in words:
if vocab[word] <= n:
#if 'name' in word:
print('\n{:8d} : {:s}'.format(vocab[word], word))
print(card.encode())
print(('\n{:8d} : {:s}'.format(vocab[word], word)))
print((card.encode()))
break
def check_characters(fname, vname):
@ -135,14 +135,15 @@ def check_characters(fname, vname):
token_to_idx = {tok:i+1 for i, tok in enumerate(sorted(tokens))}
idx_to_token = {i+1:tok for i, tok in enumerate(sorted(tokens))}
print('Vocabulary: ({:d} symbols)'.format(len(token_to_idx)))
print(('Vocabulary: ({:d} symbols)'.format(len(token_to_idx))))
for token in sorted(token_to_idx):
print('{:8s} : {:4d}'.format(repr(token), token_to_idx[token]))
print(('{:8s} : {:4d}'.format(repr(token), token_to_idx[token])))
# compliant with torch-rnn
if vname:
json_data = {'token_to_idx':token_to_idx, 'idx_to_token':idx_to_token}
print('writing vocabulary to {:s}'.format(vname))
json_data = {'token_to_idx': token_to_idx,
'idx_to_token': idx_to_token}
print(('writing vocabulary to {:s}'.format(vname)))
with open(vname, 'w') as f:
json.dump(json_data, f)

View file

@ -41,7 +41,8 @@ def force_kill_self_noreturn():
def handler_kill_self(signum, frame):
if signum != signal.SIGQUIT:
traceback.print_stack(frame)
print('caught signal {:d} - streamer sending SIGTERM to self'.format(signum))
print(
('caught signal {:d} - streamer sending SIGTERM to self'.format(signum)))
force_kill_self_noreturn()
def install_suicide_handlers():

View file

@ -36,14 +36,14 @@ def main(fname):
name_avg = name_avg / float(nonempty)
card_avg = card_avg / float(nonempty)
print str(nonempty) + ' cards'
print '-- names --'
print 'avg distance: ' + str(name_avg)
print 'num duplicates: ' + str(name_dupes)
print '-- cards --'
print 'avg distance: ' + str(card_avg)
print 'num duplicates: ' + str(card_dupes)
print '----'
print(str(nonempty) + ' cards')
print('-- names --')
print('avg distance: ' + str(name_avg))
print('num duplicates: ' + str(name_dupes))
print('-- cards --')
print('avg distance: ' + str(card_avg))
print('num duplicates: ' + str(card_dupes))
print('----')
if __name__ == '__main__':

View file

@ -11,7 +11,7 @@ from datalib import Datamine
def main(fname, verbose = True, outliers = False, dump_all = False):
if fname[-5:] == '.json':
if verbose:
print 'This looks like a json file: ' + fname
print('This looks like a json file: ' + fname)
json_srcs = jdecode.mtg_open_json(fname, verbose)
card_srcs = []
for json_cardname in sorted(json_srcs):
@ -19,7 +19,7 @@ def main(fname, verbose = True, outliers = False, dump_all = False):
card_srcs += [json_srcs[json_cardname][0]]
else:
if verbose:
print 'Opening encoded card file: ' + fname
print('Opening encoded card file: ' + fname)
with open(fname, 'rt') as f:
text = f.read()
card_srcs = text.split(utils.cardsep)

View file

@ -141,7 +141,7 @@ def sortcards(cards):
def main(fname, oname = None, verbose = True):
if verbose:
print 'Opening encoded card file: ' + fname
print('Opening encoded card file: ' + fname)
f = open(fname, 'r')
text = f.read()
@ -153,14 +153,14 @@ def main(fname, oname = None, verbose = True):
if not oname == None:
if verbose:
print 'Writing output to: ' + oname
print('Writing output to: ' + oname)
ofile = codecs.open(oname, 'w', 'utf-8')
for cardclass in classes:
if classes[cardclass] == None:
print cardclass
print(cardclass)
else:
print ' ' + cardclass + ': ' + str(len(classes[cardclass]))
print(' ' + cardclass + ': ' + str(len(classes[cardclass])))
if oname == None:
outputter = sys.stdout
@ -189,6 +189,7 @@ if __name__ == '__main__':
elif len(sys.argv) == 3:
main(sys.argv[1], oname = sys.argv[2])
else:
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
print('Usage: ' + sys.argv[0] + ' ' +
'<encoded file> [output filename]')
exit(1)