From 2f95b79f10a15226401c6e46c6e2f8d0fa93276f Mon Sep 17 00:00:00 2001 From: David Heyman Date: Sat, 13 Aug 2016 21:16:43 -0400 Subject: [PATCH] 2to3 conversion (some things not yet changed) --- decode.py | 22 +-- encode.py | 14 +- lib/cardlib.py | 45 +++--- lib/cbow.py | 24 +-- lib/datalib.py | 281 +++++++++++++++++---------------- lib/jdecode.py | 14 +- lib/manalib.py | 2 +- lib/namediff.py | 27 ++-- lib/nltk_model.py | 6 +- lib/transforms.py | 38 +++-- lib/utils.py | 30 ++-- scripts/analysis.py | 12 +- scripts/autosample.py | 4 +- scripts/collect_checkpoints.py | 11 +- scripts/distances.py | 8 +- scripts/keydiff.py | 37 ++--- scripts/mtg_validate.py | 46 +++--- scripts/ngrams.py | 32 ++-- scripts/pairing.py | 28 ++-- scripts/sanity.py | 29 ++-- scripts/streamcards.py | 3 +- scripts/sum.py | 16 +- scripts/summarize.py | 4 +- sortcards.py | 11 +- 24 files changed, 388 insertions(+), 356 deletions(-) diff --git a/decode.py b/decode.py index ced02dc..eef77f9 100755 --- a/decode.py +++ b/decode.py @@ -19,7 +19,7 @@ def main(fname, oname = None, verbose = True, encoding = 'std', # there is a sane thing to do here (namely, produce both at the same time) # but we don't support it yet. if for_mse and for_html: - print 'ERROR - decode.py - incompatible formats "mse" and "html"' + print('ERROR - decode.py - incompatible formats "mse" and "html"') return fmt_ordered = cardlib.fmt_ordered_default @@ -52,16 +52,16 @@ def main(fname, oname = None, verbose = True, encoding = 'std', namediff = Namediff() cbow = CBOW() if verbose: - print 'Computing nearest names...' - nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3) + print('Computing nearest names...') + nearest_names = namediff.nearest_par([c.name for c in cards], n=3) if verbose: - print 'Computing nearest cards...' + print('Computing nearest cards...') nearest_cards = cbow.nearest_par(cards) for i in range(0, len(cards)): cards[i].nearest_names = nearest_names[i] cards[i].nearest_cards = nearest_cards[i] if verbose: - print '...Done.' + print('...Done.') def hoverimg(cardname, dist, nd): truename = nd.names[cardname] @@ -238,17 +238,18 @@ def main(fname, oname = None, verbose = True, encoding = 'std', if oname: if for_html: - print oname + print(oname) # if ('.html' != oname[-]) # oname += '.html' if verbose: - print 'Writing output to: ' + oname + print('Writing output to: ' + oname) with open(oname, 'w') as ofile: writecards(ofile) if for_mse: - # Copy whatever output file is produced, name the copy 'set' (yes, no extension). + # Copy whatever output file is produced, name the copy 'set' (yes, + # no extension). if os.path.isfile('set'): - print 'ERROR: tried to overwrite existing file "set" - aborting.' + print('ERROR: tried to overwrite existing file "set" - aborting.') return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. @@ -258,7 +259,8 @@ def main(fname, oname = None, verbose = True, encoding = 'std', zf.write('set') finally: if verbose: - print 'Made an MSE set file called ' + oname + '.mse-set.' + print('Made an MSE set file called ' + + oname + '.mse-set.') # The set file is useless outside the .mse-set, delete it. os.remove('set') else: diff --git a/encode.py b/encode.py index e2996f8..0ba3bf2 100755 --- a/encode.py +++ b/encode.py @@ -47,16 +47,16 @@ def main(fname, oname = None, verbose = True, encoding = 'std', raise ValueError('encode.py: unknown encoding: ' + encoding) if verbose: - print 'Preparing to encode:' - print ' Using encoding ' + repr(encoding) + print('Preparing to encode:') + print(' Using encoding ' + repr(encoding)) if stable: - print ' NOT randomizing order of cards.' + print(' NOT randomizing order of cards.') if randomize_mana: - print ' Randomizing order of symobls in manacosts.' + print(' Randomizing order of symobls in manacosts.') if not fmt_labeled: - print ' NOT labeling fields for this run (may be harder to decode).' + print(' NOT labeling fields for this run (may be harder to decode).') if not line_transformations: - print ' NOT using line reordering transformations' + print(' NOT using line reordering transformations') cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations) @@ -82,7 +82,7 @@ def main(fname, oname = None, verbose = True, encoding = 'std', if oname: if verbose: - print 'Writing output to: ' + oname + print('Writing output to: ' + oname) with open(oname, 'w') as ofile: writecards(ofile) else: diff --git a/lib/cardlib.py b/lib/cardlib.py index a5df0e3..4544e99 100644 --- a/lib/cardlib.py +++ b/lib/cardlib.py @@ -247,20 +247,19 @@ def fields_from_json(src_json, linetrans = True): fields[field_cost] = [(-1, cost)] if 'supertypes' in src_json: - fields[field_supertypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()), - src_json['supertypes']))] + fields[field_supertypes] = [ + (-1, [utils.to_ascii(s.lower()) for s in src_json['supertypes']])] if 'types' in src_json: - fields[field_types] = [(-1, map(lambda s: utils.to_ascii(s.lower()), - src_json['types']))] + fields[field_types] = [(-1, [utils.to_ascii(s.lower()) + for s in src_json['types']])] else: parsed = False if 'subtypes' in src_json: - fields[field_subtypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()) - # urza's lands... - .replace('"', "'").replace('-', utils.dash_marker), - src_json['subtypes']))] + fields[field_subtypes] = [(-1, [utils.to_ascii(s.lower()) + # urza's lands... + .replace('"', "'").replace('-', utils.dash_marker) for s in src_json['subtypes']])] if 'rarity' in src_json: @@ -323,7 +322,7 @@ def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep): if fmt_labeled: labels = {fmt_labeled[k] : k for k in fmt_labeled} - field_label_regex = '[' + ''.join(labels.keys()) + ']' + field_label_regex = '[' + ''.join(list(labels.keys())) + ']' def addf(fields, fkey, fval): # make sure you pass a pair if fval and fval[1]: @@ -544,14 +543,13 @@ class Card: self.__dict__[field_text] = mtext fulltext = mtext.encode() if fulltext: - self.__dict__[field_text + '_lines'] = map(Manatext, - fulltext.split(utils.newline)) - self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex, - ' ', + self.__dict__[field_text + '_lines'] = list(map(Manatext, + fulltext.split(utils.newline))) + self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex, + ' ', fulltext).split() - self.__dict__[field_text + '_lines_words'] = map( - lambda line: re.sub(utils.unletters_regex, ' ', line).split(), - fulltext.split(utils.newline)) + self.__dict__[field_text + '_lines_words'] = [re.sub( + utils.unletters_regex, ' ', line).split() for line in fulltext.split(utils.newline)] else: self.valid = False self.__dict__[field_other] += [(idx, ' ' + str(value))] @@ -667,11 +665,12 @@ class Card: outstr += linebreak - basetypes = map(str.capitalize, self.__dict__[field_types]) + basetypes = list(map(str.capitalize, self.__dict__[field_types])) if vdump and len(basetypes) < 1: basetypes = ['_NOTYPE_'] - - outstr += ' '.join(map(str.capitalize, self.__dict__[field_supertypes]) + basetypes) + + outstr += ' '.join(list(map(str.capitalize, + self.__dict__[field_supertypes])) + basetypes) if self.__dict__[field_subtypes]: outstr += (' ' + utils.dash_marker + ' ' + @@ -1005,8 +1004,8 @@ class Card: if coststr: outstr += coststr + ' ' - typestr = ' '.join(map(lambda s: '(' + s + ')', - self.__dict__[field_supertypes] + self.__dict__[field_types])) + typestr = ' '.join( + ['(' + s + ')' for s in self.__dict__[field_supertypes] + self.__dict__[field_types]]) if typestr: outstr += typestr + ' ' @@ -1014,8 +1013,8 @@ class Card: outstr += ' '.join(self.__dict__[field_subtypes]) + ' ' if self.__dict__[field_pt]: - outstr += ' '.join(map(lambda s: '(' + s + ')', - self.__dict__[field_pt].replace('/', '/ /').split())) + outstr += ' '.join(['(' + s + ')' for s in self.__dict__[ + field_pt].replace('/', '/ /').split()]) outstr += ' ' if self.__dict__[field_loyalty]: diff --git a/lib/cbow.py b/lib/cbow.py index 8c89a19..c91f7ce 100644 --- a/lib/cbow.py +++ b/lib/cbow.py @@ -147,7 +147,7 @@ def f_nearest(card, vocab, vecs, cardvecs, n): def f_nearest_per_thread(workitem): (workcards, vocab, vecs, cardvecs, n) = workitem - return map(lambda card: f_nearest(card, vocab, vecs, cardvecs, n), workcards) + return [f_nearest(card, vocab, vecs, cardvecs, n) for card in workcards] class CBOW: def __init__(self, verbose = True, @@ -157,17 +157,18 @@ class CBOW: self.cardvecs = [] if self.verbose: - print 'Building a cbow model...' + print('Building a cbow model...') if self.verbose: - print ' Reading binary vector data from: ' + vector_fname + print(' Reading binary vector data from: ' + vector_fname) (vocab, vecs) = read_vector_file(vector_fname) self.vocab = vocab self.vecs = vecs - + if self.verbose: - print ' Reading encoded cards from: ' + card_fname - print ' They\'d better be in the same order as the file used to build the vector model!' + print(' Reading encoded cards from: ' + card_fname) + print( + ' They\'d better be in the same order as the file used to build the vector model!') with open(card_fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): @@ -179,10 +180,10 @@ class CBOW: card.vectorize()))] if self.verbose: - print '... Done.' - print ' vocab size: ' + str(len(self.vocab)) - print ' raw vecs: ' + str(len(self.vecs)) - print ' card vecs: ' + str(len(self.cardvecs)) + print('... Done.') + print(' vocab size: ' + str(len(self.vocab))) + print(' raw vecs: ' + str(len(self.vecs))) + print(' card vecs: ' + str(len(self.cardvecs))) def nearest(self, card, n=5): return f_nearest(card, self.vocab, self.vecs, self.cardvecs, n) @@ -190,6 +191,7 @@ class CBOW: def nearest_par(self, cards, n=5, threads=cores): workpool = multiprocessing.Pool(threads) proto_worklist = namediff.list_split(cards, threads) - worklist = map(lambda x: (x, self.vocab, self.vecs, self.cardvecs, n), proto_worklist) + worklist = [(x, self.vocab, self.vecs, self.cardvecs, n) + for x in proto_worklist] donelist = workpool.map(f_nearest_per_thread, worklist) return namediff.list_flatten(donelist) diff --git a/lib/datalib.py b/lib/datalib.py index c7d2d64..8613ad9 100644 --- a/lib/datalib.py +++ b/lib/datalib.py @@ -25,11 +25,13 @@ def padrows(l): return padded def printrows(l): for row in l: - print row + print(row) # index management helpers + + def index_size(d): - return sum(map(lambda k: len(d[k]), d)) + return sum([len(d[k]) for k in d]) def inc(d, k, obj): if k or k == 0: @@ -148,35 +150,39 @@ class Datamine: # summarize the indices # Yes, this printing code is pretty terrible. def summarize(self, hsize = 10, vsize = 10, cmcsize = 20): - print '====================' - print str(len(self.cards)) + ' valid cards, ' + str(len(self.invalid_cards)) + ' invalid cards.' - print str(len(self.allcards)) + ' cards parsed, ' + str(len(self.unparsed_cards)) + ' failed to parse' - print '--------------------' - print str(len(self.by_name)) + ' unique card names' - print '--------------------' - print (str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), ' - + str(len(self.by_color)) + ' combinations') - print 'Breakdown by color:' - rows = [self.by_color_inclusive.keys()] + print('====================') + print(str(len(self.cards)) + ' valid cards, ' + + str(len(self.invalid_cards)) + ' invalid cards.') + print(str(len(self.allcards)) + ' cards parsed, ' + + str(len(self.unparsed_cards)) + ' failed to parse') + print('--------------------') + print(str(len(self.by_name)) + ' unique card names') + print('--------------------') + print((str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), ' + + str(len(self.by_color)) + ' combinations')) + print('Breakdown by color:') + rows = [list(self.by_color_inclusive.keys())] rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) - print 'Breakdown by number of colors:' - rows = [self.by_color_count.keys()] + print('Breakdown by number of colors:') + rows = [list(self.by_color_count.keys())] rows += [[len(self.by_color_count[k]) for k in rows[0]]] printrows(padrows(rows)) - print '--------------------' - print str(len(self.by_type_inclusive)) + ' unique card types, ' + str(len(self.by_type)) + ' combinations' - print 'Breakdown by type:' - d = sorted(self.by_type_inclusive, - lambda x,y: cmp(len(self.by_type_inclusive[x]), len(self.by_type_inclusive[y])), - reverse = True) + print('--------------------') + print(str(len(self.by_type_inclusive)) + ' unique card types, ' + + str(len(self.by_type)) + ' combinations') + print('Breakdown by type:') + d = sorted(self.by_type_inclusive, + lambda x, y: cmp(len(self.by_type_inclusive[x]), len( + self.by_type_inclusive[y])), + reverse=True) rows = [[k for k in d[:hsize]]] rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) - print '--------------------' - print (str(len(self.by_subtype_inclusive)) + ' unique subtypes, ' - + str(len(self.by_subtype)) + ' combinations') - print '-- Popular subtypes: --' + print('--------------------') + print((str(len(self.by_subtype_inclusive)) + ' unique subtypes, ' + + str(len(self.by_subtype)) + ' combinations')) + print('-- Popular subtypes: --') d = sorted(self.by_subtype_inclusive, lambda x,y: cmp(len(self.by_subtype_inclusive[x]), len(self.by_subtype_inclusive[y])), reverse = True) @@ -184,7 +190,7 @@ class Datamine: for k in d[0:vsize]: rows += [[k, len(self.by_subtype_inclusive[k])]] printrows(padrows(rows)) - print '-- Top combinations: --' + print('-- Top combinations: --') d = sorted(self.by_subtype, lambda x,y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])), reverse = True) @@ -192,24 +198,26 @@ class Datamine: for k in d[0:vsize]: rows += [[k, len(self.by_subtype[k])]] printrows(padrows(rows)) - print '--------------------' - print (str(len(self.by_supertype_inclusive)) + ' unique supertypes, ' - + str(len(self.by_supertype)) + ' combinations') - print 'Breakdown by supertype:' - d = sorted(self.by_supertype_inclusive, - lambda x,y: cmp(len(self.by_supertype_inclusive[x]),len(self.by_supertype_inclusive[y])), - reverse = True) + print('--------------------') + print((str(len(self.by_supertype_inclusive)) + ' unique supertypes, ' + + str(len(self.by_supertype)) + ' combinations')) + print('Breakdown by supertype:') + d = sorted(self.by_supertype_inclusive, + lambda x, y: cmp(len(self.by_supertype_inclusive[x]), len( + self.by_supertype_inclusive[y])), + reverse=True) rows = [[k for k in d[:hsize]]] rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) - print '--------------------' - print str(len(self.by_cmc)) + ' different CMCs, ' + str(len(self.by_cost)) + ' unique mana costs' - print 'Breakdown by CMC:' - d = sorted(self.by_cmc, reverse = False) + print('--------------------') + print(str(len(self.by_cmc)) + ' different CMCs, ' + + str(len(self.by_cost)) + ' unique mana costs') + print('Breakdown by CMC:') + d = sorted(self.by_cmc, reverse=False) rows = [[k for k in d[:cmcsize]]] rows += [[len(self.by_cmc[k]) for k in rows[0]]] printrows(padrows(rows)) - print '-- Popular mana costs: --' + print('-- Popular mana costs: --') d = sorted(self.by_cost, lambda x,y: cmp(len(self.by_cost[x]), len(self.by_cost[y])), reverse = True) @@ -217,12 +225,12 @@ class Datamine: for k in d[0:vsize]: rows += [[utils.from_mana(k), len(self.by_cost[k])]] printrows(padrows(rows)) - print '--------------------' - print str(len(self.by_pt)) + ' unique p/t combinations' + print('--------------------') + print(str(len(self.by_pt)) + ' unique p/t combinations') if len(self.by_power) > 0 and len(self.by_toughness) > 0: - print ('Largest power: ' + str(max(map(len, self.by_power)) - 1) + - ', largest toughness: ' + str(max(map(len, self.by_toughness)) - 1)) - print '-- Popular p/t values: --' + print(('Largest power: ' + str(max(list(map(len, self.by_power))) - 1) + + ', largest toughness: ' + str(max(list(map(len, self.by_toughness))) - 1))) + print('-- Popular p/t values: --') d = sorted(self.by_pt, lambda x,y: cmp(len(self.by_pt[x]), len(self.by_pt[y])), reverse = True) @@ -230,8 +238,8 @@ class Datamine: for k in d[0:vsize]: rows += [[utils.from_unary(k), len(self.by_pt[k])]] printrows(padrows(rows)) - print '--------------------' - print 'Loyalty values:' + print('--------------------') + print('Loyalty values:') d = sorted(self.by_loyalty, lambda x,y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])), reverse = True) @@ -239,13 +247,13 @@ class Datamine: for k in d[0:vsize]: rows += [[utils.from_unary(k), len(self.by_loyalty[k])]] printrows(padrows(rows)) - print '--------------------' + print('--------------------') if len(self.by_textlen) > 0 and len(self.by_textlines) > 0: - print('Card text ranges from ' + str(min(self.by_textlen)) + ' to ' - + str(max(self.by_textlen)) + ' characters in length') - print('Card text ranges from ' + str(min(self.by_textlines)) + ' to ' - + str(max(self.by_textlines)) + ' lines') - print '-- Line counts by frequency: --' + print(('Card text ranges from ' + str(min(self.by_textlen)) + ' to ' + + str(max(self.by_textlen)) + ' characters in length')) + print(('Card text ranges from ' + str(min(self.by_textlines)) + ' to ' + + str(max(self.by_textlines)) + ' lines')) + print('-- Line counts by frequency: --') d = sorted(self.by_textlines, lambda x,y: cmp(len(self.by_textlines[x]), len(self.by_textlines[y])), reverse = True) @@ -253,29 +261,29 @@ class Datamine: for k in d[0:vsize]: rows += [[k, len(self.by_textlines[k])]] printrows(padrows(rows)) - print '====================' - + print('====================') # describe outliers in the indices - def outliers(self, hsize = 10, vsize = 10, dump_invalid = False): - print '********************' - print 'Overview of indices:' + def outliers(self, hsize=10, vsize=10, dump_invalid=False): + print('********************') + print('Overview of indices:') rows = [['Index Name', 'Keys', 'Total Members']] for index in self.indices: - rows += [[index, len(self.indices[index]), index_size(self.indices[index])]] + rows += [[index, len(self.indices[index]), + index_size(self.indices[index])]] printrows(padrows(rows)) - print '********************' + print('********************') if len(self.by_name) > 0: - scardname = sorted(self.by_name, - lambda x,y: cmp(len(x), len(y)), - reverse = False)[0] - print 'Shortest Cardname: (' + str(len(scardname)) + ')' - print ' ' + scardname - lcardname = sorted(self.by_name, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Longest Cardname: (' + str(len(lcardname)) + ')' - print ' ' + lcardname + scardname = sorted(self.by_name, + lambda x, y: cmp(len(x), len(y)), + reverse=False)[0] + print('Shortest Cardname: (' + str(len(scardname)) + ')') + print(' ' + scardname) + lcardname = sorted(self.by_name, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Longest Cardname: (' + str(len(lcardname)) + ')') + print(' ' + lcardname) d = sorted(self.by_name, lambda x,y: cmp(len(self.by_name[x]), len(self.by_name[y])), reverse = True) @@ -286,94 +294,95 @@ class Datamine: if rows == []: print('No duplicated cardnames') else: - print '-- Most duplicated names: --' + print('-- Most duplicated names: --') printrows(padrows(rows)) else: - print 'No cards indexed by name?' - print '--------------------' + print('No cards indexed by name?') + print('--------------------') if len(self.by_type) > 0: - ltypes = sorted(self.by_type, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Longest card type: (' + str(len(ltypes)) + ')' - print ' ' + ltypes + ltypes = sorted(self.by_type, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Longest card type: (' + str(len(ltypes)) + ')') + print(' ' + ltypes) else: - print 'No cards indexed by type?' + print('No cards indexed by type?') if len(self.by_subtype) > 0: - lsubtypes = sorted(self.by_subtype, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Longest subtype: (' + str(len(lsubtypes)) + ')' - print ' ' + lsubtypes + lsubtypes = sorted(self.by_subtype, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Longest subtype: (' + str(len(lsubtypes)) + ')') + print(' ' + lsubtypes) else: - print 'No cards indexed by subtype?' + print('No cards indexed by subtype?') if len(self.by_supertype) > 0: - lsupertypes = sorted(self.by_supertype, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Longest supertype: (' + str(len(lsupertypes)) + ')' - print ' ' + lsupertypes + lsupertypes = sorted(self.by_supertype, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Longest supertype: (' + str(len(lsupertypes)) + ')') + print(' ' + lsupertypes) else: - print 'No cards indexed by supertype?' - print '--------------------' + print('No cards indexed by supertype?') + print('--------------------') if len(self.by_cost) > 0: - lcost = sorted(self.by_cost, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Longest mana cost: (' + str(len(lcost)) + ')' - print ' ' + utils.from_mana(lcost) - print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n' + lcost = sorted(self.by_cost, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Longest mana cost: (' + str(len(lcost)) + ')') + print(' ' + utils.from_mana(lcost)) + print('\n' + plimit(self.by_cost[lcost][0].encode()) + '\n') else: - print 'No cards indexed by cost?' + print('No cards indexed by cost?') if len(self.by_cmc) > 0: - lcmc = sorted(self.by_cmc, reverse = True)[0] - print 'Largest cmc: (' + str(lcmc) + ')' - print ' ' + str(self.by_cmc[lcmc][0].cost) - print '\n' + plimit(self.by_cmc[lcmc][0].encode()) + lcmc = sorted(self.by_cmc, reverse=True)[0] + print('Largest cmc: (' + str(lcmc) + ')') + print(' ' + str(self.by_cmc[lcmc][0].cost)) + print('\n' + plimit(self.by_cmc[lcmc][0].encode())) else: - print 'No cards indexed by cmc?' - print '--------------------' + print('No cards indexed by cmc?') + print('--------------------') if len(self.by_power) > 0: - lpower = sorted(self.by_power, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Largest creature power: ' + utils.from_unary(lpower) - print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n' - else: - print 'No cards indexed by power?' + lpower = sorted(self.by_power, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Largest creature power: ' + utils.from_unary(lpower)) + print('\n' + plimit(self.by_power[lpower][0].encode()) + '\n') + else: + print('No cards indexed by power?') if len(self.by_toughness) > 0: - ltoughness = sorted(self.by_toughness, - lambda x,y: cmp(len(x), len(y)), - reverse = True)[0] - print 'Largest creature toughness: ' + utils.from_unary(ltoughness) - print '\n' + plimit(self.by_toughness[ltoughness][0].encode()) - else: - print 'No cards indexed by toughness?' - print '--------------------' + ltoughness = sorted(self.by_toughness, + lambda x, y: cmp(len(x), len(y)), + reverse=True)[0] + print('Largest creature toughness: ' + + utils.from_unary(ltoughness)) + print('\n' + plimit(self.by_toughness[ltoughness][0].encode())) + else: + print('No cards indexed by toughness?') + print('--------------------') if len(self.by_textlines) > 0: - llines = sorted(self.by_textlines, reverse = True)[0] - print 'Most lines of text in a card: ' + str(llines) - print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n' - else: - print 'No cards indexed by line count?' + llines = sorted(self.by_textlines, reverse=True)[0] + print('Most lines of text in a card: ' + str(llines)) + print('\n' + plimit(self.by_textlines[llines][0].encode()) + '\n') + else: + print('No cards indexed by line count?') if len(self.by_textlen) > 0: - ltext = sorted(self.by_textlen, reverse = True)[0] - print 'Most chars in a card text: ' + str(ltext) - print '\n' + plimit(self.by_textlen[ltext][0].encode()) - else: - print 'No cards indexed by char count?' - print '--------------------' - print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.' + ltext = sorted(self.by_textlen, reverse=True)[0] + print('Most chars in a card text: ' + str(ltext)) + print('\n' + plimit(self.by_textlen[ltext][0].encode())) + else: + print('No cards indexed by char count?') + print('--------------------') + print('There were ' + str(len(self.invalid_cards)) + ' invalid cards.') if dump_invalid: for card in self.invalid_cards: - print '\n' + repr(card.fields) + print('\n' + repr(card.fields)) elif len(self.invalid_cards) > 0: - print 'Not summarizing.' - print '--------------------' - print 'There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.' + print('Not summarizing.') + print('--------------------') + print('There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.') if dump_invalid: for card in self.unparsed_cards: - print '\n' + repr(card.fields) + print('\n' + repr(card.fields)) elif len(self.unparsed_cards) > 0: - print 'Not summarizing.' - print '====================' + print('Not summarizing.') + print('====================') diff --git a/lib/jdecode.py b/lib/jdecode.py index 9c89f3a..e0721d2 100644 --- a/lib/jdecode.py +++ b/lib/jdecode.py @@ -63,7 +63,7 @@ def mtg_open_json(fname, verbose = False): #print bsides[uid] if verbose: - print 'Opened ' + str(len(allcards)) + ' uniquely named cards.' + print('Opened ' + str(len(allcards)) + ' uniquely named cards.') return allcards # filters to ignore some undesirable cards, only used when opening json @@ -91,7 +91,7 @@ def mtg_open_file(fname, verbose = False, if fname[-5:] == '.json': if verbose: - print 'This looks like a json file: ' + fname + print('This looks like a json file: ' + fname) json_srcs = mtg_open_json(fname, verbose) # sorted for stability for json_cardname in sorted(json_srcs): @@ -136,7 +136,7 @@ def mtg_open_file(fname, verbose = False, # fall back to opening a normal encoded file else: if verbose: - print 'Opening encoded card file: ' + fname + print('Opening encoded card file: ' + fname) with open(fname, 'rt') as f: text = f.read() for card_src in text.split(utils.cardsep): @@ -152,8 +152,8 @@ def mtg_open_file(fname, verbose = False, unparsed += 1 if verbose: - print (str(valid) + ' valid, ' + str(skipped) + ' skipped, ' - + str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.') + print((str(valid) + ' valid, ' + str(skipped) + ' skipped, ' + + str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.')) good_count = 0 bad_count = 0 @@ -168,7 +168,7 @@ def mtg_open_file(fname, verbose = False, break # random heuristic if bad_count > 10: - print 'WARNING: Saw a bunch of unparsed cards:' - print ' Is this a legacy format, you may need to specify the field order.' + print('WARNING: Saw a bunch of unparsed cards:') + print(' Is this a legacy format, you may need to specify the field order.') return cards diff --git a/lib/manalib.py b/lib/manalib.py index 4cabc7f..d943c56 100644 --- a/lib/manalib.py +++ b/lib/manalib.py @@ -136,7 +136,7 @@ class Manacost: else: ld = '' rd = '' - return ' '.join(map(lambda s: ld + s + rd, sorted(self.sequence))) + return ' '.join([ld + s + rd for s in sorted(self.sequence)]) class Manatext: diff --git a/lib/namediff.py b/lib/namediff.py index 14e341f..521dcc2 100644 --- a/lib/namediff.py +++ b/lib/namediff.py @@ -48,7 +48,7 @@ def f_nearest_per_thread(workitem): (worknames, names, n) = workitem # each thread (well, process) needs to generate its own matchers matchers = [difflib.SequenceMatcher(b=name, autojunk=False) for name in names] - return map(lambda name: f_nearest(name, matchers, n), worknames) + return [f_nearest(name, matchers, n) for name in worknames] class Namediff: def __init__(self, verbose = True, @@ -59,10 +59,10 @@ class Namediff: self.cardstrings = {} if self.verbose: - print 'Setting up namediff...' + print('Setting up namediff...') if self.verbose: - print ' Reading names from: ' + json_fname + print(' Reading names from: ' + json_fname) json_srcs = jdecode.mtg_open_json(json_fname, verbose) namecount = 0 for json_cardname in sorted(json_srcs): @@ -81,7 +81,7 @@ class Namediff: jnum = '' if name in self.names: - print ' Duplicate name ' + name + ', ignoring.' + print(' Duplicate name ' + name + ', ignoring.') else: self.names[name] = jname self.cardstrings[name] = card.encode() @@ -91,13 +91,15 @@ class Namediff: self.codes[name] = '' namecount += 1 - print ' Read ' + str(namecount) + ' unique cardnames' - print ' Building SequenceMatcher objects.' - - self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names] - self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings] + print(' Read ' + str(namecount) + ' unique cardnames') + print(' Building SequenceMatcher objects.') - print '... Done.' + self.matchers = [difflib.SequenceMatcher( + b=n, autojunk=False) for n in self.names] + self.card_matchers = [difflib.SequenceMatcher( + b=self.cardstrings[n], autojunk=False) for n in self.cardstrings] + + print('... Done.') def nearest(self, name, n=3): return f_nearest(name, self.matchers, n) @@ -105,7 +107,7 @@ class Namediff: def nearest_par(self, names, n=3, threads=cores): workpool = multiprocessing.Pool(threads) proto_worklist = list_split(names, threads) - worklist = map(lambda x: (x, self.names, n), proto_worklist) + worklist = [(x, self.names, n) for x in proto_worklist] donelist = workpool.map(f_nearest_per_thread, worklist) return list_flatten(donelist) @@ -115,6 +117,7 @@ class Namediff: def nearest_card_par(self, cards, n=5, threads=cores): workpool = multiprocessing.Pool(threads) proto_worklist = list_split(cards, threads) - worklist = map(lambda x: (map(lambda c: c.encode(), x), self.cardstrings.values(), n), proto_worklist) + worklist = [([c.encode() for c in x], list( + self.cardstrings.values()), n) for x in proto_worklist] donelist = workpool.map(f_nearest_per_thread, worklist) return list_flatten(donelist) diff --git a/lib/nltk_model.py b/lib/nltk_model.py index b88894f..b6a8173 100644 --- a/lib/nltk_model.py +++ b/lib/nltk_model.py @@ -11,7 +11,7 @@ # an attempt was made to preserve the exact functionality of this code, # hampered somewhat by its brokenness -from __future__ import unicode_literals + from math import log @@ -169,9 +169,9 @@ class NgramModel(ModelI): # fixed def _words_following(self, context, cond_freq_dist): - for ctxt in cond_freq_dist.iterkeys(): + for ctxt in cond_freq_dist.keys(): if ctxt == context: - yield cond_freq_dist[ctxt].keys() + yield list(cond_freq_dist[ctxt].keys()) def prob(self, word, context): """ diff --git a/lib/transforms.py b/lib/transforms.py index c3036a8..cac46c4 100644 --- a/lib/transforms.py +++ b/lib/transforms.py @@ -144,7 +144,7 @@ def text_pass_4b_x(s): s = s.replace(' x ', ' ' + x_marker + ' ') s = s.replace('x:', x_marker + ':') s = s.replace('x~', x_marker + '~') - s = s.replace(u'x\u2014', x_marker + u'\u2014') + s = s.replace('x\u2014', x_marker + '\u2014') s = s.replace('x.', x_marker + '.') s = s.replace('x,', x_marker + ',') s = s.replace('x is', x_marker + ' is') @@ -359,7 +359,8 @@ def text_pass_7_choice(s): # to '[n = ability = ability]\n' def choice_formatting_helper(s_helper, prefix, count, suffix = ''): - single_choices = re.findall(ur'(' + prefix + ur'\n?(\u2022.*(\n|$))+)', s_helper) + single_choices = re.findall( + r'(' + prefix + r'\n?(\u2022.*(\n|$))+)', s_helper) for choice in single_choices: newchoice = choice[0] newchoice = newchoice.replace(prefix, unary_marker + (unary_counter * count) + suffix) @@ -371,20 +372,23 @@ def text_pass_7_choice(s): s_helper = s_helper.replace(choice[0], newchoice) return s_helper - s = choice_formatting_helper(s, ur'choose one \u2014', 1) - s = choice_formatting_helper(s, ur'choose one \u2014 ', 1) # ty Promise of Power - s = choice_formatting_helper(s, ur'choose two \u2014', 2) - s = choice_formatting_helper(s, ur'choose two \u2014 ', 2) # ty Profane Command - s = choice_formatting_helper(s, ur'choose one or both \u2014', 0) - s = choice_formatting_helper(s, ur'choose one or more \u2014', 0) - s = choice_formatting_helper(s, ur'choose khans or dragons.', 1) - # this is for 'an opponent chooses one', which will be a bit weird but still work out - s = choice_formatting_helper(s, ur'chooses one \u2014', 1) + s = choice_formatting_helper(s, r'choose one \u2014', 1) + s = choice_formatting_helper( + s, r'choose one \u2014 ', 1) # ty Promise of Power + s = choice_formatting_helper(s, r'choose two \u2014', 2) + s = choice_formatting_helper( + s, r'choose two \u2014 ', 2) # ty Profane Command + s = choice_formatting_helper(s, r'choose one or both \u2014', 0) + s = choice_formatting_helper(s, r'choose one or more \u2014', 0) + s = choice_formatting_helper(s, r'choose khans or dragons.', 1) + # this is for 'an opponent chooses one', which will be a bit weird but + # still work out + s = choice_formatting_helper(s, r'chooses one \u2014', 1) # Demonic Pact has 'choose one that hasn't been chosen'... - s = choice_formatting_helper(s, ur"choose one that hasn't been chosen \u2014", 1, + s = choice_formatting_helper(s, r"choose one that hasn't been chosen \u2014", 1, suffix=" that hasn't been chosen") # 'choose n. you may choose the same mode more than once.' - s = choice_formatting_helper(s, ur'choose three. you may choose the same mode more than once.', 3, + s = choice_formatting_helper(s, r'choose three. you may choose the same mode more than once.', 3, suffix='. you may choose the same mode more than once.') return s @@ -409,7 +413,7 @@ def text_pass_8_equip(s): else: s = equip + '\n' + s - nonmana = re.findall(ur'(equip\u2014.*(\n|$))', s) + nonmana = re.findall(r'(equip\u2014.*(\n|$))', s) if len(nonmana) == 1: equip = nonmana[0][0] s = s.replace('\n' + equip, '') @@ -470,7 +474,7 @@ def text_pass_11_linetrans(s): postlines += [subline] else: keylines += [subline] - elif u'\u2014' in line and not u' \u2014 ' in line: + elif '\u2014' in line and not ' \u2014 ' in line: if 'equip' in line or 'enchant' in line: prelines += [line] elif 'countertype' in line or 'kicker' in line: @@ -669,6 +673,6 @@ def text_unpass_7_newlines(s): def text_unpass_8_unicode(s): - s = s.replace(dash_marker, u'\u2014') - s = s.replace(bullet_marker, u'\u2022') + s = s.replace(dash_marker, '\u2014') + s = s.replace(bullet_marker, '\u2022') return s diff --git a/lib/utils.py b/lib/utils.py index e15976b..a209a69 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -83,20 +83,20 @@ json_field_info_code = config.json_field_info_code # unicode / ascii conversion unicode_trans = { - u'\u2014' : dash_marker, # unicode long dash - u'\u2022' : bullet_marker, # unicode bullet - u'\u2019' : '"', # single quote - u'\u2018' : '"', # single quote - u'\u2212' : '-', # minus sign - u'\xe6' : 'ae', # ae symbol - u'\xfb' : 'u', # u with caret - u'\xfa' : 'u', # u with accent - u'\xe9' : 'e', # e with accent - u'\xe1' : 'a', # a with accent - u'\xe0' : 'a', # a with accent going the other way - u'\xe2' : 'a', # a with caret - u'\xf6' : 'o', # o with umlaut - u'\xed' : 'i', # i with accent + '\u2014': dash_marker, # unicode long dash + '\u2022': bullet_marker, # unicode bullet + '\u2019': '"', # single quote + '\u2018': '"', # single quote + '\u2212': '-', # minus sign + '\xe6': 'ae', # ae symbol + '\xfb': 'u', # u with caret + '\xfa': 'u', # u with accent + '\xe9': 'e', # e with accent + '\xe1': 'a', # a with accent + '\xe0': 'a', # a with accent going the other way + '\xe2': 'a', # a with caret + '\xf6': 'o', # o with umlaut + '\xed': 'i', # i with accent } # this one is one-way only @@ -121,7 +121,7 @@ def to_unary(s, warn = False): elif i > unary_max: i = unary_max if warn: - print s + print(s) s = s.replace(n, unary_marker + unary_counter * i) else: s = s.replace(n, unary_marker + unary_counter * i) diff --git a/scripts/analysis.py b/scripts/analysis.py index 22b3186..8757880 100755 --- a/scripts/analysis.py +++ b/scripts/analysis.py @@ -35,14 +35,16 @@ def annotate_values(values): def print_statistics(stats, ident = 0): for k in stats: if isinstance(stats[k], OrderedDict): - print(' ' * ident + str(k) + ':') - print_statistics(stats[k], ident=ident+2) + print((' ' * ident + str(k) + ':')) + print_statistics(stats[k], ident=ident + 2) elif isinstance(stats[k], dict): - print(' ' * ident + str(k) + ': ') + print((' ' * ident + str(k) + ': ')) elif isinstance(stats[k], list): - print(' ' * ident + str(k) + ': ') + print((' ' * ident + str(k) + ': ')) else: - print(' ' * ident + str(k) + ': ' + str(stats[k])) + print((' ' * ident + str(k) + ': ' + str(stats[k]))) def get_statistics(fname, lm = None, sep = False, verbose=False): stats = OrderedDict() diff --git a/scripts/autosample.py b/scripts/autosample.py index 6587874..85b878d 100755 --- a/scripts/autosample.py +++ b/scripts/autosample.py @@ -22,7 +22,7 @@ def sample(cp, temp, count, seed = None, ident = 'output'): + ' -seed ' + str(seed) + ' >> ' + outfile) if os.path.exists(outfile): - print(outfile + ' already exists, skipping') + print((outfile + ' already exists, skipping')) return False else: # UNSAFE SHELL=TRUE FOR CONVENIENCE @@ -45,7 +45,7 @@ def find_best_cp(cpdir): def process_dir(cpdir, temp, count, seed = None, ident = 'output', verbose = False): if verbose: - print('processing ' + cpdir) + print(('processing ' + cpdir)) best_cp = find_best_cp(cpdir) if not best_cp is None: sample(best_cp, temp, count, seed=seed, ident=ident) diff --git a/scripts/collect_checkpoints.py b/scripts/collect_checkpoints.py index 95df0f0..350ddd9 100755 --- a/scripts/collect_checkpoints.py +++ b/scripts/collect_checkpoints.py @@ -47,20 +47,21 @@ def process_dir(basedir, targetdir, ident, copy_cp = False, verbose = False): cp_infos = identify_checkpoints(basedir, ident) for (dpath, cpath, (epoch, vloss, temp)) in cp_infos: if verbose: - print('found dumpfile ' + dpath) - dname = basedirname + '_epoch' + epoch + '_' + vloss + '.' + ident + '.' + temp + '.txt' + print(('found dumpfile ' + dpath)) + dname = basedirname + '_epoch' + epoch + '_' + \ + vloss + '.' + ident + '.' + temp + '.txt' cname = basedirname + '_epoch' + epoch + '_' + vloss + '.t7' tdpath = os.path.join(targetdir, dname) tcpath = os.path.join(targetdir, cname) if verbose: - print(' cpx ' + dpath + ' ' + tdpath) + print((' cpx ' + dpath + ' ' + tdpath)) with open(dpath, 'rt') as infile: with open(tdpath, 'wt') as outfile: outfile.write(cleanup_dump(infile.read())) if copy_cp: if os.path.isfile(cpath): if verbose: - print(' cp ' + cpath + ' ' + tcpath) + print((' cp ' + cpath + ' ' + tcpath)) shutil.copy(cpath, tcpath) if copy_cp and len(cp_infos) > 0: @@ -68,7 +69,7 @@ def process_dir(basedir, targetdir, ident, copy_cp = False, verbose = False): tcmdpath = os.path.join(targetdir, basedirname + '.command') if os.path.isfile(cmdpath): if verbose: - print(' cp ' + cmdpath + ' ' + tcmdpath) + print((' cp ' + cmdpath + ' ' + tcmdpath)) shutil.copy(cmdpath, tcmdpath) for path in os.listdir(basedir): diff --git a/scripts/distances.py b/scripts/distances.py index 4c0fd7c..d93f696 100755 --- a/scripts/distances.py +++ b/scripts/distances.py @@ -19,14 +19,14 @@ def main(fname, oname, verbose = True, parallel = True): cbow = CBOW() if verbose: - print 'Computing nearest names...' + print('Computing nearest names...') if parallel: - nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=1) + nearest_names = namediff.nearest_par([c.name for c in cards], n=1) else: nearest_names = [namediff.nearest(c.name, n=1) for c in cards] if verbose: - print 'Computing nearest cards...' + print('Computing nearest cards...') if parallel: nearest_cards = cbow.nearest_par(cards, n=1) else: @@ -45,7 +45,7 @@ def main(fname, oname, verbose = True, parallel = True): # nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards] if verbose: - print '...Done.' + print('...Done.') # write to a file to store the data, this is a terribly long computation # we could also just store this same info in the cards themselves as more fields... diff --git a/scripts/keydiff.py b/scripts/keydiff.py index ab818f0..28b7c0f 100755 --- a/scripts/keydiff.py +++ b/scripts/keydiff.py @@ -2,7 +2,7 @@ def parse_keyfile(f, d, constructor = lambda x: x): for line in f: - kv = map(lambda s: s.strip(), line.split(':')) + kv = [s.strip() for s in line.split(':')] if not len(kv) == 2: continue d[kv[0]] = constructor(kv[1]) @@ -18,8 +18,8 @@ def merge_dicts(d1, d2): def main(fname1, fname2, verbose = True): if verbose: - print 'opening ' + fname1 + ' as base key/value store' - print 'opening ' + fname2 + ' as target key/value store' + print('opening ' + fname1 + ' as base key/value store') + print('opening ' + fname2 + ' as target key/value store') d1 = {} d2 = {} @@ -32,8 +32,8 @@ def main(fname1, fname2, verbose = True): tot2 = sum(d2.values()) if verbose: - print ' ' + fname1 + ': ' + str(len(d1)) + ', total ' + str(tot1) - print ' ' + fname2 + ': ' + str(len(d2)) + ', total ' + str(tot2) + print(' ' + fname1 + ': ' + str(len(d1)) + ', total ' + str(tot1)) + print(' ' + fname2 + ': ' + str(len(d2)) + ', total ' + str(tot2)) d_merged = merge_dicts(d1, d2) @@ -49,20 +49,21 @@ def main(fname1, fname2, verbose = True): else: ratios[k] = float(v2 * tot1) / float(v1 * tot2) - print 'shared: ' + str(len(ratios)) - for k in sorted(ratios, lambda x,y: cmp(d2[x], d2[y]), reverse=True): - print ' ' + k + ': ' + str(d2[k]) + '/' + str(d1[k]) + ' (' + str(ratios[k]) + ')' - print '' - - print '1 only: ' + str(len(only_1)) - for k in sorted(only_1, lambda x,y: cmp(d1[x], d1[y]), reverse=True): - print ' ' + k + ': ' + str(d1[k]) - print '' + print('shared: ' + str(len(ratios))) + for k in sorted(ratios, lambda x, y: cmp(d2[x], d2[y]), reverse=True): + print(' ' + k + ': ' + str(d2[k]) + '/' + + str(d1[k]) + ' (' + str(ratios[k]) + ')') + print('') - print '2 only: ' + str(len(only_2)) - for k in sorted(only_2, lambda x,y: cmp(d2[x], d2[y]), reverse=True): - print ' ' + k + ': ' + str(d2[k]) - print '' + print('1 only: ' + str(len(only_1))) + for k in sorted(only_1, lambda x, y: cmp(d1[x], d1[y]), reverse=True): + print(' ' + k + ': ' + str(d1[k])) + print('') + + print('2 only: ' + str(len(only_2))) + for k in sorted(only_2, lambda x, y: cmp(d2[x], d2[y]), reverse=True): + print(' ' + k + ': ' + str(d2[k])) + print('') if __name__ == '__main__': diff --git a/scripts/mtg_validate.py b/scripts/mtg_validate.py index 1329ebc..d9f8561 100755 --- a/scripts/mtg_validate.py +++ b/scripts/mtg_validate.py @@ -91,13 +91,13 @@ def check_X(card): lcosts = mt.costs[:actcosts] rcosts = mt.costs[actcosts:] if 'X' in sides[0] or (utils.reserved_mana_marker in sides[0] and - 'X' in ''.join(map(lambda c: c.encode(), lcosts))): + 'X' in ''.join([c.encode() for c in lcosts])): if incost: - return False # bad, duplicated Xs in costs + return False # bad, duplicated Xs in costs if 'X' in sides[1] or (utils.reserved_mana_marker in sides[1] and - 'X' in ''.join(map(lambda c: c.encode(), rcosts))): + 'X' in ''.join([c.encode() for c in rcosts])): correct = True # good, defined X is either specified or used if 'monstrosity' in sides[1]: extra_cost_lines += 1 @@ -384,9 +384,9 @@ def process_props(cards, dump = False, uncovered = False): if card.name not in ['demonic pact', 'lavaclaw reaches', "ertai's trickery", 'rumbling aftershocks', # i hate these ] and dump: - print('---- ' + prop + ' ----') - print(card.encode()) - print(card.format()) + print(('---- ' + prop + ' ----')) + print((card.encode())) + print((card.format())) values[prop] = (total, good, bad) if overall: total_good += 1 @@ -396,8 +396,8 @@ def process_props(cards, dump = False, uncovered = False): total_uncovered += 1 if uncovered: print('---- uncovered ----') - print(card.encode()) - print(card.format()) + print((card.encode())) + print((card.format())) return ((total_all, total_good, total_bad, total_uncovered), values) @@ -419,8 +419,8 @@ def main(fname, oname = None, verbose = False, dump = False): else: rg[g] = 1 if g >= 60: - print g - print card.format() + print(g) + print(card.format()) tot = 0 vmax = sum(rg.values()) @@ -428,7 +428,7 @@ def main(fname, oname = None, verbose = False, dump = False): pct95 = None pct99 = None for i in sorted(rg): - print str(i) + ' rare ngrams: ' + str(rg[i]) + print(str(i) + ' rare ngrams: ' + str(rg[i])) tot += rg[i] if pct90 is None and tot >= vmax * 0.90: pct90 = i @@ -437,9 +437,9 @@ def main(fname, oname = None, verbose = False, dump = False): if pct99 is None and tot >= vmax * 0.99: pct99 = i - print '90% - ' + str(pct90) - print '95% - ' + str(pct95) - print '99% - ' + str(pct99) + print('90% - ' + str(pct90)) + print('95% - ' + str(pct95)) + print('99% - ' + str(pct99)) else: ((total_all, total_good, total_bad, total_uncovered), @@ -447,19 +447,21 @@ def main(fname, oname = None, verbose = False, dump = False): # summary print('-- overall --') - print(' total : ' + str(total_all)) - print(' good : ' + str(total_good) + ' ' + pct(total_good, total_all)) - print(' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all)) - print(' uncocoverd: ' + str(total_uncovered) + ' ' + pct(total_uncovered, total_all)) + print((' total : ' + str(total_all))) + print((' good : ' + str(total_good) + + ' ' + pct(total_good, total_all))) + print((' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all))) + print((' uncocoverd: ' + str(total_uncovered) + + ' ' + pct(total_uncovered, total_all))) print('----') # breakdown for prop in props: (total, good, bad) = values[prop] - print(prop + ':') - print(' total: ' + str(total) + ' ' + pct(total, total_all)) - print(' good : ' + str(good) + ' ' + pct(good, total_all)) - print(' bad : ' + str(bad) + ' ' + pct(bad, total_all)) + print((prop + ':')) + print((' total: ' + str(total) + ' ' + pct(total, total_all))) + print((' good : ' + str(good) + ' ' + pct(good, total_all))) + print((' bad : ' + str(bad) + ' ' + pct(bad, total_all))) if __name__ == '__main__': diff --git a/scripts/ngrams.py b/scripts/ngrams.py index 73fe224..c8cd466 100755 --- a/scripts/ngrams.py +++ b/scripts/ngrams.py @@ -33,22 +33,24 @@ def describe_bins(gramdict, bins): for i in range(0, len(counts)): if counts[i] > 0: - print (' ' + (str(bins[i]) if i < len(bins) else str(bins[-1]) + '+') - + ': ' + str(counts[i])) + print((' ' + (str(bins[i]) if i < len(bins) else str(bins[-1]) + '+') + + ': ' + str(counts[i]))) -def extract_language(cards, separate_lines = True): + +def extract_language(cards, separate_lines=True): if separate_lines: lang = [line.vectorize() for card in cards for line in card.text_lines] else: lang = [card.text.vectorize() for card in cards] - return map(lambda s: s.split(), lang) + return [s.split() for s in lang] -def build_ngram_model(cards, n, separate_lines = True, verbose = False): + +def build_ngram_model(cards, n, separate_lines=True, verbose=False): if verbose: - print('generating ' + str(n) + '-gram model') + print(('generating ' + str(n) + '-gram model')) lang = extract_language(cards, separate_lines=separate_lines) if verbose: - print('found ' + str(len(lang)) + ' sentences') + print(('found ' + str(len(lang)) + ' sentences')) lm = model.NgramModel(n, lang, pad_left=True, pad_right=True) if verbose: print(lm) @@ -65,30 +67,30 @@ def main(fname, oname, gmin = 2, gmax = 8, nltk = False, sep = False, verbose = lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose) if verbose: teststr = 'when @ enters the battlefield' - print('litmus test: perplexity of ' + repr(teststr)) - print(' ' + str(lm.perplexity(teststr.split()))) + print(('litmus test: perplexity of ' + repr(teststr))) + print((' ' + str(lm.perplexity(teststr.split())))) if verbose: - print('pickling module to ' + oname) + print(('pickling module to ' + oname)) with open(oname, 'wb') as f: pickle.dump(lm, f) else: bins = [1, 2, 3, 10, 30, 100, 300, 1000] if gmin < 2 or gmax < gmin: - print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax) + print('invalid gram sizes: ' + str(gmin) + '-' + str(gmax)) exit(1) - for grams in range(gmin, gmax+1): + for grams in range(gmin, gmax + 1): if verbose: - print 'generating ' + str(grams) + '-grams...' + print('generating ' + str(grams) + '-grams...') gramdict = {} for card in cards: update_ngrams(card.text_lines_words, gramdict, grams) oname_full = oname + '.' + str(grams) + 'g' if verbose: - print(' writing ' + str(len(gramdict)) + ' unique ' + str(grams) - + '-grams to ' + oname_full) + print((' writing ' + str(len(gramdict)) + ' unique ' + str(grams) + + '-grams to ' + oname_full)) describe_bins(gramdict, bins) with open(oname_full, 'wt') as f: diff --git a/scripts/pairing.py b/scripts/pairing.py index 39d266e..a14f632 100755 --- a/scripts/pairing.py +++ b/scripts/pairing.py @@ -87,8 +87,9 @@ def main(fname, oname, n=20, verbose=False): #selected = selected[:limit] if verbose: - print('computing nearest cards for ' + str(len(selected)) + ' candindates...') - cbow_nearest = cbow.nearest_par(map(lambda (i, c): c, selected)) + print(('computing nearest cards for ' + + str(len(selected)) + ' candindates...')) + cbow_nearest = cbow.nearest_par([i_c[1] for i_c in selected]) for i in range(0, len(selected)): (j, card) = selected[i] selected[i] = (j, card, cbow_nearest[i]) @@ -104,17 +105,17 @@ def main(fname, oname, n=20, verbose=False): break for (i, card, realcard, dist) in final: - print '-- real --' - print realcard.format() - print '-- fake --' - print card.format() - print '-- stats --' + print('-- real --') + print(realcard.format()) + print('-- fake --') + print(card.format()) + print('-- stats --') perp_per = stats['ngram']['perp_per'][i] perp_max = stats['ngram']['perp_max'][i] - print dist - print perp_per - print perp_max - print '----' + print(dist) + print(perp_per) + print(perp_max) + print('----') if not oname is None: with open(oname, 'wt') as ofile: @@ -126,7 +127,7 @@ def main(fname, oname, n=20, verbose=False): ofile.write('version control:\n\ttype: none\napprentice code: ') # Copy whatever output file is produced, name the copy 'set' (yes, no extension). if os.path.isfile('set'): - print 'ERROR: tried to overwrite existing file "set" - aborting.' + print('ERROR: tried to overwrite existing file "set" - aborting.') return shutil.copyfile(oname, 'set') # Use the freaky mse extension instead of zip. @@ -136,7 +137,8 @@ def main(fname, oname, n=20, verbose=False): zf.write('set') finally: if verbose: - print 'Made an MSE set file called ' + oname + '.mse-set.' + print('Made an MSE set file called ' + + oname + '.mse-set.') # The set file is useless outside the .mse-set, delete it. os.remove('set') diff --git a/scripts/sanity.py b/scripts/sanity.py index 7701654..56e0bf0 100755 --- a/scripts/sanity.py +++ b/scripts/sanity.py @@ -41,37 +41,37 @@ def check_lines(fname): for line in prel: if line.strip() == '': - print(card.name, card.text.text) + print((card.name, card.text.text)) if any(line.startswith(s) for s in known): line = 'known' prelines.add(line) for line in postl: if line.strip() == '': - print(card.name, card.text.text) + print((card.name, card.text.text)) if any(line.startswith(s) for s in known): line = 'known' postlines.add(line) for line in keyl: if line.strip() == '': - print(card.name, card.text.text) + print((card.name, card.text.text)) if any(line.startswith(s) for s in known): line = 'known' keylines.add(line) for line in mainl: if line.strip() == '': - print(card.name, card.text.text) + print((card.name, card.text.text)) # if any(line.startswith(s) for s in known): # line = 'known' mainlines.add(line) for line in costl: if line.strip() == '': - print(card.name, card.text.text) + print((card.name, card.text.text)) # if any(line.startswith(s) for s in known) or 'cycling' in line or 'monstrosity' in line: # line = 'known' costlines.add(line) - print('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}' - .format(len(prelines), len(keylines), len(mainlines), len(postlines))) + print(('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}' + .format(len(prelines), len(keylines), len(mainlines), len(postlines)))) print('\nprelines') for line in sorted(prelines): @@ -109,7 +109,7 @@ def check_vocab(fname): vocab[word] += 1 for word in sorted(vocab, lambda x,y: cmp(vocab[x], vocab[y]), reverse = True): - print('{:8d} : {:s}'.format(vocab[word], word)) + print(('{:8d} : {:s}'.format(vocab[word], word))) n = 3 @@ -120,8 +120,8 @@ def check_vocab(fname): for word in words: if vocab[word] <= n: #if 'name' in word: - print('\n{:8d} : {:s}'.format(vocab[word], word)) - print(card.encode()) + print(('\n{:8d} : {:s}'.format(vocab[word], word))) + print((card.encode())) break def check_characters(fname, vname): @@ -135,14 +135,15 @@ def check_characters(fname, vname): token_to_idx = {tok:i+1 for i, tok in enumerate(sorted(tokens))} idx_to_token = {i+1:tok for i, tok in enumerate(sorted(tokens))} - print('Vocabulary: ({:d} symbols)'.format(len(token_to_idx))) + print(('Vocabulary: ({:d} symbols)'.format(len(token_to_idx)))) for token in sorted(token_to_idx): - print('{:8s} : {:4d}'.format(repr(token), token_to_idx[token])) + print(('{:8s} : {:4d}'.format(repr(token), token_to_idx[token]))) # compliant with torch-rnn if vname: - json_data = {'token_to_idx':token_to_idx, 'idx_to_token':idx_to_token} - print('writing vocabulary to {:s}'.format(vname)) + json_data = {'token_to_idx': token_to_idx, + 'idx_to_token': idx_to_token} + print(('writing vocabulary to {:s}'.format(vname))) with open(vname, 'w') as f: json.dump(json_data, f) diff --git a/scripts/streamcards.py b/scripts/streamcards.py index 3842d7c..4254883 100755 --- a/scripts/streamcards.py +++ b/scripts/streamcards.py @@ -41,7 +41,8 @@ def force_kill_self_noreturn(): def handler_kill_self(signum, frame): if signum != signal.SIGQUIT: traceback.print_stack(frame) - print('caught signal {:d} - streamer sending SIGTERM to self'.format(signum)) + print( + ('caught signal {:d} - streamer sending SIGTERM to self'.format(signum))) force_kill_self_noreturn() def install_suicide_handlers(): diff --git a/scripts/sum.py b/scripts/sum.py index 947b39d..0ee7a86 100755 --- a/scripts/sum.py +++ b/scripts/sum.py @@ -36,14 +36,14 @@ def main(fname): name_avg = name_avg / float(nonempty) card_avg = card_avg / float(nonempty) - print str(nonempty) + ' cards' - print '-- names --' - print 'avg distance: ' + str(name_avg) - print 'num duplicates: ' + str(name_dupes) - print '-- cards --' - print 'avg distance: ' + str(card_avg) - print 'num duplicates: ' + str(card_dupes) - print '----' + print(str(nonempty) + ' cards') + print('-- names --') + print('avg distance: ' + str(name_avg)) + print('num duplicates: ' + str(name_dupes)) + print('-- cards --') + print('avg distance: ' + str(card_avg)) + print('num duplicates: ' + str(card_dupes)) + print('----') if __name__ == '__main__': diff --git a/scripts/summarize.py b/scripts/summarize.py index 6828cbf..981409e 100755 --- a/scripts/summarize.py +++ b/scripts/summarize.py @@ -11,7 +11,7 @@ from datalib import Datamine def main(fname, verbose = True, outliers = False, dump_all = False): if fname[-5:] == '.json': if verbose: - print 'This looks like a json file: ' + fname + print('This looks like a json file: ' + fname) json_srcs = jdecode.mtg_open_json(fname, verbose) card_srcs = [] for json_cardname in sorted(json_srcs): @@ -19,7 +19,7 @@ def main(fname, verbose = True, outliers = False, dump_all = False): card_srcs += [json_srcs[json_cardname][0]] else: if verbose: - print 'Opening encoded card file: ' + fname + print('Opening encoded card file: ' + fname) with open(fname, 'rt') as f: text = f.read() card_srcs = text.split(utils.cardsep) diff --git a/sortcards.py b/sortcards.py index 0c688b1..44c58e8 100644 --- a/sortcards.py +++ b/sortcards.py @@ -141,7 +141,7 @@ def sortcards(cards): def main(fname, oname = None, verbose = True): if verbose: - print 'Opening encoded card file: ' + fname + print('Opening encoded card file: ' + fname) f = open(fname, 'r') text = f.read() @@ -153,14 +153,14 @@ def main(fname, oname = None, verbose = True): if not oname == None: if verbose: - print 'Writing output to: ' + oname + print('Writing output to: ' + oname) ofile = codecs.open(oname, 'w', 'utf-8') for cardclass in classes: if classes[cardclass] == None: - print cardclass + print(cardclass) else: - print ' ' + cardclass + ': ' + str(len(classes[cardclass])) + print(' ' + cardclass + ': ' + str(len(classes[cardclass]))) if oname == None: outputter = sys.stdout @@ -189,6 +189,7 @@ if __name__ == '__main__': elif len(sys.argv) == 3: main(sys.argv[1], oname = sys.argv[2]) else: - print 'Usage: ' + sys.argv[0] + ' ' + ' [output filename]' + print('Usage: ' + sys.argv[0] + ' ' + + ' [output filename]') exit(1)