diff --git a/encode.py b/encode.py old mode 100644 new mode 100755 index 5ecbbae..32cc0f9 --- a/encode.py +++ b/encode.py @@ -1,12 +1,14 @@ +#!/usr/bin/env python +import sys +import os + +libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib') +sys.path.append(libdir) import re import random -import sys - -import lib.utils as utils -from lib.cardlib import Card -import lib.jdecode as jdecode - -valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]' +import utils +import jdecode +import cardlib def exclude_sets(cardset): return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration' @@ -17,42 +19,100 @@ def exclude_types(cardtype): def exclude_layouts(layout): return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard'] -def compile_duplicated(jcards): - # Boring solution: only write out the first one... - card = Card(jcards[0]) - if (exclude_sets(jcards[0][utils.json_field_set_name]) - or exclude_layouts(jcards[0]['layout'])): - return None - for cardtype in card.types: - if exclude_types(cardtype): - return None - return card +def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False): + fmt_ordered = cardlib.fmt_ordered_default + fmt_labeled = None + fieldsep = utils.fieldsep + randomize_fields = False + randomize_mana = False + initial_sep = True + final_sep = True + + # set the properties of the encoding + if encoding in ['std']: + if dupes == 0: + dupes = 1 + elif encoding in ['rmana']: + if dupes == 0: + dupes = 3 + randomize_mana = True + elif encoding in ['rmana_dual']: + if dupes == 0: + dupes = 3 + fmt_ordered = fmt_ordered + [cardlib.field_cost] + randomize_mana = True + elif encoding in ['rfields']: + if dupes == 0: + dupes = 10 + fmt_labeled = cardlib.fmt_labeled_default + randomize_fields = True + randomize_mana = True + final_sep = False + else: + raise ValueError('encode.py: unknown encoding: ' + encoding) + + if dupes <= 0: + dupes = 1 -def main(fname, oname = None, verbose = True): if verbose: - print 'Opening json file: ' + fname + print 'Preparing to encode:' + print ' Using encoding ' + repr(encoding) + if dupes > 1: + print ' Duplicating each card ' + str(dupes) + ' times.' + if stable: + print ' NOT randomizing order of cards.' + - jcards = jdecode.mtg_open_json(fname, verbose) cards = [] - valid = 0 skipped = 0 invalid = 0 unparsed = 0 - # force a stable ordering, we will randomize later - for jcard_name in sorted(jcards): - card = compile_duplicated(jcards[jcard_name]) - if card: - if card.valid: - valid += 1 - cards += [card] - elif card.parsed: - invalid += 1 - else: - unparsed += 1 - else: - skipped += 1 + if fname[-5:] == '.json': + if verbose: + print 'This looks like a json file: ' + fname + json_srcs = jdecode.mtg_open_json(fname, verbose) + # don't worry we randomize later + for json_cardname in sorted(json_srcs): + if len(json_srcs[json_cardname]) > 0: + jcards = json_srcs[json_cardname] + card = cardlib.Card(json_srcs[json_cardname][0]) + + skip = False + if (exclude_sets(jcards[0][utils.json_field_set_name]) + or exclude_layouts(jcards[0]['layout'])): + skip = True + for cardtype in card.types: + if exclude_types(cardtype): + skip = True + if skip: + skipped += 1 + continue + + if card.valid: + valid += 1 + cards += [card] * dupes + elif card.parsed: + invalid += 1 + else: + unparsed += 1 + # fall back to opening a normal encoded file + else: + if verbose: + print 'Opening encoded card file: ' + fname + with open(fname, 'rt') as f: + text = f.read() + for card_src in text.split(utils.cardsep): + if card_src: + card = cardlib.Card(card_src) + if card.valid: + valid += 1 + cards += [card] * dupes + elif card.parsed: + invalid += 1 + else: + unparsed += 1 if verbose: print (str(valid) + ' valid, ' + str(skipped) + ' skipped, ' @@ -60,27 +120,55 @@ def main(fname, oname = None, verbose = True): # This should give a random but consistent ordering, to make comparing changes # between the output of different versions easier. - random.seed(1371367) - random.shuffle(cards) + if not stable: + random.seed(1371367) + random.shuffle(cards) if oname: if verbose: print 'Writing output to: ' + oname with open(oname, 'w') as ofile: for card in cards: - ofile.write(card.encode() + utils.cardsep) + ofile.write(card.encode(fmt_ordered = fmt_ordered, + fmt_labeled = fmt_labeled, + fieldsep = fieldsep, + randomize_fields = randomize_fields, + randomize_mana = randomize_mana, + initial_sep = initial_sep, + final_sep = final_sep) + + utils.cardsep) else: for card in cards: - sys.stdout.write(card.encode() + utils.cardsep) - sts.stdout.flush() + sys.stdout.write(card.encode(fmt_ordered = fmt_ordered, + fmt_labeled = fmt_labeled, + fieldsep = fieldsep, + randomize_fields = randomize_fields, + randomize_mana = randomize_mana, + initial_sep = initial_sep, + final_sep = final_sep) + + utils.cardsep) + sys.stdout.flush() + - if __name__ == '__main__': - import sys - if len(sys.argv) == 2: - main(sys.argv[1]) - elif len(sys.argv) == 3: - main(sys.argv[1], oname = sys.argv[2]) - else: - print 'Usage: ' + sys.argv[0] + ' ' + ' [output filename]' - exit(1) + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument('infile', + help='encoded card file or json corpus to encode') + parser.add_argument('outfile', nargs='?', default=None, + help='output file, defaults to stdout') + parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0, + help='number of times to duplicate each card') + parser.add_argument('-e', '--encoding', default='std', + choices=['std', 'rmana', 'rmana_dual', 'rfields']) + parser.add_argument('-s', '--stable', action='store_true', + help="don't randomize the order of the cards") + parser.add_argument('-v', '--verbose', action='store_true', + help='verbose output') + + args = parser.parse_args() + main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate, + encoding = args.encoding, stable = args.stable) + exit(0) + diff --git a/lib/cardlib.py b/lib/cardlib.py index d6f9b69..13accce 100644 --- a/lib/cardlib.py +++ b/lib/cardlib.py @@ -1,5 +1,6 @@ # card representation import re +import random import utils import transforms @@ -58,6 +59,7 @@ fmt_labeled_default = { field_cost : field_label_cost, field_supertypes : field_label_supertypes, field_types : field_label_types, + field_subtypes : field_label_subtypes, field_loyalty : field_label_loyalty, field_pt : field_label_pt, field_text : field_label_text, @@ -434,13 +436,12 @@ class Card: outfield_str = outfield.encode(randomize = randomize_mana) else: outfield_str = outfield - - if fmt_labeled and field in fmt_labeled: - outfield_str = fmt_labeled[field] + outfield_str - else: outfield_str = '' + if fmt_labeled and field in fmt_labeled: + outfield_str = fmt_labeled[field] + outfield_str + outfields += [outfield_str] else: diff --git a/lib/datalib.py b/lib/datalib.py index a8e3edf..c7d2d64 100644 --- a/lib/datalib.py +++ b/lib/datalib.py @@ -1,5 +1,4 @@ import re -import sys import utils from cardlib import Card diff --git a/lib/manalib.py b/lib/manalib.py index f3e3284..aab9df3 100644 --- a/lib/manalib.py +++ b/lib/manalib.py @@ -1,7 +1,7 @@ # representation for mana costs and text with embedded mana costs # data aggregating classes -import random import re +import random import utils diff --git a/scripts/randomize_mana.py b/scripts/randomize_mana.py deleted file mode 100644 index 5a7c6e0..0000000 --- a/scripts/randomize_mana.py +++ /dev/null @@ -1,46 +0,0 @@ -import utils -import datamine -import random - -def main(fname, oname = None, verbose = True): - if verbose: - print 'Opening encoded card file: ' + fname - - with open(fname, 'rt') as f: - text = f.read() - - cardtexts = text.split(utils.cardsep) - - # overkill - datamine.analyze(cardtexts) - - multicards = [] - reps = 10 - - for card in datamine.cards: - for i in range(reps): - multicards += [card.reencode(randomize = True)] - # multicards += [card.reencode(randomize = True) - # + card.cost.reencode(randomize = True) + utils.fieldsep] - - random.shuffle(multicards) - - if oname: - if verbose: - print 'Writing output to: ' + oname - with open(oname, 'w') as ofile: - for textcard in multicards: - ofile.write(textcard + utils.cardsep) - else: - for textcard in multicards: - print textcard + '\n' - -if __name__ == '__main__': - import sys - if len(sys.argv) == 2: - main(sys.argv[1]) - elif len(sys.argv) == 3: - main(sys.argv[1], oname = sys.argv[2]) - else: - print 'Usage: ' + sys.argv[0] + ' ' + ' [output filename]' - exit(1) diff --git a/scripts/summarize.py b/scripts/summarize.py new file mode 100755 index 0000000..6828cbf --- /dev/null +++ b/scripts/summarize.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +import sys +import os + +libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib') +sys.path.append(libdir) +import utils +import jdecode +from datalib import Datamine + +def main(fname, verbose = True, outliers = False, dump_all = False): + if fname[-5:] == '.json': + if verbose: + print 'This looks like a json file: ' + fname + json_srcs = jdecode.mtg_open_json(fname, verbose) + card_srcs = [] + for json_cardname in sorted(json_srcs): + if len(json_srcs[json_cardname]) > 0: + card_srcs += [json_srcs[json_cardname][0]] + else: + if verbose: + print 'Opening encoded card file: ' + fname + with open(fname, 'rt') as f: + text = f.read() + card_srcs = text.split(utils.cardsep) + + mine = Datamine(card_srcs) + mine.summarize() + if outliers or dump_all: + mine.outliers(dump_invalid = dump_all) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument('infile', + help='encoded card file or json corpus to process') + parser.add_argument('-x', '--outliers', action='store_true', + help='show additional diagnostics and edge cases') + parser.add_argument('-a', '--all', action='store_true', + help='show all information and dump invalid cards') + parser.add_argument('-v', '--verbose', action='store_true', + help='verbose output') + + args = parser.parse_args() + main(args.infile, verbose = args.verbose, outliers = args.outliers, dump_all = args.all) + exit(0) diff --git a/summarize.py b/summarize.py deleted file mode 100644 index 8a81d99..0000000 --- a/summarize.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys - -import lib.utils as utils -import lib.jdecode as jdecode -from lib.datalib import Datamine - -def main(fname, verbose = True): - if fname[-5:] == '.json': - if verbose: - print 'This looks like a json file: ' + fname - json_srcs = jdecode.mtg_open_json(fname, verbose) - card_srcs = [] - for json_cardname in json_srcs: - if len(json_srcs[json_cardname]) > 0: - card_srcs += [json_srcs[json_cardname][0]] - else: - if verbose: - print 'Opening encoded card file: ' + fname - with open(fname, 'rt') as f: - text = f.read() - card_srcs = text.split(utils.cardsep) - - mine = Datamine(card_srcs) - mine.summarize() - mine.outliers(dump_invalid = False) - -if __name__ == '__main__': - import sys - if len(sys.argv) == 2: - main(sys.argv[1]) - else: - print 'Usage: ' + sys.argv[0] + ' ' + '' - exit(1)