Changed things to use the python module system better and have argparse.

Encode now supports multiple formats, still working on decoding them nicely.
This commit is contained in:
Bill Zorn 2015-07-15 23:40:15 -07:00
parent 40fc695826
commit 2a0e014c41
7 changed files with 190 additions and 133 deletions

182
encode.py Normal file → Executable file
View file

@ -1,12 +1,14 @@
#!/usr/bin/env python
import sys
import os
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib')
sys.path.append(libdir)
import re
import random
import sys
import lib.utils as utils
from lib.cardlib import Card
import lib.jdecode as jdecode
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
import utils
import jdecode
import cardlib
def exclude_sets(cardset):
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
@ -17,42 +19,100 @@ def exclude_types(cardtype):
def exclude_layouts(layout):
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
def compile_duplicated(jcards):
# Boring solution: only write out the first one...
card = Card(jcards[0])
if (exclude_sets(jcards[0][utils.json_field_set_name])
or exclude_layouts(jcards[0]['layout'])):
return None
for cardtype in card.types:
if exclude_types(cardtype):
return None
return card
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
fmt_ordered = cardlib.fmt_ordered_default
fmt_labeled = None
fieldsep = utils.fieldsep
randomize_fields = False
randomize_mana = False
initial_sep = True
final_sep = True
# set the properties of the encoding
if encoding in ['std']:
if dupes == 0:
dupes = 1
elif encoding in ['rmana']:
if dupes == 0:
dupes = 3
randomize_mana = True
elif encoding in ['rmana_dual']:
if dupes == 0:
dupes = 3
fmt_ordered = fmt_ordered + [cardlib.field_cost]
randomize_mana = True
elif encoding in ['rfields']:
if dupes == 0:
dupes = 10
fmt_labeled = cardlib.fmt_labeled_default
randomize_fields = True
randomize_mana = True
final_sep = False
else:
raise ValueError('encode.py: unknown encoding: ' + encoding)
if dupes <= 0:
dupes = 1
def main(fname, oname = None, verbose = True):
if verbose:
print 'Opening json file: ' + fname
print 'Preparing to encode:'
print ' Using encoding ' + repr(encoding)
if dupes > 1:
print ' Duplicating each card ' + str(dupes) + ' times.'
if stable:
print ' NOT randomizing order of cards.'
jcards = jdecode.mtg_open_json(fname, verbose)
cards = []
valid = 0
skipped = 0
invalid = 0
unparsed = 0
# force a stable ordering, we will randomize later
for jcard_name in sorted(jcards):
card = compile_duplicated(jcards[jcard_name])
if card:
if card.valid:
valid += 1
cards += [card]
elif card.parsed:
invalid += 1
else:
unparsed += 1
else:
skipped += 1
if fname[-5:] == '.json':
if verbose:
print 'This looks like a json file: ' + fname
json_srcs = jdecode.mtg_open_json(fname, verbose)
# don't worry we randomize later
for json_cardname in sorted(json_srcs):
if len(json_srcs[json_cardname]) > 0:
jcards = json_srcs[json_cardname]
card = cardlib.Card(json_srcs[json_cardname][0])
skip = False
if (exclude_sets(jcards[0][utils.json_field_set_name])
or exclude_layouts(jcards[0]['layout'])):
skip = True
for cardtype in card.types:
if exclude_types(cardtype):
skip = True
if skip:
skipped += 1
continue
if card.valid:
valid += 1
cards += [card] * dupes
elif card.parsed:
invalid += 1
else:
unparsed += 1
# fall back to opening a normal encoded file
else:
if verbose:
print 'Opening encoded card file: ' + fname
with open(fname, 'rt') as f:
text = f.read()
for card_src in text.split(utils.cardsep):
if card_src:
card = cardlib.Card(card_src)
if card.valid:
valid += 1
cards += [card] * dupes
elif card.parsed:
invalid += 1
else:
unparsed += 1
if verbose:
print (str(valid) + ' valid, ' + str(skipped) + ' skipped, '
@ -60,27 +120,55 @@ def main(fname, oname = None, verbose = True):
# This should give a random but consistent ordering, to make comparing changes
# between the output of different versions easier.
random.seed(1371367)
random.shuffle(cards)
if not stable:
random.seed(1371367)
random.shuffle(cards)
if oname:
if verbose:
print 'Writing output to: ' + oname
with open(oname, 'w') as ofile:
for card in cards:
ofile.write(card.encode() + utils.cardsep)
ofile.write(card.encode(fmt_ordered = fmt_ordered,
fmt_labeled = fmt_labeled,
fieldsep = fieldsep,
randomize_fields = randomize_fields,
randomize_mana = randomize_mana,
initial_sep = initial_sep,
final_sep = final_sep)
+ utils.cardsep)
else:
for card in cards:
sys.stdout.write(card.encode() + utils.cardsep)
sts.stdout.flush()
sys.stdout.write(card.encode(fmt_ordered = fmt_ordered,
fmt_labeled = fmt_labeled,
fieldsep = fieldsep,
randomize_fields = randomize_fields,
randomize_mana = randomize_mana,
initial_sep = initial_sep,
final_sep = final_sep)
+ utils.cardsep)
sys.stdout.flush()
if __name__ == '__main__':
import sys
if len(sys.argv) == 2:
main(sys.argv[1])
elif len(sys.argv) == 3:
main(sys.argv[1], oname = sys.argv[2])
else:
print 'Usage: ' + sys.argv[0] + ' ' + '<JSON file> [output filename]'
exit(1)
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('infile',
help='encoded card file or json corpus to encode')
parser.add_argument('outfile', nargs='?', default=None,
help='output file, defaults to stdout')
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
help='number of times to duplicate each card')
parser.add_argument('-e', '--encoding', default='std',
choices=['std', 'rmana', 'rmana_dual', 'rfields'])
parser.add_argument('-s', '--stable', action='store_true',
help="don't randomize the order of the cards")
parser.add_argument('-v', '--verbose', action='store_true',
help='verbose output')
args = parser.parse_args()
main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
encoding = args.encoding, stable = args.stable)
exit(0)

View file

@ -1,5 +1,6 @@
# card representation
import re
import random
import utils
import transforms
@ -58,6 +59,7 @@ fmt_labeled_default = {
field_cost : field_label_cost,
field_supertypes : field_label_supertypes,
field_types : field_label_types,
field_subtypes : field_label_subtypes,
field_loyalty : field_label_loyalty,
field_pt : field_label_pt,
field_text : field_label_text,
@ -434,13 +436,12 @@ class Card:
outfield_str = outfield.encode(randomize = randomize_mana)
else:
outfield_str = outfield
if fmt_labeled and field in fmt_labeled:
outfield_str = fmt_labeled[field] + outfield_str
else:
outfield_str = ''
if fmt_labeled and field in fmt_labeled:
outfield_str = fmt_labeled[field] + outfield_str
outfields += [outfield_str]
else:

View file

@ -1,5 +1,4 @@
import re
import sys
import utils
from cardlib import Card

View file

@ -1,7 +1,7 @@
# representation for mana costs and text with embedded mana costs
# data aggregating classes
import random
import re
import random
import utils

View file

@ -1,46 +0,0 @@
import utils
import datamine
import random
def main(fname, oname = None, verbose = True):
if verbose:
print 'Opening encoded card file: ' + fname
with open(fname, 'rt') as f:
text = f.read()
cardtexts = text.split(utils.cardsep)
# overkill
datamine.analyze(cardtexts)
multicards = []
reps = 10
for card in datamine.cards:
for i in range(reps):
multicards += [card.reencode(randomize = True)]
# multicards += [card.reencode(randomize = True)
# + card.cost.reencode(randomize = True) + utils.fieldsep]
random.shuffle(multicards)
if oname:
if verbose:
print 'Writing output to: ' + oname
with open(oname, 'w') as ofile:
for textcard in multicards:
ofile.write(textcard + utils.cardsep)
else:
for textcard in multicards:
print textcard + '\n'
if __name__ == '__main__':
import sys
if len(sys.argv) == 2:
main(sys.argv[1])
elif len(sys.argv) == 3:
main(sys.argv[1], oname = sys.argv[2])
else:
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
exit(1)

48
scripts/summarize.py Executable file
View file

@ -0,0 +1,48 @@
#!/usr/bin/env python
import sys
import os
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
sys.path.append(libdir)
import utils
import jdecode
from datalib import Datamine
def main(fname, verbose = True, outliers = False, dump_all = False):
if fname[-5:] == '.json':
if verbose:
print 'This looks like a json file: ' + fname
json_srcs = jdecode.mtg_open_json(fname, verbose)
card_srcs = []
for json_cardname in sorted(json_srcs):
if len(json_srcs[json_cardname]) > 0:
card_srcs += [json_srcs[json_cardname][0]]
else:
if verbose:
print 'Opening encoded card file: ' + fname
with open(fname, 'rt') as f:
text = f.read()
card_srcs = text.split(utils.cardsep)
mine = Datamine(card_srcs)
mine.summarize()
if outliers or dump_all:
mine.outliers(dump_invalid = dump_all)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('infile',
help='encoded card file or json corpus to process')
parser.add_argument('-x', '--outliers', action='store_true',
help='show additional diagnostics and edge cases')
parser.add_argument('-a', '--all', action='store_true',
help='show all information and dump invalid cards')
parser.add_argument('-v', '--verbose', action='store_true',
help='verbose output')
args = parser.parse_args()
main(args.infile, verbose = args.verbose, outliers = args.outliers, dump_all = args.all)
exit(0)

View file

@ -1,33 +0,0 @@
import sys
import lib.utils as utils
import lib.jdecode as jdecode
from lib.datalib import Datamine
def main(fname, verbose = True):
if fname[-5:] == '.json':
if verbose:
print 'This looks like a json file: ' + fname
json_srcs = jdecode.mtg_open_json(fname, verbose)
card_srcs = []
for json_cardname in json_srcs:
if len(json_srcs[json_cardname]) > 0:
card_srcs += [json_srcs[json_cardname][0]]
else:
if verbose:
print 'Opening encoded card file: ' + fname
with open(fname, 'rt') as f:
text = f.read()
card_srcs = text.split(utils.cardsep)
mine = Datamine(card_srcs)
mine.summarize()
mine.outliers(dump_invalid = False)
if __name__ == '__main__':
import sys
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file>'
exit(1)