Changed things to use the python module system better and have argparse.
Encode now supports multiple formats, still working on decoding them nicely.
This commit is contained in:
parent
40fc695826
commit
2a0e014c41
7 changed files with 190 additions and 133 deletions
184
encode.py
Normal file → Executable file
184
encode.py
Normal file → Executable file
|
@ -1,12 +1,14 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
import sys
|
import utils
|
||||||
|
import jdecode
|
||||||
import lib.utils as utils
|
import cardlib
|
||||||
from lib.cardlib import Card
|
|
||||||
import lib.jdecode as jdecode
|
|
||||||
|
|
||||||
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
|
||||||
|
|
||||||
def exclude_sets(cardset):
|
def exclude_sets(cardset):
|
||||||
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
||||||
|
@ -17,42 +19,100 @@ def exclude_types(cardtype):
|
||||||
def exclude_layouts(layout):
|
def exclude_layouts(layout):
|
||||||
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
|
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
|
||||||
|
|
||||||
def compile_duplicated(jcards):
|
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
|
||||||
# Boring solution: only write out the first one...
|
fmt_ordered = cardlib.fmt_ordered_default
|
||||||
card = Card(jcards[0])
|
fmt_labeled = None
|
||||||
if (exclude_sets(jcards[0][utils.json_field_set_name])
|
fieldsep = utils.fieldsep
|
||||||
or exclude_layouts(jcards[0]['layout'])):
|
randomize_fields = False
|
||||||
return None
|
randomize_mana = False
|
||||||
for cardtype in card.types:
|
initial_sep = True
|
||||||
if exclude_types(cardtype):
|
final_sep = True
|
||||||
return None
|
|
||||||
return card
|
# set the properties of the encoding
|
||||||
|
if encoding in ['std']:
|
||||||
|
if dupes == 0:
|
||||||
|
dupes = 1
|
||||||
|
elif encoding in ['rmana']:
|
||||||
|
if dupes == 0:
|
||||||
|
dupes = 3
|
||||||
|
randomize_mana = True
|
||||||
|
elif encoding in ['rmana_dual']:
|
||||||
|
if dupes == 0:
|
||||||
|
dupes = 3
|
||||||
|
fmt_ordered = fmt_ordered + [cardlib.field_cost]
|
||||||
|
randomize_mana = True
|
||||||
|
elif encoding in ['rfields']:
|
||||||
|
if dupes == 0:
|
||||||
|
dupes = 10
|
||||||
|
fmt_labeled = cardlib.fmt_labeled_default
|
||||||
|
randomize_fields = True
|
||||||
|
randomize_mana = True
|
||||||
|
final_sep = False
|
||||||
|
else:
|
||||||
|
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
||||||
|
|
||||||
|
if dupes <= 0:
|
||||||
|
dupes = 1
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True):
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Opening json file: ' + fname
|
print 'Preparing to encode:'
|
||||||
|
print ' Using encoding ' + repr(encoding)
|
||||||
|
if dupes > 1:
|
||||||
|
print ' Duplicating each card ' + str(dupes) + ' times.'
|
||||||
|
if stable:
|
||||||
|
print ' NOT randomizing order of cards.'
|
||||||
|
|
||||||
|
|
||||||
jcards = jdecode.mtg_open_json(fname, verbose)
|
|
||||||
cards = []
|
cards = []
|
||||||
|
|
||||||
valid = 0
|
valid = 0
|
||||||
skipped = 0
|
skipped = 0
|
||||||
invalid = 0
|
invalid = 0
|
||||||
unparsed = 0
|
unparsed = 0
|
||||||
|
|
||||||
# force a stable ordering, we will randomize later
|
if fname[-5:] == '.json':
|
||||||
for jcard_name in sorted(jcards):
|
if verbose:
|
||||||
card = compile_duplicated(jcards[jcard_name])
|
print 'This looks like a json file: ' + fname
|
||||||
if card:
|
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
||||||
if card.valid:
|
# don't worry we randomize later
|
||||||
valid += 1
|
for json_cardname in sorted(json_srcs):
|
||||||
cards += [card]
|
if len(json_srcs[json_cardname]) > 0:
|
||||||
elif card.parsed:
|
jcards = json_srcs[json_cardname]
|
||||||
invalid += 1
|
card = cardlib.Card(json_srcs[json_cardname][0])
|
||||||
else:
|
|
||||||
unparsed += 1
|
skip = False
|
||||||
else:
|
if (exclude_sets(jcards[0][utils.json_field_set_name])
|
||||||
skipped += 1
|
or exclude_layouts(jcards[0]['layout'])):
|
||||||
|
skip = True
|
||||||
|
for cardtype in card.types:
|
||||||
|
if exclude_types(cardtype):
|
||||||
|
skip = True
|
||||||
|
if skip:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if card.valid:
|
||||||
|
valid += 1
|
||||||
|
cards += [card] * dupes
|
||||||
|
elif card.parsed:
|
||||||
|
invalid += 1
|
||||||
|
else:
|
||||||
|
unparsed += 1
|
||||||
|
# fall back to opening a normal encoded file
|
||||||
|
else:
|
||||||
|
if verbose:
|
||||||
|
print 'Opening encoded card file: ' + fname
|
||||||
|
with open(fname, 'rt') as f:
|
||||||
|
text = f.read()
|
||||||
|
for card_src in text.split(utils.cardsep):
|
||||||
|
if card_src:
|
||||||
|
card = cardlib.Card(card_src)
|
||||||
|
if card.valid:
|
||||||
|
valid += 1
|
||||||
|
cards += [card] * dupes
|
||||||
|
elif card.parsed:
|
||||||
|
invalid += 1
|
||||||
|
else:
|
||||||
|
unparsed += 1
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print (str(valid) + ' valid, ' + str(skipped) + ' skipped, '
|
print (str(valid) + ' valid, ' + str(skipped) + ' skipped, '
|
||||||
|
@ -60,27 +120,55 @@ def main(fname, oname = None, verbose = True):
|
||||||
|
|
||||||
# This should give a random but consistent ordering, to make comparing changes
|
# This should give a random but consistent ordering, to make comparing changes
|
||||||
# between the output of different versions easier.
|
# between the output of different versions easier.
|
||||||
random.seed(1371367)
|
if not stable:
|
||||||
random.shuffle(cards)
|
random.seed(1371367)
|
||||||
|
random.shuffle(cards)
|
||||||
|
|
||||||
if oname:
|
if oname:
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Writing output to: ' + oname
|
print 'Writing output to: ' + oname
|
||||||
with open(oname, 'w') as ofile:
|
with open(oname, 'w') as ofile:
|
||||||
for card in cards:
|
for card in cards:
|
||||||
ofile.write(card.encode() + utils.cardsep)
|
ofile.write(card.encode(fmt_ordered = fmt_ordered,
|
||||||
|
fmt_labeled = fmt_labeled,
|
||||||
|
fieldsep = fieldsep,
|
||||||
|
randomize_fields = randomize_fields,
|
||||||
|
randomize_mana = randomize_mana,
|
||||||
|
initial_sep = initial_sep,
|
||||||
|
final_sep = final_sep)
|
||||||
|
+ utils.cardsep)
|
||||||
else:
|
else:
|
||||||
for card in cards:
|
for card in cards:
|
||||||
sys.stdout.write(card.encode() + utils.cardsep)
|
sys.stdout.write(card.encode(fmt_ordered = fmt_ordered,
|
||||||
sts.stdout.flush()
|
fmt_labeled = fmt_labeled,
|
||||||
|
fieldsep = fieldsep,
|
||||||
|
randomize_fields = randomize_fields,
|
||||||
|
randomize_mana = randomize_mana,
|
||||||
|
initial_sep = initial_sep,
|
||||||
|
final_sep = final_sep)
|
||||||
|
+ utils.cardsep)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import argparse
|
||||||
if len(sys.argv) == 2:
|
parser = argparse.ArgumentParser()
|
||||||
main(sys.argv[1])
|
|
||||||
elif len(sys.argv) == 3:
|
parser.add_argument('infile',
|
||||||
main(sys.argv[1], oname = sys.argv[2])
|
help='encoded card file or json corpus to encode')
|
||||||
else:
|
parser.add_argument('outfile', nargs='?', default=None,
|
||||||
print 'Usage: ' + sys.argv[0] + ' ' + '<JSON file> [output filename]'
|
help='output file, defaults to stdout')
|
||||||
exit(1)
|
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
|
||||||
|
help='number of times to duplicate each card')
|
||||||
|
parser.add_argument('-e', '--encoding', default='std',
|
||||||
|
choices=['std', 'rmana', 'rmana_dual', 'rfields'])
|
||||||
|
parser.add_argument('-s', '--stable', action='store_true',
|
||||||
|
help="don't randomize the order of the cards")
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
|
||||||
|
encoding = args.encoding, stable = args.stable)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# card representation
|
# card representation
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
import transforms
|
import transforms
|
||||||
|
@ -58,6 +59,7 @@ fmt_labeled_default = {
|
||||||
field_cost : field_label_cost,
|
field_cost : field_label_cost,
|
||||||
field_supertypes : field_label_supertypes,
|
field_supertypes : field_label_supertypes,
|
||||||
field_types : field_label_types,
|
field_types : field_label_types,
|
||||||
|
field_subtypes : field_label_subtypes,
|
||||||
field_loyalty : field_label_loyalty,
|
field_loyalty : field_label_loyalty,
|
||||||
field_pt : field_label_pt,
|
field_pt : field_label_pt,
|
||||||
field_text : field_label_text,
|
field_text : field_label_text,
|
||||||
|
@ -434,13 +436,12 @@ class Card:
|
||||||
outfield_str = outfield.encode(randomize = randomize_mana)
|
outfield_str = outfield.encode(randomize = randomize_mana)
|
||||||
else:
|
else:
|
||||||
outfield_str = outfield
|
outfield_str = outfield
|
||||||
|
|
||||||
if fmt_labeled and field in fmt_labeled:
|
|
||||||
outfield_str = fmt_labeled[field] + outfield_str
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
outfield_str = ''
|
outfield_str = ''
|
||||||
|
|
||||||
|
if fmt_labeled and field in fmt_labeled:
|
||||||
|
outfield_str = fmt_labeled[field] + outfield_str
|
||||||
|
|
||||||
outfields += [outfield_str]
|
outfields += [outfield_str]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
from cardlib import Card
|
from cardlib import Card
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# representation for mana costs and text with embedded mana costs
|
# representation for mana costs and text with embedded mana costs
|
||||||
# data aggregating classes
|
# data aggregating classes
|
||||||
import random
|
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
import utils
|
|
||||||
import datamine
|
|
||||||
import random
|
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True):
|
|
||||||
if verbose:
|
|
||||||
print 'Opening encoded card file: ' + fname
|
|
||||||
|
|
||||||
with open(fname, 'rt') as f:
|
|
||||||
text = f.read()
|
|
||||||
|
|
||||||
cardtexts = text.split(utils.cardsep)
|
|
||||||
|
|
||||||
# overkill
|
|
||||||
datamine.analyze(cardtexts)
|
|
||||||
|
|
||||||
multicards = []
|
|
||||||
reps = 10
|
|
||||||
|
|
||||||
for card in datamine.cards:
|
|
||||||
for i in range(reps):
|
|
||||||
multicards += [card.reencode(randomize = True)]
|
|
||||||
# multicards += [card.reencode(randomize = True)
|
|
||||||
# + card.cost.reencode(randomize = True) + utils.fieldsep]
|
|
||||||
|
|
||||||
random.shuffle(multicards)
|
|
||||||
|
|
||||||
if oname:
|
|
||||||
if verbose:
|
|
||||||
print 'Writing output to: ' + oname
|
|
||||||
with open(oname, 'w') as ofile:
|
|
||||||
for textcard in multicards:
|
|
||||||
ofile.write(textcard + utils.cardsep)
|
|
||||||
else:
|
|
||||||
for textcard in multicards:
|
|
||||||
print textcard + '\n'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
if len(sys.argv) == 2:
|
|
||||||
main(sys.argv[1])
|
|
||||||
elif len(sys.argv) == 3:
|
|
||||||
main(sys.argv[1], oname = sys.argv[2])
|
|
||||||
else:
|
|
||||||
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
|
||||||
exit(1)
|
|
48
scripts/summarize.py
Executable file
48
scripts/summarize.py
Executable file
|
@ -0,0 +1,48 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
import utils
|
||||||
|
import jdecode
|
||||||
|
from datalib import Datamine
|
||||||
|
|
||||||
|
def main(fname, verbose = True, outliers = False, dump_all = False):
|
||||||
|
if fname[-5:] == '.json':
|
||||||
|
if verbose:
|
||||||
|
print 'This looks like a json file: ' + fname
|
||||||
|
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
||||||
|
card_srcs = []
|
||||||
|
for json_cardname in sorted(json_srcs):
|
||||||
|
if len(json_srcs[json_cardname]) > 0:
|
||||||
|
card_srcs += [json_srcs[json_cardname][0]]
|
||||||
|
else:
|
||||||
|
if verbose:
|
||||||
|
print 'Opening encoded card file: ' + fname
|
||||||
|
with open(fname, 'rt') as f:
|
||||||
|
text = f.read()
|
||||||
|
card_srcs = text.split(utils.cardsep)
|
||||||
|
|
||||||
|
mine = Datamine(card_srcs)
|
||||||
|
mine.summarize()
|
||||||
|
if outliers or dump_all:
|
||||||
|
mine.outliers(dump_invalid = dump_all)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile',
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('-x', '--outliers', action='store_true',
|
||||||
|
help='show additional diagnostics and edge cases')
|
||||||
|
parser.add_argument('-a', '--all', action='store_true',
|
||||||
|
help='show all information and dump invalid cards')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, verbose = args.verbose, outliers = args.outliers, dump_all = args.all)
|
||||||
|
exit(0)
|
33
summarize.py
33
summarize.py
|
@ -1,33 +0,0 @@
|
||||||
import sys
|
|
||||||
|
|
||||||
import lib.utils as utils
|
|
||||||
import lib.jdecode as jdecode
|
|
||||||
from lib.datalib import Datamine
|
|
||||||
|
|
||||||
def main(fname, verbose = True):
|
|
||||||
if fname[-5:] == '.json':
|
|
||||||
if verbose:
|
|
||||||
print 'This looks like a json file: ' + fname
|
|
||||||
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
|
||||||
card_srcs = []
|
|
||||||
for json_cardname in json_srcs:
|
|
||||||
if len(json_srcs[json_cardname]) > 0:
|
|
||||||
card_srcs += [json_srcs[json_cardname][0]]
|
|
||||||
else:
|
|
||||||
if verbose:
|
|
||||||
print 'Opening encoded card file: ' + fname
|
|
||||||
with open(fname, 'rt') as f:
|
|
||||||
text = f.read()
|
|
||||||
card_srcs = text.split(utils.cardsep)
|
|
||||||
|
|
||||||
mine = Datamine(card_srcs)
|
|
||||||
mine.summarize()
|
|
||||||
mine.outliers(dump_invalid = False)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
if len(sys.argv) == 2:
|
|
||||||
main(sys.argv[1])
|
|
||||||
else:
|
|
||||||
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file>'
|
|
||||||
exit(1)
|
|
Loading…
Reference in a new issue