Changed things to use the python module system better and have argparse.
Encode now supports multiple formats, still working on decoding them nicely.
This commit is contained in:
parent
40fc695826
commit
2a0e014c41
7 changed files with 190 additions and 133 deletions
182
encode.py
Normal file → Executable file
182
encode.py
Normal file → Executable file
|
@ -1,12 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
import os
|
||||
|
||||
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib')
|
||||
sys.path.append(libdir)
|
||||
import re
|
||||
import random
|
||||
import sys
|
||||
|
||||
import lib.utils as utils
|
||||
from lib.cardlib import Card
|
||||
import lib.jdecode as jdecode
|
||||
|
||||
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
||||
import utils
|
||||
import jdecode
|
||||
import cardlib
|
||||
|
||||
def exclude_sets(cardset):
|
||||
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
||||
|
@ -17,42 +19,100 @@ def exclude_types(cardtype):
|
|||
def exclude_layouts(layout):
|
||||
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
|
||||
|
||||
def compile_duplicated(jcards):
|
||||
# Boring solution: only write out the first one...
|
||||
card = Card(jcards[0])
|
||||
if (exclude_sets(jcards[0][utils.json_field_set_name])
|
||||
or exclude_layouts(jcards[0]['layout'])):
|
||||
return None
|
||||
for cardtype in card.types:
|
||||
if exclude_types(cardtype):
|
||||
return None
|
||||
return card
|
||||
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
|
||||
fmt_ordered = cardlib.fmt_ordered_default
|
||||
fmt_labeled = None
|
||||
fieldsep = utils.fieldsep
|
||||
randomize_fields = False
|
||||
randomize_mana = False
|
||||
initial_sep = True
|
||||
final_sep = True
|
||||
|
||||
# set the properties of the encoding
|
||||
if encoding in ['std']:
|
||||
if dupes == 0:
|
||||
dupes = 1
|
||||
elif encoding in ['rmana']:
|
||||
if dupes == 0:
|
||||
dupes = 3
|
||||
randomize_mana = True
|
||||
elif encoding in ['rmana_dual']:
|
||||
if dupes == 0:
|
||||
dupes = 3
|
||||
fmt_ordered = fmt_ordered + [cardlib.field_cost]
|
||||
randomize_mana = True
|
||||
elif encoding in ['rfields']:
|
||||
if dupes == 0:
|
||||
dupes = 10
|
||||
fmt_labeled = cardlib.fmt_labeled_default
|
||||
randomize_fields = True
|
||||
randomize_mana = True
|
||||
final_sep = False
|
||||
else:
|
||||
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
||||
|
||||
if dupes <= 0:
|
||||
dupes = 1
|
||||
|
||||
def main(fname, oname = None, verbose = True):
|
||||
if verbose:
|
||||
print 'Opening json file: ' + fname
|
||||
print 'Preparing to encode:'
|
||||
print ' Using encoding ' + repr(encoding)
|
||||
if dupes > 1:
|
||||
print ' Duplicating each card ' + str(dupes) + ' times.'
|
||||
if stable:
|
||||
print ' NOT randomizing order of cards.'
|
||||
|
||||
|
||||
jcards = jdecode.mtg_open_json(fname, verbose)
|
||||
cards = []
|
||||
|
||||
valid = 0
|
||||
skipped = 0
|
||||
invalid = 0
|
||||
unparsed = 0
|
||||
|
||||
# force a stable ordering, we will randomize later
|
||||
for jcard_name in sorted(jcards):
|
||||
card = compile_duplicated(jcards[jcard_name])
|
||||
if card:
|
||||
if card.valid:
|
||||
valid += 1
|
||||
cards += [card]
|
||||
elif card.parsed:
|
||||
invalid += 1
|
||||
else:
|
||||
unparsed += 1
|
||||
else:
|
||||
skipped += 1
|
||||
if fname[-5:] == '.json':
|
||||
if verbose:
|
||||
print 'This looks like a json file: ' + fname
|
||||
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
||||
# don't worry we randomize later
|
||||
for json_cardname in sorted(json_srcs):
|
||||
if len(json_srcs[json_cardname]) > 0:
|
||||
jcards = json_srcs[json_cardname]
|
||||
card = cardlib.Card(json_srcs[json_cardname][0])
|
||||
|
||||
skip = False
|
||||
if (exclude_sets(jcards[0][utils.json_field_set_name])
|
||||
or exclude_layouts(jcards[0]['layout'])):
|
||||
skip = True
|
||||
for cardtype in card.types:
|
||||
if exclude_types(cardtype):
|
||||
skip = True
|
||||
if skip:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if card.valid:
|
||||
valid += 1
|
||||
cards += [card] * dupes
|
||||
elif card.parsed:
|
||||
invalid += 1
|
||||
else:
|
||||
unparsed += 1
|
||||
# fall back to opening a normal encoded file
|
||||
else:
|
||||
if verbose:
|
||||
print 'Opening encoded card file: ' + fname
|
||||
with open(fname, 'rt') as f:
|
||||
text = f.read()
|
||||
for card_src in text.split(utils.cardsep):
|
||||
if card_src:
|
||||
card = cardlib.Card(card_src)
|
||||
if card.valid:
|
||||
valid += 1
|
||||
cards += [card] * dupes
|
||||
elif card.parsed:
|
||||
invalid += 1
|
||||
else:
|
||||
unparsed += 1
|
||||
|
||||
if verbose:
|
||||
print (str(valid) + ' valid, ' + str(skipped) + ' skipped, '
|
||||
|
@ -60,27 +120,55 @@ def main(fname, oname = None, verbose = True):
|
|||
|
||||
# This should give a random but consistent ordering, to make comparing changes
|
||||
# between the output of different versions easier.
|
||||
random.seed(1371367)
|
||||
random.shuffle(cards)
|
||||
if not stable:
|
||||
random.seed(1371367)
|
||||
random.shuffle(cards)
|
||||
|
||||
if oname:
|
||||
if verbose:
|
||||
print 'Writing output to: ' + oname
|
||||
with open(oname, 'w') as ofile:
|
||||
for card in cards:
|
||||
ofile.write(card.encode() + utils.cardsep)
|
||||
ofile.write(card.encode(fmt_ordered = fmt_ordered,
|
||||
fmt_labeled = fmt_labeled,
|
||||
fieldsep = fieldsep,
|
||||
randomize_fields = randomize_fields,
|
||||
randomize_mana = randomize_mana,
|
||||
initial_sep = initial_sep,
|
||||
final_sep = final_sep)
|
||||
+ utils.cardsep)
|
||||
else:
|
||||
for card in cards:
|
||||
sys.stdout.write(card.encode() + utils.cardsep)
|
||||
sts.stdout.flush()
|
||||
sys.stdout.write(card.encode(fmt_ordered = fmt_ordered,
|
||||
fmt_labeled = fmt_labeled,
|
||||
fieldsep = fieldsep,
|
||||
randomize_fields = randomize_fields,
|
||||
randomize_mana = randomize_mana,
|
||||
initial_sep = initial_sep,
|
||||
final_sep = final_sep)
|
||||
+ utils.cardsep)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) == 2:
|
||||
main(sys.argv[1])
|
||||
elif len(sys.argv) == 3:
|
||||
main(sys.argv[1], oname = sys.argv[2])
|
||||
else:
|
||||
print 'Usage: ' + sys.argv[0] + ' ' + '<JSON file> [output filename]'
|
||||
exit(1)
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('infile',
|
||||
help='encoded card file or json corpus to encode')
|
||||
parser.add_argument('outfile', nargs='?', default=None,
|
||||
help='output file, defaults to stdout')
|
||||
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
|
||||
help='number of times to duplicate each card')
|
||||
parser.add_argument('-e', '--encoding', default='std',
|
||||
choices=['std', 'rmana', 'rmana_dual', 'rfields'])
|
||||
parser.add_argument('-s', '--stable', action='store_true',
|
||||
help="don't randomize the order of the cards")
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
|
||||
encoding = args.encoding, stable = args.stable)
|
||||
exit(0)
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# card representation
|
||||
import re
|
||||
import random
|
||||
|
||||
import utils
|
||||
import transforms
|
||||
|
@ -58,6 +59,7 @@ fmt_labeled_default = {
|
|||
field_cost : field_label_cost,
|
||||
field_supertypes : field_label_supertypes,
|
||||
field_types : field_label_types,
|
||||
field_subtypes : field_label_subtypes,
|
||||
field_loyalty : field_label_loyalty,
|
||||
field_pt : field_label_pt,
|
||||
field_text : field_label_text,
|
||||
|
@ -434,13 +436,12 @@ class Card:
|
|||
outfield_str = outfield.encode(randomize = randomize_mana)
|
||||
else:
|
||||
outfield_str = outfield
|
||||
|
||||
if fmt_labeled and field in fmt_labeled:
|
||||
outfield_str = fmt_labeled[field] + outfield_str
|
||||
|
||||
else:
|
||||
outfield_str = ''
|
||||
|
||||
if fmt_labeled and field in fmt_labeled:
|
||||
outfield_str = fmt_labeled[field] + outfield_str
|
||||
|
||||
outfields += [outfield_str]
|
||||
|
||||
else:
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import re
|
||||
import sys
|
||||
|
||||
import utils
|
||||
from cardlib import Card
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# representation for mana costs and text with embedded mana costs
|
||||
# data aggregating classes
|
||||
import random
|
||||
import re
|
||||
import random
|
||||
|
||||
import utils
|
||||
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
import utils
|
||||
import datamine
|
||||
import random
|
||||
|
||||
def main(fname, oname = None, verbose = True):
|
||||
if verbose:
|
||||
print 'Opening encoded card file: ' + fname
|
||||
|
||||
with open(fname, 'rt') as f:
|
||||
text = f.read()
|
||||
|
||||
cardtexts = text.split(utils.cardsep)
|
||||
|
||||
# overkill
|
||||
datamine.analyze(cardtexts)
|
||||
|
||||
multicards = []
|
||||
reps = 10
|
||||
|
||||
for card in datamine.cards:
|
||||
for i in range(reps):
|
||||
multicards += [card.reencode(randomize = True)]
|
||||
# multicards += [card.reencode(randomize = True)
|
||||
# + card.cost.reencode(randomize = True) + utils.fieldsep]
|
||||
|
||||
random.shuffle(multicards)
|
||||
|
||||
if oname:
|
||||
if verbose:
|
||||
print 'Writing output to: ' + oname
|
||||
with open(oname, 'w') as ofile:
|
||||
for textcard in multicards:
|
||||
ofile.write(textcard + utils.cardsep)
|
||||
else:
|
||||
for textcard in multicards:
|
||||
print textcard + '\n'
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) == 2:
|
||||
main(sys.argv[1])
|
||||
elif len(sys.argv) == 3:
|
||||
main(sys.argv[1], oname = sys.argv[2])
|
||||
else:
|
||||
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
|
||||
exit(1)
|
48
scripts/summarize.py
Executable file
48
scripts/summarize.py
Executable file
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
import os
|
||||
|
||||
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||
sys.path.append(libdir)
|
||||
import utils
|
||||
import jdecode
|
||||
from datalib import Datamine
|
||||
|
||||
def main(fname, verbose = True, outliers = False, dump_all = False):
|
||||
if fname[-5:] == '.json':
|
||||
if verbose:
|
||||
print 'This looks like a json file: ' + fname
|
||||
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
||||
card_srcs = []
|
||||
for json_cardname in sorted(json_srcs):
|
||||
if len(json_srcs[json_cardname]) > 0:
|
||||
card_srcs += [json_srcs[json_cardname][0]]
|
||||
else:
|
||||
if verbose:
|
||||
print 'Opening encoded card file: ' + fname
|
||||
with open(fname, 'rt') as f:
|
||||
text = f.read()
|
||||
card_srcs = text.split(utils.cardsep)
|
||||
|
||||
mine = Datamine(card_srcs)
|
||||
mine.summarize()
|
||||
if outliers or dump_all:
|
||||
mine.outliers(dump_invalid = dump_all)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('infile',
|
||||
help='encoded card file or json corpus to process')
|
||||
parser.add_argument('-x', '--outliers', action='store_true',
|
||||
help='show additional diagnostics and edge cases')
|
||||
parser.add_argument('-a', '--all', action='store_true',
|
||||
help='show all information and dump invalid cards')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args.infile, verbose = args.verbose, outliers = args.outliers, dump_all = args.all)
|
||||
exit(0)
|
33
summarize.py
33
summarize.py
|
@ -1,33 +0,0 @@
|
|||
import sys
|
||||
|
||||
import lib.utils as utils
|
||||
import lib.jdecode as jdecode
|
||||
from lib.datalib import Datamine
|
||||
|
||||
def main(fname, verbose = True):
|
||||
if fname[-5:] == '.json':
|
||||
if verbose:
|
||||
print 'This looks like a json file: ' + fname
|
||||
json_srcs = jdecode.mtg_open_json(fname, verbose)
|
||||
card_srcs = []
|
||||
for json_cardname in json_srcs:
|
||||
if len(json_srcs[json_cardname]) > 0:
|
||||
card_srcs += [json_srcs[json_cardname][0]]
|
||||
else:
|
||||
if verbose:
|
||||
print 'Opening encoded card file: ' + fname
|
||||
with open(fname, 'rt') as f:
|
||||
text = f.read()
|
||||
card_srcs = text.split(utils.cardsep)
|
||||
|
||||
mine = Datamine(card_srcs)
|
||||
mine.summarize()
|
||||
mine.outliers(dump_invalid = False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) == 2:
|
||||
main(sys.argv[1])
|
||||
else:
|
||||
print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file>'
|
||||
exit(1)
|
Loading…
Reference in a new issue