updated encode and decode to use new, unified formats

This commit is contained in:
Bill Zorn 2015-09-06 20:24:19 -07:00
parent 377aa9453c
commit 7dbd56a9bd
4 changed files with 144 additions and 63 deletions

View file

@ -15,29 +15,39 @@ from namediff import Namediff
def exclude_sets(cardset):
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
def main(fname, oname = None, verbose = True,
def main(fname, oname = None, verbose = True, encoding = 'std',
gatherer = False, for_forum = False, for_mse = False,
creativity = False, norarity = False, vdump = False):
creativity = False, vdump = False):
fmt_ordered = cardlib.fmt_ordered_default
if encoding in ['std']:
pass
elif encoding in ['named']:
fmt_ordered = cardlib.fmt_ordered_named
elif encoding in ['noname']:
fmt_ordered = cardlib.fmt_ordered_noname
elif encoding in ['rfields']:
pass
elif encoding in ['old']:
fmt_ordered = cardlib.fmt_ordered_old
elif encoding in ['norarity']:
fmt_ordered = cardlib.fmt_ordered_norarity
elif encoding in ['vec']:
pass
elif encoding in ['custom']:
## put custom format decisions here ##########################
## end of custom format ######################################
pass
else:
raise ValueError('encode.py: unknown encoding: ' + encoding)
cards = []
valid = 0
invalid = 0
unparsed = 0
if norarity:
decode_fields = [
cardlib.field_name,
cardlib.field_supertypes,
cardlib.field_types,
cardlib.field_loyalty,
cardlib.field_subtypes,
#cardlib.field_rarity,
cardlib.field_pt,
cardlib.field_cost,
cardlib.field_text,
]
else:
decode_fields = cardlib.fmt_ordered_default
if fname[-5:] == '.json':
if verbose:
print 'This looks like a json file: ' + fname
@ -48,17 +58,17 @@ def main(fname, oname = None, verbose = True,
# look for a normal rarity version, in a set we can use
idx = 0
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
while (idx < len(jcards)
and (card.rarity == utils.rarity_special_marker
or exclude_sets(jcards[idx][utils.json_field_set_name]))):
idx += 1
if idx < len(jcards):
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
# if there isn't one, settle with index 0
if idx >= len(jcards):
idx = 0
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
# we could go back and look for a card satisfying one of the criteria,
# but eh
@ -78,7 +88,7 @@ def main(fname, oname = None, verbose = True,
text = f.read()
for card_src in text.split(utils.cardsep):
if card_src:
card = cardlib.Card(card_src, fmt_ordered = decode_fields)
card = cardlib.Card(card_src, fmt_ordered = fmt_ordered)
if card.valid:
valid += 1
elif card.parsed:
@ -96,14 +106,16 @@ def main(fname, oname = None, verbose = True,
for card in cards:
if not card.parsed and not card.text.text:
bad_count += 1
elif len(card.name) > 50 or len(card.rarity) > 3:
bad_count += 1
else:
good_count += 1
if good_count + bad_count > 15:
break
# random heuristic
if bad_count > 10:
print 'Saw a bunch of unparsed cards with no text:'
print 'If this is a legacy format, try rerunning with --norarity'
print 'WARNING: Saw a bunch of unparsed cards:'
print ' If this is a legacy format, try rerunning with "-e old" or "-e norarity"'
if creativity:
cbow = CBOW()
@ -190,6 +202,10 @@ if __name__ == '__main__':
help='encoded card file or json corpus to encode')
parser.add_argument('outfile', nargs='?', default=None,
help='output file, defaults to stdout')
parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
#help='{' + ','.join(formats) + '}',
help='encoding format to use',
)
parser.add_argument('-g', '--gatherer', action='store_true',
help='emulate Gatherer visual spoiler')
parser.add_argument('-f', '--forum', action='store_true',
@ -198,14 +214,12 @@ if __name__ == '__main__':
help='use CBOW fuzzy matching to check creativity of cards')
parser.add_argument('-d', '--dump', action='store_true',
help='dump out lots of information about invalid cards')
parser.add_argument('--norarity', action='store_true',
help='the card format has no rarity field; use for legacy input')
parser.add_argument('-v', '--verbose', action='store_true',
help='verbose output')
parser.add_argument('-mse', '--mse', action='store_true', help='use Magic Set Editor 2 encoding; will output as .mse-set file')
args = parser.parse_args()
main(args.infile, args.outfile, verbose = args.verbose,
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
creativity = args.creativity, norarity = args.norarity, vdump = args.dump)
creativity = args.creativity, vdump = args.dump)
exit(0)

View file

@ -19,51 +19,53 @@ def exclude_types(cardtype):
def exclude_layouts(layout):
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
def main(fname, oname = None, verbose = True, encoding = 'std',
nolinetrans = False, randomize = False, nolabel = False, stable = False):
fmt_ordered = cardlib.fmt_ordered_default
fmt_labeled = None
fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
fieldsep = utils.fieldsep
line_transformations = not nolinetrans
randomize_fields = False
randomize_mana = False
randomize_mana = randomize
initial_sep = True
final_sep = True
# set the properties of the encoding
if encoding in ['vec']:
if encoding in ['std']:
pass
elif encoding in ['std']:
if dupes == 0:
dupes = 1
elif encoding in ['rmana']:
if dupes == 0:
dupes = 1
randomize_mana = True
elif encoding in ['rmana_dual']:
if dupes == 0:
dupes = 1
fmt_ordered = fmt_ordered + [cardlib.field_cost]
randomize_mana = True
elif encoding in ['named']:
fmt_ordered = cardlib.fmt_ordered_named
elif encoding in ['noname']:
fmt_ordered = cardlib.fmt_ordered_noname
elif encoding in ['rfields']:
if dupes == 0:
dupes = 1
fmt_labeled = cardlib.fmt_labeled_default
randomize_fields = True
#randomize_mana = True
final_sep = False
elif encoding in ['old']:
fmt_ordered = cardlib.fmt_ordered_old
elif encoding in ['norarity']:
fmt_ordered = cardlib.fmt_ordered_norarity
elif encoding in ['vec']:
pass
elif encoding in ['custom']:
## put custom format decisions here ##########################
## end of custom format ######################################
pass
else:
raise ValueError('encode.py: unknown encoding: ' + encoding)
if dupes <= 0:
dupes = 1
if verbose:
print 'Preparing to encode:'
print ' Using encoding ' + repr(encoding)
if dupes > 1:
print ' Duplicating each card ' + str(dupes) + ' times.'
if stable:
print ' NOT randomizing order of cards.'
if randomize_mana:
print ' Randomizing order of symobls in manacosts.'
if not fmt_labeled:
print ' NOT labeling fields for this run (may be harder to decode).'
if not line_transformations:
print ' NOT using line reordering transformations'
cards = []
valid = 0
@ -109,7 +111,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
if card.valid:
valid += 1
cards += [card] * dupes
cards += [card]
elif card.parsed:
invalid += 1
else:
@ -126,7 +128,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
card = cardlib.Card(card_src)
if card.valid:
valid += 1
cards += [card] * dupes
cards += [card]
elif card.parsed:
invalid += 1
else:
@ -174,17 +176,23 @@ if __name__ == '__main__':
help='encoded card file or json corpus to encode')
parser.add_argument('outfile', nargs='?', default=None,
help='output file, defaults to stdout')
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
help='number of times to duplicate each card')
parser.add_argument('-e', '--encoding', default='std',
choices=['std', 'rmana', 'rmana_dual', 'rfields', 'vec'])
parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
#help='{' + ','.join(formats) + '}',
help='encoding format to use',
)
parser.add_argument('-r', '--randomize', action='store_true',
help='randomize the order of symbols in mana costs')
parser.add_argument('--nolinetrans', action='store_true',
help="don't reorder lines of card text")
parser.add_argument('--nolabel', action='store_true',
help="don't label fields")
parser.add_argument('-s', '--stable', action='store_true',
help="don't randomize the order of the cards")
parser.add_argument('-v', '--verbose', action='store_true',
help='verbose output')
args = parser.parse_args()
main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
encoding = args.encoding, stable = args.stable)
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
nolinetrans = args.nolinetrans, randomize = args.randomize, nolabel = args.nolabel,
stable = args.stable)
exit(0)

View file

@ -88,7 +88,8 @@ fieldnames = [
field_text,
]
fmt_ordered_default = [
# legacy
fmt_ordered_old = [
field_name,
field_supertypes,
field_types,
@ -99,6 +100,52 @@ fmt_ordered_default = [
field_cost,
field_text,
]
fmt_ordered_norarity = [
field_name,
field_supertypes,
field_types,
field_loyalty,
field_subtypes,
field_pt,
field_cost,
field_text,
]
# standard
fmt_ordered_default = [
field_types,
field_supertypes,
field_subtypes,
field_loyalty,
field_pt,
field_text,
field_cost,
field_rarity,
field_name,
]
# minor variations
fmt_ordered_noname = [
field_types,
field_supertypes,
field_subtypes,
field_loyalty,
field_pt,
field_text,
field_cost,
field_rarity,
]
fmt_ordered_named = [
field_name,
field_types,
field_supertypes,
field_subtypes,
field_loyalty,
field_pt,
field_text,
field_cost,
field_rarity,
]
fmt_labeled_default = {
field_name : field_label_name,

View file

@ -9,6 +9,18 @@ import config
# special chunk of text that Magic Set Editor 2 requires at the start of all set files.
mse_prepend = 'mse version: 0.3.8\ngame: magic\nstylesheet: m15\nset info:\n\tsymbol:\nstyling:\n\tmagic-m15:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay:\n\tmagic-m15-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-extra-improved:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\tpt box symbols: magic-pt-symbols-extra.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-promo-dka:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-token-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-4abil:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n'
# encoding formats we know about
formats = [
'std',
'named',
'noname',
'rfields',
'old',
'norarity',
'vec',
'custom',
]
# separators
cardsep = config.cardsep
fieldsep = config.fieldsep