updated encode and decode to use new, unified formats
This commit is contained in:
parent
377aa9453c
commit
7dbd56a9bd
4 changed files with 144 additions and 63 deletions
68
decode.py
68
decode.py
|
@ -15,29 +15,39 @@ from namediff import Namediff
|
|||
def exclude_sets(cardset):
|
||||
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
||||
|
||||
def main(fname, oname = None, verbose = True,
|
||||
def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||
gatherer = False, for_forum = False, for_mse = False,
|
||||
creativity = False, norarity = False, vdump = False):
|
||||
creativity = False, vdump = False):
|
||||
|
||||
fmt_ordered = cardlib.fmt_ordered_default
|
||||
|
||||
if encoding in ['std']:
|
||||
pass
|
||||
elif encoding in ['named']:
|
||||
fmt_ordered = cardlib.fmt_ordered_named
|
||||
elif encoding in ['noname']:
|
||||
fmt_ordered = cardlib.fmt_ordered_noname
|
||||
elif encoding in ['rfields']:
|
||||
pass
|
||||
elif encoding in ['old']:
|
||||
fmt_ordered = cardlib.fmt_ordered_old
|
||||
elif encoding in ['norarity']:
|
||||
fmt_ordered = cardlib.fmt_ordered_norarity
|
||||
elif encoding in ['vec']:
|
||||
pass
|
||||
elif encoding in ['custom']:
|
||||
## put custom format decisions here ##########################
|
||||
|
||||
## end of custom format ######################################
|
||||
pass
|
||||
else:
|
||||
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
||||
|
||||
cards = []
|
||||
valid = 0
|
||||
invalid = 0
|
||||
unparsed = 0
|
||||
|
||||
if norarity:
|
||||
decode_fields = [
|
||||
cardlib.field_name,
|
||||
cardlib.field_supertypes,
|
||||
cardlib.field_types,
|
||||
cardlib.field_loyalty,
|
||||
cardlib.field_subtypes,
|
||||
#cardlib.field_rarity,
|
||||
cardlib.field_pt,
|
||||
cardlib.field_cost,
|
||||
cardlib.field_text,
|
||||
]
|
||||
else:
|
||||
decode_fields = cardlib.fmt_ordered_default
|
||||
|
||||
if fname[-5:] == '.json':
|
||||
if verbose:
|
||||
print 'This looks like a json file: ' + fname
|
||||
|
@ -48,17 +58,17 @@ def main(fname, oname = None, verbose = True,
|
|||
|
||||
# look for a normal rarity version, in a set we can use
|
||||
idx = 0
|
||||
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
|
||||
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
|
||||
while (idx < len(jcards)
|
||||
and (card.rarity == utils.rarity_special_marker
|
||||
or exclude_sets(jcards[idx][utils.json_field_set_name]))):
|
||||
idx += 1
|
||||
if idx < len(jcards):
|
||||
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
|
||||
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
|
||||
# if there isn't one, settle with index 0
|
||||
if idx >= len(jcards):
|
||||
idx = 0
|
||||
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
|
||||
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
|
||||
# we could go back and look for a card satisfying one of the criteria,
|
||||
# but eh
|
||||
|
||||
|
@ -78,7 +88,7 @@ def main(fname, oname = None, verbose = True,
|
|||
text = f.read()
|
||||
for card_src in text.split(utils.cardsep):
|
||||
if card_src:
|
||||
card = cardlib.Card(card_src, fmt_ordered = decode_fields)
|
||||
card = cardlib.Card(card_src, fmt_ordered = fmt_ordered)
|
||||
if card.valid:
|
||||
valid += 1
|
||||
elif card.parsed:
|
||||
|
@ -96,14 +106,16 @@ def main(fname, oname = None, verbose = True,
|
|||
for card in cards:
|
||||
if not card.parsed and not card.text.text:
|
||||
bad_count += 1
|
||||
elif len(card.name) > 50 or len(card.rarity) > 3:
|
||||
bad_count += 1
|
||||
else:
|
||||
good_count += 1
|
||||
if good_count + bad_count > 15:
|
||||
break
|
||||
# random heuristic
|
||||
if bad_count > 10:
|
||||
print 'Saw a bunch of unparsed cards with no text:'
|
||||
print 'If this is a legacy format, try rerunning with --norarity'
|
||||
print 'WARNING: Saw a bunch of unparsed cards:'
|
||||
print ' If this is a legacy format, try rerunning with "-e old" or "-e norarity"'
|
||||
|
||||
if creativity:
|
||||
cbow = CBOW()
|
||||
|
@ -190,6 +202,10 @@ if __name__ == '__main__':
|
|||
help='encoded card file or json corpus to encode')
|
||||
parser.add_argument('outfile', nargs='?', default=None,
|
||||
help='output file, defaults to stdout')
|
||||
parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
|
||||
#help='{' + ','.join(formats) + '}',
|
||||
help='encoding format to use',
|
||||
)
|
||||
parser.add_argument('-g', '--gatherer', action='store_true',
|
||||
help='emulate Gatherer visual spoiler')
|
||||
parser.add_argument('-f', '--forum', action='store_true',
|
||||
|
@ -198,14 +214,12 @@ if __name__ == '__main__':
|
|||
help='use CBOW fuzzy matching to check creativity of cards')
|
||||
parser.add_argument('-d', '--dump', action='store_true',
|
||||
help='dump out lots of information about invalid cards')
|
||||
parser.add_argument('--norarity', action='store_true',
|
||||
help='the card format has no rarity field; use for legacy input')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='verbose output')
|
||||
parser.add_argument('-mse', '--mse', action='store_true', help='use Magic Set Editor 2 encoding; will output as .mse-set file')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args.infile, args.outfile, verbose = args.verbose,
|
||||
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
|
||||
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
|
||||
creativity = args.creativity, norarity = args.norarity, vdump = args.dump)
|
||||
creativity = args.creativity, vdump = args.dump)
|
||||
exit(0)
|
||||
|
|
78
encode.py
78
encode.py
|
@ -19,51 +19,53 @@ def exclude_types(cardtype):
|
|||
def exclude_layouts(layout):
|
||||
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
|
||||
|
||||
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
|
||||
def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||
nolinetrans = False, randomize = False, nolabel = False, stable = False):
|
||||
fmt_ordered = cardlib.fmt_ordered_default
|
||||
fmt_labeled = None
|
||||
fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
|
||||
fieldsep = utils.fieldsep
|
||||
line_transformations = not nolinetrans
|
||||
randomize_fields = False
|
||||
randomize_mana = False
|
||||
randomize_mana = randomize
|
||||
initial_sep = True
|
||||
final_sep = True
|
||||
|
||||
# set the properties of the encoding
|
||||
if encoding in ['vec']:
|
||||
|
||||
if encoding in ['std']:
|
||||
pass
|
||||
elif encoding in ['std']:
|
||||
if dupes == 0:
|
||||
dupes = 1
|
||||
elif encoding in ['rmana']:
|
||||
if dupes == 0:
|
||||
dupes = 1
|
||||
randomize_mana = True
|
||||
elif encoding in ['rmana_dual']:
|
||||
if dupes == 0:
|
||||
dupes = 1
|
||||
fmt_ordered = fmt_ordered + [cardlib.field_cost]
|
||||
randomize_mana = True
|
||||
elif encoding in ['named']:
|
||||
fmt_ordered = cardlib.fmt_ordered_named
|
||||
elif encoding in ['noname']:
|
||||
fmt_ordered = cardlib.fmt_ordered_noname
|
||||
elif encoding in ['rfields']:
|
||||
if dupes == 0:
|
||||
dupes = 1
|
||||
fmt_labeled = cardlib.fmt_labeled_default
|
||||
randomize_fields = True
|
||||
#randomize_mana = True
|
||||
final_sep = False
|
||||
elif encoding in ['old']:
|
||||
fmt_ordered = cardlib.fmt_ordered_old
|
||||
elif encoding in ['norarity']:
|
||||
fmt_ordered = cardlib.fmt_ordered_norarity
|
||||
elif encoding in ['vec']:
|
||||
pass
|
||||
elif encoding in ['custom']:
|
||||
## put custom format decisions here ##########################
|
||||
|
||||
## end of custom format ######################################
|
||||
pass
|
||||
else:
|
||||
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
||||
|
||||
if dupes <= 0:
|
||||
dupes = 1
|
||||
|
||||
if verbose:
|
||||
print 'Preparing to encode:'
|
||||
print ' Using encoding ' + repr(encoding)
|
||||
if dupes > 1:
|
||||
print ' Duplicating each card ' + str(dupes) + ' times.'
|
||||
if stable:
|
||||
print ' NOT randomizing order of cards.'
|
||||
|
||||
if randomize_mana:
|
||||
print ' Randomizing order of symobls in manacosts.'
|
||||
if not fmt_labeled:
|
||||
print ' NOT labeling fields for this run (may be harder to decode).'
|
||||
if not line_transformations:
|
||||
print ' NOT using line reordering transformations'
|
||||
|
||||
cards = []
|
||||
valid = 0
|
||||
|
@ -109,7 +111,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
|||
|
||||
if card.valid:
|
||||
valid += 1
|
||||
cards += [card] * dupes
|
||||
cards += [card]
|
||||
elif card.parsed:
|
||||
invalid += 1
|
||||
else:
|
||||
|
@ -126,7 +128,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
|||
card = cardlib.Card(card_src)
|
||||
if card.valid:
|
||||
valid += 1
|
||||
cards += [card] * dupes
|
||||
cards += [card]
|
||||
elif card.parsed:
|
||||
invalid += 1
|
||||
else:
|
||||
|
@ -174,17 +176,23 @@ if __name__ == '__main__':
|
|||
help='encoded card file or json corpus to encode')
|
||||
parser.add_argument('outfile', nargs='?', default=None,
|
||||
help='output file, defaults to stdout')
|
||||
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
|
||||
help='number of times to duplicate each card')
|
||||
parser.add_argument('-e', '--encoding', default='std',
|
||||
choices=['std', 'rmana', 'rmana_dual', 'rfields', 'vec'])
|
||||
parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
|
||||
#help='{' + ','.join(formats) + '}',
|
||||
help='encoding format to use',
|
||||
)
|
||||
parser.add_argument('-r', '--randomize', action='store_true',
|
||||
help='randomize the order of symbols in mana costs')
|
||||
parser.add_argument('--nolinetrans', action='store_true',
|
||||
help="don't reorder lines of card text")
|
||||
parser.add_argument('--nolabel', action='store_true',
|
||||
help="don't label fields")
|
||||
parser.add_argument('-s', '--stable', action='store_true',
|
||||
help="don't randomize the order of the cards")
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
|
||||
encoding = args.encoding, stable = args.stable)
|
||||
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
|
||||
nolinetrans = args.nolinetrans, randomize = args.randomize, nolabel = args.nolabel,
|
||||
stable = args.stable)
|
||||
exit(0)
|
||||
|
||||
|
|
|
@ -88,7 +88,8 @@ fieldnames = [
|
|||
field_text,
|
||||
]
|
||||
|
||||
fmt_ordered_default = [
|
||||
# legacy
|
||||
fmt_ordered_old = [
|
||||
field_name,
|
||||
field_supertypes,
|
||||
field_types,
|
||||
|
@ -99,6 +100,52 @@ fmt_ordered_default = [
|
|||
field_cost,
|
||||
field_text,
|
||||
]
|
||||
fmt_ordered_norarity = [
|
||||
field_name,
|
||||
field_supertypes,
|
||||
field_types,
|
||||
field_loyalty,
|
||||
field_subtypes,
|
||||
field_pt,
|
||||
field_cost,
|
||||
field_text,
|
||||
]
|
||||
|
||||
# standard
|
||||
fmt_ordered_default = [
|
||||
field_types,
|
||||
field_supertypes,
|
||||
field_subtypes,
|
||||
field_loyalty,
|
||||
field_pt,
|
||||
field_text,
|
||||
field_cost,
|
||||
field_rarity,
|
||||
field_name,
|
||||
]
|
||||
|
||||
# minor variations
|
||||
fmt_ordered_noname = [
|
||||
field_types,
|
||||
field_supertypes,
|
||||
field_subtypes,
|
||||
field_loyalty,
|
||||
field_pt,
|
||||
field_text,
|
||||
field_cost,
|
||||
field_rarity,
|
||||
]
|
||||
fmt_ordered_named = [
|
||||
field_name,
|
||||
field_types,
|
||||
field_supertypes,
|
||||
field_subtypes,
|
||||
field_loyalty,
|
||||
field_pt,
|
||||
field_text,
|
||||
field_cost,
|
||||
field_rarity,
|
||||
]
|
||||
|
||||
fmt_labeled_default = {
|
||||
field_name : field_label_name,
|
||||
|
|
12
lib/utils.py
12
lib/utils.py
|
@ -9,6 +9,18 @@ import config
|
|||
# special chunk of text that Magic Set Editor 2 requires at the start of all set files.
|
||||
mse_prepend = 'mse version: 0.3.8\ngame: magic\nstylesheet: m15\nset info:\n\tsymbol:\nstyling:\n\tmagic-m15:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay:\n\tmagic-m15-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-extra-improved:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\tpt box symbols: magic-pt-symbols-extra.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-promo-dka:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-token-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-4abil:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n'
|
||||
|
||||
# encoding formats we know about
|
||||
formats = [
|
||||
'std',
|
||||
'named',
|
||||
'noname',
|
||||
'rfields',
|
||||
'old',
|
||||
'norarity',
|
||||
'vec',
|
||||
'custom',
|
||||
]
|
||||
|
||||
# separators
|
||||
cardsep = config.cardsep
|
||||
fieldsep = config.fieldsep
|
||||
|
|
Loading…
Reference in a new issue