updated encode and decode to use new, unified formats
This commit is contained in:
parent
377aa9453c
commit
7dbd56a9bd
4 changed files with 144 additions and 63 deletions
68
decode.py
68
decode.py
|
@ -15,29 +15,39 @@ from namediff import Namediff
|
||||||
def exclude_sets(cardset):
|
def exclude_sets(cardset):
|
||||||
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True,
|
def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||||
gatherer = False, for_forum = False, for_mse = False,
|
gatherer = False, for_forum = False, for_mse = False,
|
||||||
creativity = False, norarity = False, vdump = False):
|
creativity = False, vdump = False):
|
||||||
|
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_default
|
||||||
|
|
||||||
|
if encoding in ['std']:
|
||||||
|
pass
|
||||||
|
elif encoding in ['named']:
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_named
|
||||||
|
elif encoding in ['noname']:
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_noname
|
||||||
|
elif encoding in ['rfields']:
|
||||||
|
pass
|
||||||
|
elif encoding in ['old']:
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_old
|
||||||
|
elif encoding in ['norarity']:
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_norarity
|
||||||
|
elif encoding in ['vec']:
|
||||||
|
pass
|
||||||
|
elif encoding in ['custom']:
|
||||||
|
## put custom format decisions here ##########################
|
||||||
|
|
||||||
|
## end of custom format ######################################
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
||||||
|
|
||||||
cards = []
|
cards = []
|
||||||
valid = 0
|
valid = 0
|
||||||
invalid = 0
|
invalid = 0
|
||||||
unparsed = 0
|
unparsed = 0
|
||||||
|
|
||||||
if norarity:
|
|
||||||
decode_fields = [
|
|
||||||
cardlib.field_name,
|
|
||||||
cardlib.field_supertypes,
|
|
||||||
cardlib.field_types,
|
|
||||||
cardlib.field_loyalty,
|
|
||||||
cardlib.field_subtypes,
|
|
||||||
#cardlib.field_rarity,
|
|
||||||
cardlib.field_pt,
|
|
||||||
cardlib.field_cost,
|
|
||||||
cardlib.field_text,
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
decode_fields = cardlib.fmt_ordered_default
|
|
||||||
|
|
||||||
if fname[-5:] == '.json':
|
if fname[-5:] == '.json':
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'This looks like a json file: ' + fname
|
print 'This looks like a json file: ' + fname
|
||||||
|
@ -48,17 +58,17 @@ def main(fname, oname = None, verbose = True,
|
||||||
|
|
||||||
# look for a normal rarity version, in a set we can use
|
# look for a normal rarity version, in a set we can use
|
||||||
idx = 0
|
idx = 0
|
||||||
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
|
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
|
||||||
while (idx < len(jcards)
|
while (idx < len(jcards)
|
||||||
and (card.rarity == utils.rarity_special_marker
|
and (card.rarity == utils.rarity_special_marker
|
||||||
or exclude_sets(jcards[idx][utils.json_field_set_name]))):
|
or exclude_sets(jcards[idx][utils.json_field_set_name]))):
|
||||||
idx += 1
|
idx += 1
|
||||||
if idx < len(jcards):
|
if idx < len(jcards):
|
||||||
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
|
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
|
||||||
# if there isn't one, settle with index 0
|
# if there isn't one, settle with index 0
|
||||||
if idx >= len(jcards):
|
if idx >= len(jcards):
|
||||||
idx = 0
|
idx = 0
|
||||||
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
|
card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
|
||||||
# we could go back and look for a card satisfying one of the criteria,
|
# we could go back and look for a card satisfying one of the criteria,
|
||||||
# but eh
|
# but eh
|
||||||
|
|
||||||
|
@ -78,7 +88,7 @@ def main(fname, oname = None, verbose = True,
|
||||||
text = f.read()
|
text = f.read()
|
||||||
for card_src in text.split(utils.cardsep):
|
for card_src in text.split(utils.cardsep):
|
||||||
if card_src:
|
if card_src:
|
||||||
card = cardlib.Card(card_src, fmt_ordered = decode_fields)
|
card = cardlib.Card(card_src, fmt_ordered = fmt_ordered)
|
||||||
if card.valid:
|
if card.valid:
|
||||||
valid += 1
|
valid += 1
|
||||||
elif card.parsed:
|
elif card.parsed:
|
||||||
|
@ -96,14 +106,16 @@ def main(fname, oname = None, verbose = True,
|
||||||
for card in cards:
|
for card in cards:
|
||||||
if not card.parsed and not card.text.text:
|
if not card.parsed and not card.text.text:
|
||||||
bad_count += 1
|
bad_count += 1
|
||||||
|
elif len(card.name) > 50 or len(card.rarity) > 3:
|
||||||
|
bad_count += 1
|
||||||
else:
|
else:
|
||||||
good_count += 1
|
good_count += 1
|
||||||
if good_count + bad_count > 15:
|
if good_count + bad_count > 15:
|
||||||
break
|
break
|
||||||
# random heuristic
|
# random heuristic
|
||||||
if bad_count > 10:
|
if bad_count > 10:
|
||||||
print 'Saw a bunch of unparsed cards with no text:'
|
print 'WARNING: Saw a bunch of unparsed cards:'
|
||||||
print 'If this is a legacy format, try rerunning with --norarity'
|
print ' If this is a legacy format, try rerunning with "-e old" or "-e norarity"'
|
||||||
|
|
||||||
if creativity:
|
if creativity:
|
||||||
cbow = CBOW()
|
cbow = CBOW()
|
||||||
|
@ -190,6 +202,10 @@ if __name__ == '__main__':
|
||||||
help='encoded card file or json corpus to encode')
|
help='encoded card file or json corpus to encode')
|
||||||
parser.add_argument('outfile', nargs='?', default=None,
|
parser.add_argument('outfile', nargs='?', default=None,
|
||||||
help='output file, defaults to stdout')
|
help='output file, defaults to stdout')
|
||||||
|
parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
|
||||||
|
#help='{' + ','.join(formats) + '}',
|
||||||
|
help='encoding format to use',
|
||||||
|
)
|
||||||
parser.add_argument('-g', '--gatherer', action='store_true',
|
parser.add_argument('-g', '--gatherer', action='store_true',
|
||||||
help='emulate Gatherer visual spoiler')
|
help='emulate Gatherer visual spoiler')
|
||||||
parser.add_argument('-f', '--forum', action='store_true',
|
parser.add_argument('-f', '--forum', action='store_true',
|
||||||
|
@ -198,14 +214,12 @@ if __name__ == '__main__':
|
||||||
help='use CBOW fuzzy matching to check creativity of cards')
|
help='use CBOW fuzzy matching to check creativity of cards')
|
||||||
parser.add_argument('-d', '--dump', action='store_true',
|
parser.add_argument('-d', '--dump', action='store_true',
|
||||||
help='dump out lots of information about invalid cards')
|
help='dump out lots of information about invalid cards')
|
||||||
parser.add_argument('--norarity', action='store_true',
|
|
||||||
help='the card format has no rarity field; use for legacy input')
|
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
help='verbose output')
|
help='verbose output')
|
||||||
parser.add_argument('-mse', '--mse', action='store_true', help='use Magic Set Editor 2 encoding; will output as .mse-set file')
|
parser.add_argument('-mse', '--mse', action='store_true', help='use Magic Set Editor 2 encoding; will output as .mse-set file')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.infile, args.outfile, verbose = args.verbose,
|
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
|
||||||
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
|
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
|
||||||
creativity = args.creativity, norarity = args.norarity, vdump = args.dump)
|
creativity = args.creativity, vdump = args.dump)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
78
encode.py
78
encode.py
|
@ -19,51 +19,53 @@ def exclude_types(cardtype):
|
||||||
def exclude_layouts(layout):
|
def exclude_layouts(layout):
|
||||||
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
|
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
|
def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||||
|
nolinetrans = False, randomize = False, nolabel = False, stable = False):
|
||||||
fmt_ordered = cardlib.fmt_ordered_default
|
fmt_ordered = cardlib.fmt_ordered_default
|
||||||
fmt_labeled = None
|
fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
|
||||||
fieldsep = utils.fieldsep
|
fieldsep = utils.fieldsep
|
||||||
|
line_transformations = not nolinetrans
|
||||||
randomize_fields = False
|
randomize_fields = False
|
||||||
randomize_mana = False
|
randomize_mana = randomize
|
||||||
initial_sep = True
|
initial_sep = True
|
||||||
final_sep = True
|
final_sep = True
|
||||||
|
|
||||||
# set the properties of the encoding
|
# set the properties of the encoding
|
||||||
if encoding in ['vec']:
|
|
||||||
|
if encoding in ['std']:
|
||||||
pass
|
pass
|
||||||
elif encoding in ['std']:
|
elif encoding in ['named']:
|
||||||
if dupes == 0:
|
fmt_ordered = cardlib.fmt_ordered_named
|
||||||
dupes = 1
|
elif encoding in ['noname']:
|
||||||
elif encoding in ['rmana']:
|
fmt_ordered = cardlib.fmt_ordered_noname
|
||||||
if dupes == 0:
|
|
||||||
dupes = 1
|
|
||||||
randomize_mana = True
|
|
||||||
elif encoding in ['rmana_dual']:
|
|
||||||
if dupes == 0:
|
|
||||||
dupes = 1
|
|
||||||
fmt_ordered = fmt_ordered + [cardlib.field_cost]
|
|
||||||
randomize_mana = True
|
|
||||||
elif encoding in ['rfields']:
|
elif encoding in ['rfields']:
|
||||||
if dupes == 0:
|
|
||||||
dupes = 1
|
|
||||||
fmt_labeled = cardlib.fmt_labeled_default
|
|
||||||
randomize_fields = True
|
randomize_fields = True
|
||||||
#randomize_mana = True
|
|
||||||
final_sep = False
|
final_sep = False
|
||||||
|
elif encoding in ['old']:
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_old
|
||||||
|
elif encoding in ['norarity']:
|
||||||
|
fmt_ordered = cardlib.fmt_ordered_norarity
|
||||||
|
elif encoding in ['vec']:
|
||||||
|
pass
|
||||||
|
elif encoding in ['custom']:
|
||||||
|
## put custom format decisions here ##########################
|
||||||
|
|
||||||
|
## end of custom format ######################################
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
raise ValueError('encode.py: unknown encoding: ' + encoding)
|
||||||
|
|
||||||
if dupes <= 0:
|
|
||||||
dupes = 1
|
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Preparing to encode:'
|
print 'Preparing to encode:'
|
||||||
print ' Using encoding ' + repr(encoding)
|
print ' Using encoding ' + repr(encoding)
|
||||||
if dupes > 1:
|
|
||||||
print ' Duplicating each card ' + str(dupes) + ' times.'
|
|
||||||
if stable:
|
if stable:
|
||||||
print ' NOT randomizing order of cards.'
|
print ' NOT randomizing order of cards.'
|
||||||
|
if randomize_mana:
|
||||||
|
print ' Randomizing order of symobls in manacosts.'
|
||||||
|
if not fmt_labeled:
|
||||||
|
print ' NOT labeling fields for this run (may be harder to decode).'
|
||||||
|
if not line_transformations:
|
||||||
|
print ' NOT using line reordering transformations'
|
||||||
|
|
||||||
cards = []
|
cards = []
|
||||||
valid = 0
|
valid = 0
|
||||||
|
@ -109,7 +111,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
||||||
|
|
||||||
if card.valid:
|
if card.valid:
|
||||||
valid += 1
|
valid += 1
|
||||||
cards += [card] * dupes
|
cards += [card]
|
||||||
elif card.parsed:
|
elif card.parsed:
|
||||||
invalid += 1
|
invalid += 1
|
||||||
else:
|
else:
|
||||||
|
@ -126,7 +128,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
||||||
card = cardlib.Card(card_src)
|
card = cardlib.Card(card_src)
|
||||||
if card.valid:
|
if card.valid:
|
||||||
valid += 1
|
valid += 1
|
||||||
cards += [card] * dupes
|
cards += [card]
|
||||||
elif card.parsed:
|
elif card.parsed:
|
||||||
invalid += 1
|
invalid += 1
|
||||||
else:
|
else:
|
||||||
|
@ -174,17 +176,23 @@ if __name__ == '__main__':
|
||||||
help='encoded card file or json corpus to encode')
|
help='encoded card file or json corpus to encode')
|
||||||
parser.add_argument('outfile', nargs='?', default=None,
|
parser.add_argument('outfile', nargs='?', default=None,
|
||||||
help='output file, defaults to stdout')
|
help='output file, defaults to stdout')
|
||||||
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
|
parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
|
||||||
help='number of times to duplicate each card')
|
#help='{' + ','.join(formats) + '}',
|
||||||
parser.add_argument('-e', '--encoding', default='std',
|
help='encoding format to use',
|
||||||
choices=['std', 'rmana', 'rmana_dual', 'rfields', 'vec'])
|
)
|
||||||
|
parser.add_argument('-r', '--randomize', action='store_true',
|
||||||
|
help='randomize the order of symbols in mana costs')
|
||||||
|
parser.add_argument('--nolinetrans', action='store_true',
|
||||||
|
help="don't reorder lines of card text")
|
||||||
|
parser.add_argument('--nolabel', action='store_true',
|
||||||
|
help="don't label fields")
|
||||||
parser.add_argument('-s', '--stable', action='store_true',
|
parser.add_argument('-s', '--stable', action='store_true',
|
||||||
help="don't randomize the order of the cards")
|
help="don't randomize the order of the cards")
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
help='verbose output')
|
help='verbose output')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
|
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
|
||||||
encoding = args.encoding, stable = args.stable)
|
nolinetrans = args.nolinetrans, randomize = args.randomize, nolabel = args.nolabel,
|
||||||
|
stable = args.stable)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,8 @@ fieldnames = [
|
||||||
field_text,
|
field_text,
|
||||||
]
|
]
|
||||||
|
|
||||||
fmt_ordered_default = [
|
# legacy
|
||||||
|
fmt_ordered_old = [
|
||||||
field_name,
|
field_name,
|
||||||
field_supertypes,
|
field_supertypes,
|
||||||
field_types,
|
field_types,
|
||||||
|
@ -99,6 +100,52 @@ fmt_ordered_default = [
|
||||||
field_cost,
|
field_cost,
|
||||||
field_text,
|
field_text,
|
||||||
]
|
]
|
||||||
|
fmt_ordered_norarity = [
|
||||||
|
field_name,
|
||||||
|
field_supertypes,
|
||||||
|
field_types,
|
||||||
|
field_loyalty,
|
||||||
|
field_subtypes,
|
||||||
|
field_pt,
|
||||||
|
field_cost,
|
||||||
|
field_text,
|
||||||
|
]
|
||||||
|
|
||||||
|
# standard
|
||||||
|
fmt_ordered_default = [
|
||||||
|
field_types,
|
||||||
|
field_supertypes,
|
||||||
|
field_subtypes,
|
||||||
|
field_loyalty,
|
||||||
|
field_pt,
|
||||||
|
field_text,
|
||||||
|
field_cost,
|
||||||
|
field_rarity,
|
||||||
|
field_name,
|
||||||
|
]
|
||||||
|
|
||||||
|
# minor variations
|
||||||
|
fmt_ordered_noname = [
|
||||||
|
field_types,
|
||||||
|
field_supertypes,
|
||||||
|
field_subtypes,
|
||||||
|
field_loyalty,
|
||||||
|
field_pt,
|
||||||
|
field_text,
|
||||||
|
field_cost,
|
||||||
|
field_rarity,
|
||||||
|
]
|
||||||
|
fmt_ordered_named = [
|
||||||
|
field_name,
|
||||||
|
field_types,
|
||||||
|
field_supertypes,
|
||||||
|
field_subtypes,
|
||||||
|
field_loyalty,
|
||||||
|
field_pt,
|
||||||
|
field_text,
|
||||||
|
field_cost,
|
||||||
|
field_rarity,
|
||||||
|
]
|
||||||
|
|
||||||
fmt_labeled_default = {
|
fmt_labeled_default = {
|
||||||
field_name : field_label_name,
|
field_name : field_label_name,
|
||||||
|
|
12
lib/utils.py
12
lib/utils.py
|
@ -9,6 +9,18 @@ import config
|
||||||
# special chunk of text that Magic Set Editor 2 requires at the start of all set files.
|
# special chunk of text that Magic Set Editor 2 requires at the start of all set files.
|
||||||
mse_prepend = 'mse version: 0.3.8\ngame: magic\nstylesheet: m15\nset info:\n\tsymbol:\nstyling:\n\tmagic-m15:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay:\n\tmagic-m15-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-extra-improved:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\tpt box symbols: magic-pt-symbols-extra.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-promo-dka:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-token-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-4abil:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n'
|
mse_prepend = 'mse version: 0.3.8\ngame: magic\nstylesheet: m15\nset info:\n\tsymbol:\nstyling:\n\tmagic-m15:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay:\n\tmagic-m15-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-extra-improved:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\tpt box symbols: magic-pt-symbols-extra.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-promo-dka:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-token-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-4abil:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n'
|
||||||
|
|
||||||
|
# encoding formats we know about
|
||||||
|
formats = [
|
||||||
|
'std',
|
||||||
|
'named',
|
||||||
|
'noname',
|
||||||
|
'rfields',
|
||||||
|
'old',
|
||||||
|
'norarity',
|
||||||
|
'vec',
|
||||||
|
'custom',
|
||||||
|
]
|
||||||
|
|
||||||
# separators
|
# separators
|
||||||
cardsep = config.cardsep
|
cardsep = config.cardsep
|
||||||
fieldsep = config.fieldsep
|
fieldsep = config.fieldsep
|
||||||
|
|
Loading…
Reference in a new issue