updated encode and decode to use new, unified formats

2015-09-06 20:24:19 -07:00 · 2015-09-06 20:24:19 -07:00 · 7dbd56a9bd
commit 7dbd56a9bd
parent 377aa9453c
4 changed files with 144 additions and 63 deletions
--- a/decode.py
+++ b/decode.py
@ -15,29 +15,39 @@ from namediff import Namediff
 def exclude_sets(cardset):
    return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'

-def main(fname, oname = None, verbose = True, 
+def main(fname, oname = None, verbose = True, encoding = 'std',
         gatherer = False, for_forum = False, for_mse = False,
-         creativity = False, norarity = False, vdump = False):
+         creativity = False, vdump = False):
+
+    fmt_ordered = cardlib.fmt_ordered_default
+
+    if encoding in ['std']:
+        pass
+    elif encoding in ['named']:
+        fmt_ordered = cardlib.fmt_ordered_named
+    elif encoding in ['noname']:
+        fmt_ordered = cardlib.fmt_ordered_noname
+    elif encoding in ['rfields']:
+        pass
+    elif encoding in ['old']:
+        fmt_ordered = cardlib.fmt_ordered_old
+    elif encoding in ['norarity']:
+        fmt_ordered = cardlib.fmt_ordered_norarity
+    elif encoding in ['vec']:
+        pass
+    elif encoding in ['custom']:
+        ## put custom format decisions here ##########################
+        
+        ## end of custom format ######################################
+        pass
+    else:
+        raise ValueError('encode.py: unknown encoding: ' + encoding)
+
    cards = []
    valid = 0
    invalid = 0
    unparsed = 0

-    if norarity:
-        decode_fields = [
-            cardlib.field_name,
-            cardlib.field_supertypes,
-            cardlib.field_types,
-            cardlib.field_loyalty,
-            cardlib.field_subtypes,
-            #cardlib.field_rarity,
-            cardlib.field_pt,
-            cardlib.field_cost,
-            cardlib.field_text,
-        ]
-    else:
-        decode_fields = cardlib.fmt_ordered_default
-
    if fname[-5:] == '.json':
        if verbose:
            print 'This looks like a json file: ' + fname
@ -48,17 +58,17 @@ def main(fname, oname = None, verbose = True,

                # look for a normal rarity version, in a set we can use
                idx = 0
-                card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
+                card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
                while (idx < len(jcards)
                       and (card.rarity == utils.rarity_special_marker 
                            or exclude_sets(jcards[idx][utils.json_field_set_name]))):
                    idx += 1
                    if idx < len(jcards):
-                        card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
+                        card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
                # if there isn't one, settle with index 0
                if idx >= len(jcards):
                    idx = 0
-                    card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
+                    card = cardlib.Card(jcards[idx], fmt_ordered = fmt_ordered)
                # we could go back and look for a card satisfying one of the criteria,
                # but eh

@ -78,7 +88,7 @@ def main(fname, oname = None, verbose = True,
            text = f.read()
        for card_src in text.split(utils.cardsep):
            if card_src:
-                card = cardlib.Card(card_src, fmt_ordered = decode_fields)
+                card = cardlib.Card(card_src, fmt_ordered = fmt_ordered)
                if card.valid:
                    valid += 1
                elif card.parsed:
@ -96,14 +106,16 @@ def main(fname, oname = None, verbose = True,
    for card in cards:
        if not card.parsed and not card.text.text:
            bad_count += 1
+        elif len(card.name) > 50 or len(card.rarity) > 3:
+            bad_count += 1
        else:
            good_count += 1
        if good_count + bad_count > 15: 
            break
    # random heuristic
    if bad_count > 10:
-        print 'Saw a bunch of unparsed cards with no text:'
-        print 'If this is a legacy format, try rerunning with --norarity'
+        print 'WARNING: Saw a bunch of unparsed cards:'
+        print '         If this is a legacy format, try rerunning with "-e old" or "-e norarity"'

    if creativity:
        cbow = CBOW()
@ -190,6 +202,10 @@ if __name__ == '__main__':
                        help='encoded card file or json corpus to encode')
    parser.add_argument('outfile', nargs='?', default=None,
                        help='output file, defaults to stdout')
+    parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
+                        #help='{' + ','.join(formats) + '}',
+                        help='encoding format to use',
+    )
    parser.add_argument('-g', '--gatherer', action='store_true',
                        help='emulate Gatherer visual spoiler')
    parser.add_argument('-f', '--forum', action='store_true',
@ -198,14 +214,12 @@ if __name__ == '__main__':
                        help='use CBOW fuzzy matching to check creativity of cards')
    parser.add_argument('-d', '--dump', action='store_true',
                        help='dump out lots of information about invalid cards')
-    parser.add_argument('--norarity', action='store_true',
-                        help='the card format has no rarity field; use for legacy input')
    parser.add_argument('-v', '--verbose', action='store_true', 
                        help='verbose output')
    parser.add_argument('-mse', '--mse', action='store_true', help='use Magic Set Editor 2 encoding; will output as .mse-set file')
    
    args = parser.parse_args()
-    main(args.infile, args.outfile, verbose = args.verbose, 
+    main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
         gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
-         creativity = args.creativity, norarity = args.norarity, vdump = args.dump)
+         creativity = args.creativity, vdump = args.dump)
    exit(0)
--- a/encode.py
+++ b/encode.py
@ -19,51 +19,53 @@ def exclude_types(cardtype):
 def exclude_layouts(layout):
    return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']

-def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stable = False):
+def main(fname, oname = None, verbose = True, encoding = 'std', 
+         nolinetrans = False, randomize = False, nolabel = False, stable = False):
    fmt_ordered = cardlib.fmt_ordered_default
-    fmt_labeled = None
+    fmt_labeled = None if nolabel else cardlib.fmt_labeled_default
    fieldsep = utils.fieldsep
+    line_transformations = not nolinetrans
    randomize_fields = False
-    randomize_mana = False
+    randomize_mana = randomize
    initial_sep = True
    final_sep = True

    # set the properties of the encoding
-    if encoding in ['vec']:
+
+    if encoding in ['std']:
        pass
-    elif encoding in ['std']:
-        if dupes == 0:
-            dupes = 1
-    elif encoding in ['rmana']:
-        if dupes == 0:
-            dupes = 1
-        randomize_mana = True
-    elif encoding in ['rmana_dual']:
-        if dupes == 0:
-            dupes = 1
-        fmt_ordered = fmt_ordered + [cardlib.field_cost]
-        randomize_mana = True
+    elif encoding in ['named']:
+        fmt_ordered = cardlib.fmt_ordered_named
+    elif encoding in ['noname']:
+        fmt_ordered = cardlib.fmt_ordered_noname
    elif encoding in ['rfields']:
-        if dupes == 0:
-            dupes = 1
-        fmt_labeled = cardlib.fmt_labeled_default
        randomize_fields = True
-        #randomize_mana = True
        final_sep = False
+    elif encoding in ['old']:
+        fmt_ordered = cardlib.fmt_ordered_old
+    elif encoding in ['norarity']:
+        fmt_ordered = cardlib.fmt_ordered_norarity
+    elif encoding in ['vec']:
+        pass
+    elif encoding in ['custom']:
+        ## put custom format decisions here ##########################
+        
+        ## end of custom format ######################################
+        pass
    else:
        raise ValueError('encode.py: unknown encoding: ' + encoding)

-    if dupes <= 0:
-        dupes = 1 
-
    if verbose:
        print 'Preparing to encode:'
        print '  Using encoding ' + repr(encoding)
-        if dupes > 1:
-            print '  Duplicating each card ' + str(dupes) + ' times.'
        if stable:
            print '  NOT randomizing order of cards.'
-            
+        if randomize_mana:
+            print '  Randomizing order of symobls in manacosts.'
+        if not fmt_labeled:
+            print '  NOT labeling fields for this run (may be harder to decode).'
+        if not line_transformations:
+            print '  NOT using line reordering transformations'

    cards = []
    valid = 0
@ -109,7 +111,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
                
                if card.valid:
                    valid += 1
-                    cards += [card] * dupes
+                    cards += [card]
                elif card.parsed:
                    invalid += 1
                else:
@ -126,7 +128,7 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
                card = cardlib.Card(card_src)
                if card.valid:
                    valid += 1
-                    cards += [card] * dupes
+                    cards += [card]
                elif card.parsed:
                    invalid += 1
                else:
@ -174,17 +176,23 @@ if __name__ == '__main__':
                        help='encoded card file or json corpus to encode')
    parser.add_argument('outfile', nargs='?', default=None,
                        help='output file, defaults to stdout')
-    parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
-                        help='number of times to duplicate each card')
-    parser.add_argument('-e', '--encoding', default='std',
-                        choices=['std', 'rmana', 'rmana_dual', 'rfields', 'vec'])
+    parser.add_argument('-e', '--encoding', default='std', choices=utils.formats,
+                        #help='{' + ','.join(formats) + '}',
+                        help='encoding format to use',
+    )
+    parser.add_argument('-r', '--randomize', action='store_true',
+                        help='randomize the order of symbols in mana costs')
+    parser.add_argument('--nolinetrans', action='store_true',
+                        help="don't reorder lines of card text")
+    parser.add_argument('--nolabel', action='store_true',
+                        help="don't label fields")
    parser.add_argument('-s', '--stable', action='store_true',
                        help="don't randomize the order of the cards")
    parser.add_argument('-v', '--verbose', action='store_true', 
                        help='verbose output')
    
    args = parser.parse_args()
-    main(args.infile, args.outfile, verbose = args.verbose, dupes = args.duplicate,
-         encoding = args.encoding, stable = args.stable)
+    main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding, 
+         nolinetrans = args.nolinetrans, randomize = args.randomize, nolabel = args.nolabel, 
+         stable = args.stable)
    exit(0)
-
--- a/lib/cardlib.py
+++ b/lib/cardlib.py
@ -88,7 +88,8 @@ fieldnames = [
    field_text,
 ]

-fmt_ordered_default = [
+# legacy
+fmt_ordered_old = [
    field_name,
    field_supertypes,
    field_types,
@ -99,6 +100,52 @@ fmt_ordered_default = [
    field_cost,
    field_text,
 ]
+fmt_ordered_norarity = [
+    field_name,
+    field_supertypes,
+    field_types,
+    field_loyalty,
+    field_subtypes,
+    field_pt,
+    field_cost,
+    field_text,
+]
+
+# standard
+fmt_ordered_default = [
+    field_types,
+    field_supertypes,
+    field_subtypes,
+    field_loyalty,
+    field_pt,
+    field_text,
+    field_cost,
+    field_rarity,
+    field_name,
+]
+
+# minor variations
+fmt_ordered_noname = [
+    field_types,
+    field_supertypes,
+    field_subtypes,
+    field_loyalty,
+    field_pt,
+    field_text,
+    field_cost,
+    field_rarity,
+]
+fmt_ordered_named = [
+    field_name,
+    field_types,
+    field_supertypes,
+    field_subtypes,
+    field_loyalty,
+    field_pt,
+    field_text,
+    field_cost,
+    field_rarity,
+]

 fmt_labeled_default = {
    field_name : field_label_name,
--- a/lib/utils.py
+++ b/lib/utils.py
@ -9,6 +9,18 @@ import config
 # special chunk of text that Magic Set Editor 2 requires at the start of all set files.
 mse_prepend = 'mse version: 0.3.8\ngame: magic\nstylesheet: m15\nset info:\n\tsymbol:\nstyling:\n\tmagic-m15:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay:\n\tmagic-m15-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-extra-improved:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\tpt box symbols: magic-pt-symbols-extra.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-promo-dka:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-m15-token-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-4abil:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-clear:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n\tmagic-new-planeswalker-promo-black:\n\t\ttext box mana symbols: magic-mana-small.mse-symbol-font\n\t\toverlay: \n'

+# encoding formats we know about
+formats = [
+    'std',
+    'named',
+    'noname',
+    'rfields',
+    'old',
+    'norarity',
+    'vec',
+    'custom',
+]
+
 # separators
 cardsep = config.cardsep
 fieldsep = config.fieldsep