added card sorting script, also added renaming of counter to uncast in encoding

2015-06-29 22:40:14 -07:00 · 2015-06-29 22:40:14 -07:00 · 6df5a4410e
commit 6df5a4410e
parent edfa7100b7
4 changed files with 561 additions and 371 deletions
--- a/encode.py
+++ b/encode.py
@ -75,13 +75,17 @@ this_marker = '@'
 counter_marker = '#'
 bsidesep = '\n'

-unary_max = 30
+unary_max = 20

 def to_unary(s):
    numbers = re.findall(r'[0123456789]+', s)
    for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)) * -1):
        i = int(n)
-        if i == 40:
+        if i == 25:
+            s = s.replace(n, 'twenty~five')
+        elif i == 30:
+            s = s.replace(n, 'thirty')
+        elif i == 40:
            s = s.replace(n, 'forty')
        elif i == 50:
            s = s.replace(n, 'fifty')
@ -377,6 +381,56 @@ def replace_counters(s):
    return s


+# the word counter is confusing when used to refer to what we do to spells
+# and sometimes abilities to make them not happen. Let's rename that.
+# call this after doing the counter replacement to simplify the regexes
+counter_rename = 'uncast'
+def rename_uncast(s):
+    # pre-checks to make sure we aren't doing anything dumb
+    # if '# counter target ' in s or '^ counter target ' in s or '& counter target ' in s:
+    #     print s + '\n'
+    # if '# counter a ' in s or '^ counter a ' in s or '& counter a ' in s:
+    #     print s + '\n'
+    # if '# counter all ' in s or '^ counter all ' in s or '& counter all ' in s:
+    #     print s + '\n'
+    # if '# counter a ' in s or '^ counter a ' in s or '& counter a ' in s:
+    #     print s + '\n'
+    # if '# counter that ' in s or '^ counter that ' in s or '& counter that ' in s:
+    #     print s + '\n'
+    # if '# counter @' in s or '^ counter @' in s or '& counter @' in s:
+    #     print s + '\n'
+    # if '# counter the ' in s or '^ counter the ' in s or '& counter the ' in s:
+    #     print s + '\n'
+
+    # counter target
+    s = s.replace('counter target ', counter_rename + ' target ')
+    # counter a
+    s = s.replace('counter a ', counter_rename + ' a ')
+    # counter all
+    s = s.replace('counter all ', counter_rename + ' all ')
+    # counters a
+    s = s.replace('counters a ', counter_rename + 's a ')
+    # countered (this could get weird in terms of englishing the word)
+    s = s.replace('countered', counter_rename)
+    # counter that
+    s = s.replace('counter that ', counter_rename + ' that ')
+    # counter @
+    s = s.replace('counter @', counter_rename + ' @')
+    # counter it (this is tricky
+    s = s.replace(', counter it', ', ' + counter_rename + ' it')
+    # counter the (it happens at least once, thanks wizards!)
+    s = s.replace('counter the ', counter_rename + ' the ')
+    # counter up to
+    s = s.replace('counter up to ', counter_rename + ' up to ')
+
+    # check if the word exists in any other context
+    # if 'counter' in s.replace('# counter', '').replace('countertype', '').replace('^ counter', '').replace('& counter', ''):
+    #     print s + '\n'
+
+    # whew! by manual inspection of a few dozen texts, it looks like this about covers it.
+    return s
+    
+
 # run only after doing unary conversion
 def fix_dashes(s):
    s = s.replace('-' + unary_marker, reserved_indicator)
@ -470,6 +524,7 @@ def encode(card):
        text = fix_dashes(text)
        text = fix_x(text)
        text = replace_counters(text)
+        text = rename_uncast(text)
        text = relocate_equip(text)
        text = replace_newlines(text)
        encoding += text
--- a/output.txt
+++ b/output.txt
--- a/sortcards.py
+++ b/sortcards.py
@ -0,0 +1,112 @@
+import re
+import codecs
+import sys
+
+# returns back a dictionary mapping the names of classes of cards
+# to lists of cards in those classes
+def sortcards(cards):
+    classes = {
+        'multicards' : [],
+
+        'X cards' : [],
+        'counter cards' : [],
+        'choice cards' : [],
+        'equipment' : [],
+        'levelers' : [],
+        'legendary' : [],
+        
+        'planeswalkers' : [],
+        'lands' : [],
+        'instants' : [],
+        'sorceries' : [],
+        'enchantments' : [],
+        'noncreature artifacts' : [],
+        'creatures' : [],
+        'other' : [],
+    }
+
+    for card in cards:
+        # special classes
+        if '|\n|' in card:
+            classes['multicards'] += [card]
+            continue
+        
+        # inclusive classes
+        if 'X' in card:
+            classes['X cards'] += [card]
+        if '#' in card:
+            classes['counter cards'] += [card]
+        if 'choose one ~' in card or 'choose two ~' in card or '=' in card:
+            classes['choice cards'] += [card]
+        if '|equipment|' in card or 'equip {' in card:
+            classes['equipment'] += [card]
+        if 'level up' in card or 'level &' in card:
+            classes['levelers'] += [card]
+        if '|legendary|' in card:
+            classes['legendary'] += [card]
+
+        # exclusive classes
+        if '|planeswalker|' in card:
+            classes['planeswalkers'] += [card]
+        elif '|land|' in card:
+            classes['lands'] += [card]
+        elif '|instant|' in card:
+            classes['instants'] += [card]
+        elif '|sorcery|' in card:
+            classes['sorceries'] += [card]
+        elif '|enchantment|' in card:
+            classes['enchantments'] += [card]
+        elif '|artifact|' in card:
+            classes['noncreature artifacts'] += [card]
+        elif '|creature|' in card or 'artifact creature' in card:
+            classes['creatures'] += [card]
+        else:
+            classes['other'] += [card]
+        
+    return classes
+
+def main(fname, oname = None, verbose = True):
+    if verbose:
+        print 'Opening encoded card file: ' + fname
+
+    f = open(fname, 'r')
+    text = f.read()
+    f.close()
+
+    # we get rid of the first and last because they are probably partial
+    cards = text.split('\n\n')[1:-1]
+    classes = sortcards(cards)
+
+    if not oname == None:
+        if verbose:
+            print 'Writing output to: ' + oname
+        ofile = codecs.open(oname, 'w', 'utf-8')
+
+    for cardclass in classes:
+        print cardclass + ': ' + str(len(classes[cardclass]))
+
+    if oname == None:
+        outputter = sys.stdout
+    else:
+        outputter = ofile
+
+    for cardclass in classes:
+        outputter.write('[spoiler=' + cardclass + ']\n')
+        for card in classes[cardclass]:
+            outputter.write(card + '\n\n')
+        outputter.write('[/spoiler]')
+
+    if not oname == None:
+        ofile.close()
+
+    
+if __name__ == '__main__':
+    import sys
+    if len(sys.argv) == 2:
+        main(sys.argv[1])
+    elif len(sys.argv) == 3:
+        main(sys.argv[1], oname = sys.argv[2])
+    else:
+        print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
+        exit(1)
+
--- a/unscramble.py
+++ b/unscramble.py
@ -11,7 +11,7 @@ def from_unary(s):
        s = s.replace(number, str(i))
    return s

-def cleanup_mana(s):
+def cleanup_mana(s, pretty = False):
    untranslations = {
        'WW' : '{W}',
        'UU' : '{U}',
@ -43,10 +43,28 @@ def cleanup_mana(s):
        'XX' : '{X}',
    }

+    untranslations_pretty = {
+        'WW' : 'W',
+        'UU' : 'U',
+        'BB' : 'B',
+        'RR' : 'R',
+        'GG' : 'G',
+        'PP' : 'P',
+        'SS' : 'S',
+        'XX' : 'X',
+    }
+
+    if pretty:
+        ldelim = ''
+        rdelim = ''
+    else:
+        ldelim = '{'
+        rdelim = '}'
+
    manacosts = re.findall(r'\{[WUBRGPVSX\^]*\}', s)
    for cost in manacosts:
        if cost == '{}':
-            s = s.replace(cost, '{0}')
+            s = s.replace(cost, ldelim + '0' + rdelim)
            continue

        innercost = cost[1:-1]
@ -59,18 +77,23 @@ def cleanup_mana(s):
            innercost = innercost.replace(count, '')
            colorless_total += len(count)            
        if colorless_total > 0:
-            newcost += '{' + str(colorless_total) + '}'
+            newcost += ldelim + str(colorless_total) + rdelim

        # now try to read the remaining characters in pairs
        success = True
        while len(innercost) > 1:
            fragment = innercost[0:2]
-            if fragment in untranslations:
+            if pretty and fragment in untranslations_pretty:
+                newcost += untranslations_pretty[fragment]
+            elif fragment in untranslations:
                newcost += untranslations[fragment]
            else:
                success = False
                break
            innercost = innercost[2:]
+
+        if pretty:
+            cost = '[mana]' + cost + '[/mana]'
        
        if len(innercost) == 0 and success:
            s = s.replace(cost, newcost)
@ -89,9 +112,11 @@ def forum_reorder(s):
    fields = s.split('|')
    # should see ten of em
    if not len(fields) == 10:
+        #print 'badlen ' + str(len(fields))
        return s
    # first and last should be empty, if we had | on the ends
-    if not (fields[0] == '' and fields [-1] == ''):
+    if not (fields[0] == '' and fields [-1] == '\n'):
+        #print 'badfields ' + repr(fields[0]) + ', ' + repr(fields[-1]) 
        return s
    name = fields[1]
    supertypes = fields[2]
@ -103,41 +128,41 @@ def forum_reorder(s):
    text = fields[8]

    new_s = ''
+    if not name == '':
+        new_s += name + '\n'
    if not cost == '':
        new_s += cost + '\n'
-    #if not name == '':
-    new_s += name + '\n'
+
    if not supertypes == '':
        new_s += supertypes + ' '
-    #if not types == '':
-    new_s += types
+    if not types == '':
+        new_s += types
    if not subtypes == '':
-        new_s += ' - ' + subtypes + '\n'
+        new_s += ' ~ ' + subtypes + '\n'
    else:
        new_s += '\n'
+    # super special case, doubt it will come up
+    if types == '' and subtypes == '':
+        new_s += '\n'
+    
    if not text == '':
        new_s += text + '\n'
    if not pt == '':
-        new_s += pt
+        new_s += pt + '\n'
    if not loyalty == '':
-        new_s += loyalty
+        new_s += '(' + loyalty + ')\n'

-    new_s = new_s.replace('{', '[mana]')
-    new_s = new_s.replace('}', '[/mana]')
-    new_s = new_s.replace('T', '[mana]T[/mana]')
-    new_s = new_s.replace('Q', '[mana]Q[/mana]')
+    return new_s

-    return s
-
-def unscramble(s):
+def unscramble(s, pretty = False):
    s = from_unary(s)
-    s = cleanup_mana(s)
+    s = cleanup_mana(s, pretty)
    s = unreplace_newlines(s)
    s = forum_reorder(s)
    return s
    

-def main(fname, oname = None, verbose = True):
+def main(fname, oname = None, verbose = True, pretty = False):
    if verbose:
        print 'Opening encoded card file: ' + fname

@ -151,15 +176,11 @@ def main(fname, oname = None, verbose = True):
        ofile = codecs.open(oname, 'w', 'utf-8')

    for line in lines:
-        val = unscramble(line)
+        val = unscramble(line, pretty)
        if oname == None:
            sys.stdout.write(val)
        else:
            ofile.write(val)
-        
-    # print len(badwords)
-    # for word in badwords:
-    #     print word

    if not oname == None:
        ofile.close()
@ -171,7 +192,9 @@ if __name__ == '__main__':
        main(sys.argv[1])
    elif len(sys.argv) == 3:
        main(sys.argv[1], oname = sys.argv[2])
+    elif len(sys.argv) == 4 and sys.argv[3] in ['p', '-p', 'pretty', '-pretty', '--pretty']:
+        main(sys.argv[1], oname = sys.argv[2], pretty = True)
    else:
-        print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename]'
+        print 'Usage: ' + sys.argv[0] + ' ' + '<encoded file> [output filename [p]]'
        exit(1)