split most of the code from config into utils, encode now uses utils as a lib

fixed MANY MANY ERRORS in the original output file by using this superior code
This commit is contained in:
Bill Zorn 2015-07-06 22:26:25 -07:00
parent 110ac8473c
commit db0b9a9f9a
3 changed files with 460 additions and 505 deletions

371
config.py
View file

@ -1,14 +1,8 @@
import re
# Don't be intimidated by the massive size of this file. It provides both the
# raw character decisions made about the encoding scheme as variables, and
# a bunch of tables and functions to make dealing with mana costs and unary
# numbers easier. For the most part the functions should adapt if you change
# the specific delimiters and markers used.
# The decoding from mtgjson format is dependent on the specific structure of
# the internally used mana symbol strings, so if you want to change that you'll
# also have to change the json decoding functions.
# Utilities for handling unicode, unary numbers, mana costs, and special symbols.
# For convenience we redefine everything from utils so that it can all be accessed
# from the utils module.
# separators
cardsep = '\n\n'
@ -29,29 +23,6 @@ untap_marker = 'Q'
# unambiguous synonyms
counter_rename = 'uncast'
# unicode / ascii conversion
unicode_trans = {
u'\u2014' : dash_marker, # unicode long dash
u'\u2022' : bullet_marker, # unicode bullet
u'\u2019' : '"', # single quote
u'\u2018' : '"', # single quote
u'\u2212' : '-', # minus sign
u'\xe6' : 'ae', # ae symbol
u'\xfb' : 'u', # u with caret
u'\xfa' : 'u', # u with accent
u'\xe9' : 'e', # e with accent
u'\xe1' : 'a', # a with accent
u'\xe0' : 'a', # a with accent going the other way
u'\xe2' : 'a', # a with caret
u'\xf6' : 'o', # o with umlaut
u'\xed' : 'i', # i with accent
}
# this one is one-way only
def to_ascii(s):
for uchar in unicode_trans:
s = s.replace(uchar, unicode_trans(uchar))
return s
# unary numbers
unary_marker = '&'
unary_counter = '^'
@ -64,339 +35,3 @@ unary_exceptions = {
100: 'one hundred',
200: 'two hundred',
}
def to_unary(s, warn = False):
numbers = re.findall(r'[0123456789]+', s)
# replace largest first to avoid accidentally replacing shared substrings
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True):
i = int(n)
if i in unary_exceptions:
s = s.replace(n, unary_exceptions[n])
elif i > unary_max:
i = unary_max
if warn:
print s
s = s.replace(n, unary_marker + unary_counter * i)
else:
s = s.replace(n, unary_marker + unary_counter * i)
return s
def from_unary(s):
numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s)
# again, largest first so we don't replace substrings and break everything
for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True):
i = (len(n) - len(unary_marker)) / len(unary_counter)
s = s.replace(n, str(i))
return s
# mana syntax
mana_open_delimiter = '{'
mana_close_delimiter = '}'
mana_json_open_delimiter = mana_open_delimiter
mana_json_close_delimiter = mana_close_delimiter
mana_json_hybrid_delimiter = '/'
mana_forum_open_delimiter = '[mana]'
mana_forum_close_delimiter = '[/mana]'
mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs
mana_unary_counter = unary_counter
# individual mana symbols
mana_W = 'W' # single color
mana_U = 'U'
mana_B = 'B'
mana_R = 'R'
mana_G = 'G'
mana_P = 'P' # colorless phyrexian
mana_S = 'S' # snow
mana_X = 'X' # colorless X
mana_WP = 'WP' # single color phyrexian
mana_UP = 'UP'
mana_BP = 'BP'
mana_RP = 'RP'
mana_GP = 'GP'
mana_2W = '2W' # single color hybrid
mana_2U = '2U'
mana_2B = '2B'
mana_2R = '2R'
mana_2G = '2G'
mana_WU = 'WU' # dual color hybrid
mana_WB = 'WB'
mana_RW = 'RW'
mana_GW = 'GW'
mana_UB = 'UB'
mana_UR = 'UR'
mana_GU = 'GU'
mana_BR = 'BR'
mana_BG = 'BG'
mana_RG = 'RG'
# alternative order symbols
mana_WP_alt = 'PW' # single color phyrexian
mana_UP_alt = 'PU'
mana_BP_alt = 'PB'
mana_RP_alt = 'PR'
mana_GP_alt = 'PG'
mana_2W_alt = 'W2' # single color hybrid
mana_2U_alt = 'U2'
mana_2B_alt = 'B2'
mana_2R_alt = 'R2'
mana_2G_alt = 'G2'
mana_WU_alt = 'UW' # dual color hybrid
mana_WB_alt = 'BW'
mana_RW_alt = 'WR'
mana_GW_alt = 'WG'
mana_UB_alt = 'BU'
mana_UR_alt = 'RU'
mana_GU_alt = 'UG'
mana_BR_alt = 'RB'
mana_BG_alt = 'GB'
mana_RG_alt = 'GR'
# special
mana_2 = '2' # use with 'in' to identify single color hybrid
# master symbol lists
mana_syms = [
mana_W,
mana_U,
mana_B,
mana_R,
mana_G,
mana_P,
mana_S,
mana_X,
mana_WP,
mana_UP,
mana_BP,
mana_RP,
mana_GP,
mana_2W,
mana_2U,
mana_2B,
mana_2R,
mana_2G,
mana_WU,
mana_WB,
mana_RW,
mana_GW,
mana_UB,
mana_UR,
mana_GU,
mana_BR,
mana_BG,
mana_RG,
]
mana_symalt = [
mana_WP_alt,
mana_UP_alt,
mana_BP_alt,
mana_RP_alt,
mana_GP_alt,
mana_2W_alt,
mana_2U_alt,
mana_2B_alt,
mana_2R_alt,
mana_2G_alt,
mana_WU_alt,
mana_WB_alt,
mana_RW_alt,
mana_GW_alt,
mana_UB_alt,
mana_UR_alt,
mana_GU_alt,
mana_BR_alt,
mana_BG_alt,
mana_RG_alt,
]
mana_symall = mana_syms + mana_symalt
# alt symbol conversion
def mana_alt(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym))
if len(sym) < 2:
return sym
else:
return sym[::-1]
# produce intended neural net output format
def mana_sym_to_encoding(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym))
if len(sym) < 2:
return sym * 2
else:
return sym
# produce json formatting used in mtgjson
def mana_sym_to_json(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym))
if len(sym) < 2:
return mana_json_open_delimiter + sym + mana_json_close_delimiter
else:
return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter
+ sym[1] + mana_json_close_delimiter)
# produce pretty formatting that renders on mtgsalvation forum
# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere
def mana_sym_to_forum(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym))
if sym in mana_symalt:
sym = mana_alt(sym)
if len(sym) < 2:
return sym
else:
return mana_json_open_delimiter + sym + mana_json_close_delimiter
# forward symbol tables for encoding
mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms}
mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt}
mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall}
mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms}
mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt}
mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall}
# reverse symbol tables for decoding
mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms}
mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt}
mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall}
mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms}
mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt}
mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall}
# going straight from json to encoding and vice versa
def mana_encode_direct(jsym):
if not jsym in mana_symall_jdecode:
raise ValueError('json string not found in decode table for mana_encode_direct(): '
+ repr(jsym))
else:
return mana_symall_encode[mana_symall_jdecode[jsym]]
def mana_decode_direct(sym):
if not sym in mana_symall_decode:
raise ValueError('mana symbol not found in decode table for mana_decode_direct(): '
+ repr(sym))
else:
return mana_symall_jencode[mana_symall_decode[sym]]
# hacked in support for mtgsalvation forum
def mana_decode_direct_forum(sym):
if not sym in mana_symall_decode:
raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): '
+ repr(sym))
else:
return mana_sym_to_forum(mana_symall_decode[sym])
# processing entire strings
def unique_string(s):
return ''.join(set(s))
mana_charset_special = mana_unary_marker + mana_unary_counter
mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special)
mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower())
mana_regex_strict = (re.escape(mana_open_delimiter) + '['
+ re.escape(mana_charset_strict)
+ ']*' + re.escape(mana_close_delimiter))
mana_regex = (re.escape(mana_open_delimiter) + '['
+ re.escape(mana_charset)
+ ']*' + re.escape(mana_close_delimiter))
# as a special case, we let unary or decimal numbers exist in json mana strings
mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter
mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special)
mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower())
# note that json mana strings can't be empty between the delimiters
mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_jcharset_strict)
+ ']+' + re.escape(mana_json_close_delimiter))
mana_jregex = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_jcharset)
+ ']+' + re.escape(mana_json_close_delimiter))
number_decimal_regex = r'[0123456789]+'
number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*'
mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex
+ re.escape(mana_json_close_delimiter))
mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex
+ re.escape(mana_json_close_delimiter))
# convert a json mana string to the proper encoding
def mana_translate(jmanastr):
manastr = jmanastr
for n in re.findall(mana_unary_regex, manastr):
ns = re.findall(number_unary_regex, n)
i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter)
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for n in re.findall(mana_decimal_regex, manastr):
ns = re.findall(number_decimal_regex, n)
i = int(ns[0])
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for jsym in mana_symall_jdecode:
if jsym in manastr:
manastr = manastr.replace(jsym, mana_encode_direct(jsym))
return mana_open_delimiter + manastr + mana_close_delimiter
# convert an encoded mana string back to json
mana_symlen_min = min([len(sym) for sym in mana_symall_decode])
mana_symlen_max = max([len(sym) for sym in mana_symall_decode])
def mana_untranslate(manastr, for_forum = False):
inner = manastr[1:-1]
jmanastr = ''
colorless_total = 0
idx = 0
while idx < len(inner):
# taking this branch is an infinite loop if unary_marker is empty
if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker:
idx += len(mana_unary_marker)
elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter:
idx += len(mana_unary_counter)
colorless_total += 1
else:
old_idx = idx
for symlen in range(mana_symlen_min, mana_symlen_max + 1):
sym = inner[idx:idx+symlen]
if sym in mana_symall_decode:
idx += symlen
if for_forum:
jmanastr = jmanastr + mana_decode_direct_forum(sym)
else:
jmanastr = jmanastr + mana_decode_direct(sym)
break
# otherwise we'll go into an infinite loop if we see a symbol we don't know
if idx == old_idx:
idx += 1
if for_forum:
if jmanastr == '':
return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter
else:
return (mana_forum_open_delimiter + ('' if colorless_total == 0
else str(colorless_total))
+ jmanastr + mana_forum_close_delimiter)
else:
if jmanastr == '':
return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter
else:
return (('' if colorless_total == 0 else
mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter)
+ jmanastr)
# finally, replacing all instances in a string
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
def to_mana(s):
jmanastrs = re.findall(mana_jregex, s)
for jmanastr in jmanastrs:
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
return s
def from_mana(s, for_forum = False):
manastrs = re.findall(mana_regex, s)
for manastr in manastrs:
s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum))
return s
# Translation could also be accomplished using the datamine.Manacost object's
# display methods, but these direct string transformations are retained for
# quick scripting and convenience (and used under the hood by that class to
# do its formatting).

168
encode.py
View file

@ -3,30 +3,27 @@ import re
import codecs
import sys
import utils
#badwords = []
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
dash_marker = '~'
bullet_marker = '='
reserved_marker = '\r'
def to_ascii(s):
s = s.replace(u'\u2014', dash_marker) # unicode long dash
s = s.replace(u'\u2022', bullet_marker) # unicode bullet
s = s.replace(u'\u2019', '"') # single quote
s = s.replace(u'\u2018', '"') # single quote
s = s.replace(u'\u2212', '-') # minus sign
s = s.replace(u'\xe6', 'ae') # ae symbol
s = s.replace(u'\xfb', 'u') # u with caret
s = s.replace(u'\xfa', 'u') # u with accent
s = s.replace(u'\xe9', 'e') # e with accent
s = s.replace(u'\xe1', 'a') # a with accent
s = s.replace(u'\xe0', 'a') # a with accent going the other way
s = s.replace(u'\xe2', 'a') # a with caret
s = s.replace(u'\xf6', 'o') # o with umlaut
s = s.replace(u'\xed', 'i') # i with accent
return s
cardsep = utils.cardsep
fieldsep = utils.fieldsep
bsidesep = utils.bsidesep
newline = utils.newline
dash_marker = utils.dash_marker
bullet_marker = utils.bullet_marker
this_marker = utils.this_marker
counter_marker = utils.counter_marker
reserved_marker = utils.reserved_marker
x_marker = utils.x_marker
tap_marker = utils.tap_marker
untap_marker = utils.untap_marker
counter_rename = utils.counter_rename
unary_marker = utils.unary_marker
unary_counter = utils.unary_counter
# This whole things assumes the json format of mtgjson.com.
@ -63,107 +60,11 @@ def to_ascii(s):
# releaseDate - string
# starter - boolean
fieldsep = '|'
newline = '\\'
unary_marker = '&'
unary_counter = '^'
mana_open_delimiter = '{'
mana_close_delimiter = '}'
x_marker = 'X'
tap_marker = 'T'
untap_marker = 'Q'
this_marker = '@'
counter_marker = '%'
bsidesep = '\n'
unary_max = 20
def to_unary(s):
numbers = re.findall(r'[0123456789]+', s)
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)) * -1):
i = int(n)
if i == 25:
s = s.replace(n, 'twenty~five')
elif i == 30:
s = s.replace(n, 'thirty')
elif i == 40:
s = s.replace(n, 'forty')
elif i == 50:
s = s.replace(n, 'fifty')
elif i == 100:
s = s.replace(n, 'one hundred')
elif i == 200:
s = s.replace(n, 'two hundred')
else:
if i > unary_max:
# truncate to unary_max
i = unary_max
# warn, because we probably don't want this to happen
print s
s = s.replace(n, unary_marker + unary_counter * i)
return s
# also handles the tap and untap symbols
def compress_mana(manastring):
# mana string is of the form '{3}{W}{2/B}', as specified by mtgjson
translations = {
'{w}' : 'WW',
'{u}' : 'UU',
'{b}' : 'BB',
'{r}' : 'RR',
'{g}' : 'GG',
'{p}' : 'PP',
'{w/p}' : 'WP',
'{u/p}' : 'UP',
'{b/p}' : 'BP',
'{r/p}' : 'RP',
'{g/p}' : 'GP',
'{2/w}' : 'VW',
'{2/u}' : 'VU',
'{2/b}' : 'VB',
'{2/r}' : 'VR',
'{2/g}' : 'VG',
'{w/u}' : 'WU',
'{w/b}' : 'WB',
'{r/w}' : 'RW',
'{g/w}' : 'GW',
'{u/b}' : 'UB',
'{u/r}' : 'UR',
'{g/u}' : 'GU',
'{b/r}' : 'BR',
'{b/g}' : 'BG',
'{r/g}' : 'RG',
'{s}' : 'SS',
'{x}' : x_marker * 2,
'{t}' : tap_marker,
'{q}' : untap_marker,
}
for t in translations:
manastring = manastring.replace(t, translations[t])
numbers = re.findall(r'\{[0123456789]+\}', manastring)
for n in numbers:
i = int(re.findall(r'[0123456789]+', n)[0])
manastring = manastring.replace(n, unary_counter * i)
# we don't really need delimiters for tap, it's a unique symbol anyways
if manastring in [tap_marker, untap_marker]:
return manastring
else:
return '{' + manastring + '}'
def replace_mana(s):
manastrings = re.findall(r'\{[\{\}wubrgp/xtq0123456789]+\}', s)
for manastring in manastrings:
s = s.replace(manastring, compress_mana(manastring))
return s
def strip_reminder_text(s):
return re.sub(r'\(.*\)', '', s)
def replace_newlines(s):
return s.replace('\n', '\\')
@ -213,7 +114,7 @@ def replace_cardname(s, name):
for override in overrides:
s = s.replace(override, this_marker)
# some detection code when the overrides need to be fixed...
# some detection code for when the overrides need to be fixed...
# global badwords
# bad = False
# for word in name.replace(',', '').split():
@ -539,32 +440,35 @@ def encode(card):
return
encoding = fieldsep
name = card['name'].lower()
encoding += sanitize_name(name)
if 'name' in card:
name = card['name'].lower()
encoding += sanitize_name(name)
encoding += fieldsep
if 'supertypes' in card:
encoding += ' '.join(card['supertypes']).lower()
encoding += fieldsep
encoding += ' '.join(card['types']).lower()
if 'types' in card:
encoding += ' '.join(card['types']).lower()
encoding += fieldsep
if 'loyalty' in card:
encoding += to_unary(str(card['loyalty']))
encoding += utils.to_unary(str(card['loyalty']))
encoding += fieldsep
if 'subtypes' in card:
encoding += ' '.join(card['subtypes']).lower()
encoding += fieldsep
if 'power' in card and 'toughness' in card:
encoding += to_unary(card['power']) + '/' + to_unary(card['toughness'])
encoding += utils.to_unary(card['power']) + '/' + utils.to_unary(card['toughness'])
encoding += fieldsep
if 'manaCost' in card:
encoding += replace_mana(card['manaCost'].lower())
encoding += utils.to_mana(card['manaCost'].lower())
encoding += fieldsep
if 'text' in card:
text = card['text'].lower()
text = strip_reminder_text(text)
text = replace_cardname(text, name)
text = replace_mana(text)
text = to_unary(text)
text = utils.to_mana(text)
text = utils.to_symbols(text)
text = utils.to_unary(text)
text = fix_dashes(text)
text = fix_x(text)
text = replace_counters(text)
@ -575,24 +479,16 @@ def encode(card):
encoding += text.strip()
encoding += fieldsep
# HACK: put the cost again after the text
# if 'manaCost' in card:
# encoding += replace_mana(card['manaCost'].lower())
# encoding += fieldsep
# if 'flavor' in card:
# encoding += card['flavor'].lower()
# encoding += fieldsep
# now output the bside if there is one
if 'bside' in card:
encoding += bsidesep
encoding += encode(card['bside'])
encoding = to_ascii(encoding)
encoding = utils.to_ascii(encoding)
# encoding = re.sub(valid_encoded_char, '', encoding)
# if not encoding == '':
# print card
return encoding
def encode_duplicated(cards):

424
utils.py Normal file
View file

@ -0,0 +1,424 @@
import re
# Utilities for handling unicode, unary numbers, mana costs, and special symbols.
# For convenience we redefine everything from config so that it can all be accessed
# from the utils module.
import config
# separators
cardsep = config.cardsep
fieldsep = config.fieldsep
bsidesep = config.bsidesep
newline = config.newline
# special indicators
dash_marker = config.dash_marker
bullet_marker = config.bullet_marker
this_marker = config.this_marker
counter_marker = config.counter_marker
reserved_marker = config.reserved_marker
x_marker = config.x_marker
tap_marker = config.tap_marker
untap_marker = config.untap_marker
# unambiguous synonyms
counter_rename = config.counter_rename
# unicode / ascii conversion
unicode_trans = {
u'\u2014' : dash_marker, # unicode long dash
u'\u2022' : bullet_marker, # unicode bullet
u'\u2019' : '"', # single quote
u'\u2018' : '"', # single quote
u'\u2212' : '-', # minus sign
u'\xe6' : 'ae', # ae symbol
u'\xfb' : 'u', # u with caret
u'\xfa' : 'u', # u with accent
u'\xe9' : 'e', # e with accent
u'\xe1' : 'a', # a with accent
u'\xe0' : 'a', # a with accent going the other way
u'\xe2' : 'a', # a with caret
u'\xf6' : 'o', # o with umlaut
u'\xed' : 'i', # i with accent
}
# this one is one-way only
def to_ascii(s):
for uchar in unicode_trans:
s = s.replace(uchar, unicode_trans[uchar])
return s
# unary numbers
unary_marker = config.unary_marker
unary_counter = config.unary_counter
unary_max = config.unary_max
unary_exceptions = config.unary_exceptions
def to_unary(s, warn = False):
numbers = re.findall(r'[0123456789]+', s)
# replace largest first to avoid accidentally replacing shared substrings
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True):
i = int(n)
if i in unary_exceptions:
s = s.replace(n, unary_exceptions[i])
elif i > unary_max:
i = unary_max
if warn:
print s
s = s.replace(n, unary_marker + unary_counter * i)
else:
s = s.replace(n, unary_marker + unary_counter * i)
return s
def from_unary(s):
numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s)
# again, largest first so we don't replace substrings and break everything
for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True):
i = (len(n) - len(unary_marker)) / len(unary_counter)
s = s.replace(n, str(i))
return s
# mana syntax
mana_open_delimiter = '{'
mana_close_delimiter = '}'
mana_json_open_delimiter = mana_open_delimiter
mana_json_close_delimiter = mana_close_delimiter
mana_json_hybrid_delimiter = '/'
mana_forum_open_delimiter = '[mana]'
mana_forum_close_delimiter = '[/mana]'
mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs
mana_unary_counter = unary_counter
# The decoding from mtgjson format is dependent on the specific structure of
# these internally used mana symbol strings, so if you want to change them you'll
# also have to change the json decoding functions.
# standard mana symbol set
mana_W = 'W' # single color
mana_U = 'U'
mana_B = 'B'
mana_R = 'R'
mana_G = 'G'
mana_P = 'P' # colorless phyrexian
mana_S = 'S' # snow
mana_X = 'X' # colorless X
mana_WP = 'WP' # single color phyrexian
mana_UP = 'UP'
mana_BP = 'BP'
mana_RP = 'RP'
mana_GP = 'GP'
mana_2W = '2W' # single color hybrid
mana_2U = '2U'
mana_2B = '2B'
mana_2R = '2R'
mana_2G = '2G'
mana_WU = 'WU' # dual color hybrid
mana_WB = 'WB'
mana_RW = 'RW'
mana_GW = 'GW'
mana_UB = 'UB'
mana_UR = 'UR'
mana_GU = 'GU'
mana_BR = 'BR'
mana_BG = 'BG'
mana_RG = 'RG'
# alternative order symbols
mana_WP_alt = 'PW' # single color phyrexian
mana_UP_alt = 'PU'
mana_BP_alt = 'PB'
mana_RP_alt = 'PR'
mana_GP_alt = 'PG'
mana_2W_alt = 'W2' # single color hybrid
mana_2U_alt = 'U2'
mana_2B_alt = 'B2'
mana_2R_alt = 'R2'
mana_2G_alt = 'G2'
mana_WU_alt = 'UW' # dual color hybrid
mana_WB_alt = 'BW'
mana_RW_alt = 'WR'
mana_GW_alt = 'WG'
mana_UB_alt = 'BU'
mana_UR_alt = 'RU'
mana_GU_alt = 'UG'
mana_BR_alt = 'RB'
mana_BG_alt = 'GB'
mana_RG_alt = 'GR'
# special
mana_2 = '2' # use with 'in' to identify single color hybrid
# master symbol lists
mana_syms = [
mana_W,
mana_U,
mana_B,
mana_R,
mana_G,
mana_P,
mana_S,
mana_X,
mana_WP,
mana_UP,
mana_BP,
mana_RP,
mana_GP,
mana_2W,
mana_2U,
mana_2B,
mana_2R,
mana_2G,
mana_WU,
mana_WB,
mana_RW,
mana_GW,
mana_UB,
mana_UR,
mana_GU,
mana_BR,
mana_BG,
mana_RG,
]
mana_symalt = [
mana_WP_alt,
mana_UP_alt,
mana_BP_alt,
mana_RP_alt,
mana_GP_alt,
mana_2W_alt,
mana_2U_alt,
mana_2B_alt,
mana_2R_alt,
mana_2G_alt,
mana_WU_alt,
mana_WB_alt,
mana_RW_alt,
mana_GW_alt,
mana_UB_alt,
mana_UR_alt,
mana_GU_alt,
mana_BR_alt,
mana_BG_alt,
mana_RG_alt,
]
mana_symall = mana_syms + mana_symalt
# alt symbol conversion
def mana_alt(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym))
if len(sym) < 2:
return sym
else:
return sym[::-1]
# produce intended neural net output format
def mana_sym_to_encoding(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym))
if len(sym) < 2:
return sym * 2
else:
return sym
# produce json formatting used in mtgjson
def mana_sym_to_json(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym))
if len(sym) < 2:
return mana_json_open_delimiter + sym + mana_json_close_delimiter
else:
return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter
+ sym[1] + mana_json_close_delimiter)
# produce pretty formatting that renders on mtgsalvation forum
# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere
def mana_sym_to_forum(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym))
if sym in mana_symalt:
sym = mana_alt(sym)
if len(sym) < 2:
return sym
else:
return mana_json_open_delimiter + sym + mana_json_close_delimiter
# forward symbol tables for encoding
mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms}
mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt}
mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall}
mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms}
mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt}
mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall}
# reverse symbol tables for decoding
mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms}
mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt}
mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall}
mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms}
mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt}
mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall}
# going straight from json to encoding and vice versa
def mana_encode_direct(jsym):
if not jsym in mana_symall_jdecode:
raise ValueError('json string not found in decode table for mana_encode_direct(): '
+ repr(jsym))
else:
return mana_symall_encode[mana_symall_jdecode[jsym]]
def mana_decode_direct(sym):
if not sym in mana_symall_decode:
raise ValueError('mana symbol not found in decode table for mana_decode_direct(): '
+ repr(sym))
else:
return mana_symall_jencode[mana_symall_decode[sym]]
# hacked in support for mtgsalvation forum
def mana_decode_direct_forum(sym):
if not sym in mana_symall_decode:
raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): '
+ repr(sym))
else:
return mana_sym_to_forum(mana_symall_decode[sym])
# processing entire strings
def unique_string(s):
return ''.join(set(s))
mana_charset_special = mana_unary_marker + mana_unary_counter
mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special)
mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower())
mana_regex_strict = (re.escape(mana_open_delimiter) + '['
+ re.escape(mana_charset_strict)
+ ']*' + re.escape(mana_close_delimiter))
mana_regex = (re.escape(mana_open_delimiter) + '['
+ re.escape(mana_charset)
+ ']*' + re.escape(mana_close_delimiter))
# as a special case, we let unary or decimal numbers exist in json mana strings
mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter
mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special)
mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower())
# note that json mana strings can't be empty between the delimiters
mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_jcharset_strict)
+ ']+' + re.escape(mana_json_close_delimiter))
mana_jregex = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_jcharset)
+ ']+' + re.escape(mana_json_close_delimiter))
number_decimal_regex = r'[0123456789]+'
number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*'
mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex
+ re.escape(mana_json_close_delimiter))
mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex
+ re.escape(mana_json_close_delimiter))
# convert a json mana string to the proper encoding
def mana_translate(jmanastr):
manastr = jmanastr
for n in sorted(re.findall(mana_unary_regex, manastr),
lambda x,y: cmp(len(x), len(y)), reverse = True):
ns = re.findall(number_unary_regex, n)
i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter)
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for n in sorted(re.findall(mana_decimal_regex, manastr),
lambda x,y: cmp(len(x), len(y)), reverse = True):
ns = re.findall(number_decimal_regex, n)
i = int(ns[0])
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for jsym in sorted(mana_symall_jdecode, lambda x,y: cmp(len(x), len(y)), reverse = True):
if jsym in manastr:
manastr = manastr.replace(jsym, mana_encode_direct(jsym))
return mana_open_delimiter + manastr + mana_close_delimiter
# convert an encoded mana string back to json
mana_symlen_min = min([len(sym) for sym in mana_symall_decode])
mana_symlen_max = max([len(sym) for sym in mana_symall_decode])
def mana_untranslate(manastr, for_forum = False):
inner = manastr[1:-1]
jmanastr = ''
colorless_total = 0
idx = 0
while idx < len(inner):
# taking this branch is an infinite loop if unary_marker is empty
if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker:
idx += len(mana_unary_marker)
elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter:
idx += len(mana_unary_counter)
colorless_total += 1
else:
old_idx = idx
for symlen in range(mana_symlen_min, mana_symlen_max + 1):
sym = inner[idx:idx+symlen]
if sym in mana_symall_decode:
idx += symlen
if for_forum:
jmanastr = jmanastr + mana_decode_direct_forum(sym)
else:
jmanastr = jmanastr + mana_decode_direct(sym)
break
# otherwise we'll go into an infinite loop if we see a symbol we don't know
if idx == old_idx:
idx += 1
if for_forum:
if jmanastr == '':
return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter
else:
return (mana_forum_open_delimiter + ('' if colorless_total == 0
else str(colorless_total))
+ jmanastr + mana_forum_close_delimiter)
else:
if jmanastr == '':
return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter
else:
return (('' if colorless_total == 0 else
mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter)
+ jmanastr)
# finally, replacing all instances in a string
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
def to_mana(s):
jmanastrs = re.findall(mana_jregex, s)
for jmanastr in sorted(jmanastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
return s
def from_mana(s, for_forum = False):
manastrs = re.findall(mana_regex, s)
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum))
return s
# Translation could also be accomplished using the datamine.Manacost object's
# display methods, but these direct string transformations are retained for
# quick scripting and convenience (and used under the hood by that class to
# do its formatting).
# more convenience features for formatting tap / untap symbols
json_symbol_tap = tap_marker
json_symbol_untap = untap_marker
json_symbol_trans = {
mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter : tap_marker,
mana_json_open_delimiter + json_symbol_tap.lower() + mana_json_close_delimiter : tap_marker,
mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter : untap_marker,
mana_json_open_delimiter + json_symbol_untap.lower() + mana_json_close_delimiter : untap_marker,
}
symbol_trans = {
tap_marker : mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter,
untap_marker : mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter,
}
json_symbol_regex = r'\{[TtQq]\}'
def to_symbols(s):
symstrs = re.findall(json_symbol_regex, s)
for symstr in sorted(symstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(symstr, json_symbol_trans[symstr])
return s
def from_symbols(s):
symstrs