split most of the code from config into utils, encode now uses utils as a lib
fixed MANY MANY ERRORS in the original output file by using this superior code
This commit is contained in:
parent
110ac8473c
commit
db0b9a9f9a
3 changed files with 460 additions and 505 deletions
371
config.py
371
config.py
|
@ -1,14 +1,8 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Don't be intimidated by the massive size of this file. It provides both the
|
# Utilities for handling unicode, unary numbers, mana costs, and special symbols.
|
||||||
# raw character decisions made about the encoding scheme as variables, and
|
# For convenience we redefine everything from utils so that it can all be accessed
|
||||||
# a bunch of tables and functions to make dealing with mana costs and unary
|
# from the utils module.
|
||||||
# numbers easier. For the most part the functions should adapt if you change
|
|
||||||
# the specific delimiters and markers used.
|
|
||||||
|
|
||||||
# The decoding from mtgjson format is dependent on the specific structure of
|
|
||||||
# the internally used mana symbol strings, so if you want to change that you'll
|
|
||||||
# also have to change the json decoding functions.
|
|
||||||
|
|
||||||
# separators
|
# separators
|
||||||
cardsep = '\n\n'
|
cardsep = '\n\n'
|
||||||
|
@ -29,29 +23,6 @@ untap_marker = 'Q'
|
||||||
# unambiguous synonyms
|
# unambiguous synonyms
|
||||||
counter_rename = 'uncast'
|
counter_rename = 'uncast'
|
||||||
|
|
||||||
# unicode / ascii conversion
|
|
||||||
unicode_trans = {
|
|
||||||
u'\u2014' : dash_marker, # unicode long dash
|
|
||||||
u'\u2022' : bullet_marker, # unicode bullet
|
|
||||||
u'\u2019' : '"', # single quote
|
|
||||||
u'\u2018' : '"', # single quote
|
|
||||||
u'\u2212' : '-', # minus sign
|
|
||||||
u'\xe6' : 'ae', # ae symbol
|
|
||||||
u'\xfb' : 'u', # u with caret
|
|
||||||
u'\xfa' : 'u', # u with accent
|
|
||||||
u'\xe9' : 'e', # e with accent
|
|
||||||
u'\xe1' : 'a', # a with accent
|
|
||||||
u'\xe0' : 'a', # a with accent going the other way
|
|
||||||
u'\xe2' : 'a', # a with caret
|
|
||||||
u'\xf6' : 'o', # o with umlaut
|
|
||||||
u'\xed' : 'i', # i with accent
|
|
||||||
}
|
|
||||||
# this one is one-way only
|
|
||||||
def to_ascii(s):
|
|
||||||
for uchar in unicode_trans:
|
|
||||||
s = s.replace(uchar, unicode_trans(uchar))
|
|
||||||
return s
|
|
||||||
|
|
||||||
# unary numbers
|
# unary numbers
|
||||||
unary_marker = '&'
|
unary_marker = '&'
|
||||||
unary_counter = '^'
|
unary_counter = '^'
|
||||||
|
@ -64,339 +35,3 @@ unary_exceptions = {
|
||||||
100: 'one hundred',
|
100: 'one hundred',
|
||||||
200: 'two hundred',
|
200: 'two hundred',
|
||||||
}
|
}
|
||||||
|
|
||||||
def to_unary(s, warn = False):
|
|
||||||
numbers = re.findall(r'[0123456789]+', s)
|
|
||||||
# replace largest first to avoid accidentally replacing shared substrings
|
|
||||||
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True):
|
|
||||||
i = int(n)
|
|
||||||
if i in unary_exceptions:
|
|
||||||
s = s.replace(n, unary_exceptions[n])
|
|
||||||
elif i > unary_max:
|
|
||||||
i = unary_max
|
|
||||||
if warn:
|
|
||||||
print s
|
|
||||||
s = s.replace(n, unary_marker + unary_counter * i)
|
|
||||||
else:
|
|
||||||
s = s.replace(n, unary_marker + unary_counter * i)
|
|
||||||
return s
|
|
||||||
|
|
||||||
def from_unary(s):
|
|
||||||
numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s)
|
|
||||||
# again, largest first so we don't replace substrings and break everything
|
|
||||||
for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
||||||
i = (len(n) - len(unary_marker)) / len(unary_counter)
|
|
||||||
s = s.replace(n, str(i))
|
|
||||||
return s
|
|
||||||
|
|
||||||
# mana syntax
|
|
||||||
mana_open_delimiter = '{'
|
|
||||||
mana_close_delimiter = '}'
|
|
||||||
mana_json_open_delimiter = mana_open_delimiter
|
|
||||||
mana_json_close_delimiter = mana_close_delimiter
|
|
||||||
mana_json_hybrid_delimiter = '/'
|
|
||||||
mana_forum_open_delimiter = '[mana]'
|
|
||||||
mana_forum_close_delimiter = '[/mana]'
|
|
||||||
mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs
|
|
||||||
mana_unary_counter = unary_counter
|
|
||||||
|
|
||||||
# individual mana symbols
|
|
||||||
mana_W = 'W' # single color
|
|
||||||
mana_U = 'U'
|
|
||||||
mana_B = 'B'
|
|
||||||
mana_R = 'R'
|
|
||||||
mana_G = 'G'
|
|
||||||
mana_P = 'P' # colorless phyrexian
|
|
||||||
mana_S = 'S' # snow
|
|
||||||
mana_X = 'X' # colorless X
|
|
||||||
mana_WP = 'WP' # single color phyrexian
|
|
||||||
mana_UP = 'UP'
|
|
||||||
mana_BP = 'BP'
|
|
||||||
mana_RP = 'RP'
|
|
||||||
mana_GP = 'GP'
|
|
||||||
mana_2W = '2W' # single color hybrid
|
|
||||||
mana_2U = '2U'
|
|
||||||
mana_2B = '2B'
|
|
||||||
mana_2R = '2R'
|
|
||||||
mana_2G = '2G'
|
|
||||||
mana_WU = 'WU' # dual color hybrid
|
|
||||||
mana_WB = 'WB'
|
|
||||||
mana_RW = 'RW'
|
|
||||||
mana_GW = 'GW'
|
|
||||||
mana_UB = 'UB'
|
|
||||||
mana_UR = 'UR'
|
|
||||||
mana_GU = 'GU'
|
|
||||||
mana_BR = 'BR'
|
|
||||||
mana_BG = 'BG'
|
|
||||||
mana_RG = 'RG'
|
|
||||||
# alternative order symbols
|
|
||||||
mana_WP_alt = 'PW' # single color phyrexian
|
|
||||||
mana_UP_alt = 'PU'
|
|
||||||
mana_BP_alt = 'PB'
|
|
||||||
mana_RP_alt = 'PR'
|
|
||||||
mana_GP_alt = 'PG'
|
|
||||||
mana_2W_alt = 'W2' # single color hybrid
|
|
||||||
mana_2U_alt = 'U2'
|
|
||||||
mana_2B_alt = 'B2'
|
|
||||||
mana_2R_alt = 'R2'
|
|
||||||
mana_2G_alt = 'G2'
|
|
||||||
mana_WU_alt = 'UW' # dual color hybrid
|
|
||||||
mana_WB_alt = 'BW'
|
|
||||||
mana_RW_alt = 'WR'
|
|
||||||
mana_GW_alt = 'WG'
|
|
||||||
mana_UB_alt = 'BU'
|
|
||||||
mana_UR_alt = 'RU'
|
|
||||||
mana_GU_alt = 'UG'
|
|
||||||
mana_BR_alt = 'RB'
|
|
||||||
mana_BG_alt = 'GB'
|
|
||||||
mana_RG_alt = 'GR'
|
|
||||||
# special
|
|
||||||
mana_2 = '2' # use with 'in' to identify single color hybrid
|
|
||||||
|
|
||||||
# master symbol lists
|
|
||||||
mana_syms = [
|
|
||||||
mana_W,
|
|
||||||
mana_U,
|
|
||||||
mana_B,
|
|
||||||
mana_R,
|
|
||||||
mana_G,
|
|
||||||
mana_P,
|
|
||||||
mana_S,
|
|
||||||
mana_X,
|
|
||||||
mana_WP,
|
|
||||||
mana_UP,
|
|
||||||
mana_BP,
|
|
||||||
mana_RP,
|
|
||||||
mana_GP,
|
|
||||||
mana_2W,
|
|
||||||
mana_2U,
|
|
||||||
mana_2B,
|
|
||||||
mana_2R,
|
|
||||||
mana_2G,
|
|
||||||
mana_WU,
|
|
||||||
mana_WB,
|
|
||||||
mana_RW,
|
|
||||||
mana_GW,
|
|
||||||
mana_UB,
|
|
||||||
mana_UR,
|
|
||||||
mana_GU,
|
|
||||||
mana_BR,
|
|
||||||
mana_BG,
|
|
||||||
mana_RG,
|
|
||||||
]
|
|
||||||
mana_symalt = [
|
|
||||||
mana_WP_alt,
|
|
||||||
mana_UP_alt,
|
|
||||||
mana_BP_alt,
|
|
||||||
mana_RP_alt,
|
|
||||||
mana_GP_alt,
|
|
||||||
mana_2W_alt,
|
|
||||||
mana_2U_alt,
|
|
||||||
mana_2B_alt,
|
|
||||||
mana_2R_alt,
|
|
||||||
mana_2G_alt,
|
|
||||||
mana_WU_alt,
|
|
||||||
mana_WB_alt,
|
|
||||||
mana_RW_alt,
|
|
||||||
mana_GW_alt,
|
|
||||||
mana_UB_alt,
|
|
||||||
mana_UR_alt,
|
|
||||||
mana_GU_alt,
|
|
||||||
mana_BR_alt,
|
|
||||||
mana_BG_alt,
|
|
||||||
mana_RG_alt,
|
|
||||||
]
|
|
||||||
mana_symall = mana_syms + mana_symalt
|
|
||||||
|
|
||||||
# alt symbol conversion
|
|
||||||
def mana_alt(sym):
|
|
||||||
if not sym in mana_symall:
|
|
||||||
raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym))
|
|
||||||
if len(sym) < 2:
|
|
||||||
return sym
|
|
||||||
else:
|
|
||||||
return sym[::-1]
|
|
||||||
|
|
||||||
# produce intended neural net output format
|
|
||||||
def mana_sym_to_encoding(sym):
|
|
||||||
if not sym in mana_symall:
|
|
||||||
raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym))
|
|
||||||
if len(sym) < 2:
|
|
||||||
return sym * 2
|
|
||||||
else:
|
|
||||||
return sym
|
|
||||||
|
|
||||||
# produce json formatting used in mtgjson
|
|
||||||
def mana_sym_to_json(sym):
|
|
||||||
if not sym in mana_symall:
|
|
||||||
raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym))
|
|
||||||
if len(sym) < 2:
|
|
||||||
return mana_json_open_delimiter + sym + mana_json_close_delimiter
|
|
||||||
else:
|
|
||||||
return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter
|
|
||||||
+ sym[1] + mana_json_close_delimiter)
|
|
||||||
|
|
||||||
# produce pretty formatting that renders on mtgsalvation forum
|
|
||||||
# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere
|
|
||||||
def mana_sym_to_forum(sym):
|
|
||||||
if not sym in mana_symall:
|
|
||||||
raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym))
|
|
||||||
if sym in mana_symalt:
|
|
||||||
sym = mana_alt(sym)
|
|
||||||
if len(sym) < 2:
|
|
||||||
return sym
|
|
||||||
else:
|
|
||||||
return mana_json_open_delimiter + sym + mana_json_close_delimiter
|
|
||||||
|
|
||||||
# forward symbol tables for encoding
|
|
||||||
mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms}
|
|
||||||
mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt}
|
|
||||||
mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall}
|
|
||||||
mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms}
|
|
||||||
mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt}
|
|
||||||
mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall}
|
|
||||||
|
|
||||||
# reverse symbol tables for decoding
|
|
||||||
mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms}
|
|
||||||
mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt}
|
|
||||||
mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall}
|
|
||||||
mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms}
|
|
||||||
mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt}
|
|
||||||
mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall}
|
|
||||||
|
|
||||||
# going straight from json to encoding and vice versa
|
|
||||||
def mana_encode_direct(jsym):
|
|
||||||
if not jsym in mana_symall_jdecode:
|
|
||||||
raise ValueError('json string not found in decode table for mana_encode_direct(): '
|
|
||||||
+ repr(jsym))
|
|
||||||
else:
|
|
||||||
return mana_symall_encode[mana_symall_jdecode[jsym]]
|
|
||||||
|
|
||||||
def mana_decode_direct(sym):
|
|
||||||
if not sym in mana_symall_decode:
|
|
||||||
raise ValueError('mana symbol not found in decode table for mana_decode_direct(): '
|
|
||||||
+ repr(sym))
|
|
||||||
else:
|
|
||||||
return mana_symall_jencode[mana_symall_decode[sym]]
|
|
||||||
|
|
||||||
# hacked in support for mtgsalvation forum
|
|
||||||
def mana_decode_direct_forum(sym):
|
|
||||||
if not sym in mana_symall_decode:
|
|
||||||
raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): '
|
|
||||||
+ repr(sym))
|
|
||||||
else:
|
|
||||||
return mana_sym_to_forum(mana_symall_decode[sym])
|
|
||||||
|
|
||||||
# processing entire strings
|
|
||||||
def unique_string(s):
|
|
||||||
return ''.join(set(s))
|
|
||||||
|
|
||||||
mana_charset_special = mana_unary_marker + mana_unary_counter
|
|
||||||
mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special)
|
|
||||||
mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower())
|
|
||||||
|
|
||||||
mana_regex_strict = (re.escape(mana_open_delimiter) + '['
|
|
||||||
+ re.escape(mana_charset_strict)
|
|
||||||
+ ']*' + re.escape(mana_close_delimiter))
|
|
||||||
mana_regex = (re.escape(mana_open_delimiter) + '['
|
|
||||||
+ re.escape(mana_charset)
|
|
||||||
+ ']*' + re.escape(mana_close_delimiter))
|
|
||||||
|
|
||||||
# as a special case, we let unary or decimal numbers exist in json mana strings
|
|
||||||
mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter
|
|
||||||
mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special)
|
|
||||||
mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower())
|
|
||||||
|
|
||||||
# note that json mana strings can't be empty between the delimiters
|
|
||||||
mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '['
|
|
||||||
+ re.escape(mana_jcharset_strict)
|
|
||||||
+ ']+' + re.escape(mana_json_close_delimiter))
|
|
||||||
mana_jregex = (re.escape(mana_json_open_delimiter) + '['
|
|
||||||
+ re.escape(mana_jcharset)
|
|
||||||
+ ']+' + re.escape(mana_json_close_delimiter))
|
|
||||||
|
|
||||||
number_decimal_regex = r'[0123456789]+'
|
|
||||||
number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*'
|
|
||||||
mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex
|
|
||||||
+ re.escape(mana_json_close_delimiter))
|
|
||||||
mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex
|
|
||||||
+ re.escape(mana_json_close_delimiter))
|
|
||||||
|
|
||||||
# convert a json mana string to the proper encoding
|
|
||||||
def mana_translate(jmanastr):
|
|
||||||
manastr = jmanastr
|
|
||||||
for n in re.findall(mana_unary_regex, manastr):
|
|
||||||
ns = re.findall(number_unary_regex, n)
|
|
||||||
i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter)
|
|
||||||
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
|
|
||||||
for n in re.findall(mana_decimal_regex, manastr):
|
|
||||||
ns = re.findall(number_decimal_regex, n)
|
|
||||||
i = int(ns[0])
|
|
||||||
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
|
|
||||||
for jsym in mana_symall_jdecode:
|
|
||||||
if jsym in manastr:
|
|
||||||
manastr = manastr.replace(jsym, mana_encode_direct(jsym))
|
|
||||||
return mana_open_delimiter + manastr + mana_close_delimiter
|
|
||||||
|
|
||||||
# convert an encoded mana string back to json
|
|
||||||
mana_symlen_min = min([len(sym) for sym in mana_symall_decode])
|
|
||||||
mana_symlen_max = max([len(sym) for sym in mana_symall_decode])
|
|
||||||
def mana_untranslate(manastr, for_forum = False):
|
|
||||||
inner = manastr[1:-1]
|
|
||||||
jmanastr = ''
|
|
||||||
colorless_total = 0
|
|
||||||
idx = 0
|
|
||||||
while idx < len(inner):
|
|
||||||
# taking this branch is an infinite loop if unary_marker is empty
|
|
||||||
if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker:
|
|
||||||
idx += len(mana_unary_marker)
|
|
||||||
elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter:
|
|
||||||
idx += len(mana_unary_counter)
|
|
||||||
colorless_total += 1
|
|
||||||
else:
|
|
||||||
old_idx = idx
|
|
||||||
for symlen in range(mana_symlen_min, mana_symlen_max + 1):
|
|
||||||
sym = inner[idx:idx+symlen]
|
|
||||||
if sym in mana_symall_decode:
|
|
||||||
idx += symlen
|
|
||||||
if for_forum:
|
|
||||||
jmanastr = jmanastr + mana_decode_direct_forum(sym)
|
|
||||||
else:
|
|
||||||
jmanastr = jmanastr + mana_decode_direct(sym)
|
|
||||||
break
|
|
||||||
# otherwise we'll go into an infinite loop if we see a symbol we don't know
|
|
||||||
if idx == old_idx:
|
|
||||||
idx += 1
|
|
||||||
if for_forum:
|
|
||||||
if jmanastr == '':
|
|
||||||
return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter
|
|
||||||
else:
|
|
||||||
return (mana_forum_open_delimiter + ('' if colorless_total == 0
|
|
||||||
else str(colorless_total))
|
|
||||||
+ jmanastr + mana_forum_close_delimiter)
|
|
||||||
else:
|
|
||||||
if jmanastr == '':
|
|
||||||
return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter
|
|
||||||
else:
|
|
||||||
return (('' if colorless_total == 0 else
|
|
||||||
mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter)
|
|
||||||
+ jmanastr)
|
|
||||||
|
|
||||||
# finally, replacing all instances in a string
|
|
||||||
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
|
|
||||||
def to_mana(s):
|
|
||||||
jmanastrs = re.findall(mana_jregex, s)
|
|
||||||
for jmanastr in jmanastrs:
|
|
||||||
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
|
|
||||||
return s
|
|
||||||
|
|
||||||
def from_mana(s, for_forum = False):
|
|
||||||
manastrs = re.findall(mana_regex, s)
|
|
||||||
for manastr in manastrs:
|
|
||||||
s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum))
|
|
||||||
return s
|
|
||||||
|
|
||||||
# Translation could also be accomplished using the datamine.Manacost object's
|
|
||||||
# display methods, but these direct string transformations are retained for
|
|
||||||
# quick scripting and convenience (and used under the hood by that class to
|
|
||||||
# do its formatting).
|
|
||||||
|
|
168
encode.py
168
encode.py
|
@ -3,30 +3,27 @@ import re
|
||||||
import codecs
|
import codecs
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
#badwords = []
|
#badwords = []
|
||||||
|
|
||||||
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
||||||
|
|
||||||
dash_marker = '~'
|
cardsep = utils.cardsep
|
||||||
bullet_marker = '='
|
fieldsep = utils.fieldsep
|
||||||
reserved_marker = '\r'
|
bsidesep = utils.bsidesep
|
||||||
|
newline = utils.newline
|
||||||
def to_ascii(s):
|
dash_marker = utils.dash_marker
|
||||||
s = s.replace(u'\u2014', dash_marker) # unicode long dash
|
bullet_marker = utils.bullet_marker
|
||||||
s = s.replace(u'\u2022', bullet_marker) # unicode bullet
|
this_marker = utils.this_marker
|
||||||
s = s.replace(u'\u2019', '"') # single quote
|
counter_marker = utils.counter_marker
|
||||||
s = s.replace(u'\u2018', '"') # single quote
|
reserved_marker = utils.reserved_marker
|
||||||
s = s.replace(u'\u2212', '-') # minus sign
|
x_marker = utils.x_marker
|
||||||
s = s.replace(u'\xe6', 'ae') # ae symbol
|
tap_marker = utils.tap_marker
|
||||||
s = s.replace(u'\xfb', 'u') # u with caret
|
untap_marker = utils.untap_marker
|
||||||
s = s.replace(u'\xfa', 'u') # u with accent
|
counter_rename = utils.counter_rename
|
||||||
s = s.replace(u'\xe9', 'e') # e with accent
|
unary_marker = utils.unary_marker
|
||||||
s = s.replace(u'\xe1', 'a') # a with accent
|
unary_counter = utils.unary_counter
|
||||||
s = s.replace(u'\xe0', 'a') # a with accent going the other way
|
|
||||||
s = s.replace(u'\xe2', 'a') # a with caret
|
|
||||||
s = s.replace(u'\xf6', 'o') # o with umlaut
|
|
||||||
s = s.replace(u'\xed', 'i') # i with accent
|
|
||||||
return s
|
|
||||||
|
|
||||||
# This whole things assumes the json format of mtgjson.com.
|
# This whole things assumes the json format of mtgjson.com.
|
||||||
|
|
||||||
|
@ -63,107 +60,11 @@ def to_ascii(s):
|
||||||
# releaseDate - string
|
# releaseDate - string
|
||||||
# starter - boolean
|
# starter - boolean
|
||||||
|
|
||||||
fieldsep = '|'
|
|
||||||
newline = '\\'
|
|
||||||
unary_marker = '&'
|
|
||||||
unary_counter = '^'
|
|
||||||
mana_open_delimiter = '{'
|
|
||||||
mana_close_delimiter = '}'
|
|
||||||
x_marker = 'X'
|
|
||||||
tap_marker = 'T'
|
|
||||||
untap_marker = 'Q'
|
|
||||||
this_marker = '@'
|
|
||||||
counter_marker = '%'
|
|
||||||
bsidesep = '\n'
|
|
||||||
|
|
||||||
unary_max = 20
|
|
||||||
|
|
||||||
def to_unary(s):
|
|
||||||
numbers = re.findall(r'[0123456789]+', s)
|
|
||||||
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)) * -1):
|
|
||||||
i = int(n)
|
|
||||||
if i == 25:
|
|
||||||
s = s.replace(n, 'twenty~five')
|
|
||||||
elif i == 30:
|
|
||||||
s = s.replace(n, 'thirty')
|
|
||||||
elif i == 40:
|
|
||||||
s = s.replace(n, 'forty')
|
|
||||||
elif i == 50:
|
|
||||||
s = s.replace(n, 'fifty')
|
|
||||||
elif i == 100:
|
|
||||||
s = s.replace(n, 'one hundred')
|
|
||||||
elif i == 200:
|
|
||||||
s = s.replace(n, 'two hundred')
|
|
||||||
else:
|
|
||||||
if i > unary_max:
|
|
||||||
# truncate to unary_max
|
|
||||||
i = unary_max
|
|
||||||
# warn, because we probably don't want this to happen
|
|
||||||
print s
|
|
||||||
s = s.replace(n, unary_marker + unary_counter * i)
|
|
||||||
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
# also handles the tap and untap symbols
|
|
||||||
def compress_mana(manastring):
|
|
||||||
# mana string is of the form '{3}{W}{2/B}', as specified by mtgjson
|
|
||||||
translations = {
|
|
||||||
'{w}' : 'WW',
|
|
||||||
'{u}' : 'UU',
|
|
||||||
'{b}' : 'BB',
|
|
||||||
'{r}' : 'RR',
|
|
||||||
'{g}' : 'GG',
|
|
||||||
'{p}' : 'PP',
|
|
||||||
'{w/p}' : 'WP',
|
|
||||||
'{u/p}' : 'UP',
|
|
||||||
'{b/p}' : 'BP',
|
|
||||||
'{r/p}' : 'RP',
|
|
||||||
'{g/p}' : 'GP',
|
|
||||||
'{2/w}' : 'VW',
|
|
||||||
'{2/u}' : 'VU',
|
|
||||||
'{2/b}' : 'VB',
|
|
||||||
'{2/r}' : 'VR',
|
|
||||||
'{2/g}' : 'VG',
|
|
||||||
'{w/u}' : 'WU',
|
|
||||||
'{w/b}' : 'WB',
|
|
||||||
'{r/w}' : 'RW',
|
|
||||||
'{g/w}' : 'GW',
|
|
||||||
'{u/b}' : 'UB',
|
|
||||||
'{u/r}' : 'UR',
|
|
||||||
'{g/u}' : 'GU',
|
|
||||||
'{b/r}' : 'BR',
|
|
||||||
'{b/g}' : 'BG',
|
|
||||||
'{r/g}' : 'RG',
|
|
||||||
'{s}' : 'SS',
|
|
||||||
'{x}' : x_marker * 2,
|
|
||||||
'{t}' : tap_marker,
|
|
||||||
'{q}' : untap_marker,
|
|
||||||
}
|
|
||||||
for t in translations:
|
|
||||||
manastring = manastring.replace(t, translations[t])
|
|
||||||
|
|
||||||
numbers = re.findall(r'\{[0123456789]+\}', manastring)
|
|
||||||
for n in numbers:
|
|
||||||
i = int(re.findall(r'[0123456789]+', n)[0])
|
|
||||||
manastring = manastring.replace(n, unary_counter * i)
|
|
||||||
|
|
||||||
# we don't really need delimiters for tap, it's a unique symbol anyways
|
|
||||||
if manastring in [tap_marker, untap_marker]:
|
|
||||||
return manastring
|
|
||||||
else:
|
|
||||||
return '{' + manastring + '}'
|
|
||||||
|
|
||||||
def replace_mana(s):
|
|
||||||
manastrings = re.findall(r'\{[\{\}wubrgp/xtq0123456789]+\}', s)
|
|
||||||
for manastring in manastrings:
|
|
||||||
s = s.replace(manastring, compress_mana(manastring))
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
def strip_reminder_text(s):
|
def strip_reminder_text(s):
|
||||||
return re.sub(r'\(.*\)', '', s)
|
return re.sub(r'\(.*\)', '', s)
|
||||||
|
|
||||||
|
|
||||||
def replace_newlines(s):
|
def replace_newlines(s):
|
||||||
return s.replace('\n', '\\')
|
return s.replace('\n', '\\')
|
||||||
|
|
||||||
|
@ -213,7 +114,7 @@ def replace_cardname(s, name):
|
||||||
for override in overrides:
|
for override in overrides:
|
||||||
s = s.replace(override, this_marker)
|
s = s.replace(override, this_marker)
|
||||||
|
|
||||||
# some detection code when the overrides need to be fixed...
|
# some detection code for when the overrides need to be fixed...
|
||||||
# global badwords
|
# global badwords
|
||||||
# bad = False
|
# bad = False
|
||||||
# for word in name.replace(',', '').split():
|
# for word in name.replace(',', '').split():
|
||||||
|
@ -539,32 +440,35 @@ def encode(card):
|
||||||
return
|
return
|
||||||
|
|
||||||
encoding = fieldsep
|
encoding = fieldsep
|
||||||
name = card['name'].lower()
|
if 'name' in card:
|
||||||
encoding += sanitize_name(name)
|
name = card['name'].lower()
|
||||||
|
encoding += sanitize_name(name)
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
if 'supertypes' in card:
|
if 'supertypes' in card:
|
||||||
encoding += ' '.join(card['supertypes']).lower()
|
encoding += ' '.join(card['supertypes']).lower()
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
encoding += ' '.join(card['types']).lower()
|
if 'types' in card:
|
||||||
|
encoding += ' '.join(card['types']).lower()
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
if 'loyalty' in card:
|
if 'loyalty' in card:
|
||||||
encoding += to_unary(str(card['loyalty']))
|
encoding += utils.to_unary(str(card['loyalty']))
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
if 'subtypes' in card:
|
if 'subtypes' in card:
|
||||||
encoding += ' '.join(card['subtypes']).lower()
|
encoding += ' '.join(card['subtypes']).lower()
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
if 'power' in card and 'toughness' in card:
|
if 'power' in card and 'toughness' in card:
|
||||||
encoding += to_unary(card['power']) + '/' + to_unary(card['toughness'])
|
encoding += utils.to_unary(card['power']) + '/' + utils.to_unary(card['toughness'])
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
if 'manaCost' in card:
|
if 'manaCost' in card:
|
||||||
encoding += replace_mana(card['manaCost'].lower())
|
encoding += utils.to_mana(card['manaCost'].lower())
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
if 'text' in card:
|
if 'text' in card:
|
||||||
text = card['text'].lower()
|
text = card['text'].lower()
|
||||||
text = strip_reminder_text(text)
|
text = strip_reminder_text(text)
|
||||||
text = replace_cardname(text, name)
|
text = replace_cardname(text, name)
|
||||||
text = replace_mana(text)
|
text = utils.to_mana(text)
|
||||||
text = to_unary(text)
|
text = utils.to_symbols(text)
|
||||||
|
text = utils.to_unary(text)
|
||||||
text = fix_dashes(text)
|
text = fix_dashes(text)
|
||||||
text = fix_x(text)
|
text = fix_x(text)
|
||||||
text = replace_counters(text)
|
text = replace_counters(text)
|
||||||
|
@ -575,24 +479,16 @@ def encode(card):
|
||||||
encoding += text.strip()
|
encoding += text.strip()
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
|
|
||||||
# HACK: put the cost again after the text
|
|
||||||
# if 'manaCost' in card:
|
|
||||||
# encoding += replace_mana(card['manaCost'].lower())
|
|
||||||
# encoding += fieldsep
|
|
||||||
|
|
||||||
# if 'flavor' in card:
|
|
||||||
# encoding += card['flavor'].lower()
|
|
||||||
# encoding += fieldsep
|
|
||||||
|
|
||||||
# now output the bside if there is one
|
# now output the bside if there is one
|
||||||
if 'bside' in card:
|
if 'bside' in card:
|
||||||
encoding += bsidesep
|
encoding += bsidesep
|
||||||
encoding += encode(card['bside'])
|
encoding += encode(card['bside'])
|
||||||
|
|
||||||
encoding = to_ascii(encoding)
|
encoding = utils.to_ascii(encoding)
|
||||||
# encoding = re.sub(valid_encoded_char, '', encoding)
|
# encoding = re.sub(valid_encoded_char, '', encoding)
|
||||||
# if not encoding == '':
|
# if not encoding == '':
|
||||||
# print card
|
# print card
|
||||||
|
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
def encode_duplicated(cards):
|
def encode_duplicated(cards):
|
||||||
|
|
424
utils.py
Normal file
424
utils.py
Normal file
|
@ -0,0 +1,424 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Utilities for handling unicode, unary numbers, mana costs, and special symbols.
|
||||||
|
# For convenience we redefine everything from config so that it can all be accessed
|
||||||
|
# from the utils module.
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
# separators
|
||||||
|
cardsep = config.cardsep
|
||||||
|
fieldsep = config.fieldsep
|
||||||
|
bsidesep = config.bsidesep
|
||||||
|
newline = config.newline
|
||||||
|
|
||||||
|
# special indicators
|
||||||
|
dash_marker = config.dash_marker
|
||||||
|
bullet_marker = config.bullet_marker
|
||||||
|
this_marker = config.this_marker
|
||||||
|
counter_marker = config.counter_marker
|
||||||
|
reserved_marker = config.reserved_marker
|
||||||
|
x_marker = config.x_marker
|
||||||
|
tap_marker = config.tap_marker
|
||||||
|
untap_marker = config.untap_marker
|
||||||
|
|
||||||
|
# unambiguous synonyms
|
||||||
|
counter_rename = config.counter_rename
|
||||||
|
|
||||||
|
# unicode / ascii conversion
|
||||||
|
unicode_trans = {
|
||||||
|
u'\u2014' : dash_marker, # unicode long dash
|
||||||
|
u'\u2022' : bullet_marker, # unicode bullet
|
||||||
|
u'\u2019' : '"', # single quote
|
||||||
|
u'\u2018' : '"', # single quote
|
||||||
|
u'\u2212' : '-', # minus sign
|
||||||
|
u'\xe6' : 'ae', # ae symbol
|
||||||
|
u'\xfb' : 'u', # u with caret
|
||||||
|
u'\xfa' : 'u', # u with accent
|
||||||
|
u'\xe9' : 'e', # e with accent
|
||||||
|
u'\xe1' : 'a', # a with accent
|
||||||
|
u'\xe0' : 'a', # a with accent going the other way
|
||||||
|
u'\xe2' : 'a', # a with caret
|
||||||
|
u'\xf6' : 'o', # o with umlaut
|
||||||
|
u'\xed' : 'i', # i with accent
|
||||||
|
}
|
||||||
|
|
||||||
|
# this one is one-way only
|
||||||
|
def to_ascii(s):
|
||||||
|
for uchar in unicode_trans:
|
||||||
|
s = s.replace(uchar, unicode_trans[uchar])
|
||||||
|
return s
|
||||||
|
|
||||||
|
# unary numbers
|
||||||
|
unary_marker = config.unary_marker
|
||||||
|
unary_counter = config.unary_counter
|
||||||
|
unary_max = config.unary_max
|
||||||
|
unary_exceptions = config.unary_exceptions
|
||||||
|
|
||||||
|
def to_unary(s, warn = False):
|
||||||
|
numbers = re.findall(r'[0123456789]+', s)
|
||||||
|
# replace largest first to avoid accidentally replacing shared substrings
|
||||||
|
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True):
|
||||||
|
i = int(n)
|
||||||
|
if i in unary_exceptions:
|
||||||
|
s = s.replace(n, unary_exceptions[i])
|
||||||
|
elif i > unary_max:
|
||||||
|
i = unary_max
|
||||||
|
if warn:
|
||||||
|
print s
|
||||||
|
s = s.replace(n, unary_marker + unary_counter * i)
|
||||||
|
else:
|
||||||
|
s = s.replace(n, unary_marker + unary_counter * i)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def from_unary(s):
|
||||||
|
numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s)
|
||||||
|
# again, largest first so we don't replace substrings and break everything
|
||||||
|
for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
i = (len(n) - len(unary_marker)) / len(unary_counter)
|
||||||
|
s = s.replace(n, str(i))
|
||||||
|
return s
|
||||||
|
|
||||||
|
# mana syntax
|
||||||
|
mana_open_delimiter = '{'
|
||||||
|
mana_close_delimiter = '}'
|
||||||
|
mana_json_open_delimiter = mana_open_delimiter
|
||||||
|
mana_json_close_delimiter = mana_close_delimiter
|
||||||
|
mana_json_hybrid_delimiter = '/'
|
||||||
|
mana_forum_open_delimiter = '[mana]'
|
||||||
|
mana_forum_close_delimiter = '[/mana]'
|
||||||
|
mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs
|
||||||
|
mana_unary_counter = unary_counter
|
||||||
|
|
||||||
|
# The decoding from mtgjson format is dependent on the specific structure of
|
||||||
|
# these internally used mana symbol strings, so if you want to change them you'll
|
||||||
|
# also have to change the json decoding functions.
|
||||||
|
|
||||||
|
# standard mana symbol set
|
||||||
|
mana_W = 'W' # single color
|
||||||
|
mana_U = 'U'
|
||||||
|
mana_B = 'B'
|
||||||
|
mana_R = 'R'
|
||||||
|
mana_G = 'G'
|
||||||
|
mana_P = 'P' # colorless phyrexian
|
||||||
|
mana_S = 'S' # snow
|
||||||
|
mana_X = 'X' # colorless X
|
||||||
|
mana_WP = 'WP' # single color phyrexian
|
||||||
|
mana_UP = 'UP'
|
||||||
|
mana_BP = 'BP'
|
||||||
|
mana_RP = 'RP'
|
||||||
|
mana_GP = 'GP'
|
||||||
|
mana_2W = '2W' # single color hybrid
|
||||||
|
mana_2U = '2U'
|
||||||
|
mana_2B = '2B'
|
||||||
|
mana_2R = '2R'
|
||||||
|
mana_2G = '2G'
|
||||||
|
mana_WU = 'WU' # dual color hybrid
|
||||||
|
mana_WB = 'WB'
|
||||||
|
mana_RW = 'RW'
|
||||||
|
mana_GW = 'GW'
|
||||||
|
mana_UB = 'UB'
|
||||||
|
mana_UR = 'UR'
|
||||||
|
mana_GU = 'GU'
|
||||||
|
mana_BR = 'BR'
|
||||||
|
mana_BG = 'BG'
|
||||||
|
mana_RG = 'RG'
|
||||||
|
# alternative order symbols
|
||||||
|
mana_WP_alt = 'PW' # single color phyrexian
|
||||||
|
mana_UP_alt = 'PU'
|
||||||
|
mana_BP_alt = 'PB'
|
||||||
|
mana_RP_alt = 'PR'
|
||||||
|
mana_GP_alt = 'PG'
|
||||||
|
mana_2W_alt = 'W2' # single color hybrid
|
||||||
|
mana_2U_alt = 'U2'
|
||||||
|
mana_2B_alt = 'B2'
|
||||||
|
mana_2R_alt = 'R2'
|
||||||
|
mana_2G_alt = 'G2'
|
||||||
|
mana_WU_alt = 'UW' # dual color hybrid
|
||||||
|
mana_WB_alt = 'BW'
|
||||||
|
mana_RW_alt = 'WR'
|
||||||
|
mana_GW_alt = 'WG'
|
||||||
|
mana_UB_alt = 'BU'
|
||||||
|
mana_UR_alt = 'RU'
|
||||||
|
mana_GU_alt = 'UG'
|
||||||
|
mana_BR_alt = 'RB'
|
||||||
|
mana_BG_alt = 'GB'
|
||||||
|
mana_RG_alt = 'GR'
|
||||||
|
# special
|
||||||
|
mana_2 = '2' # use with 'in' to identify single color hybrid
|
||||||
|
|
||||||
|
# master symbol lists
|
||||||
|
mana_syms = [
|
||||||
|
mana_W,
|
||||||
|
mana_U,
|
||||||
|
mana_B,
|
||||||
|
mana_R,
|
||||||
|
mana_G,
|
||||||
|
mana_P,
|
||||||
|
mana_S,
|
||||||
|
mana_X,
|
||||||
|
mana_WP,
|
||||||
|
mana_UP,
|
||||||
|
mana_BP,
|
||||||
|
mana_RP,
|
||||||
|
mana_GP,
|
||||||
|
mana_2W,
|
||||||
|
mana_2U,
|
||||||
|
mana_2B,
|
||||||
|
mana_2R,
|
||||||
|
mana_2G,
|
||||||
|
mana_WU,
|
||||||
|
mana_WB,
|
||||||
|
mana_RW,
|
||||||
|
mana_GW,
|
||||||
|
mana_UB,
|
||||||
|
mana_UR,
|
||||||
|
mana_GU,
|
||||||
|
mana_BR,
|
||||||
|
mana_BG,
|
||||||
|
mana_RG,
|
||||||
|
]
|
||||||
|
mana_symalt = [
|
||||||
|
mana_WP_alt,
|
||||||
|
mana_UP_alt,
|
||||||
|
mana_BP_alt,
|
||||||
|
mana_RP_alt,
|
||||||
|
mana_GP_alt,
|
||||||
|
mana_2W_alt,
|
||||||
|
mana_2U_alt,
|
||||||
|
mana_2B_alt,
|
||||||
|
mana_2R_alt,
|
||||||
|
mana_2G_alt,
|
||||||
|
mana_WU_alt,
|
||||||
|
mana_WB_alt,
|
||||||
|
mana_RW_alt,
|
||||||
|
mana_GW_alt,
|
||||||
|
mana_UB_alt,
|
||||||
|
mana_UR_alt,
|
||||||
|
mana_GU_alt,
|
||||||
|
mana_BR_alt,
|
||||||
|
mana_BG_alt,
|
||||||
|
mana_RG_alt,
|
||||||
|
]
|
||||||
|
mana_symall = mana_syms + mana_symalt
|
||||||
|
|
||||||
|
# alt symbol conversion
|
||||||
|
def mana_alt(sym):
|
||||||
|
if not sym in mana_symall:
|
||||||
|
raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym))
|
||||||
|
if len(sym) < 2:
|
||||||
|
return sym
|
||||||
|
else:
|
||||||
|
return sym[::-1]
|
||||||
|
|
||||||
|
# produce intended neural net output format
|
||||||
|
def mana_sym_to_encoding(sym):
|
||||||
|
if not sym in mana_symall:
|
||||||
|
raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym))
|
||||||
|
if len(sym) < 2:
|
||||||
|
return sym * 2
|
||||||
|
else:
|
||||||
|
return sym
|
||||||
|
|
||||||
|
# produce json formatting used in mtgjson
|
||||||
|
def mana_sym_to_json(sym):
|
||||||
|
if not sym in mana_symall:
|
||||||
|
raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym))
|
||||||
|
if len(sym) < 2:
|
||||||
|
return mana_json_open_delimiter + sym + mana_json_close_delimiter
|
||||||
|
else:
|
||||||
|
return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter
|
||||||
|
+ sym[1] + mana_json_close_delimiter)
|
||||||
|
|
||||||
|
# produce pretty formatting that renders on mtgsalvation forum
|
||||||
|
# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere
|
||||||
|
def mana_sym_to_forum(sym):
|
||||||
|
if not sym in mana_symall:
|
||||||
|
raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym))
|
||||||
|
if sym in mana_symalt:
|
||||||
|
sym = mana_alt(sym)
|
||||||
|
if len(sym) < 2:
|
||||||
|
return sym
|
||||||
|
else:
|
||||||
|
return mana_json_open_delimiter + sym + mana_json_close_delimiter
|
||||||
|
|
||||||
|
# forward symbol tables for encoding
|
||||||
|
mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms}
|
||||||
|
mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt}
|
||||||
|
mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall}
|
||||||
|
mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms}
|
||||||
|
mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt}
|
||||||
|
mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall}
|
||||||
|
|
||||||
|
# reverse symbol tables for decoding
|
||||||
|
mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms}
|
||||||
|
mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt}
|
||||||
|
mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall}
|
||||||
|
mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms}
|
||||||
|
mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt}
|
||||||
|
mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall}
|
||||||
|
|
||||||
|
# going straight from json to encoding and vice versa
|
||||||
|
def mana_encode_direct(jsym):
|
||||||
|
if not jsym in mana_symall_jdecode:
|
||||||
|
raise ValueError('json string not found in decode table for mana_encode_direct(): '
|
||||||
|
+ repr(jsym))
|
||||||
|
else:
|
||||||
|
return mana_symall_encode[mana_symall_jdecode[jsym]]
|
||||||
|
|
||||||
|
def mana_decode_direct(sym):
|
||||||
|
if not sym in mana_symall_decode:
|
||||||
|
raise ValueError('mana symbol not found in decode table for mana_decode_direct(): '
|
||||||
|
+ repr(sym))
|
||||||
|
else:
|
||||||
|
return mana_symall_jencode[mana_symall_decode[sym]]
|
||||||
|
|
||||||
|
# hacked in support for mtgsalvation forum
|
||||||
|
def mana_decode_direct_forum(sym):
|
||||||
|
if not sym in mana_symall_decode:
|
||||||
|
raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): '
|
||||||
|
+ repr(sym))
|
||||||
|
else:
|
||||||
|
return mana_sym_to_forum(mana_symall_decode[sym])
|
||||||
|
|
||||||
|
# processing entire strings
|
||||||
|
def unique_string(s):
|
||||||
|
return ''.join(set(s))
|
||||||
|
|
||||||
|
mana_charset_special = mana_unary_marker + mana_unary_counter
|
||||||
|
mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special)
|
||||||
|
mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower())
|
||||||
|
|
||||||
|
mana_regex_strict = (re.escape(mana_open_delimiter) + '['
|
||||||
|
+ re.escape(mana_charset_strict)
|
||||||
|
+ ']*' + re.escape(mana_close_delimiter))
|
||||||
|
mana_regex = (re.escape(mana_open_delimiter) + '['
|
||||||
|
+ re.escape(mana_charset)
|
||||||
|
+ ']*' + re.escape(mana_close_delimiter))
|
||||||
|
|
||||||
|
# as a special case, we let unary or decimal numbers exist in json mana strings
|
||||||
|
mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter
|
||||||
|
mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special)
|
||||||
|
mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower())
|
||||||
|
|
||||||
|
# note that json mana strings can't be empty between the delimiters
|
||||||
|
mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '['
|
||||||
|
+ re.escape(mana_jcharset_strict)
|
||||||
|
+ ']+' + re.escape(mana_json_close_delimiter))
|
||||||
|
mana_jregex = (re.escape(mana_json_open_delimiter) + '['
|
||||||
|
+ re.escape(mana_jcharset)
|
||||||
|
+ ']+' + re.escape(mana_json_close_delimiter))
|
||||||
|
|
||||||
|
number_decimal_regex = r'[0123456789]+'
|
||||||
|
number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*'
|
||||||
|
mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex
|
||||||
|
+ re.escape(mana_json_close_delimiter))
|
||||||
|
mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex
|
||||||
|
+ re.escape(mana_json_close_delimiter))
|
||||||
|
|
||||||
|
# convert a json mana string to the proper encoding
|
||||||
|
def mana_translate(jmanastr):
|
||||||
|
manastr = jmanastr
|
||||||
|
for n in sorted(re.findall(mana_unary_regex, manastr),
|
||||||
|
lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
ns = re.findall(number_unary_regex, n)
|
||||||
|
i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter)
|
||||||
|
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
|
||||||
|
for n in sorted(re.findall(mana_decimal_regex, manastr),
|
||||||
|
lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
ns = re.findall(number_decimal_regex, n)
|
||||||
|
i = int(ns[0])
|
||||||
|
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
|
||||||
|
for jsym in sorted(mana_symall_jdecode, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
if jsym in manastr:
|
||||||
|
manastr = manastr.replace(jsym, mana_encode_direct(jsym))
|
||||||
|
return mana_open_delimiter + manastr + mana_close_delimiter
|
||||||
|
|
||||||
|
# convert an encoded mana string back to json
|
||||||
|
mana_symlen_min = min([len(sym) for sym in mana_symall_decode])
|
||||||
|
mana_symlen_max = max([len(sym) for sym in mana_symall_decode])
|
||||||
|
def mana_untranslate(manastr, for_forum = False):
|
||||||
|
inner = manastr[1:-1]
|
||||||
|
jmanastr = ''
|
||||||
|
colorless_total = 0
|
||||||
|
idx = 0
|
||||||
|
while idx < len(inner):
|
||||||
|
# taking this branch is an infinite loop if unary_marker is empty
|
||||||
|
if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker:
|
||||||
|
idx += len(mana_unary_marker)
|
||||||
|
elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter:
|
||||||
|
idx += len(mana_unary_counter)
|
||||||
|
colorless_total += 1
|
||||||
|
else:
|
||||||
|
old_idx = idx
|
||||||
|
for symlen in range(mana_symlen_min, mana_symlen_max + 1):
|
||||||
|
sym = inner[idx:idx+symlen]
|
||||||
|
if sym in mana_symall_decode:
|
||||||
|
idx += symlen
|
||||||
|
if for_forum:
|
||||||
|
jmanastr = jmanastr + mana_decode_direct_forum(sym)
|
||||||
|
else:
|
||||||
|
jmanastr = jmanastr + mana_decode_direct(sym)
|
||||||
|
break
|
||||||
|
# otherwise we'll go into an infinite loop if we see a symbol we don't know
|
||||||
|
if idx == old_idx:
|
||||||
|
idx += 1
|
||||||
|
if for_forum:
|
||||||
|
if jmanastr == '':
|
||||||
|
return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter
|
||||||
|
else:
|
||||||
|
return (mana_forum_open_delimiter + ('' if colorless_total == 0
|
||||||
|
else str(colorless_total))
|
||||||
|
+ jmanastr + mana_forum_close_delimiter)
|
||||||
|
else:
|
||||||
|
if jmanastr == '':
|
||||||
|
return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter
|
||||||
|
else:
|
||||||
|
return (('' if colorless_total == 0 else
|
||||||
|
mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter)
|
||||||
|
+ jmanastr)
|
||||||
|
|
||||||
|
# finally, replacing all instances in a string
|
||||||
|
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
|
||||||
|
def to_mana(s):
|
||||||
|
jmanastrs = re.findall(mana_jregex, s)
|
||||||
|
for jmanastr in sorted(jmanastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
|
||||||
|
return s
|
||||||
|
|
||||||
|
def from_mana(s, for_forum = False):
|
||||||
|
manastrs = re.findall(mana_regex, s)
|
||||||
|
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum))
|
||||||
|
return s
|
||||||
|
|
||||||
|
# Translation could also be accomplished using the datamine.Manacost object's
|
||||||
|
# display methods, but these direct string transformations are retained for
|
||||||
|
# quick scripting and convenience (and used under the hood by that class to
|
||||||
|
# do its formatting).
|
||||||
|
|
||||||
|
# more convenience features for formatting tap / untap symbols
|
||||||
|
json_symbol_tap = tap_marker
|
||||||
|
json_symbol_untap = untap_marker
|
||||||
|
|
||||||
|
json_symbol_trans = {
|
||||||
|
mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter : tap_marker,
|
||||||
|
mana_json_open_delimiter + json_symbol_tap.lower() + mana_json_close_delimiter : tap_marker,
|
||||||
|
mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter : untap_marker,
|
||||||
|
mana_json_open_delimiter + json_symbol_untap.lower() + mana_json_close_delimiter : untap_marker,
|
||||||
|
}
|
||||||
|
symbol_trans = {
|
||||||
|
tap_marker : mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter,
|
||||||
|
untap_marker : mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter,
|
||||||
|
}
|
||||||
|
|
||||||
|
json_symbol_regex = r'\{[TtQq]\}'
|
||||||
|
|
||||||
|
def to_symbols(s):
|
||||||
|
symstrs = re.findall(json_symbol_regex, s)
|
||||||
|
for symstr in sorted(symstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
||||||
|
s = s.replace(symstr, json_symbol_trans[symstr])
|
||||||
|
return s
|
||||||
|
|
||||||
|
def from_symbols(s):
|
||||||
|
symstrs
|
Loading…
Reference in a new issue