1a4965fd83
Added lib and script subdirs to organize things; the biggest change is that now we have a really powerful Card class that can handle all of the decoding and encoding for us. encode.py has been written to take advantage of this, other things have not yet. Coming soon! As a side note the changes to output.txt are purely cosemtic, though the order should be stable now.
455 lines
16 KiB
Python
455 lines
16 KiB
Python
import re
|
|
|
|
# Utilities for handling unicode, unary numbers, mana costs, and special symbols.
|
|
# For convenience we redefine everything from config so that it can all be accessed
|
|
# from the utils module.
|
|
|
|
import config
|
|
|
|
# separators
|
|
cardsep = config.cardsep
|
|
fieldsep = config.fieldsep
|
|
bsidesep = config.bsidesep
|
|
newline = config.newline
|
|
|
|
# special indicators
|
|
dash_marker = config.dash_marker
|
|
bullet_marker = config.bullet_marker
|
|
this_marker = config.this_marker
|
|
counter_marker = config.counter_marker
|
|
reserved_marker = config.reserved_marker
|
|
reserved_mana_marker = config.reserved_mana_marker
|
|
x_marker = config.x_marker
|
|
tap_marker = config.tap_marker
|
|
untap_marker = config.untap_marker
|
|
|
|
# unambiguous synonyms
|
|
counter_rename = config.counter_rename
|
|
|
|
# field labels
|
|
field_label_name = config.field_label_name
|
|
field_label_rarity = config.field_label_rarity
|
|
field_label_cost = config.field_label_cost
|
|
field_label_supertypes = config.field_label_supertypes
|
|
field_label_types = config.field_label_types
|
|
field_label_subtypes = config.field_label_subtypes
|
|
field_label_loyalty = config.field_label_loyalty
|
|
field_label_pt = config.field_label_pt
|
|
field_label_text = config.field_label_text
|
|
|
|
# additional fields we add to the json cards
|
|
json_field_bside = config.json_field_bside
|
|
json_field_set_name = config.json_field_set_name
|
|
|
|
# unicode / ascii conversion
|
|
unicode_trans = {
|
|
u'\u2014' : dash_marker, # unicode long dash
|
|
u'\u2022' : bullet_marker, # unicode bullet
|
|
u'\u2019' : '"', # single quote
|
|
u'\u2018' : '"', # single quote
|
|
u'\u2212' : '-', # minus sign
|
|
u'\xe6' : 'ae', # ae symbol
|
|
u'\xfb' : 'u', # u with caret
|
|
u'\xfa' : 'u', # u with accent
|
|
u'\xe9' : 'e', # e with accent
|
|
u'\xe1' : 'a', # a with accent
|
|
u'\xe0' : 'a', # a with accent going the other way
|
|
u'\xe2' : 'a', # a with caret
|
|
u'\xf6' : 'o', # o with umlaut
|
|
u'\xed' : 'i', # i with accent
|
|
}
|
|
|
|
# this one is one-way only
|
|
def to_ascii(s):
|
|
for uchar in unicode_trans:
|
|
s = s.replace(uchar, unicode_trans[uchar])
|
|
return s
|
|
|
|
# unary numbers
|
|
unary_marker = config.unary_marker
|
|
unary_counter = config.unary_counter
|
|
unary_max = config.unary_max
|
|
unary_exceptions = config.unary_exceptions
|
|
|
|
def to_unary(s, warn = False):
|
|
numbers = re.findall(r'[0123456789]+', s)
|
|
# replace largest first to avoid accidentally replacing shared substrings
|
|
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True):
|
|
i = int(n)
|
|
if i in unary_exceptions:
|
|
s = s.replace(n, unary_exceptions[i])
|
|
elif i > unary_max:
|
|
i = unary_max
|
|
if warn:
|
|
print s
|
|
s = s.replace(n, unary_marker + unary_counter * i)
|
|
else:
|
|
s = s.replace(n, unary_marker + unary_counter * i)
|
|
return s
|
|
|
|
def from_unary(s):
|
|
numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s)
|
|
# again, largest first so we don't replace substrings and break everything
|
|
for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
i = (len(n) - len(unary_marker)) / len(unary_counter)
|
|
s = s.replace(n, str(i))
|
|
return s
|
|
|
|
# mana syntax
|
|
mana_open_delimiter = '{'
|
|
mana_close_delimiter = '}'
|
|
mana_json_open_delimiter = mana_open_delimiter
|
|
mana_json_close_delimiter = mana_close_delimiter
|
|
mana_json_hybrid_delimiter = '/'
|
|
mana_forum_open_delimiter = '[mana]'
|
|
mana_forum_close_delimiter = '[/mana]'
|
|
mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs
|
|
mana_unary_counter = unary_counter
|
|
|
|
# The decoding from mtgjson format is dependent on the specific structure of
|
|
# these internally used mana symbol strings, so if you want to change them you'll
|
|
# also have to change the json decoding functions.
|
|
|
|
# standard mana symbol set
|
|
mana_W = 'W' # single color
|
|
mana_U = 'U'
|
|
mana_B = 'B'
|
|
mana_R = 'R'
|
|
mana_G = 'G'
|
|
mana_P = 'P' # colorless phyrexian
|
|
mana_S = 'S' # snow
|
|
mana_X = 'X' # colorless X
|
|
mana_WP = 'WP' # single color phyrexian
|
|
mana_UP = 'UP'
|
|
mana_BP = 'BP'
|
|
mana_RP = 'RP'
|
|
mana_GP = 'GP'
|
|
mana_2W = '2W' # single color hybrid
|
|
mana_2U = '2U'
|
|
mana_2B = '2B'
|
|
mana_2R = '2R'
|
|
mana_2G = '2G'
|
|
mana_WU = 'WU' # dual color hybrid
|
|
mana_WB = 'WB'
|
|
mana_RW = 'RW'
|
|
mana_GW = 'GW'
|
|
mana_UB = 'UB'
|
|
mana_UR = 'UR'
|
|
mana_GU = 'GU'
|
|
mana_BR = 'BR'
|
|
mana_BG = 'BG'
|
|
mana_RG = 'RG'
|
|
# alternative order symbols
|
|
mana_WP_alt = 'PW' # single color phyrexian
|
|
mana_UP_alt = 'PU'
|
|
mana_BP_alt = 'PB'
|
|
mana_RP_alt = 'PR'
|
|
mana_GP_alt = 'PG'
|
|
mana_2W_alt = 'W2' # single color hybrid
|
|
mana_2U_alt = 'U2'
|
|
mana_2B_alt = 'B2'
|
|
mana_2R_alt = 'R2'
|
|
mana_2G_alt = 'G2'
|
|
mana_WU_alt = 'UW' # dual color hybrid
|
|
mana_WB_alt = 'BW'
|
|
mana_RW_alt = 'WR'
|
|
mana_GW_alt = 'WG'
|
|
mana_UB_alt = 'BU'
|
|
mana_UR_alt = 'RU'
|
|
mana_GU_alt = 'UG'
|
|
mana_BR_alt = 'RB'
|
|
mana_BG_alt = 'GB'
|
|
mana_RG_alt = 'GR'
|
|
# special
|
|
mana_2 = '2' # use with 'in' to identify single color hybrid
|
|
|
|
# master symbol lists
|
|
mana_syms = [
|
|
mana_W,
|
|
mana_U,
|
|
mana_B,
|
|
mana_R,
|
|
mana_G,
|
|
mana_P,
|
|
mana_S,
|
|
mana_X,
|
|
mana_WP,
|
|
mana_UP,
|
|
mana_BP,
|
|
mana_RP,
|
|
mana_GP,
|
|
mana_2W,
|
|
mana_2U,
|
|
mana_2B,
|
|
mana_2R,
|
|
mana_2G,
|
|
mana_WU,
|
|
mana_WB,
|
|
mana_RW,
|
|
mana_GW,
|
|
mana_UB,
|
|
mana_UR,
|
|
mana_GU,
|
|
mana_BR,
|
|
mana_BG,
|
|
mana_RG,
|
|
]
|
|
mana_symalt = [
|
|
mana_WP_alt,
|
|
mana_UP_alt,
|
|
mana_BP_alt,
|
|
mana_RP_alt,
|
|
mana_GP_alt,
|
|
mana_2W_alt,
|
|
mana_2U_alt,
|
|
mana_2B_alt,
|
|
mana_2R_alt,
|
|
mana_2G_alt,
|
|
mana_WU_alt,
|
|
mana_WB_alt,
|
|
mana_RW_alt,
|
|
mana_GW_alt,
|
|
mana_UB_alt,
|
|
mana_UR_alt,
|
|
mana_GU_alt,
|
|
mana_BR_alt,
|
|
mana_BG_alt,
|
|
mana_RG_alt,
|
|
]
|
|
mana_symall = mana_syms + mana_symalt
|
|
|
|
# alt symbol conversion
|
|
def mana_alt(sym):
|
|
if not sym in mana_symall:
|
|
raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym))
|
|
if len(sym) < 2:
|
|
return sym
|
|
else:
|
|
return sym[::-1]
|
|
|
|
# produce intended neural net output format
|
|
def mana_sym_to_encoding(sym):
|
|
if not sym in mana_symall:
|
|
raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym))
|
|
if len(sym) < 2:
|
|
return sym * 2
|
|
else:
|
|
return sym
|
|
|
|
# produce json formatting used in mtgjson
|
|
def mana_sym_to_json(sym):
|
|
if not sym in mana_symall:
|
|
raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym))
|
|
if len(sym) < 2:
|
|
return mana_json_open_delimiter + sym + mana_json_close_delimiter
|
|
else:
|
|
return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter
|
|
+ sym[1] + mana_json_close_delimiter)
|
|
|
|
# produce pretty formatting that renders on mtgsalvation forum
|
|
# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere
|
|
def mana_sym_to_forum(sym):
|
|
if not sym in mana_symall:
|
|
raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym))
|
|
if sym in mana_symalt:
|
|
sym = mana_alt(sym)
|
|
if len(sym) < 2:
|
|
return sym
|
|
else:
|
|
return mana_json_open_delimiter + sym + mana_json_close_delimiter
|
|
|
|
# forward symbol tables for encoding
|
|
mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms}
|
|
mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt}
|
|
mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall}
|
|
mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms}
|
|
mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt}
|
|
mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall}
|
|
|
|
# reverse symbol tables for decoding
|
|
mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms}
|
|
mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt}
|
|
mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall}
|
|
mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms}
|
|
mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt}
|
|
mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall}
|
|
|
|
# going straight from json to encoding and vice versa
|
|
def mana_encode_direct(jsym):
|
|
if not jsym in mana_symall_jdecode:
|
|
raise ValueError('json string not found in decode table for mana_encode_direct(): '
|
|
+ repr(jsym))
|
|
else:
|
|
return mana_symall_encode[mana_symall_jdecode[jsym]]
|
|
|
|
def mana_decode_direct(sym):
|
|
if not sym in mana_symall_decode:
|
|
raise ValueError('mana symbol not found in decode table for mana_decode_direct(): '
|
|
+ repr(sym))
|
|
else:
|
|
return mana_symall_jencode[mana_symall_decode[sym]]
|
|
|
|
# hacked in support for mtgsalvation forum
|
|
def mana_decode_direct_forum(sym):
|
|
if not sym in mana_symall_decode:
|
|
raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): '
|
|
+ repr(sym))
|
|
else:
|
|
return mana_sym_to_forum(mana_symall_decode[sym])
|
|
|
|
# processing entire strings
|
|
def unique_string(s):
|
|
return ''.join(set(s))
|
|
|
|
mana_charset_special = mana_unary_marker + mana_unary_counter
|
|
mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special)
|
|
mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower())
|
|
|
|
mana_regex_strict = (re.escape(mana_open_delimiter) + '['
|
|
+ re.escape(mana_charset_strict)
|
|
+ ']*' + re.escape(mana_close_delimiter))
|
|
mana_regex = (re.escape(mana_open_delimiter) + '['
|
|
+ re.escape(mana_charset)
|
|
+ ']*' + re.escape(mana_close_delimiter))
|
|
|
|
# as a special case, we let unary or decimal numbers exist in json mana strings
|
|
mana_json_charset_special = ('0123456789' + unary_marker + unary_counter)
|
|
mana_json_charset_strict = unique_string(''.join(mana_symall_jdecode) + mana_json_charset_special)
|
|
mana_json_charset = unique_string(mana_json_charset_strict + mana_json_charset_strict.lower())
|
|
|
|
# note that json mana strings can't be empty between the delimiters
|
|
mana_json_regex_strict = (re.escape(mana_json_open_delimiter) + '['
|
|
+ re.escape(mana_json_charset_strict)
|
|
+ ']+' + re.escape(mana_json_close_delimiter))
|
|
mana_json_regex = (re.escape(mana_json_open_delimiter) + '['
|
|
+ re.escape(mana_json_charset)
|
|
+ ']+' + re.escape(mana_json_close_delimiter))
|
|
|
|
number_decimal_regex = r'[0123456789]+'
|
|
number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*'
|
|
mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex
|
|
+ re.escape(mana_json_close_delimiter))
|
|
mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex
|
|
+ re.escape(mana_json_close_delimiter))
|
|
|
|
# convert a json mana string to the proper encoding
|
|
def mana_translate(jmanastr):
|
|
manastr = jmanastr
|
|
for n in sorted(re.findall(mana_unary_regex, manastr),
|
|
lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
ns = re.findall(number_unary_regex, n)
|
|
i = (len(ns[0]) - len(unary_marker)) / len(unary_counter)
|
|
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
|
|
for n in sorted(re.findall(mana_decimal_regex, manastr),
|
|
lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
ns = re.findall(number_decimal_regex, n)
|
|
i = int(ns[0])
|
|
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
|
|
for jsym in sorted(mana_symall_jdecode, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
if jsym in manastr:
|
|
manastr = manastr.replace(jsym, mana_encode_direct(jsym))
|
|
return mana_open_delimiter + manastr + mana_close_delimiter
|
|
|
|
# convert an encoded mana string back to json
|
|
mana_symlen_min = min([len(sym) for sym in mana_symall_decode])
|
|
mana_symlen_max = max([len(sym) for sym in mana_symall_decode])
|
|
def mana_untranslate(manastr, for_forum = False):
|
|
inner = manastr[1:-1]
|
|
jmanastr = ''
|
|
colorless_total = 0
|
|
idx = 0
|
|
while idx < len(inner):
|
|
# taking this branch is an infinite loop if unary_marker is empty
|
|
if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker:
|
|
idx += len(mana_unary_marker)
|
|
elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter:
|
|
idx += len(mana_unary_counter)
|
|
colorless_total += 1
|
|
else:
|
|
old_idx = idx
|
|
for symlen in range(mana_symlen_min, mana_symlen_max + 1):
|
|
sym = inner[idx:idx+symlen]
|
|
if sym in mana_symall_decode:
|
|
idx += symlen
|
|
if for_forum:
|
|
jmanastr = jmanastr + mana_decode_direct_forum(sym)
|
|
else:
|
|
jmanastr = jmanastr + mana_decode_direct(sym)
|
|
break
|
|
# otherwise we'll go into an infinite loop if we see a symbol we don't know
|
|
if idx == old_idx:
|
|
idx += 1
|
|
if for_forum:
|
|
if jmanastr == '':
|
|
return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter
|
|
else:
|
|
return (mana_forum_open_delimiter + ('' if colorless_total == 0
|
|
else str(colorless_total))
|
|
+ jmanastr + mana_forum_close_delimiter)
|
|
else:
|
|
if jmanastr == '':
|
|
return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter
|
|
else:
|
|
return (('' if colorless_total == 0 else
|
|
mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter)
|
|
+ jmanastr)
|
|
|
|
# finally, replacing all instances in a string
|
|
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
|
|
def to_mana(s):
|
|
jmanastrs = re.findall(mana_json_regex, s)
|
|
for jmanastr in sorted(jmanastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
|
|
return s
|
|
|
|
def from_mana(s, for_forum = False):
|
|
manastrs = re.findall(mana_regex, s)
|
|
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum))
|
|
return s
|
|
|
|
# Translation could also be accomplished using the datamine.Manacost object's
|
|
# display methods, but these direct string transformations are retained for
|
|
# quick scripting and convenience (and used under the hood by that class to
|
|
# do its formatting).
|
|
|
|
# more convenience features for formatting tap / untap symbols
|
|
json_symbol_tap = tap_marker
|
|
json_symbol_untap = untap_marker
|
|
|
|
json_symbol_trans = {
|
|
mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter : tap_marker,
|
|
mana_json_open_delimiter + json_symbol_tap.lower() + mana_json_close_delimiter : tap_marker,
|
|
mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter : untap_marker,
|
|
mana_json_open_delimiter + json_symbol_untap.lower() + mana_json_close_delimiter : untap_marker,
|
|
}
|
|
json_forum_trans = {
|
|
mana_forum_open_delimiter + json_symbol_tap + mana_forum_close_delimiter : tap_marker,
|
|
mana_forum_open_delimiter + json_symbol_tap.lower() + mana_forum_close_delimiter : tap_marker,
|
|
mana_forum_open_delimiter + json_symbol_untap + mana_forum_close_delimiter : untap_marker,
|
|
mana_forum_open_delimiter + json_symbol_untap.lower() + mana_forum_close_delimiter : untap_marker,
|
|
}
|
|
symbol_trans = {
|
|
tap_marker : mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter,
|
|
untap_marker : mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter,
|
|
}
|
|
json_symbol_regex = (re.escape(mana_json_open_delimiter) + '['
|
|
+ json_symbol_tap + json_symbol_tap.lower()
|
|
+ json_symbol_untap + json_symbol_untap.lower()
|
|
+ ']' + re.escape(mana_json_close_delimiter))
|
|
symbol_regex = '[' + tap_marker + untap_marker + ']'
|
|
|
|
def to_symbols(s):
|
|
jsymstrs = re.findall(json_symbol_regex, s)
|
|
for jsymstr in sorted(jsymstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
s = s.replace(jsymstr, json_symbol_trans[jsymstr])
|
|
return s
|
|
|
|
def from_symbols(s, for_forum = False):
|
|
symstrs = re.findall(symbol_regex, s)
|
|
for symstr in sorted(symstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
|
|
s = s.replace(symstr, symbol_trans[symstr])
|
|
return s
|
|
|
|
unletters_regex = r"[^abcdefghijklmnopqrstuvwxyz']"
|
|
|