mtgencode/lib/utils.py
Bill Zorn 1a4965fd83 EVERYTHING HAS CHANGED
Added lib and script subdirs to organize things; the biggest change is that now
we have a really powerful Card class that can handle all of the decoding and
encoding for us. encode.py has been written to take advantage of this, other
things have not yet. Coming soon! As a side note the changes to output.txt
are purely cosemtic, though the order should be stable now.
2015-07-14 00:07:25 -07:00

456 lines
16 KiB
Python

import re
# Utilities for handling unicode, unary numbers, mana costs, and special symbols.
# For convenience we redefine everything from config so that it can all be accessed
# from the utils module.
import config
# separators
cardsep = config.cardsep
fieldsep = config.fieldsep
bsidesep = config.bsidesep
newline = config.newline
# special indicators
dash_marker = config.dash_marker
bullet_marker = config.bullet_marker
this_marker = config.this_marker
counter_marker = config.counter_marker
reserved_marker = config.reserved_marker
reserved_mana_marker = config.reserved_mana_marker
x_marker = config.x_marker
tap_marker = config.tap_marker
untap_marker = config.untap_marker
# unambiguous synonyms
counter_rename = config.counter_rename
# field labels
field_label_name = config.field_label_name
field_label_rarity = config.field_label_rarity
field_label_cost = config.field_label_cost
field_label_supertypes = config.field_label_supertypes
field_label_types = config.field_label_types
field_label_subtypes = config.field_label_subtypes
field_label_loyalty = config.field_label_loyalty
field_label_pt = config.field_label_pt
field_label_text = config.field_label_text
# additional fields we add to the json cards
json_field_bside = config.json_field_bside
json_field_set_name = config.json_field_set_name
# unicode / ascii conversion
unicode_trans = {
u'\u2014' : dash_marker, # unicode long dash
u'\u2022' : bullet_marker, # unicode bullet
u'\u2019' : '"', # single quote
u'\u2018' : '"', # single quote
u'\u2212' : '-', # minus sign
u'\xe6' : 'ae', # ae symbol
u'\xfb' : 'u', # u with caret
u'\xfa' : 'u', # u with accent
u'\xe9' : 'e', # e with accent
u'\xe1' : 'a', # a with accent
u'\xe0' : 'a', # a with accent going the other way
u'\xe2' : 'a', # a with caret
u'\xf6' : 'o', # o with umlaut
u'\xed' : 'i', # i with accent
}
# this one is one-way only
def to_ascii(s):
for uchar in unicode_trans:
s = s.replace(uchar, unicode_trans[uchar])
return s
# unary numbers
unary_marker = config.unary_marker
unary_counter = config.unary_counter
unary_max = config.unary_max
unary_exceptions = config.unary_exceptions
def to_unary(s, warn = False):
numbers = re.findall(r'[0123456789]+', s)
# replace largest first to avoid accidentally replacing shared substrings
for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True):
i = int(n)
if i in unary_exceptions:
s = s.replace(n, unary_exceptions[i])
elif i > unary_max:
i = unary_max
if warn:
print s
s = s.replace(n, unary_marker + unary_counter * i)
else:
s = s.replace(n, unary_marker + unary_counter * i)
return s
def from_unary(s):
numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s)
# again, largest first so we don't replace substrings and break everything
for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True):
i = (len(n) - len(unary_marker)) / len(unary_counter)
s = s.replace(n, str(i))
return s
# mana syntax
mana_open_delimiter = '{'
mana_close_delimiter = '}'
mana_json_open_delimiter = mana_open_delimiter
mana_json_close_delimiter = mana_close_delimiter
mana_json_hybrid_delimiter = '/'
mana_forum_open_delimiter = '[mana]'
mana_forum_close_delimiter = '[/mana]'
mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs
mana_unary_counter = unary_counter
# The decoding from mtgjson format is dependent on the specific structure of
# these internally used mana symbol strings, so if you want to change them you'll
# also have to change the json decoding functions.
# standard mana symbol set
mana_W = 'W' # single color
mana_U = 'U'
mana_B = 'B'
mana_R = 'R'
mana_G = 'G'
mana_P = 'P' # colorless phyrexian
mana_S = 'S' # snow
mana_X = 'X' # colorless X
mana_WP = 'WP' # single color phyrexian
mana_UP = 'UP'
mana_BP = 'BP'
mana_RP = 'RP'
mana_GP = 'GP'
mana_2W = '2W' # single color hybrid
mana_2U = '2U'
mana_2B = '2B'
mana_2R = '2R'
mana_2G = '2G'
mana_WU = 'WU' # dual color hybrid
mana_WB = 'WB'
mana_RW = 'RW'
mana_GW = 'GW'
mana_UB = 'UB'
mana_UR = 'UR'
mana_GU = 'GU'
mana_BR = 'BR'
mana_BG = 'BG'
mana_RG = 'RG'
# alternative order symbols
mana_WP_alt = 'PW' # single color phyrexian
mana_UP_alt = 'PU'
mana_BP_alt = 'PB'
mana_RP_alt = 'PR'
mana_GP_alt = 'PG'
mana_2W_alt = 'W2' # single color hybrid
mana_2U_alt = 'U2'
mana_2B_alt = 'B2'
mana_2R_alt = 'R2'
mana_2G_alt = 'G2'
mana_WU_alt = 'UW' # dual color hybrid
mana_WB_alt = 'BW'
mana_RW_alt = 'WR'
mana_GW_alt = 'WG'
mana_UB_alt = 'BU'
mana_UR_alt = 'RU'
mana_GU_alt = 'UG'
mana_BR_alt = 'RB'
mana_BG_alt = 'GB'
mana_RG_alt = 'GR'
# special
mana_2 = '2' # use with 'in' to identify single color hybrid
# master symbol lists
mana_syms = [
mana_W,
mana_U,
mana_B,
mana_R,
mana_G,
mana_P,
mana_S,
mana_X,
mana_WP,
mana_UP,
mana_BP,
mana_RP,
mana_GP,
mana_2W,
mana_2U,
mana_2B,
mana_2R,
mana_2G,
mana_WU,
mana_WB,
mana_RW,
mana_GW,
mana_UB,
mana_UR,
mana_GU,
mana_BR,
mana_BG,
mana_RG,
]
mana_symalt = [
mana_WP_alt,
mana_UP_alt,
mana_BP_alt,
mana_RP_alt,
mana_GP_alt,
mana_2W_alt,
mana_2U_alt,
mana_2B_alt,
mana_2R_alt,
mana_2G_alt,
mana_WU_alt,
mana_WB_alt,
mana_RW_alt,
mana_GW_alt,
mana_UB_alt,
mana_UR_alt,
mana_GU_alt,
mana_BR_alt,
mana_BG_alt,
mana_RG_alt,
]
mana_symall = mana_syms + mana_symalt
# alt symbol conversion
def mana_alt(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym))
if len(sym) < 2:
return sym
else:
return sym[::-1]
# produce intended neural net output format
def mana_sym_to_encoding(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym))
if len(sym) < 2:
return sym * 2
else:
return sym
# produce json formatting used in mtgjson
def mana_sym_to_json(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym))
if len(sym) < 2:
return mana_json_open_delimiter + sym + mana_json_close_delimiter
else:
return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter
+ sym[1] + mana_json_close_delimiter)
# produce pretty formatting that renders on mtgsalvation forum
# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere
def mana_sym_to_forum(sym):
if not sym in mana_symall:
raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym))
if sym in mana_symalt:
sym = mana_alt(sym)
if len(sym) < 2:
return sym
else:
return mana_json_open_delimiter + sym + mana_json_close_delimiter
# forward symbol tables for encoding
mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms}
mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt}
mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall}
mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms}
mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt}
mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall}
# reverse symbol tables for decoding
mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms}
mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt}
mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall}
mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms}
mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt}
mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall}
# going straight from json to encoding and vice versa
def mana_encode_direct(jsym):
if not jsym in mana_symall_jdecode:
raise ValueError('json string not found in decode table for mana_encode_direct(): '
+ repr(jsym))
else:
return mana_symall_encode[mana_symall_jdecode[jsym]]
def mana_decode_direct(sym):
if not sym in mana_symall_decode:
raise ValueError('mana symbol not found in decode table for mana_decode_direct(): '
+ repr(sym))
else:
return mana_symall_jencode[mana_symall_decode[sym]]
# hacked in support for mtgsalvation forum
def mana_decode_direct_forum(sym):
if not sym in mana_symall_decode:
raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): '
+ repr(sym))
else:
return mana_sym_to_forum(mana_symall_decode[sym])
# processing entire strings
def unique_string(s):
return ''.join(set(s))
mana_charset_special = mana_unary_marker + mana_unary_counter
mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special)
mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower())
mana_regex_strict = (re.escape(mana_open_delimiter) + '['
+ re.escape(mana_charset_strict)
+ ']*' + re.escape(mana_close_delimiter))
mana_regex = (re.escape(mana_open_delimiter) + '['
+ re.escape(mana_charset)
+ ']*' + re.escape(mana_close_delimiter))
# as a special case, we let unary or decimal numbers exist in json mana strings
mana_json_charset_special = ('0123456789' + unary_marker + unary_counter)
mana_json_charset_strict = unique_string(''.join(mana_symall_jdecode) + mana_json_charset_special)
mana_json_charset = unique_string(mana_json_charset_strict + mana_json_charset_strict.lower())
# note that json mana strings can't be empty between the delimiters
mana_json_regex_strict = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_json_charset_strict)
+ ']+' + re.escape(mana_json_close_delimiter))
mana_json_regex = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_json_charset)
+ ']+' + re.escape(mana_json_close_delimiter))
number_decimal_regex = r'[0123456789]+'
number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*'
mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex
+ re.escape(mana_json_close_delimiter))
mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex
+ re.escape(mana_json_close_delimiter))
# convert a json mana string to the proper encoding
def mana_translate(jmanastr):
manastr = jmanastr
for n in sorted(re.findall(mana_unary_regex, manastr),
lambda x,y: cmp(len(x), len(y)), reverse = True):
ns = re.findall(number_unary_regex, n)
i = (len(ns[0]) - len(unary_marker)) / len(unary_counter)
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for n in sorted(re.findall(mana_decimal_regex, manastr),
lambda x,y: cmp(len(x), len(y)), reverse = True):
ns = re.findall(number_decimal_regex, n)
i = int(ns[0])
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for jsym in sorted(mana_symall_jdecode, lambda x,y: cmp(len(x), len(y)), reverse = True):
if jsym in manastr:
manastr = manastr.replace(jsym, mana_encode_direct(jsym))
return mana_open_delimiter + manastr + mana_close_delimiter
# convert an encoded mana string back to json
mana_symlen_min = min([len(sym) for sym in mana_symall_decode])
mana_symlen_max = max([len(sym) for sym in mana_symall_decode])
def mana_untranslate(manastr, for_forum = False):
inner = manastr[1:-1]
jmanastr = ''
colorless_total = 0
idx = 0
while idx < len(inner):
# taking this branch is an infinite loop if unary_marker is empty
if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker:
idx += len(mana_unary_marker)
elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter:
idx += len(mana_unary_counter)
colorless_total += 1
else:
old_idx = idx
for symlen in range(mana_symlen_min, mana_symlen_max + 1):
sym = inner[idx:idx+symlen]
if sym in mana_symall_decode:
idx += symlen
if for_forum:
jmanastr = jmanastr + mana_decode_direct_forum(sym)
else:
jmanastr = jmanastr + mana_decode_direct(sym)
break
# otherwise we'll go into an infinite loop if we see a symbol we don't know
if idx == old_idx:
idx += 1
if for_forum:
if jmanastr == '':
return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter
else:
return (mana_forum_open_delimiter + ('' if colorless_total == 0
else str(colorless_total))
+ jmanastr + mana_forum_close_delimiter)
else:
if jmanastr == '':
return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter
else:
return (('' if colorless_total == 0 else
mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter)
+ jmanastr)
# finally, replacing all instances in a string
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
def to_mana(s):
jmanastrs = re.findall(mana_json_regex, s)
for jmanastr in sorted(jmanastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
return s
def from_mana(s, for_forum = False):
manastrs = re.findall(mana_regex, s)
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum))
return s
# Translation could also be accomplished using the datamine.Manacost object's
# display methods, but these direct string transformations are retained for
# quick scripting and convenience (and used under the hood by that class to
# do its formatting).
# more convenience features for formatting tap / untap symbols
json_symbol_tap = tap_marker
json_symbol_untap = untap_marker
json_symbol_trans = {
mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter : tap_marker,
mana_json_open_delimiter + json_symbol_tap.lower() + mana_json_close_delimiter : tap_marker,
mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter : untap_marker,
mana_json_open_delimiter + json_symbol_untap.lower() + mana_json_close_delimiter : untap_marker,
}
json_forum_trans = {
mana_forum_open_delimiter + json_symbol_tap + mana_forum_close_delimiter : tap_marker,
mana_forum_open_delimiter + json_symbol_tap.lower() + mana_forum_close_delimiter : tap_marker,
mana_forum_open_delimiter + json_symbol_untap + mana_forum_close_delimiter : untap_marker,
mana_forum_open_delimiter + json_symbol_untap.lower() + mana_forum_close_delimiter : untap_marker,
}
symbol_trans = {
tap_marker : mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter,
untap_marker : mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter,
}
json_symbol_regex = (re.escape(mana_json_open_delimiter) + '['
+ json_symbol_tap + json_symbol_tap.lower()
+ json_symbol_untap + json_symbol_untap.lower()
+ ']' + re.escape(mana_json_close_delimiter))
symbol_regex = '[' + tap_marker + untap_marker + ']'
def to_symbols(s):
jsymstrs = re.findall(json_symbol_regex, s)
for jsymstr in sorted(jsymstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(jsymstr, json_symbol_trans[jsymstr])
return s
def from_symbols(s, for_forum = False):
symstrs = re.findall(symbol_regex, s)
for symstr in sorted(symstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(symstr, symbol_trans[symstr])
return s
unletters_regex = r"[^abcdefghijklmnopqrstuvwxyz']"