From db0b9a9f9aef5400dc1b2594b6a7c98f2483dea0 Mon Sep 17 00:00:00 2001 From: Bill Zorn Date: Mon, 6 Jul 2015 22:26:25 -0700 Subject: [PATCH] split most of the code from config into utils, encode now uses utils as a lib fixed MANY MANY ERRORS in the original output file by using this superior code --- config.py | 371 +---------------------------------------------- encode.py | 170 +++++----------------- utils.py | 424 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 460 insertions(+), 505 deletions(-) create mode 100644 utils.py diff --git a/config.py b/config.py index 9551d4e..a9e831d 100644 --- a/config.py +++ b/config.py @@ -1,14 +1,8 @@ import re -# Don't be intimidated by the massive size of this file. It provides both the -# raw character decisions made about the encoding scheme as variables, and -# a bunch of tables and functions to make dealing with mana costs and unary -# numbers easier. For the most part the functions should adapt if you change -# the specific delimiters and markers used. - -# The decoding from mtgjson format is dependent on the specific structure of -# the internally used mana symbol strings, so if you want to change that you'll -# also have to change the json decoding functions. +# Utilities for handling unicode, unary numbers, mana costs, and special symbols. +# For convenience we redefine everything from utils so that it can all be accessed +# from the utils module. # separators cardsep = '\n\n' @@ -29,29 +23,6 @@ untap_marker = 'Q' # unambiguous synonyms counter_rename = 'uncast' -# unicode / ascii conversion -unicode_trans = { - u'\u2014' : dash_marker, # unicode long dash - u'\u2022' : bullet_marker, # unicode bullet - u'\u2019' : '"', # single quote - u'\u2018' : '"', # single quote - u'\u2212' : '-', # minus sign - u'\xe6' : 'ae', # ae symbol - u'\xfb' : 'u', # u with caret - u'\xfa' : 'u', # u with accent - u'\xe9' : 'e', # e with accent - u'\xe1' : 'a', # a with accent - u'\xe0' : 'a', # a with accent going the other way - u'\xe2' : 'a', # a with caret - u'\xf6' : 'o', # o with umlaut - u'\xed' : 'i', # i with accent -} -# this one is one-way only -def to_ascii(s): - for uchar in unicode_trans: - s = s.replace(uchar, unicode_trans(uchar)) - return s - # unary numbers unary_marker = '&' unary_counter = '^' @@ -64,339 +35,3 @@ unary_exceptions = { 100: 'one hundred', 200: 'two hundred', } - -def to_unary(s, warn = False): - numbers = re.findall(r'[0123456789]+', s) - # replace largest first to avoid accidentally replacing shared substrings - for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True): - i = int(n) - if i in unary_exceptions: - s = s.replace(n, unary_exceptions[n]) - elif i > unary_max: - i = unary_max - if warn: - print s - s = s.replace(n, unary_marker + unary_counter * i) - else: - s = s.replace(n, unary_marker + unary_counter * i) - return s - -def from_unary(s): - numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s) - # again, largest first so we don't replace substrings and break everything - for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True): - i = (len(n) - len(unary_marker)) / len(unary_counter) - s = s.replace(n, str(i)) - return s - -# mana syntax -mana_open_delimiter = '{' -mana_close_delimiter = '}' -mana_json_open_delimiter = mana_open_delimiter -mana_json_close_delimiter = mana_close_delimiter -mana_json_hybrid_delimiter = '/' -mana_forum_open_delimiter = '[mana]' -mana_forum_close_delimiter = '[/mana]' -mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs -mana_unary_counter = unary_counter - -# individual mana symbols -mana_W = 'W' # single color -mana_U = 'U' -mana_B = 'B' -mana_R = 'R' -mana_G = 'G' -mana_P = 'P' # colorless phyrexian -mana_S = 'S' # snow -mana_X = 'X' # colorless X -mana_WP = 'WP' # single color phyrexian -mana_UP = 'UP' -mana_BP = 'BP' -mana_RP = 'RP' -mana_GP = 'GP' -mana_2W = '2W' # single color hybrid -mana_2U = '2U' -mana_2B = '2B' -mana_2R = '2R' -mana_2G = '2G' -mana_WU = 'WU' # dual color hybrid -mana_WB = 'WB' -mana_RW = 'RW' -mana_GW = 'GW' -mana_UB = 'UB' -mana_UR = 'UR' -mana_GU = 'GU' -mana_BR = 'BR' -mana_BG = 'BG' -mana_RG = 'RG' -# alternative order symbols -mana_WP_alt = 'PW' # single color phyrexian -mana_UP_alt = 'PU' -mana_BP_alt = 'PB' -mana_RP_alt = 'PR' -mana_GP_alt = 'PG' -mana_2W_alt = 'W2' # single color hybrid -mana_2U_alt = 'U2' -mana_2B_alt = 'B2' -mana_2R_alt = 'R2' -mana_2G_alt = 'G2' -mana_WU_alt = 'UW' # dual color hybrid -mana_WB_alt = 'BW' -mana_RW_alt = 'WR' -mana_GW_alt = 'WG' -mana_UB_alt = 'BU' -mana_UR_alt = 'RU' -mana_GU_alt = 'UG' -mana_BR_alt = 'RB' -mana_BG_alt = 'GB' -mana_RG_alt = 'GR' -# special -mana_2 = '2' # use with 'in' to identify single color hybrid - -# master symbol lists -mana_syms = [ - mana_W, - mana_U, - mana_B, - mana_R, - mana_G, - mana_P, - mana_S, - mana_X, - mana_WP, - mana_UP, - mana_BP, - mana_RP, - mana_GP, - mana_2W, - mana_2U, - mana_2B, - mana_2R, - mana_2G, - mana_WU, - mana_WB, - mana_RW, - mana_GW, - mana_UB, - mana_UR, - mana_GU, - mana_BR, - mana_BG, - mana_RG, -] -mana_symalt = [ - mana_WP_alt, - mana_UP_alt, - mana_BP_alt, - mana_RP_alt, - mana_GP_alt, - mana_2W_alt, - mana_2U_alt, - mana_2B_alt, - mana_2R_alt, - mana_2G_alt, - mana_WU_alt, - mana_WB_alt, - mana_RW_alt, - mana_GW_alt, - mana_UB_alt, - mana_UR_alt, - mana_GU_alt, - mana_BR_alt, - mana_BG_alt, - mana_RG_alt, -] -mana_symall = mana_syms + mana_symalt - -# alt symbol conversion -def mana_alt(sym): - if not sym in mana_symall: - raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym)) - if len(sym) < 2: - return sym - else: - return sym[::-1] - -# produce intended neural net output format -def mana_sym_to_encoding(sym): - if not sym in mana_symall: - raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym)) - if len(sym) < 2: - return sym * 2 - else: - return sym - -# produce json formatting used in mtgjson -def mana_sym_to_json(sym): - if not sym in mana_symall: - raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym)) - if len(sym) < 2: - return mana_json_open_delimiter + sym + mana_json_close_delimiter - else: - return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter - + sym[1] + mana_json_close_delimiter) - -# produce pretty formatting that renders on mtgsalvation forum -# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere -def mana_sym_to_forum(sym): - if not sym in mana_symall: - raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym)) - if sym in mana_symalt: - sym = mana_alt(sym) - if len(sym) < 2: - return sym - else: - return mana_json_open_delimiter + sym + mana_json_close_delimiter - -# forward symbol tables for encoding -mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms} -mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt} -mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall} -mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms} -mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt} -mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall} - -# reverse symbol tables for decoding -mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms} -mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt} -mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall} -mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms} -mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt} -mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall} - -# going straight from json to encoding and vice versa -def mana_encode_direct(jsym): - if not jsym in mana_symall_jdecode: - raise ValueError('json string not found in decode table for mana_encode_direct(): ' - + repr(jsym)) - else: - return mana_symall_encode[mana_symall_jdecode[jsym]] - -def mana_decode_direct(sym): - if not sym in mana_symall_decode: - raise ValueError('mana symbol not found in decode table for mana_decode_direct(): ' - + repr(sym)) - else: - return mana_symall_jencode[mana_symall_decode[sym]] - -# hacked in support for mtgsalvation forum -def mana_decode_direct_forum(sym): - if not sym in mana_symall_decode: - raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): ' - + repr(sym)) - else: - return mana_sym_to_forum(mana_symall_decode[sym]) - -# processing entire strings -def unique_string(s): - return ''.join(set(s)) - -mana_charset_special = mana_unary_marker + mana_unary_counter -mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special) -mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower()) - -mana_regex_strict = (re.escape(mana_open_delimiter) + '[' - + re.escape(mana_charset_strict) - + ']*' + re.escape(mana_close_delimiter)) -mana_regex = (re.escape(mana_open_delimiter) + '[' - + re.escape(mana_charset) - + ']*' + re.escape(mana_close_delimiter)) - -# as a special case, we let unary or decimal numbers exist in json mana strings -mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter -mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special) -mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower()) - -# note that json mana strings can't be empty between the delimiters -mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '[' - + re.escape(mana_jcharset_strict) - + ']+' + re.escape(mana_json_close_delimiter)) -mana_jregex = (re.escape(mana_json_open_delimiter) + '[' - + re.escape(mana_jcharset) - + ']+' + re.escape(mana_json_close_delimiter)) - -number_decimal_regex = r'[0123456789]+' -number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*' -mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex - + re.escape(mana_json_close_delimiter)) -mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex - + re.escape(mana_json_close_delimiter)) - -# convert a json mana string to the proper encoding -def mana_translate(jmanastr): - manastr = jmanastr - for n in re.findall(mana_unary_regex, manastr): - ns = re.findall(number_unary_regex, n) - i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter) - manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i) - for n in re.findall(mana_decimal_regex, manastr): - ns = re.findall(number_decimal_regex, n) - i = int(ns[0]) - manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i) - for jsym in mana_symall_jdecode: - if jsym in manastr: - manastr = manastr.replace(jsym, mana_encode_direct(jsym)) - return mana_open_delimiter + manastr + mana_close_delimiter - -# convert an encoded mana string back to json -mana_symlen_min = min([len(sym) for sym in mana_symall_decode]) -mana_symlen_max = max([len(sym) for sym in mana_symall_decode]) -def mana_untranslate(manastr, for_forum = False): - inner = manastr[1:-1] - jmanastr = '' - colorless_total = 0 - idx = 0 - while idx < len(inner): - # taking this branch is an infinite loop if unary_marker is empty - if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker: - idx += len(mana_unary_marker) - elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter: - idx += len(mana_unary_counter) - colorless_total += 1 - else: - old_idx = idx - for symlen in range(mana_symlen_min, mana_symlen_max + 1): - sym = inner[idx:idx+symlen] - if sym in mana_symall_decode: - idx += symlen - if for_forum: - jmanastr = jmanastr + mana_decode_direct_forum(sym) - else: - jmanastr = jmanastr + mana_decode_direct(sym) - break - # otherwise we'll go into an infinite loop if we see a symbol we don't know - if idx == old_idx: - idx += 1 - if for_forum: - if jmanastr == '': - return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter - else: - return (mana_forum_open_delimiter + ('' if colorless_total == 0 - else str(colorless_total)) - + jmanastr + mana_forum_close_delimiter) - else: - if jmanastr == '': - return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter - else: - return (('' if colorless_total == 0 else - mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter) - + jmanastr) - -# finally, replacing all instances in a string -# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case -def to_mana(s): - jmanastrs = re.findall(mana_jregex, s) - for jmanastr in jmanastrs: - s = s.replace(jmanastr, mana_translate(jmanastr.upper())) - return s - -def from_mana(s, for_forum = False): - manastrs = re.findall(mana_regex, s) - for manastr in manastrs: - s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum)) - return s - -# Translation could also be accomplished using the datamine.Manacost object's -# display methods, but these direct string transformations are retained for -# quick scripting and convenience (and used under the hood by that class to -# do its formatting). diff --git a/encode.py b/encode.py index 17eee57..0435eab 100644 --- a/encode.py +++ b/encode.py @@ -3,30 +3,27 @@ import re import codecs import sys +import utils + #badwords = [] valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]' -dash_marker = '~' -bullet_marker = '=' -reserved_marker = '\r' - -def to_ascii(s): - s = s.replace(u'\u2014', dash_marker) # unicode long dash - s = s.replace(u'\u2022', bullet_marker) # unicode bullet - s = s.replace(u'\u2019', '"') # single quote - s = s.replace(u'\u2018', '"') # single quote - s = s.replace(u'\u2212', '-') # minus sign - s = s.replace(u'\xe6', 'ae') # ae symbol - s = s.replace(u'\xfb', 'u') # u with caret - s = s.replace(u'\xfa', 'u') # u with accent - s = s.replace(u'\xe9', 'e') # e with accent - s = s.replace(u'\xe1', 'a') # a with accent - s = s.replace(u'\xe0', 'a') # a with accent going the other way - s = s.replace(u'\xe2', 'a') # a with caret - s = s.replace(u'\xf6', 'o') # o with umlaut - s = s.replace(u'\xed', 'i') # i with accent - return s +cardsep = utils.cardsep +fieldsep = utils.fieldsep +bsidesep = utils.bsidesep +newline = utils.newline +dash_marker = utils.dash_marker +bullet_marker = utils.bullet_marker +this_marker = utils.this_marker +counter_marker = utils.counter_marker +reserved_marker = utils.reserved_marker +x_marker = utils.x_marker +tap_marker = utils.tap_marker +untap_marker = utils.untap_marker +counter_rename = utils.counter_rename +unary_marker = utils.unary_marker +unary_counter = utils.unary_counter # This whole things assumes the json format of mtgjson.com. @@ -63,107 +60,11 @@ def to_ascii(s): # releaseDate - string # starter - boolean -fieldsep = '|' -newline = '\\' -unary_marker = '&' -unary_counter = '^' -mana_open_delimiter = '{' -mana_close_delimiter = '}' -x_marker = 'X' -tap_marker = 'T' -untap_marker = 'Q' -this_marker = '@' -counter_marker = '%' -bsidesep = '\n' - -unary_max = 20 - -def to_unary(s): - numbers = re.findall(r'[0123456789]+', s) - for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)) * -1): - i = int(n) - if i == 25: - s = s.replace(n, 'twenty~five') - elif i == 30: - s = s.replace(n, 'thirty') - elif i == 40: - s = s.replace(n, 'forty') - elif i == 50: - s = s.replace(n, 'fifty') - elif i == 100: - s = s.replace(n, 'one hundred') - elif i == 200: - s = s.replace(n, 'two hundred') - else: - if i > unary_max: - # truncate to unary_max - i = unary_max - # warn, because we probably don't want this to happen - print s - s = s.replace(n, unary_marker + unary_counter * i) - - return s - - -# also handles the tap and untap symbols -def compress_mana(manastring): - # mana string is of the form '{3}{W}{2/B}', as specified by mtgjson - translations = { - '{w}' : 'WW', - '{u}' : 'UU', - '{b}' : 'BB', - '{r}' : 'RR', - '{g}' : 'GG', - '{p}' : 'PP', - '{w/p}' : 'WP', - '{u/p}' : 'UP', - '{b/p}' : 'BP', - '{r/p}' : 'RP', - '{g/p}' : 'GP', - '{2/w}' : 'VW', - '{2/u}' : 'VU', - '{2/b}' : 'VB', - '{2/r}' : 'VR', - '{2/g}' : 'VG', - '{w/u}' : 'WU', - '{w/b}' : 'WB', - '{r/w}' : 'RW', - '{g/w}' : 'GW', - '{u/b}' : 'UB', - '{u/r}' : 'UR', - '{g/u}' : 'GU', - '{b/r}' : 'BR', - '{b/g}' : 'BG', - '{r/g}' : 'RG', - '{s}' : 'SS', - '{x}' : x_marker * 2, - '{t}' : tap_marker, - '{q}' : untap_marker, - } - for t in translations: - manastring = manastring.replace(t, translations[t]) - - numbers = re.findall(r'\{[0123456789]+\}', manastring) - for n in numbers: - i = int(re.findall(r'[0123456789]+', n)[0]) - manastring = manastring.replace(n, unary_counter * i) - - # we don't really need delimiters for tap, it's a unique symbol anyways - if manastring in [tap_marker, untap_marker]: - return manastring - else: - return '{' + manastring + '}' - -def replace_mana(s): - manastrings = re.findall(r'\{[\{\}wubrgp/xtq0123456789]+\}', s) - for manastring in manastrings: - s = s.replace(manastring, compress_mana(manastring)) - return s - def strip_reminder_text(s): return re.sub(r'\(.*\)', '', s) - + + def replace_newlines(s): return s.replace('\n', '\\') @@ -213,7 +114,7 @@ def replace_cardname(s, name): for override in overrides: s = s.replace(override, this_marker) - # some detection code when the overrides need to be fixed... + # some detection code for when the overrides need to be fixed... # global badwords # bad = False # for word in name.replace(',', '').split(): @@ -539,32 +440,35 @@ def encode(card): return encoding = fieldsep - name = card['name'].lower() - encoding += sanitize_name(name) + if 'name' in card: + name = card['name'].lower() + encoding += sanitize_name(name) encoding += fieldsep if 'supertypes' in card: encoding += ' '.join(card['supertypes']).lower() encoding += fieldsep - encoding += ' '.join(card['types']).lower() + if 'types' in card: + encoding += ' '.join(card['types']).lower() encoding += fieldsep if 'loyalty' in card: - encoding += to_unary(str(card['loyalty'])) + encoding += utils.to_unary(str(card['loyalty'])) encoding += fieldsep if 'subtypes' in card: encoding += ' '.join(card['subtypes']).lower() encoding += fieldsep if 'power' in card and 'toughness' in card: - encoding += to_unary(card['power']) + '/' + to_unary(card['toughness']) + encoding += utils.to_unary(card['power']) + '/' + utils.to_unary(card['toughness']) encoding += fieldsep if 'manaCost' in card: - encoding += replace_mana(card['manaCost'].lower()) + encoding += utils.to_mana(card['manaCost'].lower()) encoding += fieldsep if 'text' in card: text = card['text'].lower() text = strip_reminder_text(text) text = replace_cardname(text, name) - text = replace_mana(text) - text = to_unary(text) + text = utils.to_mana(text) + text = utils.to_symbols(text) + text = utils.to_unary(text) text = fix_dashes(text) text = fix_x(text) text = replace_counters(text) @@ -575,24 +479,16 @@ def encode(card): encoding += text.strip() encoding += fieldsep - # HACK: put the cost again after the text - # if 'manaCost' in card: - # encoding += replace_mana(card['manaCost'].lower()) - # encoding += fieldsep - - # if 'flavor' in card: - # encoding += card['flavor'].lower() - # encoding += fieldsep - # now output the bside if there is one if 'bside' in card: encoding += bsidesep encoding += encode(card['bside']) - encoding = to_ascii(encoding) + encoding = utils.to_ascii(encoding) # encoding = re.sub(valid_encoded_char, '', encoding) # if not encoding == '': # print card + return encoding def encode_duplicated(cards): diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..fd720d7 --- /dev/null +++ b/utils.py @@ -0,0 +1,424 @@ +import re + +# Utilities for handling unicode, unary numbers, mana costs, and special symbols. +# For convenience we redefine everything from config so that it can all be accessed +# from the utils module. + +import config + +# separators +cardsep = config.cardsep +fieldsep = config.fieldsep +bsidesep = config.bsidesep +newline = config.newline + +# special indicators +dash_marker = config.dash_marker +bullet_marker = config.bullet_marker +this_marker = config.this_marker +counter_marker = config.counter_marker +reserved_marker = config.reserved_marker +x_marker = config.x_marker +tap_marker = config.tap_marker +untap_marker = config.untap_marker + +# unambiguous synonyms +counter_rename = config.counter_rename + +# unicode / ascii conversion +unicode_trans = { + u'\u2014' : dash_marker, # unicode long dash + u'\u2022' : bullet_marker, # unicode bullet + u'\u2019' : '"', # single quote + u'\u2018' : '"', # single quote + u'\u2212' : '-', # minus sign + u'\xe6' : 'ae', # ae symbol + u'\xfb' : 'u', # u with caret + u'\xfa' : 'u', # u with accent + u'\xe9' : 'e', # e with accent + u'\xe1' : 'a', # a with accent + u'\xe0' : 'a', # a with accent going the other way + u'\xe2' : 'a', # a with caret + u'\xf6' : 'o', # o with umlaut + u'\xed' : 'i', # i with accent +} + +# this one is one-way only +def to_ascii(s): + for uchar in unicode_trans: + s = s.replace(uchar, unicode_trans[uchar]) + return s + +# unary numbers +unary_marker = config.unary_marker +unary_counter = config.unary_counter +unary_max = config.unary_max +unary_exceptions = config.unary_exceptions + +def to_unary(s, warn = False): + numbers = re.findall(r'[0123456789]+', s) + # replace largest first to avoid accidentally replacing shared substrings + for n in sorted(numbers, cmp = lambda x,y: cmp(int(x), int(y)), reverse = True): + i = int(n) + if i in unary_exceptions: + s = s.replace(n, unary_exceptions[i]) + elif i > unary_max: + i = unary_max + if warn: + print s + s = s.replace(n, unary_marker + unary_counter * i) + else: + s = s.replace(n, unary_marker + unary_counter * i) + return s + +def from_unary(s): + numbers = re.findall(re.escape(unary_marker + unary_counter) + '*', s) + # again, largest first so we don't replace substrings and break everything + for n in sorted(numbers, cmp = lambda x,y: cmp(len(x), len(y)), reverse = True): + i = (len(n) - len(unary_marker)) / len(unary_counter) + s = s.replace(n, str(i)) + return s + +# mana syntax +mana_open_delimiter = '{' +mana_close_delimiter = '}' +mana_json_open_delimiter = mana_open_delimiter +mana_json_close_delimiter = mana_close_delimiter +mana_json_hybrid_delimiter = '/' +mana_forum_open_delimiter = '[mana]' +mana_forum_close_delimiter = '[/mana]' +mana_unary_marker = '' # if the same as unary_marker, from_unary WILL replace numbers in mana costs +mana_unary_counter = unary_counter + +# The decoding from mtgjson format is dependent on the specific structure of +# these internally used mana symbol strings, so if you want to change them you'll +# also have to change the json decoding functions. + +# standard mana symbol set +mana_W = 'W' # single color +mana_U = 'U' +mana_B = 'B' +mana_R = 'R' +mana_G = 'G' +mana_P = 'P' # colorless phyrexian +mana_S = 'S' # snow +mana_X = 'X' # colorless X +mana_WP = 'WP' # single color phyrexian +mana_UP = 'UP' +mana_BP = 'BP' +mana_RP = 'RP' +mana_GP = 'GP' +mana_2W = '2W' # single color hybrid +mana_2U = '2U' +mana_2B = '2B' +mana_2R = '2R' +mana_2G = '2G' +mana_WU = 'WU' # dual color hybrid +mana_WB = 'WB' +mana_RW = 'RW' +mana_GW = 'GW' +mana_UB = 'UB' +mana_UR = 'UR' +mana_GU = 'GU' +mana_BR = 'BR' +mana_BG = 'BG' +mana_RG = 'RG' +# alternative order symbols +mana_WP_alt = 'PW' # single color phyrexian +mana_UP_alt = 'PU' +mana_BP_alt = 'PB' +mana_RP_alt = 'PR' +mana_GP_alt = 'PG' +mana_2W_alt = 'W2' # single color hybrid +mana_2U_alt = 'U2' +mana_2B_alt = 'B2' +mana_2R_alt = 'R2' +mana_2G_alt = 'G2' +mana_WU_alt = 'UW' # dual color hybrid +mana_WB_alt = 'BW' +mana_RW_alt = 'WR' +mana_GW_alt = 'WG' +mana_UB_alt = 'BU' +mana_UR_alt = 'RU' +mana_GU_alt = 'UG' +mana_BR_alt = 'RB' +mana_BG_alt = 'GB' +mana_RG_alt = 'GR' +# special +mana_2 = '2' # use with 'in' to identify single color hybrid + +# master symbol lists +mana_syms = [ + mana_W, + mana_U, + mana_B, + mana_R, + mana_G, + mana_P, + mana_S, + mana_X, + mana_WP, + mana_UP, + mana_BP, + mana_RP, + mana_GP, + mana_2W, + mana_2U, + mana_2B, + mana_2R, + mana_2G, + mana_WU, + mana_WB, + mana_RW, + mana_GW, + mana_UB, + mana_UR, + mana_GU, + mana_BR, + mana_BG, + mana_RG, +] +mana_symalt = [ + mana_WP_alt, + mana_UP_alt, + mana_BP_alt, + mana_RP_alt, + mana_GP_alt, + mana_2W_alt, + mana_2U_alt, + mana_2B_alt, + mana_2R_alt, + mana_2G_alt, + mana_WU_alt, + mana_WB_alt, + mana_RW_alt, + mana_GW_alt, + mana_UB_alt, + mana_UR_alt, + mana_GU_alt, + mana_BR_alt, + mana_BG_alt, + mana_RG_alt, +] +mana_symall = mana_syms + mana_symalt + +# alt symbol conversion +def mana_alt(sym): + if not sym in mana_symall: + raise ValueError('invalid mana symbol for mana_alt(): ' + repr(sym)) + if len(sym) < 2: + return sym + else: + return sym[::-1] + +# produce intended neural net output format +def mana_sym_to_encoding(sym): + if not sym in mana_symall: + raise ValueError('invalid mana symbol for mana_sym_to_encoding(): ' + repr(sym)) + if len(sym) < 2: + return sym * 2 + else: + return sym + +# produce json formatting used in mtgjson +def mana_sym_to_json(sym): + if not sym in mana_symall: + raise ValueError('invalid mana symbol for mana_sym_to_json(): ' + repr(sym)) + if len(sym) < 2: + return mana_json_open_delimiter + sym + mana_json_close_delimiter + else: + return (mana_json_open_delimiter + sym[0] + mana_json_hybrid_delimiter + + sym[1] + mana_json_close_delimiter) + +# produce pretty formatting that renders on mtgsalvation forum +# converts individual symbols; surrounding [mana][/mana] tags are added elsewhere +def mana_sym_to_forum(sym): + if not sym in mana_symall: + raise ValueError('invalid mana symbol for mana_sym_to_forum(): ' + repr(sym)) + if sym in mana_symalt: + sym = mana_alt(sym) + if len(sym) < 2: + return sym + else: + return mana_json_open_delimiter + sym + mana_json_close_delimiter + +# forward symbol tables for encoding +mana_syms_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_syms} +mana_symalt_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symalt} +mana_symall_encode = {sym : mana_sym_to_encoding(sym) for sym in mana_symall} +mana_syms_jencode = {sym : mana_sym_to_json(sym) for sym in mana_syms} +mana_symalt_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symalt} +mana_symall_jencode = {sym : mana_sym_to_json(sym) for sym in mana_symall} + +# reverse symbol tables for decoding +mana_syms_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_syms} +mana_symalt_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symalt} +mana_symall_decode = {mana_sym_to_encoding(sym) : sym for sym in mana_symall} +mana_syms_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_syms} +mana_symalt_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symalt} +mana_symall_jdecode = {mana_sym_to_json(sym) : sym for sym in mana_symall} + +# going straight from json to encoding and vice versa +def mana_encode_direct(jsym): + if not jsym in mana_symall_jdecode: + raise ValueError('json string not found in decode table for mana_encode_direct(): ' + + repr(jsym)) + else: + return mana_symall_encode[mana_symall_jdecode[jsym]] + +def mana_decode_direct(sym): + if not sym in mana_symall_decode: + raise ValueError('mana symbol not found in decode table for mana_decode_direct(): ' + + repr(sym)) + else: + return mana_symall_jencode[mana_symall_decode[sym]] + +# hacked in support for mtgsalvation forum +def mana_decode_direct_forum(sym): + if not sym in mana_symall_decode: + raise ValueError('mana symbol not found in decode table for mana_decode_direct_forum(): ' + + repr(sym)) + else: + return mana_sym_to_forum(mana_symall_decode[sym]) + +# processing entire strings +def unique_string(s): + return ''.join(set(s)) + +mana_charset_special = mana_unary_marker + mana_unary_counter +mana_charset_strict = unique_string(''.join(mana_symall) + mana_charset_special) +mana_charset = unique_string(mana_charset_strict + mana_charset_strict.lower()) + +mana_regex_strict = (re.escape(mana_open_delimiter) + '[' + + re.escape(mana_charset_strict) + + ']*' + re.escape(mana_close_delimiter)) +mana_regex = (re.escape(mana_open_delimiter) + '[' + + re.escape(mana_charset) + + ']*' + re.escape(mana_close_delimiter)) + +# as a special case, we let unary or decimal numbers exist in json mana strings +mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter +mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special) +mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower()) + +# note that json mana strings can't be empty between the delimiters +mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '[' + + re.escape(mana_jcharset_strict) + + ']+' + re.escape(mana_json_close_delimiter)) +mana_jregex = (re.escape(mana_json_open_delimiter) + '[' + + re.escape(mana_jcharset) + + ']+' + re.escape(mana_json_close_delimiter)) + +number_decimal_regex = r'[0123456789]+' +number_unary_regex = re.escape(unary_marker) + re.escape(unary_counter) + '*' +mana_decimal_regex = (re.escape(mana_json_open_delimiter) + number_decimal_regex + + re.escape(mana_json_close_delimiter)) +mana_unary_regex = (re.escape(mana_json_open_delimiter) + number_unary_regex + + re.escape(mana_json_close_delimiter)) + +# convert a json mana string to the proper encoding +def mana_translate(jmanastr): + manastr = jmanastr + for n in sorted(re.findall(mana_unary_regex, manastr), + lambda x,y: cmp(len(x), len(y)), reverse = True): + ns = re.findall(number_unary_regex, n) + i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter) + manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i) + for n in sorted(re.findall(mana_decimal_regex, manastr), + lambda x,y: cmp(len(x), len(y)), reverse = True): + ns = re.findall(number_decimal_regex, n) + i = int(ns[0]) + manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i) + for jsym in sorted(mana_symall_jdecode, lambda x,y: cmp(len(x), len(y)), reverse = True): + if jsym in manastr: + manastr = manastr.replace(jsym, mana_encode_direct(jsym)) + return mana_open_delimiter + manastr + mana_close_delimiter + +# convert an encoded mana string back to json +mana_symlen_min = min([len(sym) for sym in mana_symall_decode]) +mana_symlen_max = max([len(sym) for sym in mana_symall_decode]) +def mana_untranslate(manastr, for_forum = False): + inner = manastr[1:-1] + jmanastr = '' + colorless_total = 0 + idx = 0 + while idx < len(inner): + # taking this branch is an infinite loop if unary_marker is empty + if len(mana_unary_marker) > 0 and inner[idx:idx+len(mana_unary_marker)] == mana_unary_marker: + idx += len(mana_unary_marker) + elif inner[idx:idx+len(mana_unary_counter)] == mana_unary_counter: + idx += len(mana_unary_counter) + colorless_total += 1 + else: + old_idx = idx + for symlen in range(mana_symlen_min, mana_symlen_max + 1): + sym = inner[idx:idx+symlen] + if sym in mana_symall_decode: + idx += symlen + if for_forum: + jmanastr = jmanastr + mana_decode_direct_forum(sym) + else: + jmanastr = jmanastr + mana_decode_direct(sym) + break + # otherwise we'll go into an infinite loop if we see a symbol we don't know + if idx == old_idx: + idx += 1 + if for_forum: + if jmanastr == '': + return mana_forum_open_delimiter + str(colorless_total) + mana_forum_close_delimiter + else: + return (mana_forum_open_delimiter + ('' if colorless_total == 0 + else str(colorless_total)) + + jmanastr + mana_forum_close_delimiter) + else: + if jmanastr == '': + return mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter + else: + return (('' if colorless_total == 0 else + mana_json_open_delimiter + str(colorless_total) + mana_json_close_delimiter) + + jmanastr) + +# finally, replacing all instances in a string +# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case +def to_mana(s): + jmanastrs = re.findall(mana_jregex, s) + for jmanastr in sorted(jmanastrs, lambda x,y: cmp(len(x), len(y)), reverse = True): + s = s.replace(jmanastr, mana_translate(jmanastr.upper())) + return s + +def from_mana(s, for_forum = False): + manastrs = re.findall(mana_regex, s) + for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True): + s = s.replace(manastr, mana_untranslate(manastr.upper(), for_forum = for_forum)) + return s + +# Translation could also be accomplished using the datamine.Manacost object's +# display methods, but these direct string transformations are retained for +# quick scripting and convenience (and used under the hood by that class to +# do its formatting). + +# more convenience features for formatting tap / untap symbols +json_symbol_tap = tap_marker +json_symbol_untap = untap_marker + +json_symbol_trans = { + mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter : tap_marker, + mana_json_open_delimiter + json_symbol_tap.lower() + mana_json_close_delimiter : tap_marker, + mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter : untap_marker, + mana_json_open_delimiter + json_symbol_untap.lower() + mana_json_close_delimiter : untap_marker, +} +symbol_trans = { + tap_marker : mana_json_open_delimiter + json_symbol_tap + mana_json_close_delimiter, + untap_marker : mana_json_open_delimiter + json_symbol_untap + mana_json_close_delimiter, +} + +json_symbol_regex = r'\{[TtQq]\}' + +def to_symbols(s): + symstrs = re.findall(json_symbol_regex, s) + for symstr in sorted(symstrs, lambda x,y: cmp(len(x), len(y)), reverse = True): + s = s.replace(symstr, json_symbol_trans[symstr]) + return s + +def from_symbols(s): + symstrs