EVERYTHING HAS CHANGED

Added lib and script subdirs to organize things; the biggest change is that now
we have a really powerful Card class that can handle all of the decoding and
encoding for us. encode.py has been written to take advantage of this, other
things have not yet. Coming soon! As a side note the changes to output.txt
are purely cosemtic, though the order should be stable now.
This commit is contained in:
Bill Zorn 2015-07-14 00:07:25 -07:00
parent cbf8ac34e5
commit 1a4965fd83
12 changed files with 28751 additions and 28543 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@
*.pyc
AllSets.json
AllSets-x.json
lib/__init__.py

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,9 @@ import codecs
import sys
import random
import utils
import lib.utils as utils
from lib.card import Card
from lib.mana import Manacost
# Format a list of rows of data into nice columns.
# Note that it's the columns that are nice, not this code.
@ -29,351 +31,6 @@ def printrows(l):
for row in l:
print row
def randomize_all_mana(text):
manastrs = re.findall(utils.mana_regex, text)
newtext = text
for manastr in sorted(manastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
newtext = newtext.replace(manastr, utils.reserved_marker)
for manastr in manastrs:
newtext = newtext.replace(utils.reserved_marker,
Manacost(manastr).reencode(randomize = True),
1)
return newtext
# so this stuff still needs to be cleaned up
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
creature_keywords = [
# evergreen
'deathtouch',
'defender',
'double strike',
'first strike',
'flash',
'flying',
'haste',
'hexproof',
'indestructible',
'lifelink',
'menace',
'prowess',
'reach',
'trample',
'vigilance',
# no longer evergreen
'banding',
'fear',
'shroud',
'intimidate',
# expert level keywords
'absorb',
'amplify',
'annihilator',
'battle cry',
'bolster',
'bloodthirst',
'bushido',
'changeling',
'convoke',
'devour',
'evolve',
'exalted',
'extort',
'fading',
'flanking',
'frenzy',
'graft',
'haunt',
'horsemanship',
'infect',
'modular',
#'morph',
#'ninjutsu',
'persist',
'poisonous',
'provoke',
#'prowl',
'rampage',
'ripple',
#'scavenge',
'shadow',
'soulbond',
'soulshift',
'split second',
'sunburst',
'undying',
#'unearth',
'unleash',
'vanishing',
'wither',
] # there are other keywords out there, these are just easy to detect
# data aggregating classes
class Manacost:
'''mana cost representation with data'''
# hardcoded to be dependent on the symbol structure... ah well
def get_colors(self):
colors = ''
for sym in self.symbols:
if self.symbols[sym] > 0:
symcolors = re.sub(r'2|P|S|X', '', sym)
for symcolor in symcolors:
if symcolor not in colors:
colors += symcolor
# sort so the order is always consistent
return ''.join(sorted(colors))
def check_colors(self, symbolstring):
for sym in symbolstring:
if not sym in self.colors:
return False
return True
def __init__(self, text):
self.raw = text
self.cmc = 0
self.colorless = 0
self.sequence = []
self.symbols = {sym : 0 for sym in utils.mana_syms}
self.allsymbols = {sym : 0 for sym in utils.mana_symall}
if text == '':
self._parsed = True
self._valid = True
self.none = True
self.inner = ''
elif not (len(self.raw) >= 2 and self.raw[0] == '{' and self.raw[-1] == '}'):
self._parsed = False
self._valid = False
self.none = False
else:
self._parsed = True
self._valid = True
self.none = False
self.inner = self.raw[1:-1]
# structure mirrors the decoding in utils, but we pull out different data here
idx = 0
while idx < len(self.inner):
# taking this branch is an infinite loop if unary_marker is empty
if (len(utils.mana_unary_marker) > 0 and
self.inner[idx:idx+len(utils.mana_unary_marker)] == utils.mana_unary_marker):
idx += len(utils.mana_unary_marker)
self.sequence += [utils.mana_unary_marker]
elif self.inner[idx:idx+len(utils.mana_unary_counter)] == utils.mana_unary_counter:
idx += len(utils.mana_unary_counter)
self.sequence += [utils.mana_unary_counter]
self.colorless += 1
self.cmc += 1
else:
old_idx = idx
for symlen in range(utils.mana_symlen_min, utils.mana_symlen_max + 1):
encoded_sym = self.inner[idx:idx+symlen]
if encoded_sym in utils.mana_symall_decode:
idx += symlen
# leave the sequence encoded for convenience
self.sequence += [encoded_sym]
sym = utils.mana_symall_decode[encoded_sym]
self.allsymbols[sym] += 1
if sym in utils.mana_symalt:
self.symbols[utils.mana_alt(sym)] += 1
else:
self.symbols[sym] += 1
if sym == utils.mana_X:
self.cmc += 0
elif utils.mana_2 in sym:
self.cmc += 2
else:
self.cmc += 1
break
# otherwise we'll go into an infinite loop if we see a symbol we don't know
if idx == old_idx:
idx += 1
self._valid = False
self.colors = self.get_colors()
def __str__(self):
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence)
+ utils.mana_close_delimiter)
def format(self, for_forum):
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence, for_forum)
+ utils.mana_close_delimiter)
def reencode(self, randomize = False):
if self.none:
return ''
elif randomize:
# so this won't work very well if mana_unary_marker isn't empty
return (utils.mana_open_delimiter
+ ''.join(random.sample(self.sequence, len(self.sequence)))
+ utils.mana_close_delimiter)
else:
return utils.mana_open_delimiter + ''.join(self.sequence) + utils.mana_close_delimiter
class Card:
'''card representation with data'''
def __init__(self, text):
self.raw = text
self._parsed = True
self._valid = True
if '\n' in self.raw:
halves = self.raw.split('\n')
if not len(halves) == 2:
self._parsed = False
self._valid = False
self.fields = halves
return
else:
self.raw = halves[0]
self.bside = Card(halves[1])
if not self.bside._valid:
self._valid = False
else:
self.bside = None
fields = self.raw.split(utils.fieldsep)
if not len(fields) >= 10:
self._parsed = False
self._valid = False
self.fields = fields
else:
if not fields[1] == '':
self.name = fields[1]
else:
self.name = ''
self._valid = False
if not fields[2] == '':
self.supertypes = fields[2].split(' ')
else:
self.supertypes = []
if not fields[3] == '':
self.types = fields[3].split(' ')
else:
self.types = []
self._valid = False
if not fields[4] == '':
self.loyalty = fields[4]
try:
self.loyalty_value = int(self.loyalty)
except ValueError:
self.loyalty_value = None
# strictly speaking, '* where * is something' is valid...
# self._valid = False
else:
self.loyalty = None
self.loyalty_value = None
if not fields[5] == '':
self.subtypes = fields[5].split(' ')
if 'creature' in self.types:
self.creaturetypes = self.subtypes
else:
self.creaturetypes = []
else:
self.subtypes = []
self.creaturetypes = []
if not fields[6] == '':
self.pt = fields[6]
self.power = None
self.power_value = None
self.toughness = None
self.toughness_value = None
p_t = self.pt.split('/')
if len(p_t) == 2:
self.power = p_t[0]
try:
self.power_value = int(self.power)
except ValueError:
self.power_value = None
self.toughness = p_t[1]
try:
self.toughness_value = int(self.toughness)
except ValueError:
self.toughness_value = None
else:
self._valid = False
else:
self.pt = None
self.power = None
self.power_value = None
self.toughness = None
self.toughness_value = None
# if there's no cost (lands) then cost.none will be True
self.cost = Manacost(fields[7])
if not fields[8] == '':
self.text = fields[8]
self.text_lines = self.text.split(utils.newline)
self.text_words = re.sub(punctuation_chars, ' ', self.text).split()
self.creature_words = []
# SUPER HACK
if 'creature' in self.types:
for line in self.text_lines:
orig_line = line
guess = []
for keyword in creature_keywords:
if keyword in line:
guess += [keyword]
line = line.replace(keyword, '')
# yeah, I said it was a hack
if re.sub(punctuation_chars, ' ', line).split() == [] or 'protect' in line or 'walk' in line or 'sliver creatures' in line or 'you control have' in line:
for word in guess:
if word not in self.creature_words:
self.creature_words += [word]
# elif len(guess) > 0 and len(line) < 30:
# print orig_line
else:
self.text = ''
self.text_lines = []
self.text_words = []
self.creature_words = []
def __str__(self):
return ''.join([
utils.fieldsep,
self.name,
utils.fieldsep,
(' ' + utils.dash_marker + ' ').join([' '.join(self.supertypes + self.types),
' '.join(self.subtypes)]),
utils.fieldsep,
str(self.cost.cmc) if self.cost.colors == ''
else str(self.cost.cmc) + ', ' + self.cost.colors,
utils.fieldsep,
])
def reencode(self, randomize = False):
return ''.join([
utils.fieldsep,
self.name,
utils.fieldsep,
' '.join(self.supertypes),
utils.fieldsep,
' '.join(self.types),
utils.fieldsep,
self.loyalty if self.loyalty else '',
utils.fieldsep,
' '.join(self.subtypes),
utils.fieldsep,
self.pt if self.pt else '',
utils.fieldsep,
self.cost.reencode(randomize) if not self.cost.none else '',
utils.fieldsep,
self.text if not randomize else randomize_all_mana(self.text),
utils.fieldsep,
utils.bsidesep + self.bside.reencode(randomize) if self.bside else '',
])
# global card pools
unparsed_cards = []
invalid_cards = []

561
encode.py
View file

@ -1,526 +1,77 @@
import jdecode
import re
import codecs
import random
import sys
import utils
#badwords = []
import lib.utils as utils
from lib.cardlib import Card
import lib.jdecode as jdecode
valid_encoded_char = r'[abcdefghijklmnopqrstuvwxyz\'+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
cardsep = utils.cardsep
fieldsep = utils.fieldsep
bsidesep = utils.bsidesep
newline = utils.newline
dash_marker = utils.dash_marker
bullet_marker = utils.bullet_marker
this_marker = utils.this_marker
counter_marker = utils.counter_marker
reserved_marker = utils.reserved_marker
x_marker = utils.x_marker
tap_marker = utils.tap_marker
untap_marker = utils.untap_marker
counter_rename = utils.counter_rename
unary_marker = utils.unary_marker
unary_counter = utils.unary_counter
def exclude_sets(cardset):
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
# This whole things assumes the json format of mtgjson.com.
def exclude_types(cardtype):
return cardtype in ['conspiracy']
# Here's a brief list of relevant fields:
# name - string
# names - list (used for split, flip, and double-faced)
# manaCost - string
# cmc - number
# colors - list
# type - string (the whole big long damn thing)
# supertypes - list
# types - list
# subtypes - list
# text - string
# power - string
# toughness - string
# loyalty - number
def exclude_layouts(layout):
return layout in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']
# And some less useful ones, in case they're wanted for something:
# layout - string
# rarity - string
# flavor - string
# artis - string
# number - string
# multiverseid - number
# variations - list
# imageName - string
# watermark - string
# border - string
# timeshifted - boolean
# hand - number
# life - number
# reserved - boolean
# releaseDate - string
# starter - boolean
def strip_reminder_text(s):
return re.sub(r'\(.*\)', '', s)
def replace_newlines(s):
return s.replace('\n', '\\')
def replace_cardname(s, name):
# here are some fun edge cases, thanks to jml34 on the forum for
# pointing them out
if name == 'sacrifice':
s = s.replace(name, this_marker, 1)
return s
elif name == 'fear':
return s
s = s.replace(name, this_marker)
# so, some legends don't use the full cardname in their text box...
# this check finds about 400 of them
nameparts = name.split(',')
if len(nameparts) > 1:
mininame = nameparts[0]
new_s = s.replace(mininame, this_marker)
if not new_s == s:
s = new_s
# on first inspection, the replacements all look good
# print '------------------'
# print name
# print '----'
# print s
# a few others don't have a convenient comma to detect their nicknames,
# so we override them here
overrides = [
# detectable by splitting on 'the', though that might cause other issues
'crovax',
'rashka',
'phage',
'shimatsu',
# random and arbitrary: they have a last name, 1996 world champion, etc.
'world champion',
'axelrod',
'hazezon',
'rubinia',
'rasputin',
'hivis',
]
for override in overrides:
s = s.replace(override, this_marker)
# some detection code for when the overrides need to be fixed...
# global badwords
# bad = False
# for word in name.replace(',', '').split():
# if word in s and not word in badwords:
# badwords += [word]
return s
def sanitize_name(s):
s = s.replace('!', '')
s = s.replace('?', '')
s = s.replace('-', dash_marker)
s = s.replace('100,000', 'one hundred thousand')
s = s.replace('1,000', 'one thousand')
s = s.replace('1996', 'nineteen ninety-six')
return s
# call this before replacing newlines
# this one ends up being really bad because of the confusion
# with 'counter target spell or ability'
def replace_counters(s):
#so, big fat old dictionary time!!!!!!!!!
allcounters = [
'time counter',
'devotion counter',
'charge counter',
'ki counter',
'matrix counter',
'spore counter',
'poison counter',
'quest counter',
'hatchling counter',
'storage counter',
'growth counter',
'paralyzation counter',
'energy counter',
'study counter',
'glyph counter',
'depletion counter',
'sleight counter',
'loyalty counter',
'hoofprint counter',
'wage counter',
'echo counter',
'lore counter',
'page counter',
'divinity counter',
'mannequin counter',
'ice counter',
'fade counter',
'pain counter',
#'age counter',
'gold counter',
'muster counter',
'infection counter',
'plague counter',
'fate counter',
'slime counter',
'shell counter',
'credit counter',
'despair counter',
'globe counter',
'currency counter',
'blood counter',
'soot counter',
'carrion counter',
'fuse counter',
'filibuster counter',
'wind counter',
'hourglass counter',
'trap counter',
'corpse counter',
'awakening counter',
'verse counter',
'scream counter',
'doom counter',
'luck counter',
'intervention counter',
'eyeball counter',
'flood counter',
'eon counter',
'death counter',
'delay counter',
'blaze counter',
'magnet counter',
'feather counter',
'shield counter',
'wish counter',
'petal counter',
'music counter',
'pressure counter',
'manifestation counter',
#'net counter',
'velocity counter',
'vitality counter',
'treasure counter',
'pin counter',
'bounty counter',
'rust counter',
'mire counter',
'tower counter',
#'ore counter',
'cube counter',
'strife counter',
'elixir counter',
'hunger counter',
'level counter',
'winch counter',
'fungus counter',
'training counter',
'theft counter',
'arrowhead counter',
'sleep counter',
'healing counter',
'mining counter',
'dream counter',
'aim counter',
'arrow counter',
'javelin counter',
'gem counter',
'bribery counter',
'mine counter',
'omen counter',
'phylactery counter',
'tide counter',
'polyp counter',
'petrification counter',
'shred counter',
'pupa counter',
]
usedcounters = []
for countername in allcounters:
if countername in s:
usedcounters += [countername]
s = s.replace(countername, counter_marker + ' counter')
# oh god some of the counter names are suffixes of others...
shortcounters = [
'age counter',
'net counter',
'ore counter',
]
for countername in shortcounters:
# SUPER HACKY fix for doubling season
if countername in s and 'more counter' not in s:
usedcounters += [countername]
s = s.replace(countername, counter_marker + ' counter')
# miraculously this doesn't seem to happen
# if len(usedcounters) > 1:
# print usedcounters
# we haven't done newline replacement yet, so use actual newlines
if len(usedcounters) == 1:
# and yeah, this line of code can blow up in all kinds of different ways
s = 'countertype ' + counter_marker + ' ' + usedcounters[0].split()[0] + '\n' + s
# random code for finding out all the counter names
# global badwords
# countertypes = re.findall(r'[| ][^ ]+ counter', s)
# for countertype in countertypes:
# minicounter = countertype[1:]
# if not minicounter in badwords:
# badwords += [minicounter]
return s
# the word counter is confusing when used to refer to what we do to spells
# and sometimes abilities to make them not happen. Let's rename that.
# call this after doing the counter replacement to simplify the regexes
counter_rename = 'uncast'
def rename_uncast(s):
# pre-checks to make sure we aren't doing anything dumb
# if '# counter target ' in s or '^ counter target ' in s or '& counter target ' in s:
# print s + '\n'
# if '# counter a ' in s or '^ counter a ' in s or '& counter a ' in s:
# print s + '\n'
# if '# counter all ' in s or '^ counter all ' in s or '& counter all ' in s:
# print s + '\n'
# if '# counter a ' in s or '^ counter a ' in s or '& counter a ' in s:
# print s + '\n'
# if '# counter that ' in s or '^ counter that ' in s or '& counter that ' in s:
# print s + '\n'
# if '# counter @' in s or '^ counter @' in s or '& counter @' in s:
# print s + '\n'
# if '# counter the ' in s or '^ counter the ' in s or '& counter the ' in s:
# print s + '\n'
# counter target
s = s.replace('counter target ', counter_rename + ' target ')
# counter a
s = s.replace('counter a ', counter_rename + ' a ')
# counter all
s = s.replace('counter all ', counter_rename + ' all ')
# counters a
s = s.replace('counters a ', counter_rename + 's a ')
# countered (this could get weird in terms of englishing the word)
s = s.replace('countered', counter_rename)
# counter that
s = s.replace('counter that ', counter_rename + ' that ')
# counter @
s = s.replace('counter @', counter_rename + ' @')
# counter it (this is tricky
s = s.replace(', counter it', ', ' + counter_rename + ' it')
# counter the (it happens at least once, thanks wizards!)
s = s.replace('counter the ', counter_rename + ' the ')
# counter up to
s = s.replace('counter up to ', counter_rename + ' up to ')
# check if the word exists in any other context
# if 'counter' in s.replace('# counter', '').replace('countertype', '').replace('^ counter', '').replace('& counter', ''):
# print s + '\n'
# whew! by manual inspection of a few dozen texts, it looks like this about covers it.
return s
# run only after doing unary conversion
def fix_dashes(s):
s = s.replace('-' + unary_marker, reserved_marker)
s = s.replace('-', dash_marker)
s = s.replace(reserved_marker, '-' + unary_marker)
# level up is annoying
levels = re.findall(r'level &\^*\-&', s)
for level in levels:
newlevel = level.replace('-', dash_marker)
s = s.replace(level, newlevel)
levels = re.findall(r'level &\^*\+', s)
for level in levels:
newlevel = level.replace('+', dash_marker)
s = s.replace(level, newlevel)
# and we still have the ~x issue
return s
# run this after fixing dashes, because this unbreaks the ~x issue
# also probably don't run this on names, there are a few names with x~ in them.
def fix_x(s):
s = s.replace(dash_marker + 'x', '-' + x_marker)
s = s.replace('+x', '+' + x_marker)
s = s.replace(' x ', ' ' + x_marker + ' ')
s = s.replace('x:', x_marker + ':')
s = s.replace('x~', x_marker + '~')
s = s.replace('x.', x_marker + '.')
s = s.replace('x,', x_marker + ',')
s = s.replace('x/x', x_marker + '/' + x_marker)
return s
# run after fixing dashes, it makes the regexes better, but before replacing newlines
def reformat_choice(s):
# the idea is to take 'choose n ~\n=ability\n=ability\n'
# to '[n = ability = ability]\n'
def choice_formatting_helper(s_helper, prefix, count):
single_choices = re.findall(ur'(' + prefix + ur'\n?(\u2022.*(\n|$))+)', s_helper)
for choice in single_choices:
newchoice = choice[0]
newchoice = newchoice.replace(prefix, unary_marker + (unary_counter * count))
newchoice = newchoice.replace('\n', ' ')
if newchoice[-1:] == ' ':
newchoice = '[' + newchoice[:-1] + ']\n'
else:
newchoice = '[' + newchoice + ']'
s_helper = s_helper.replace(choice[0], newchoice)
return s_helper
s = choice_formatting_helper(s, ur'choose one \u2014', 1)
s = choice_formatting_helper(s, ur'choose one \u2014 ', 1) # ty Promise of Power
s = choice_formatting_helper(s, ur'choose two \u2014', 2)
s = choice_formatting_helper(s, ur'choose one or both \u2014', 0)
s = choice_formatting_helper(s, ur'choose one or more \u2014', 0)
return s
# do before removing newlines
# might as well do this after countertype because we probably care more about
# the location of the equip cost
def relocate_equip(s):
equips = re.findall(r'equip \{[WUBRGPV/XTQ&^]*\}.?$', s)
# there don't seem to be any cases with more than one
if len(equips) == 1:
equip = equips[0]
s = s.replace('\n' + equip, '')
s = s.replace(equip, '')
if equip[-1:] == ' ':
equip = equip[0:-1]
if s == '':
s = equip
else:
s = equip + '\n' + s
nonmana = re.findall(ur'(equip\u2014.*(\n|$))', s)
if len(nonmana) == 1:
equip = nonmana[0][0]
s = s.replace('\n' + equip, '')
s = s.replace(equip, '')
if equip[-1:] == ' ':
equip = equip[0:-1]
if s == '':
s = equip
else:
s = equip + '\n' + s
return s
def encode(card):
# filter out vanguard cards
if card['layout'] in ['token', 'plane', 'scheme', 'phenomenon', 'vanguard']:
return
if card['type'] in ['Conspiracy']: # just for now?
return
encoding = fieldsep
if 'name' in card:
name = card['name'].lower()
encoding += sanitize_name(name)
encoding += fieldsep
if 'supertypes' in card:
encoding += ' '.join(card['supertypes']).lower()
encoding += fieldsep
if 'types' in card:
encoding += ' '.join(card['types']).lower()
encoding += fieldsep
if 'loyalty' in card:
encoding += utils.to_unary(str(card['loyalty']))
encoding += fieldsep
if 'subtypes' in card:
encoding += ' '.join(card['subtypes']).lower()
encoding += fieldsep
if 'power' in card and 'toughness' in card:
encoding += utils.to_unary(card['power']) + '/' + utils.to_unary(card['toughness'])
encoding += fieldsep
if 'manaCost' in card:
encoding += utils.to_mana(card['manaCost'].lower())
encoding += fieldsep
if 'text' in card:
text = card['text'].lower()
text = strip_reminder_text(text)
text = replace_cardname(text, name)
text = utils.to_mana(text)
text = utils.to_symbols(text)
text = utils.to_unary(text)
text = fix_dashes(text)
text = fix_x(text)
text = replace_counters(text)
text = rename_uncast(text)
text = reformat_choice(text)
text = relocate_equip(text)
text = replace_newlines(text)
encoding += text.strip()
encoding += fieldsep
# now output the bside if there is one
if 'bside' in card:
encoding += bsidesep
encoding += encode(card['bside'])
encoding = utils.to_ascii(encoding)
# encoding = re.sub(valid_encoded_char, '', encoding)
# if not encoding == '':
# print card
return encoding
def encode_duplicated(cards):
def compile_duplicated(jcards):
# Boring solution: only write out the first one...
return encode(cards[0])
card = Card(jcards[0])
if (exclude_sets(jcards[0][utils.json_field_set_name])
or exclude_layouts(jcards[0]['layout'])):
return None
for cardtype in card.types:
if exclude_types(cardtype):
return None
return card
def main(fname, oname = None, verbose = True):
if verbose:
print 'Opening json file: ' + fname
allcards = jdecode.mtg_open_json(fname, verbose)
jcards = jdecode.mtg_open_json(fname, verbose)
cards = []
if not oname == None:
valid = 0
skipped = 0
invalid = 0
unparsed = 0
for jcard_name in jcards:
card = compile_duplicated(jcards[jcard_name])
if card:
if card.valid:
valid += 1
cards += [card]
elif card.parsed:
invalid += 1
else:
unparsed += 1
else:
skipped += 1
if verbose:
print (str(valid) + ' valid, ' + str(skipped) + ' skipped, '
+ str(invalid) + ' invalid, ' + str(unparsed) + ' failed to parse.')
# This should give a random but consistent ordering, to make comparing changes
# between the output of different versions easier.
random.seed(1371367)
random.shuffle(cards)
if oname:
if verbose:
print 'Writing output to: ' + oname
ofile = codecs.open(oname, 'w', 'utf-8')
for card in allcards:
val = encode_duplicated(allcards[card])
if not (val == None or val == ''):
if oname == None:
print val + '\n'
else:
ofile.write(val + cardsep)
# print len(badwords)
# for word in badwords:
# print word
if not oname == None:
ofile.close()
with open(oname, 'w') as ofile:
for card in cards:
ofile.write(card.encode() + utils.cardsep)
else:
for card in cards:
sys.stdout.write(card.encode() + utils.cardsep)
sts.stdout.flush()
if __name__ == '__main__':

View file

@ -1,65 +0,0 @@
import json
# to allow filtering of sets like un sets, etc...
def legal_set(set):
return not (set['type'] == 'un' or set['name'] == 'Celebration')
def mtg_open_json(fname, verbose = False):
f = open(fname, 'r')
jobj = json.load(f)
f.close()
allcards = {}
asides = {}
bsides = {}
for k_set in jobj:
set = jobj[k_set]
setname = set['name']
if legal_set(set):
for card in set['cards']:
card['setName'] = setname
cardnumber = None
if 'number' in card:
cardnumber = card['number']
# the lower avoids duplication of at least one card (Will-o/O'-the-Wisp)
cardname = card['name'].lower()
uid = set['code']
if cardnumber == None:
uid = uid + '_' + cardname + '_'
else:
uid = uid + '_' + cardnumber
# aggregate by name to avoid duplicates, not counting bsides
if not uid[-1] == 'b':
if cardname in allcards:
allcards[cardname] += [card]
else:
allcards[cardname] = [card]
# also aggregate aside cards by uid so we can add bsides later
if uid[-1:] == 'a':
asides[uid] = card
if uid[-1:] == 'b':
bsides[uid] = card
for uid in bsides:
aside_uid = uid[:-1] + 'a'
if aside_uid in asides:
# the second check handles the brothers yamazaki edge case
if not asides[aside_uid]['name'] == bsides[uid]['name']:
asides[aside_uid]['bside'] = bsides[uid]
else:
pass
# this exposes some coldsnap theme deck bsides that aren't
# really bsides; shouldn't matter too much
#print aside_uid
#print bsides[uid]
if verbose:
print 'Opened ' + str(len(allcards)) + ' uniquely named cards.'
return allcards

402
lib/cardlib.py Normal file
View file

@ -0,0 +1,402 @@
# card representation
import re
import utils
import transforms
from manalib import Manacost, Manatext
# These are used later to determine what the fields of the Card object are called.
# Define them here because they have nothing to do with the actual format.
field_name = 'name'
field_rarity = 'rarity'
field_cost = 'cost'
field_supertypes = 'supertypes'
field_types = 'types'
field_subtypes = 'subtypes'
field_loyalty = 'loyalty'
field_pt = 'pt'
field_text = 'text'
field_other = 'other' # it's kind of a pseudo-field
# Import the labels, because these do appear in the encoded text.
field_label_name = utils.field_label_name
field_label_rarity = utils.field_label_rarity
field_label_cost = utils.field_label_cost
field_label_supertypes = utils.field_label_supertypes
field_label_types = utils.field_label_types
field_label_subtypes = utils.field_label_subtypes
field_label_loyalty = utils.field_label_loyalty
field_label_pt = utils.field_label_pt
field_label_text = utils.field_label_text
fieldnames = [
field_name,
field_rarity,
field_cost,
field_supertypes,
field_types,
field_subtypes,
field_loyalty,
field_pt,
field_text,
]
fmt_ordered_default = [
field_name,
field_supertypes,
field_types,
field_loyalty,
field_subtypes,
field_pt,
field_cost,
field_text,
]
fmt_labeled_default = {
field_name : field_label_name,
field_rarity : field_label_rarity,
field_cost : field_label_cost,
field_supertypes : field_label_supertypes,
field_types : field_label_types,
field_loyalty : field_label_loyalty,
field_pt : field_label_pt,
field_text : field_label_text,
}
# sanity test if a card's fields look plausible
def fields_check_valid(fields):
# all cards must have a name and a type
if not field_name in fields:
return False
if not field_types in fields:
return False
# creatures have p/t, other things don't
iscreature = False
for idx, value in fields[field_types]:
if 'creature' in value:
iscreature = True
if iscreature:
return field_pt in fields
else:
return not field_pt in fields
# These functions take a bunch of source data in some format and turn
# it into nicely labeled fields that we know how to initialize a card from.
# Both return a dict that maps field names to lists of possible values,
# paired with the index that we read that particular field value from.
# So, {fieldname : [(idx, value), (idx, value)...].
# Usually we want these lists to be length 1, but you never know.
# Of course to make things nice and simple, that dict is the third element
# of a triple that reports parsing success and valid success as its
# first two elements.
# This whole things assumes the json format of mtgjson.com.
# Here's a brief list of relevant fields:
# name - string
# names - list (used for split, flip, and double-faced)
# manaCost - string
# cmc - number
# colors - list
# type - string (the whole big long damn thing)
# supertypes - list
# types - list
# subtypes - list
# text - string
# power - string
# toughness - string
# loyalty - number
# And some less useful ones, in case they're wanted for something:
# layout - string
# rarity - string
# flavor - string
# artis - string
# number - string
# multiverseid - number
# variations - list
# imageName - string
# watermark - string
# border - string
# timeshifted - boolean
# hand - number
# life - number
# reserved - boolean
# releaseDate - string
# starter - boolean
def fields_from_json(src_json):
parsed = True
valid = True
fields = {}
# we hardcode in what the things are called in the mtgjson format
if 'name' in src_json:
name_val = src_json['name'].lower()
name_orig = name_val
name_val = transforms.name_pass_1_sanitize(name_val)
name_val = utils.to_ascii(name_val)
fields[field_name] = [(-1, name_val)]
else:
name_orig = ''
parsed = False
# return the actual Manacost object
if 'manaCost' in src_json:
cost = Manacost(src_json['manaCost'], fmt = 'json')
valid = valid and cost.valid
parsed = parsed and cost.parsed
fields[field_cost] = [(-1, cost)]
if 'supertypes' in src_json:
fields[field_supertypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['supertypes']))]
if 'types' in src_json:
fields[field_types] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['types']))]
else:
parsed = False
if 'subtypes' in src_json:
fields[field_subtypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['subtypes']))]
if 'loyalty' in src_json:
fields[field_loyalty] = [(-1, utils.to_unary(str(src_json['loyalty'])))]
p_t = ''
if 'power' in src_json:
p_t = utils.to_ascii(utils.to_unary(src_json['power'])) + '/' # hardcoded
valid = False
if 'toughness' in src_json:
p_t = p_t + utils.to_ascii(utils.to_unary(src_json['toughness']))
valid = True
elif 'toughness' in src_json:
p_t = '/' + utils.to_ascii(utils.to_unary(src_json['toughness'])) # hardcoded
valid = False
if p_t:
fields[field_pt] = [(-1, p_t)]
# similarly, return the actual Manatext object
if 'text' in src_json:
text_val = src_json['text'].lower()
text_val = transforms.text_pass_1_strip_rt(text_val)
text_val = transforms.text_pass_2_cardname(text_val, name_orig)
text_val = transforms.text_pass_3_unary(text_val)
text_val = transforms.text_pass_4a_dashes(text_val)
text_val = transforms.text_pass_4b_x(text_val)
text_val = transforms.text_pass_5_counters(text_val)
text_val = transforms.text_pass_6_uncast(text_val)
text_val = transforms.text_pass_7_choice(text_val)
text_val = transforms.text_pass_8_equip(text_val)
text_val = transforms.text_pass_9_newlines(text_val)
text_val = transforms.text_pass_10_symbols(text_val)
text_val = utils.to_ascii(text_val)
text_val = text_val.strip()
mtext = Manatext(text_val, fmt = 'json')
valid = valid and mtext.valid
fields[field_text] = [(-1, mtext)]
# we don't need to worry about bsides because we handle that in the constructor
return parsed, valid and fields_check_valid(fields), fields
def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep):
pass
# Here's the actual Card class that other files should use.
class Card:
'''card representation with data'''
def __init__(self, src, fmt_ordered = fmt_ordered_default,
fmt_labeled = None,
fieldsep = utils.fieldsep):
# source fields, exactly one will be set
self.json = None
self.raw = None
# flags
self.parsed = True
self.valid = True # only records broken pt right now (broken as in, no /)
# default values for all fields
self.__dict__[field_name] = ''
self.__dict__[field_rarity] = ''
self.__dict__[field_cost] = Manacost('')
self.__dict__[field_supertypes] = []
self.__dict__[field_types] = []
self.__dict__[field_subtypes] = []
self.__dict__[field_loyalty] = ''
self.__dict__[field_loyalty + '_value'] = None
self.__dict__[field_pt] = ''
self.__dict__[field_pt + '_p'] = None
self.__dict__[field_pt + '_p_value'] = None
self.__dict__[field_pt + '_t'] = None
self.__dict__[field_pt + '_t_value'] = None
self.__dict__[field_text] = Manatext('')
self.__dict__[field_text + '_lines'] = []
self.__dict__[field_text + '_words'] = []
self.__dict__[field_other] = []
self.bside = None
# format-independent view of processed input
self.fields = None # will be reset later
# looks like a json object
if isinstance(src, dict):
if utils.json_field_bside in src:
self.bside = Card(src[utils.json_field_bside],
fmt_ordered = fmt_ordered,
fmt_labeled = fmt_labeled,
fieldsep = fieldsep)
p_success, v_success, parsed_fields = fields_from_json(src)
self.parsed = p_success
self.valid = v_success
self.fields = parsed_fields
# otherwise assume text encoding
else:
sides = src.split(utils.bsidesep)
if len(sides) > 1:
self.bside = Card(utils.bsidesep.join(sides[1:]),
fmt_ordered = fmt_ordered,
fmt_labeled = fmt_labeled,
fieldsep = fieldsep)
p_success, v_success, parsed_fields = fields_from_format(sides[0], fmt_ordered,
fmt_labeled, fieldsep)
self.parsed = p_success
self.valid = v_success
self.fields = parsed_fields
# amusingly enough, both encodings allow infinitely deep nesting of bsides...
# python name hackery
if self.fields:
for field in self.fields:
# look for a specialized set function
if '_set_' + field in self.__dict__:
self.__dict__['_set_' + field](self.fields[field])
# otherwise use the default one
elif field in self.__dict__:
self.set_field_default(field, self.fields[field])
# If we don't recognize the field, fail. This is a totally artificial
# limitation; if we just used the default handler for the else case,
# we could set arbitrarily named fields.
else:
raise ValueError('python name mangling failure: unknown field for Card(): '
+ field)
else:
# valid but not parsed indicates that the card was apparently empty
self.parsed = False
# These setters are invoked via name mangling, so they have to match
# the field names specified above to be used. Otherwise we just
# always fall back to the (uninteresting) default handler.
# Also note that all fields come wrapped in pairs, with the first member
# specifying the index the field was found at when parsing the card. These will
# all be -1 if the card was parsed from (unordered) json.
def set_field_default(self, field, values):
for idx, value in values:
self.__dict__[field] = value
break # only use the first one...
def _set_loyalty(self, values):
for idx, value in values:
self.__dict__[field_loyalty] = value
try:
self.__dict__[field_loyalty + '_value'] = int(value)
except ValueError:
self.__dict__[field_loyalty + '_value'] = None
# Technically '*' could still be valid, but it's unlikely...
break # only use the first one...
def _set_pt(self, values):
for idx, value in values:
self.__dict__[field_pt] = value
p_t = value.split('/') # hardcoded
if len(p_t) == 2:
self.__dict__[field_pt + '_p'] = p_t[0]
try:
self.__dict__[field_pt + '_p_value'] = int(p_t[0])
except ValueError:
self.__dict__[field_pt + '_p_value'] = None
self.__dict__[field_pt + '_t'] = p_t[1]
try:
self.__dict__[field_pt + '_t_value'] = int(p_t[1])
except ValueError:
self.__dict__[field_pt + '_t_value'] = None
else:
self.valid = False
break # only use the first one...
def _set_text(self, values):
mtext = ''
for idx, value in values:
mtext = value
self.__dict__[field_text] = mtext
fulltext = mtext.encode()
if fulltext:
self.__dict__[field_text + '_lines'] = map(Manatext, fulltext.split(utils.newline))
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
' ',
fulltext).split()
def _set_other(self, values):
# just record these, we could do somthing unset valid if we really wanted
for idx, value in values:
self.__dict__[field_other] += [(idx, value)]
# Output functions that produce various formats. encode() is specific to
# the NN representation, use str() or format() for output intended for human
# readers.
def encode(self, fmt_ordered = fmt_ordered_default,
fmt_labeled = None, fieldsep = utils.fieldsep,
randomize_fields = False, randomize_mana = False,
initial_sep = True, final_sep = True):
outfields = []
for field in fmt_ordered:
if field in self.__dict__:
if self.__dict__[field]:
outfield = self.__dict__[field]
# specialized field handling for the ones that aren't strings (sigh)
if isinstance(outfield, list):
outfield_str = ' '.join(outfield)
elif isinstance(outfield, Manacost):
outfield_str = outfield.encode(randomize = randomize_mana)
elif isinstance(outfield, Manatext):
outfield_str = outfield.encode(randomize = randomize_mana)
else:
outfield_str = outfield
if fmt_labeled and field in fmt_labeled:
outfield_str = fmt_labeled[field] + outfield_str
else:
outfield_str = ''
outfields += [outfield_str]
else:
raise ValueError('unknown field for Card.encode(): ' + str(field))
if randomize_fields:
random.shuffle(outfields)
if initial_sep:
outfields = [''] + outfields
if final_sep:
outfields = outfields + ['']
outstr = fieldsep.join(outfields)
if self.bside:
outstr = (outstr + utils.bsidesep
+ self.bside.encode(fmt_ordered = fmt_ordered,
fmt_labeled = fmt_labeled,
fieldsep = fieldsep,
randomize_fields = randomize_fields,
randomize_mana = randomize_mana,
initial_sep = initial_sep, final_sep = final_sep))
return outstr

View file

@ -15,7 +15,8 @@ dash_marker = '~'
bullet_marker = '='
this_marker = '@'
counter_marker = '%'
reserved_marker = '\r'
reserved_marker = '\v'
reserved_mana_marker = '$'
x_marker = 'X'
tap_marker = 'T'
untap_marker = 'Q'
@ -35,3 +36,19 @@ unary_exceptions = {
100: 'one hundred',
200: 'two hundred',
}
# field labels, to allow potential reordering of card format
field_label_name = '1'
field_label_rarity = '2'
field_label_cost = '3'
field_label_supertypes = '4'
field_label_types = '5'
field_label_subtypes = '6'
field_label_loyalty = '7'
field_label_pt = '8'
field_label_text = '9'
# one left, could use for managing bsides
# additional fields we add to the json cards
json_field_bside = 'bside'
json_field_set_name = 'setName'

61
lib/jdecode.py Normal file
View file

@ -0,0 +1,61 @@
import json
import config
def mtg_open_json(fname, verbose = False):
with open(fname, 'r') as f:
jobj = json.load(f)
allcards = {}
asides = {}
bsides = {}
for k_set in jobj:
set = jobj[k_set]
setname = set['name']
for card in set['cards']:
card[config.json_field_set_name] = setname
cardnumber = None
if 'number' in card:
cardnumber = card['number']
# the lower avoids duplication of at least one card (Will-o/O'-the-Wisp)
cardname = card['name'].lower()
uid = set['code']
if cardnumber == None:
uid = uid + '_' + cardname + '_'
else:
uid = uid + '_' + cardnumber
# aggregate by name to avoid duplicates, not counting bsides
if not uid[-1] == 'b':
if cardname in allcards:
allcards[cardname] += [card]
else:
allcards[cardname] = [card]
# also aggregate aside cards by uid so we can add bsides later
if uid[-1:] == 'a':
asides[uid] = card
if uid[-1:] == 'b':
bsides[uid] = card
for uid in bsides:
aside_uid = uid[:-1] + 'a'
if aside_uid in asides:
# the second check handles the brothers yamazaki edge case
if not asides[aside_uid]['name'] == bsides[uid]['name']:
asides[aside_uid][config.json_field_bside] = bsides[uid]
else:
pass
# this exposes some coldsnap theme deck bsides that aren't
# really bsides; shouldn't matter too much
#print aside_uid
#print bsides[uid]
if verbose:
print 'Opened ' + str(len(allcards)) + ' uniquely named cards.'
return allcards

173
lib/manalib.py Normal file
View file

@ -0,0 +1,173 @@
# representation for mana costs and text with embedded mana costs
# data aggregating classes
import random
import re
import utils
class Manacost:
'''mana cost representation with data'''
# hardcoded to be dependent on the symbol structure... ah well
def get_colors(self):
colors = ''
for sym in self.symbols:
if self.symbols[sym] > 0:
symcolors = re.sub(r'2|P|S|X', '', sym)
for symcolor in symcolors:
if symcolor not in colors:
colors += symcolor
# sort so the order is always consistent
return ''.join(sorted(colors))
def check_colors(self, symbolstring):
for sym in symbolstring:
if not sym in self.colors:
return False
return True
def __init__(self, src, fmt = ''):
# source fields, exactly one will be set
self.raw = None
self.json = None
# flags
self.parsed = True
self.valid = True
self.none = False
# default values for all fields
self.inner = None
self.cmc = 0
self.colorless = 0
self.sequence = []
self.symbols = {sym : 0 for sym in utils.mana_syms}
self.allsymbols = {sym : 0 for sym in utils.mana_symall}
self.colors = ''
if fmt == 'json':
self.json = src
text = utils.mana_translate(self.json.upper())
else:
self.raw = src
text = self.raw
if text == '':
self.inner = ''
self.none = True
elif not (len(text) >= 2 and text[0] == '{' and text[-1] == '}'):
self.parsed = False
self.valid = False
else:
self.inner = text[1:-1]
# structure mirrors the decoding in utils, but we pull out different data here
idx = 0
while idx < len(self.inner):
# taking this branch is an infinite loop if unary_marker is empty
if (len(utils.mana_unary_marker) > 0 and
self.inner[idx:idx+len(utils.mana_unary_marker)] == utils.mana_unary_marker):
idx += len(utils.mana_unary_marker)
self.sequence += [utils.mana_unary_marker]
elif self.inner[idx:idx+len(utils.mana_unary_counter)] == utils.mana_unary_counter:
idx += len(utils.mana_unary_counter)
self.sequence += [utils.mana_unary_counter]
self.colorless += 1
self.cmc += 1
else:
old_idx = idx
for symlen in range(utils.mana_symlen_min, utils.mana_symlen_max + 1):
encoded_sym = self.inner[idx:idx+symlen]
if encoded_sym in utils.mana_symall_decode:
idx += symlen
# leave the sequence encoded for convenience
self.sequence += [encoded_sym]
sym = utils.mana_symall_decode[encoded_sym]
self.allsymbols[sym] += 1
if sym in utils.mana_symalt:
self.symbols[utils.mana_alt(sym)] += 1
else:
self.symbols[sym] += 1
if sym == utils.mana_X:
self.cmc += 0
elif utils.mana_2 in sym:
self.cmc += 2
else:
self.cmc += 1
break
# otherwise we'll go into an infinite loop if we see a symbol we don't know
if idx == old_idx:
idx += 1
self.valid = False
self.colors = self.get_colors()
def __str__(self):
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence)
+ utils.mana_close_delimiter)
def format(self, for_forum = False):
return utils.mana_untranslate(utils.mana_open_delimiter + ''.join(self.sequence, for_forum)
+ utils.mana_close_delimiter)
def encode(self, randomize = False):
if self.none:
return ''
elif randomize:
# so this won't work very well if mana_unary_marker isn't empty
return (utils.mana_open_delimiter
+ ''.join(random.sample(self.sequence, len(self.sequence)))
+ utils.mana_close_delimiter)
else:
return utils.mana_open_delimiter + ''.join(self.sequence) + utils.mana_close_delimiter
class Manatext:
'''text representation with embedded mana costs'''
def __init__(self, src, fmt = ''):
# source fields
self.raw = None
self.json = None
# flags
self.valid = True
# default values for all fields
self.text = src
self.costs = []
if fmt == 'json':
self.json = src
manastrs = re.findall(utils.mana_json_regex, src)
else:
self.raw = src
manastrs = re.findall(utils.mana_regex, src)
for manastr in manastrs:
cost = Manacost(manastr, fmt)
if not cost.valid:
self.valid = False
self.costs += [cost]
self.text = self.text.replace(manastr, utils.reserved_mana_marker, 1)
if (utils.mana_open_delimiter in self.text
or utils.mana_close_delimiter in self.text
or utils.mana_json_open_delimiter in self.text
or utils.mana_json_close_delimiter in self.text):
self.valid = False
def __str__(self):
text = self.text
for cost in self.costs:
text = text.replace(utils.reserved_mana_marker, str(cost), 1)
return text
def format(self, for_forum = False):
text = self.text
for cost in self.costs:
text = text.replace(utils.reserved_mana_marker, cost.format(for_forum = for_forum), 1)
return text
def encode(self, randomize = False):
text = self.text
for cost in self.costs:
text = text.replace(utils.reserved_mana_marker, cost.encode(randomize = randomize), 1)
return text

390
lib/transforms.py Normal file
View file

@ -0,0 +1,390 @@
# transform passes used to encode / decode cards
import re
# These could probably use a little love... They tend to hardcode in lots
# of things very specific to the mtgjson format.
import utils
cardsep = utils.cardsep
fieldsep = utils.fieldsep
bsidesep = utils.bsidesep
newline = utils.newline
dash_marker = utils.dash_marker
bullet_marker = utils.bullet_marker
this_marker = utils.this_marker
counter_marker = utils.counter_marker
reserved_marker = utils.reserved_marker
x_marker = utils.x_marker
tap_marker = utils.tap_marker
untap_marker = utils.untap_marker
counter_rename = utils.counter_rename
unary_marker = utils.unary_marker
unary_counter = utils.unary_counter
# Name Passes.
def name_pass_1_sanitize(s):
s = s.replace('!', '')
s = s.replace('?', '')
s = s.replace('-', dash_marker)
s = s.replace('100,000', 'one hundred thousand')
s = s.replace('1,000', 'one thousand')
s = s.replace('1996', 'nineteen ninety-six')
return s
# Text Passes.
def text_pass_1_strip_rt(s):
return re.sub(r'\(.*\)', '', s)
def text_pass_2_cardname(s, name):
# Here are some fun edge cases, thanks to jml34 on the forum for
# pointing them out.
if name == 'sacrifice':
s = s.replace(name, this_marker, 1)
return s
elif name == 'fear':
return s
s = s.replace(name, this_marker)
# So, some legends don't use the full cardname in their text box...
# this check finds about 400 of them.
nameparts = name.split(',')
if len(nameparts) > 1:
mininame = nameparts[0]
new_s = s.replace(mininame, this_marker)
if not new_s == s:
s = new_s
# A few others don't have a convenient comma to detect their nicknames,
# so we override them here.
overrides = [
# detectable by splitting on 'the', though that might cause other issues
'crovax',
'rashka',
'phage',
'shimatsu',
# random and arbitrary: they have a last name, 1996 world champion, etc.
'world champion',
'axelrod',
'hazezon',
'rubinia',
'rasputin',
'hivis',
]
for override in overrides:
s = s.replace(override, this_marker)
return s
def text_pass_3_unary(s):
return utils.to_unary(s)
# Run only after doing unary conversion.
def text_pass_4a_dashes(s):
s = s.replace('-' + unary_marker, reserved_marker)
s = s.replace('-', dash_marker)
s = s.replace(reserved_marker, '-' + unary_marker)
# level up is annoying
levels = re.findall(r'level &\^*\-&', s)
for level in levels:
newlevel = level.replace('-', dash_marker)
s = s.replace(level, newlevel)
levels = re.findall(r'level &\^*\+', s)
for level in levels:
newlevel = level.replace('+', dash_marker)
s = s.replace(level, newlevel)
# and we still have the ~x issue
return s
# Run this after fixing dashes, because this unbreaks the ~x issue.
# Also probably don't run this on names, there are a few names with x~ in them.
def text_pass_4b_x(s):
s = s.replace(dash_marker + 'x', '-' + x_marker)
s = s.replace('+x', '+' + x_marker)
s = s.replace(' x ', ' ' + x_marker + ' ')
s = s.replace('x:', x_marker + ':')
s = s.replace('x~', x_marker + '~')
s = s.replace('x.', x_marker + '.')
s = s.replace('x,', x_marker + ',')
s = s.replace('x/x', x_marker + '/' + x_marker)
return s
# Call this before replacing newlines.
# This one ends up being really bad because of the confusion
# with 'counter target spell or ability'.
def text_pass_5_counters(s):
# so, big fat old dictionary time!!!!!!!!!
allcounters = [
'time counter',
'devotion counter',
'charge counter',
'ki counter',
'matrix counter',
'spore counter',
'poison counter',
'quest counter',
'hatchling counter',
'storage counter',
'growth counter',
'paralyzation counter',
'energy counter',
'study counter',
'glyph counter',
'depletion counter',
'sleight counter',
'loyalty counter',
'hoofprint counter',
'wage counter',
'echo counter',
'lore counter',
'page counter',
'divinity counter',
'mannequin counter',
'ice counter',
'fade counter',
'pain counter',
#'age counter',
'gold counter',
'muster counter',
'infection counter',
'plague counter',
'fate counter',
'slime counter',
'shell counter',
'credit counter',
'despair counter',
'globe counter',
'currency counter',
'blood counter',
'soot counter',
'carrion counter',
'fuse counter',
'filibuster counter',
'wind counter',
'hourglass counter',
'trap counter',
'corpse counter',
'awakening counter',
'verse counter',
'scream counter',
'doom counter',
'luck counter',
'intervention counter',
'eyeball counter',
'flood counter',
'eon counter',
'death counter',
'delay counter',
'blaze counter',
'magnet counter',
'feather counter',
'shield counter',
'wish counter',
'petal counter',
'music counter',
'pressure counter',
'manifestation counter',
#'net counter',
'velocity counter',
'vitality counter',
'treasure counter',
'pin counter',
'bounty counter',
'rust counter',
'mire counter',
'tower counter',
#'ore counter',
'cube counter',
'strife counter',
'elixir counter',
'hunger counter',
'level counter',
'winch counter',
'fungus counter',
'training counter',
'theft counter',
'arrowhead counter',
'sleep counter',
'healing counter',
'mining counter',
'dream counter',
'aim counter',
'arrow counter',
'javelin counter',
'gem counter',
'bribery counter',
'mine counter',
'omen counter',
'phylactery counter',
'tide counter',
'polyp counter',
'petrification counter',
'shred counter',
'pupa counter',
]
usedcounters = []
for countername in allcounters:
if countername in s:
usedcounters += [countername]
s = s.replace(countername, counter_marker + ' counter')
# oh god some of the counter names are suffixes of others...
shortcounters = [
'age counter',
'net counter',
'ore counter',
]
for countername in shortcounters:
# SUPER HACKY fix for doubling season
if countername in s and 'more counter' not in s:
usedcounters += [countername]
s = s.replace(countername, counter_marker + ' counter')
# miraculously this doesn't seem to happen
# if len(usedcounters) > 1:
# print usedcounters
# we haven't done newline replacement yet, so use actual newlines
if len(usedcounters) == 1:
# and yeah, this line of code can blow up in all kinds of different ways
s = 'countertype ' + counter_marker + ' ' + usedcounters[0].split()[0] + '\n' + s
return s
# The word 'counter' is confusing when used to refer to what we do to spells
# and sometimes abilities to make them not happen. Let's rename that.
# Call this after doing the counter replacement to simplify the regexes.
counter_rename = 'uncast'
def text_pass_6_uncast(s):
# pre-checks to make sure we aren't doing anything dumb
# if '% counter target ' in s or '^ counter target ' in s or '& counter target ' in s:
# print s + '\n'
# if '% counter a ' in s or '^ counter a ' in s or '& counter a ' in s:
# print s + '\n'
# if '% counter all ' in s or '^ counter all ' in s or '& counter all ' in s:
# print s + '\n'
# if '% counter a ' in s or '^ counter a ' in s or '& counter a ' in s:
# print s + '\n'
# if '% counter that ' in s or '^ counter that ' in s or '& counter that ' in s:
# print s + '\n'
# if '% counter @' in s or '^ counter @' in s or '& counter @' in s:
# print s + '\n'
# if '% counter the ' in s or '^ counter the ' in s or '& counter the ' in s:
# print s + '\n'
# counter target
s = s.replace('counter target ', counter_rename + ' target ')
# counter a
s = s.replace('counter a ', counter_rename + ' a ')
# counter all
s = s.replace('counter all ', counter_rename + ' all ')
# counters a
s = s.replace('counters a ', counter_rename + 's a ')
# countered (this could get weird in terms of englishing the word)
s = s.replace('countered', counter_rename)
# counter that
s = s.replace('counter that ', counter_rename + ' that ')
# counter @
s = s.replace('counter @', counter_rename + ' @')
# counter it (this is tricky
s = s.replace(', counter it', ', ' + counter_rename + ' it')
# counter the (it happens at least once, thanks wizards!)
s = s.replace('counter the ', counter_rename + ' the ')
# counter up to
s = s.replace('counter up to ', counter_rename + ' up to ')
# check if the word exists in any other context
# if 'counter' in (s.replace('% counter', '').replace('countertype', '')
# .replace('^ counter', '').replace('& counter', ''):
# print s + '\n'
# whew! by manual inspection of a few dozen texts, it looks like this about covers it.
return s
# Run after fixing dashes, it makes the regexes better, but before replacing newlines.
def text_pass_7_choice(s):
# the idea is to take 'choose n ~\n=ability\n=ability\n'
# to '[n = ability = ability]\n'
def choice_formatting_helper(s_helper, prefix, count):
single_choices = re.findall(ur'(' + prefix + ur'\n?(\u2022.*(\n|$))+)', s_helper)
for choice in single_choices:
newchoice = choice[0]
newchoice = newchoice.replace(prefix, unary_marker + (unary_counter * count))
newchoice = newchoice.replace('\n', ' ')
if newchoice[-1:] == ' ':
newchoice = '[' + newchoice[:-1] + ']\n'
else:
newchoice = '[' + newchoice + ']'
s_helper = s_helper.replace(choice[0], newchoice)
return s_helper
s = choice_formatting_helper(s, ur'choose one \u2014', 1)
s = choice_formatting_helper(s, ur'choose one \u2014 ', 1) # ty Promise of Power
s = choice_formatting_helper(s, ur'choose two \u2014', 2)
s = choice_formatting_helper(s, ur'choose one or both \u2014', 0)
s = choice_formatting_helper(s, ur'choose one or more \u2014', 0)
return s
# do before removing newlines
# might as well do this after countertype because we probably care more about
# the location of the equip cost
def text_pass_8_equip(s):
equips = re.findall(r'equip ' + utils.mana_json_regex + r'.?$', s)
# there don't seem to be any cases with more than one
if len(equips) == 1:
equip = equips[0]
s = s.replace('\n' + equip, '')
s = s.replace(equip, '')
if equip[-1:] == ' ':
equip = equip[0:-1]
if s == '':
s = equip
else:
s = equip + '\n' + s
nonmana = re.findall(ur'(equip\u2014.*(\n|$))', s)
if len(nonmana) == 1:
equip = nonmana[0][0]
s = s.replace('\n' + equip, '')
s = s.replace(equip, '')
if equip[-1:] == ' ':
equip = equip[0:-1]
if s == '':
s = equip
else:
s = equip + '\n' + s
return s
def text_pass_9_newlines(s):
return s.replace('\n', '\\')
def text_pass_10_symbols(s):
return utils.to_symbols(s)

View file

@ -18,6 +18,7 @@ bullet_marker = config.bullet_marker
this_marker = config.this_marker
counter_marker = config.counter_marker
reserved_marker = config.reserved_marker
reserved_mana_marker = config.reserved_mana_marker
x_marker = config.x_marker
tap_marker = config.tap_marker
untap_marker = config.untap_marker
@ -25,6 +26,21 @@ untap_marker = config.untap_marker
# unambiguous synonyms
counter_rename = config.counter_rename
# field labels
field_label_name = config.field_label_name
field_label_rarity = config.field_label_rarity
field_label_cost = config.field_label_cost
field_label_supertypes = config.field_label_supertypes
field_label_types = config.field_label_types
field_label_subtypes = config.field_label_subtypes
field_label_loyalty = config.field_label_loyalty
field_label_pt = config.field_label_pt
field_label_text = config.field_label_text
# additional fields we add to the json cards
json_field_bside = config.json_field_bside
json_field_set_name = config.json_field_set_name
# unicode / ascii conversion
unicode_trans = {
u'\u2014' : dash_marker, # unicode long dash
@ -297,16 +313,16 @@ mana_regex = (re.escape(mana_open_delimiter) + '['
+ ']*' + re.escape(mana_close_delimiter))
# as a special case, we let unary or decimal numbers exist in json mana strings
mana_jcharset_special = '0123456789' + mana_unary_marker + mana_unary_counter
mana_jcharset_strict = unique_string(''.join(mana_symall_jdecode) + mana_jcharset_special)
mana_jcharset = unique_string(mana_jcharset_strict + mana_jcharset_strict.lower())
mana_json_charset_special = ('0123456789' + unary_marker + unary_counter)
mana_json_charset_strict = unique_string(''.join(mana_symall_jdecode) + mana_json_charset_special)
mana_json_charset = unique_string(mana_json_charset_strict + mana_json_charset_strict.lower())
# note that json mana strings can't be empty between the delimiters
mana_jregex_strict = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_jcharset_strict)
mana_json_regex_strict = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_json_charset_strict)
+ ']+' + re.escape(mana_json_close_delimiter))
mana_jregex = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_jcharset)
mana_json_regex = (re.escape(mana_json_open_delimiter) + '['
+ re.escape(mana_json_charset)
+ ']+' + re.escape(mana_json_close_delimiter))
number_decimal_regex = r'[0123456789]+'
@ -322,7 +338,7 @@ def mana_translate(jmanastr):
for n in sorted(re.findall(mana_unary_regex, manastr),
lambda x,y: cmp(len(x), len(y)), reverse = True):
ns = re.findall(number_unary_regex, n)
i = (len(ns[0]) - len(mana_unary_marker)) / len(mana_unary_counter)
i = (len(ns[0]) - len(unary_marker)) / len(unary_counter)
manastr = manastr.replace(n, mana_unary_marker + mana_unary_counter * i)
for n in sorted(re.findall(mana_decimal_regex, manastr),
lambda x,y: cmp(len(x), len(y)), reverse = True):
@ -381,7 +397,7 @@ def mana_untranslate(manastr, for_forum = False):
# finally, replacing all instances in a string
# notice the calls to .upper(), this way we recognize lowercase symbols as well just in case
def to_mana(s):
jmanastrs = re.findall(mana_jregex, s)
jmanastrs = re.findall(mana_json_regex, s)
for jmanastr in sorted(jmanastrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(jmanastr, mana_translate(jmanastr.upper()))
return s
@ -434,3 +450,6 @@ def from_symbols(s, for_forum = False):
for symstr in sorted(symstrs, lambda x,y: cmp(len(x), len(y)), reverse = True):
s = s.replace(symstr, symbol_trans[symstr])
return s
unletters_regex = r"[^abcdefghijklmnopqrstuvwxyz']"

View file

@ -15,11 +15,13 @@ def main(fname, oname = None, verbose = True):
datamine.analyze(cardtexts)
multicards = []
reps = 5
reps = 10
for card in datamine.cards:
for i in range(reps):
multicards += [card.reencode(randomize = True)]
# multicards += [card.reencode(randomize = True)
# + card.cost.reencode(randomize = True) + utils.fieldsep]
random.shuffle(multicards)