From 29e604cddffe2fa4f5ab71a499f0c2e30571c236 Mon Sep 17 00:00:00 2001 From: PAK90 Date: Sat, 8 Aug 2015 21:19:00 -0600 Subject: [PATCH] Nearly finished planeswalkers, fixed lands, added titlecase for capitalization, fixed choices. Removed snow mana from land mana cost. Planeswalkers almost done; need to handle those with loyalty costs above 9. titlecase.py provides much better capitalization for names and rulestext. Choices now have dash and bullet points. --- decode.py | 2 +- lib/cardlib.py | 82 +++++++++++---- lib/titlecase.py | 254 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 319 insertions(+), 19 deletions(-) create mode 100644 lib/titlecase.py diff --git a/decode.py b/decode.py index b3292f5..9d16e94 100755 --- a/decode.py +++ b/decode.py @@ -116,7 +116,7 @@ def main(fname, oname = None, verbose = True, # have to prepend a massive chunk. writer.write(utils.mse_prepend) for card in cards: - writer.write((card.format(gatherer = gatherer, for_forum = for_forum, for_mse = for_mse)).encode('utf-8')) + writer.write((card.format(gatherer = gatherer, for_forum = for_forum, for_mse = for_mse))) if creativity and not for_mse: # this won't end well if mse mode is enabled. writer.write('~~ closest cards ~~\n'.encode('utf-8')) nearest = cbow.nearest(card) diff --git a/lib/cardlib.py b/lib/cardlib.py index 3d69128..2ea22e5 100644 --- a/lib/cardlib.py +++ b/lib/cardlib.py @@ -1,10 +1,12 @@ # card representation +# -*- coding: utf-8 import re import random import utils import transforms from manalib import Manacost, Manatext +from titlecase import titlecase # Some text prettification stuff that people may not have installed try: @@ -109,6 +111,21 @@ def fields_check_valid(fields): else: return not field_pt in fields + +def uppercaseNewLineAndFullstop(string): + # ok, let's capitalize every letter after a full stop and newline. + # first let's find all indices of '.' and '\n' + indices = [0] # initialise with 0, since we always want to capitalise the first letter. + newlineIndices = [0] # also need to keep track of pure newlines (for planeswalkers). + for i in range (len(string)): + if string[i] == '\n': + indices.append(i + 1) # we want the index of the letter after the \n, so add one. + newlineIndices.append(i + 1) + if string[i] == '.' or string[i] == "=": # also handle the choice bullets. + indices.append(i + 2) # we want the index of the letter after the ., so we need to count the space as well. + indexSet = set(indices) # convert it to a set for the next part; the capitalisation. + return "".join(c.upper() if i in indexSet else c for i, c in enumerate(string)) + # These functions take a bunch of source data in some format and turn # it into nicely labeled fields that we know how to initialize a card from. # Both return a dict that maps field names to lists of possible values, @@ -674,14 +691,9 @@ class Card: #if not self.valid: # outstr += ' _INVALID_' - outstr += '\tcasting cost: ' + self.__dict__[field_cost].format(for_forum = for_forum).replace('{','').replace('}','') - outstr += '\n' - - if "planeswalker" in str(self.__dict__[field_types]): - #print 'Walker detected! ' + cardname - outstr += '\tstylesheet: m15-planeswalker\n' - if self.__dict__[field_loyalty]: - outstr += '\tloyalty: ' + utils.from_unary(self.__dict__[field_loyalty]) + '\n' + if "land" not in self.__dict__[field_types]: + outstr += '\tcasting cost: ' + self.__dict__[field_cost].format(for_forum = for_forum).replace('{','').replace('}','') + outstr += '\n' outstr += '\tsuper type: ' + ' '.join(self.__dict__[field_supertypes] + self.__dict__[field_types]).title() + '\n' #outstr += 'sub type: ' + ' '.join(self.__dict__[field_types]) @@ -691,7 +703,7 @@ class Card: if self.__dict__[field_text].text: mtext = self.__dict__[field_text].text - mtext = transforms.text_unpass_1_choice(mtext, delimit = True) + mtext = transforms.text_unpass_1_choice(mtext, delimit = False) mtext = transforms.text_unpass_2_counters(mtext) mtext = transforms.text_unpass_3_unary(mtext) mtext = transforms.text_unpass_4_symbols(mtext, for_forum) @@ -701,18 +713,52 @@ class Card: newtext.text = mtext newtext.costs = self.__dict__[field_text].costs newtext = newtext.format(for_forum = for_forum) - newtext = newtext.replace('@',cardname) # first let's put the cardname where all the @s are. - newtext = newtext.replace("uncast","counter") # now replace 'uncast' with 'counter'. + newtext = newtext.replace(utils.this_marker, cardname) # first let's put the cardname where all the @s are. + newtext = newtext.replace(utils.counter_rename + ".", "countered.") # then replace any 'uncast' at the end of a sentence with 'countered'. + newtext = newtext.replace(utils.dash_marker, "—") # also replace the ~ with a — for choices. + newtext = newtext.replace(utils.counter_rename, "counter") # then replace all the mid-sentence 'uncast' with 'counter'. newtext = newtext.replace('{','').replace('}','') # now we encase mana/tap symbols with the correct tags for mse. linecount = newtext.count('\n') + 1 # adding 1 because no newlines means 1 line, 1 newline means 2 lines etc. - # ok, let's capitalize every letter after a \n... - # first let's find all indices of \n. - indices = [0] # initialise with 0, since we always want to capitalise the first letter. + + newtext = uppercaseNewLineAndFullstop(newtext) # make all the things uppercase! + + # done after uppercasing everything because string[i] == • doesn't work apparently. + newtext = newtext.replace(utils.bullet_marker, "•") # replace the = with a •. + + newlineIndices = [0] # also need to keep track of pure newlines (for planeswalkers). for i in range (len(newtext)): - if newtext[i] == '\n': - indices.append(i + 1) # we want the index of the letter after the \n, so add one. - indexSet = set(indices) # convert it to a set for the next part; the capitalisation. - newtext = "".join(c.upper() if i in indexSet else c for i, c in enumerate(newtext)) + if newtext[i] == '\n': + newlineIndices.append(i + 1) + + # need to do Special Things if it's a planeswalker. + if "planeswalker" in str(self.__dict__[field_types]): # for some reason this is in types, not supertypes... + # can we rely on newlines being the sole indicator of walker ability number? + # I think yes, because all existing WotC walkers have no newlines within abilities. + outstr += '\tstylesheet: m15-planeswalker\n' # set the proper card style for a 3-line walker. + + # set up the loyalty cost fields. + # also, remove the costs from the rules text... damn immutable strings means newtext has to be a list for now. + newtextList = list(newtext) + outstr += '\tloyalty cost 1: ' + newtext[newlineIndices[0]:newlineIndices[0]+2] + '\n' + # use regex to find all loyalty costs. + + newtextList[newlineIndices[0]:newlineIndices[0]+4] = '' # dang thing won't work with double-wide costs (above 9)... + # check that we won't have out of range indices; this handles partially-built walkers. + if linecount >= 2: + outstr += '\tloyalty cost 2: ' + newtext[newlineIndices[1]:newlineIndices[1]+2] + '\n' + newtextList[newlineIndices[1]-4:newlineIndices[1]] = '' # decrease index count due to removing previous costs. + if linecount >= 3: + outstr += '\tloyalty cost 3: ' + newtext[newlineIndices[2]:newlineIndices[2]+2] + '\n' + newtextList[newlineIndices[2]-8:newlineIndices[2]-4] = '' + if linecount >= 4: + outstr += '\tloyalty cost 4: ' + newtext[newlineIndices[3]:newlineIndices[3]+2] + '\n' + newtextList[newlineIndices[3]-12:newlineIndices[3]-8] = '' + newtext = ''.join(newtextList) # turn list back into string. + + newtext = uppercaseNewLineAndFullstop(newtext) # we need to uppercase the rules; previous uppercase call didn't work due to loyalty costs being there. + + if self.__dict__[field_loyalty]: + outstr += '\tloyalty: ' + utils.from_unary(self.__dict__[field_loyalty]) + '\n' # have to do special snowflake stuff for rule text with more than 1 line. 2 or more lines need to be double-indented... if linecount == 1: diff --git a/lib/titlecase.py b/lib/titlecase.py new file mode 100644 index 0000000..7751797 --- /dev/null +++ b/lib/titlecase.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +titlecase.py v0.2 +Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008 +Python version by Stuart Colville http://muffinresearch.co.uk +License: http://www.opensource.org/licenses/mit-license.php +""" + +import unittest +import sys +import re + + +SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?' +PUNCT = "[!\"#$%&'‘()*+,-./:;?@[\\\\\\]_`{|}~]" + +SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I) +INLINE_PERIOD = re.compile(r'[a-zA-Z][.][a-zA-Z]') +UC_ELSEWHERE = re.compile(r'%s*?[a-zA-Z]+[A-Z]+?' % PUNCT) +CAPFIRST = re.compile(r"^%s*?([A-Za-z])" % PUNCT) +SMALL_FIRST = re.compile(r'^(%s*)(%s)\b' % (PUNCT, SMALL), re.I) +SMALL_LAST = re.compile(r'\b(%s)%s?$' % (SMALL, PUNCT), re.I) +SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL) + +def titlecase(text): + + """ + Titlecases input text + + This filter changes all words to Title Caps, and attempts to be clever + about *un*capitalizing SMALL words like a/an/the in the input. + + The list of "SMALL words" which are not capped comes from + the New York Times Manual of Style, plus 'vs' and 'v'. + + """ + + words = re.split('\s', text) + line = [] + for word in words: + if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word): + line.append(word) + continue + if SMALL_WORDS.match(word): + line.append(word.lower()) + continue + line.append(CAPFIRST.sub(lambda m: m.group(0).upper(), word)) + + line = " ".join(line) + + line = SMALL_FIRST.sub(lambda m: '%s%s' % ( + m.group(1), + m.group(2).capitalize() + ), line) + + line = SMALL_LAST.sub(lambda m: m.group(0).capitalize(), line) + + line = SUBPHRASE.sub(lambda m: '%s%s' % ( + m.group(1), + m.group(2).capitalize() + ), line) + + return line + +class TitlecaseTests(unittest.TestCase): + + """Tests to ensure titlecase follows all of the rules""" + + def test_q_and_a(self): + """Testing: Q&A With Steve Jobs: 'That's What Happens In Technology' """ + text = titlecase( + "Q&A with steve jobs: 'that's what happens in technology'" + ) + result = "Q&A With Steve Jobs: 'That's What Happens in Technology'" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_at_and_t(self): + """Testing: What Is AT&T's Problem?""" + + text = titlecase("What is AT&T's problem?") + result = "What Is AT&T's Problem?" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_apple_deal(self): + """Testing: Apple Deal With AT&T Falls Through""" + + text = titlecase("Apple deal with AT&T falls through") + result = "Apple Deal With AT&T Falls Through" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_this_v_that(self): + """Testing: this v that""" + text = titlecase("this v that") + result = "This v That" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_this_v_that2(self): + """Testing: this v. that""" + + text = titlecase("this v. that") + result = "This v. That" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_this_vs_that(self): + """Testing: this vs that""" + + text = titlecase("this vs that") + result = "This vs That" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_this_vs_that2(self): + """Testing: this vs. that""" + + text = titlecase("this vs. that") + result = "This vs. That" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_apple_sec(self): + """Testing: The SEC's Apple Probe: What You Need to Know""" + + text = titlecase("The SEC's Apple Probe: What You Need to Know") + result = "The SEC's Apple Probe: What You Need to Know" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_small_word_quoted(self): + """Testing: 'by the Way, Small word at the start but within quotes.'""" + + text = titlecase( + "'by the Way, small word at the start but within quotes.'" + ) + result = "'By the Way, Small Word at the Start but Within Quotes.'" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_small_word_end(self): + """Testing: Small word at end is nothing to be afraid of""" + + text = titlecase("Small word at end is nothing to be afraid of") + result = "Small Word at End Is Nothing to Be Afraid Of" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_sub_phrase_small_word(self): + """Testing: Starting Sub-Phrase With a Small Word: a Trick, Perhaps?""" + + text = titlecase( + "Starting Sub-Phrase With a Small Word: a Trick, Perhaps?" + ) + result = "Starting Sub-Phrase With a Small Word: A Trick, Perhaps?" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_small_word_quotes(self): + """Testing: Sub-Phrase With a Small Word in Quotes: 'a Trick...""" + + text = titlecase( + "Sub-Phrase With a Small Word in Quotes: 'a Trick, Perhaps?'" + ) + result = "Sub-Phrase With a Small Word in Quotes: 'A Trick, Perhaps?'" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_small_word_double_quotes(self): + """Testing: Sub-Phrase With a Small Word in Quotes: \"a Trick...""" + text = titlecase( + 'Sub-Phrase With a Small Word in Quotes: "a Trick, Perhaps?"' + ) + result = 'Sub-Phrase With a Small Word in Quotes: "A Trick, Perhaps?"' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_nothing_to_be_afraid_of(self): + """Testing: \"Nothing to Be Afraid of?\"""" + text = titlecase('"Nothing to Be Afraid of?"') + result = '"Nothing to Be Afraid Of?"' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_nothing_to_be_afraid_of2(self): + """Testing: \"Nothing to Be Afraid Of?\"""" + + text = titlecase('"Nothing to be Afraid Of?"') + result = '"Nothing to Be Afraid Of?"' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_a_thing(self): + """Testing: a thing""" + + text = titlecase('a thing') + result = 'A Thing' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_vapourware(self): + """Testing: 2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'""" + text = titlecase( + "2lmc Spool: 'gruber on OmniFocus and vapo(u)rware'" + ) + result = "2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'" + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_domains(self): + """Testing: this is just an example.com""" + text = titlecase('this is just an example.com') + result = 'This Is Just an example.com' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_domains2(self): + """Testing: this is something listed on an del.icio.us""" + + text = titlecase('this is something listed on del.icio.us') + result = 'This Is Something Listed on del.icio.us' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_itunes(self): + """Testing: iTunes should be unmolested""" + + text = titlecase('iTunes should be unmolested') + result = 'iTunes Should Be Unmolested' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_thoughts_on_music(self): + """Testing: Reading Between the Lines of Steve Jobs’s...""" + + text = titlecase( + 'Reading between the lines of steve jobs’s ‘thoughts on music’' + ) + result = 'Reading Between the Lines of Steve Jobs’s ‘Thoughts on '\ + 'Music’' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_repair_perms(self): + """Testing: Seriously, ‘Repair Permissions’ Is Voodoo""" + + text = titlecase('seriously, ‘repair permissions’ is voodoo') + result = 'Seriously, ‘Repair Permissions’ Is Voodoo' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + def test_generalissimo(self): + """Testing: Generalissimo Francisco Franco...""" + + text = titlecase( + 'generalissimo francisco franco: still dead; kieren McCarthy: '\ + 'still a jackass' + ) + result = 'Generalissimo Francisco Franco: Still Dead; Kieren '\ + 'McCarthy: Still a Jackass' + self.assertEqual(text, result, "%s should be: %s" % (text, result, )) + + +if __name__ == '__main__': + if not sys.stdin.isatty(): + for line in sys.stdin: + print titlecase(line) + + else: + suite = unittest.TestLoader().loadTestsFromTestCase(TitlecaseTests) + unittest.TextTestRunner(verbosity=2).run(suite) +