Integration and unbreaking of MSE support changes.
There are still several major issues - MSE cards look decent, but there is virtually no text prettification. Split cards (bsides) are not supported at all. There is no abuse of the notes field to dump additional data yet.
This commit is contained in:
parent
cda98f6209
commit
438bc86be9
3 changed files with 161 additions and 388 deletions
46
decode.py
46
decode.py
|
@ -1,12 +1,9 @@
|
||||||
#!c:/Python27/python.exe -u
|
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import zipfile
|
import zipfile
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
#to use: py decode.py homebrew.txt homepretty.txt --norarity -v -mse in mtgencode folder.
|
|
||||||
|
|
||||||
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib')
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib')
|
||||||
sys.path.append(libdir)
|
sys.path.append(libdir)
|
||||||
import utils
|
import utils
|
||||||
|
@ -19,7 +16,8 @@ def exclude_sets(cardset):
|
||||||
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True,
|
def main(fname, oname = None, verbose = True,
|
||||||
gatherer = False, for_forum = False, creativity = False, norarity = False, for_mse = False):
|
gatherer = False, for_forum = False, for_mse = False,
|
||||||
|
creativity = False, norarity = False):
|
||||||
cards = []
|
cards = []
|
||||||
valid = 0
|
valid = 0
|
||||||
invalid = 0
|
invalid = 0
|
||||||
|
@ -113,11 +111,18 @@ def main(fname, oname = None, verbose = True,
|
||||||
|
|
||||||
def writecards(writer):
|
def writecards(writer):
|
||||||
if for_mse:
|
if for_mse:
|
||||||
# have to prepend a massive chunk.
|
# have to prepend a massive chunk of formatting info
|
||||||
writer.write(utils.mse_prepend)
|
writer.write(utils.mse_prepend)
|
||||||
for card in cards:
|
for card in cards:
|
||||||
writer.write((card.format(gatherer = gatherer, for_forum = for_forum, for_mse = for_mse)))
|
if for_mse:
|
||||||
if creativity and not for_mse: # this won't end well if mse mode is enabled.
|
writer.write(card.to_mse().encode('utf-8'))
|
||||||
|
else:
|
||||||
|
writer.write(card.format(gatherer = gatherer,
|
||||||
|
for_forum = for_forum).encode('utf-8'))
|
||||||
|
|
||||||
|
if creativity:
|
||||||
|
if for_mse:
|
||||||
|
writer.write('\tnotes:\n\t\t'.encode('utf-8'))
|
||||||
writer.write('~~ closest cards ~~\n'.encode('utf-8'))
|
writer.write('~~ closest cards ~~\n'.encode('utf-8'))
|
||||||
nearest = cbow.nearest(card)
|
nearest = cbow.nearest(card)
|
||||||
for dist, cardname in nearest:
|
for dist, cardname in nearest:
|
||||||
|
@ -133,8 +138,10 @@ def main(fname, oname = None, verbose = True,
|
||||||
cardname = '[card]' + cardname + '[/card]'
|
cardname = '[card]' + cardname + '[/card]'
|
||||||
writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8'))
|
writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8'))
|
||||||
writer.write('\n'.encode('utf-8'))
|
writer.write('\n'.encode('utf-8'))
|
||||||
|
|
||||||
if for_mse:
|
if for_mse:
|
||||||
writer.write('version control:\n\ttype: none\napprentice code: ') # have to append some junk at the end of file.
|
# more formatting info
|
||||||
|
writer.write('version control:\n\ttype: none\napprentice code: ')
|
||||||
|
|
||||||
if oname:
|
if oname:
|
||||||
if verbose:
|
if verbose:
|
||||||
|
@ -142,16 +149,21 @@ def main(fname, oname = None, verbose = True,
|
||||||
with open(oname, 'w') as ofile:
|
with open(oname, 'w') as ofile:
|
||||||
writecards(ofile)
|
writecards(ofile)
|
||||||
if for_mse:
|
if for_mse:
|
||||||
shutil.copyfile(oname, 'set') # copy whatever output file is produced, name the copy 'set' (yes, no extension).
|
# Copy whatever output file is produced, name the copy 'set' (yes, no extension).
|
||||||
zf = zipfile.ZipFile(oname+'.mse-set', mode='w') # use the freaky mse extension instead of zip.
|
if os.path.isfile('set'):
|
||||||
|
print 'ERROR: tried to overwrite existing file "set" - aborting.'
|
||||||
|
return
|
||||||
|
shutil.copyfile(oname, 'set')
|
||||||
|
# Use the freaky mse extension instead of zip.
|
||||||
|
with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf:
|
||||||
try:
|
try:
|
||||||
zf.write('set') # zip up the set file into oname.mse-set.
|
# Zip up the set file into oname.mse-set.
|
||||||
|
zf.write('set')
|
||||||
finally:
|
finally:
|
||||||
|
if verbose:
|
||||||
print 'Made an MSE set file called ' + oname + '.mse-set.'
|
print 'Made an MSE set file called ' + oname + '.mse-set.'
|
||||||
zf.close()
|
# The set file is useless outside the .mse-set, delete it.
|
||||||
os.remove('set') # the set file is useless outside the .mse-set, delete it.
|
os.remove('set')
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
writecards(sys.stdout)
|
writecards(sys.stdout)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
@ -179,6 +191,6 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.infile, args.outfile, verbose = args.verbose,
|
main(args.infile, args.outfile, verbose = args.verbose,
|
||||||
gatherer = args.gatherer, for_forum = args.forum, creativity = args.creativity,
|
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
|
||||||
norarity = args.norarity, for_mse = args.mse)
|
creativity = args.creativity, norarity = args.norarity)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
119
lib/cardlib.py
119
lib/cardlib.py
|
@ -1,12 +1,10 @@
|
||||||
# card representation
|
# card representation
|
||||||
# -*- coding: utf-8
|
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
import transforms
|
import transforms
|
||||||
from manalib import Manacost, Manatext
|
from manalib import Manacost, Manatext
|
||||||
from titlecase import titlecase
|
|
||||||
|
|
||||||
# Some text prettification stuff that people may not have installed
|
# Some text prettification stuff that people may not have installed
|
||||||
try:
|
try:
|
||||||
|
@ -19,6 +17,11 @@ try:
|
||||||
import textwrap
|
import textwrap
|
||||||
import nltk.data
|
import nltk.data
|
||||||
sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
|
sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
|
||||||
|
# This could me made smarter - MSE will capitalize for us after :,
|
||||||
|
# but we still need to capitalize the first english component of an activation
|
||||||
|
# cost that starts with symbols, such as {2U}, *R*emove a +1/+1 counter from @: etc.
|
||||||
|
def cap(s):
|
||||||
|
return s[:1].capitalize() + s[1:]
|
||||||
# This crazy thing is actually invoked as an unpass, so newlines are still
|
# This crazy thing is actually invoked as an unpass, so newlines are still
|
||||||
# encoded.
|
# encoded.
|
||||||
def sentencecase(s):
|
def sentencecase(s):
|
||||||
|
@ -28,11 +31,26 @@ try:
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if line:
|
if line:
|
||||||
sentences = sent_tokenizer.tokenize(line)
|
sentences = sent_tokenizer.tokenize(line)
|
||||||
clines += [' '.join([sent.capitalize() for sent in sentences])]
|
clines += [' '.join([cap(sent) for sent in sentences])]
|
||||||
return utils.newline.join(clines).replace(utils.reserved_marker, utils.x_marker)
|
return utils.newline.join(clines).replace(utils.reserved_marker, utils.x_marker)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
# non-nltk implementation provided by PAK90
|
||||||
|
def uppercaseNewLineAndFullstop(string):
|
||||||
|
# ok, let's capitalize every letter after a full stop and newline.
|
||||||
|
# first let's find all indices of '.' and '\n'
|
||||||
|
indices = [0] # initialise with 0, since we always want to capitalise the first letter.
|
||||||
|
newlineIndices = [0] # also need to keep track of pure newlines (for planeswalkers).
|
||||||
|
for i in range (len(string)):
|
||||||
|
if string[i] == '\\':
|
||||||
|
indices.append(i + 1) # we want the index of the letter after the \n, so add one.
|
||||||
|
newlineIndices.append(i + 1)
|
||||||
|
if string[i] == '.' or string[i] == "=": # also handle the choice bullets.
|
||||||
|
indices.append(i + 2) # we want the index of the letter after the ., so we need to count the space as well.
|
||||||
|
indexSet = set(indices) # convert it to a set for the next part; the capitalisation.
|
||||||
|
return "".join(c.upper() if i in indexSet else c for i, c in enumerate(string))
|
||||||
|
|
||||||
def sentencecase(s):
|
def sentencecase(s):
|
||||||
return s
|
return uppercaseNewLineAndFullstop(s)
|
||||||
|
|
||||||
# These are used later to determine what the fields of the Card object are called.
|
# These are used later to determine what the fields of the Card object are called.
|
||||||
# Define them here because they have nothing to do with the actual format.
|
# Define them here because they have nothing to do with the actual format.
|
||||||
|
@ -112,20 +130,6 @@ def fields_check_valid(fields):
|
||||||
return not field_pt in fields
|
return not field_pt in fields
|
||||||
|
|
||||||
|
|
||||||
def uppercaseNewLineAndFullstop(string):
|
|
||||||
# ok, let's capitalize every letter after a full stop and newline.
|
|
||||||
# first let's find all indices of '.' and '\n'
|
|
||||||
indices = [0] # initialise with 0, since we always want to capitalise the first letter.
|
|
||||||
newlineIndices = [0] # also need to keep track of pure newlines (for planeswalkers).
|
|
||||||
for i in range (len(string)):
|
|
||||||
if string[i] == '\n':
|
|
||||||
indices.append(i + 1) # we want the index of the letter after the \n, so add one.
|
|
||||||
newlineIndices.append(i + 1)
|
|
||||||
if string[i] == '.' or string[i] == "=": # also handle the choice bullets.
|
|
||||||
indices.append(i + 2) # we want the index of the letter after the ., so we need to count the space as well.
|
|
||||||
indexSet = set(indices) # convert it to a set for the next part; the capitalisation.
|
|
||||||
return "".join(c.upper() if i in indexSet else c for i, c in enumerate(string))
|
|
||||||
|
|
||||||
# These functions take a bunch of source data in some format and turn
|
# These functions take a bunch of source data in some format and turn
|
||||||
# it into nicely labeled fields that we know how to initialize a card from.
|
# it into nicely labeled fields that we know how to initialize a card from.
|
||||||
# Both return a dict that maps field names to lists of possible values,
|
# Both return a dict that maps field names to lists of possible values,
|
||||||
|
@ -623,7 +627,7 @@ class Card:
|
||||||
outstr += '[/i]'
|
outstr += '[/i]'
|
||||||
outstr += '\n'
|
outstr += '\n'
|
||||||
|
|
||||||
elif for_forum:
|
else:
|
||||||
cardname = self.__dict__[field_name]
|
cardname = self.__dict__[field_name]
|
||||||
outstr += cardname
|
outstr += cardname
|
||||||
if self.__dict__[field_rarity]:
|
if self.__dict__[field_rarity]:
|
||||||
|
@ -674,12 +678,21 @@ class Card:
|
||||||
outstr += '<' + str(idx) + '> ' + str(value)
|
outstr += '<' + str(idx) + '> ' + str(value)
|
||||||
outstr += '\n'
|
outstr += '\n'
|
||||||
|
|
||||||
|
if self.bside:
|
||||||
|
outstr += utils.dash_marker * 8 + '\n'
|
||||||
|
outstr += self.bside.format(gatherer = gatherer, for_forum = for_forum)
|
||||||
|
|
||||||
|
return outstr
|
||||||
|
|
||||||
|
def to_mse(self):
|
||||||
|
outstr = ''
|
||||||
|
|
||||||
elif for_mse:
|
|
||||||
# need a 'card' string first
|
# need a 'card' string first
|
||||||
outstr += 'card:\n'
|
outstr += 'card:\n'
|
||||||
|
|
||||||
cardname = titlecase(self.__dict__[field_name])
|
cardname = titlecase(self.__dict__[field_name])
|
||||||
outstr += '\tname: ' + cardname + '\n'
|
outstr += '\tname: ' + cardname + '\n'
|
||||||
|
|
||||||
if self.__dict__[field_rarity]:
|
if self.__dict__[field_rarity]:
|
||||||
if self.__dict__[field_rarity] in utils.json_rarity_unmap:
|
if self.__dict__[field_rarity] in utils.json_rarity_unmap:
|
||||||
rarity = utils.json_rarity_unmap[self.__dict__[field_rarity]]
|
rarity = utils.json_rarity_unmap[self.__dict__[field_rarity]]
|
||||||
|
@ -691,39 +704,52 @@ class Card:
|
||||||
#if not self.valid:
|
#if not self.valid:
|
||||||
# outstr += ' _INVALID_'
|
# outstr += ' _INVALID_'
|
||||||
|
|
||||||
if "land" not in self.__dict__[field_types]:
|
if not self.__dict__[field_cost].none:
|
||||||
outstr += '\tcasting cost: ' + self.__dict__[field_cost].format(for_forum = for_forum).replace('{','').replace('}','')
|
outstr += '\tcasting cost: ' + self.__dict__[field_cost].format().replace('{','').replace('}','')
|
||||||
outstr += '\n'
|
outstr += '\n'
|
||||||
|
|
||||||
outstr += '\tsuper type: ' + ' '.join(self.__dict__[field_supertypes] + self.__dict__[field_types]).title() + '\n'
|
outstr += '\tsuper type: ' + ' '.join(self.__dict__[field_supertypes]
|
||||||
#outstr += 'sub type: ' + ' '.join(self.__dict__[field_types])
|
+ self.__dict__[field_types]).title() + '\n'
|
||||||
if self.__dict__[field_subtypes]:
|
if self.__dict__[field_subtypes]:
|
||||||
outstr += '\tsub type: ' + ' '.join(self.__dict__[field_subtypes]).title()
|
outstr += '\tsub type: ' + ' '.join(self.__dict__[field_subtypes]).title() + '\n'
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if self.__dict__[field_text].text:
|
if self.__dict__[field_text].text:
|
||||||
mtext = self.__dict__[field_text].text
|
mtext = self.__dict__[field_text].text
|
||||||
mtext = transforms.text_unpass_1_choice(mtext, delimit = False)
|
mtext = transforms.text_unpass_1_choice(mtext, delimit = False)
|
||||||
mtext = transforms.text_unpass_2_counters(mtext)
|
mtext = transforms.text_unpass_2_counters(mtext)
|
||||||
mtext = transforms.text_unpass_3_unary(mtext)
|
mtext = transforms.text_unpass_3_unary(mtext)
|
||||||
mtext = transforms.text_unpass_4_symbols(mtext, for_forum)
|
mtext = transforms.text_unpass_4_symbols(mtext, False)
|
||||||
|
mtext = sentencecase(mtext)
|
||||||
|
# I don't really want these MSE specific passes in transforms,
|
||||||
|
# but they could be pulled out separately somewhere else in here.
|
||||||
|
mtext = mtext.replace(utils.this_marker, '<atom-cardname><nospellcheck>'
|
||||||
|
+ utils.this_marker + '</nospellcheck></atom-cardname>')
|
||||||
mtext = transforms.text_unpass_5_cardname(mtext, cardname)
|
mtext = transforms.text_unpass_5_cardname(mtext, cardname)
|
||||||
mtext = transforms.text_unpass_6_newlines(mtext)
|
mtext = transforms.text_unpass_6_newlines(mtext)
|
||||||
newtext = Manatext('')
|
newtext = Manatext('')
|
||||||
newtext.text = mtext
|
newtext.text = mtext
|
||||||
newtext.costs = self.__dict__[field_text].costs
|
newtext.costs = self.__dict__[field_text].costs
|
||||||
newtext = newtext.format(for_forum = for_forum)
|
newtext = newtext.format()
|
||||||
newtext = newtext.replace(utils.this_marker, cardname) # first let's put the cardname where all the @s are.
|
|
||||||
newtext = newtext.replace(utils.counter_rename + ".", "countered.") # then replace any 'uncast' at the end of a sentence with 'countered'.
|
#NOT NEEDED newtext = newtext.replace(utils.this_marker, cardname) # first let's put the cardname where all the @s are.
|
||||||
newtext = newtext.replace(utils.dash_marker, "—") # also replace the ~ with a — for choices.
|
|
||||||
newtext = newtext.replace(utils.counter_rename, "counter") # then replace all the mid-sentence 'uncast' with 'counter'.
|
|
||||||
newtext = newtext.replace('{','<sym-auto>').replace('}','</sym-auto>') # now we encase mana/tap symbols with the correct tags for mse.
|
# newtext = newtext.replace(utils.counter_rename + ".", "countered.") # then replace any 'uncast' at the end of a sentence with 'countered'.
|
||||||
|
# newtext = newtext.replace(utils.dash_marker, u'\u2014') # also replace the ~ with a u2014 for choices.
|
||||||
|
# newtext = newtext.replace(utils.counter_rename, "counter") # then replace all the mid-sentence 'uncast' with 'counter'.
|
||||||
|
# newtext = newtext.replace('{','<sym-auto>').replace('}','</sym-auto>') # now we encase mana/tap symbols with the correct tags for mse.
|
||||||
|
# linecount = newtext.count('\n') + 1 # adding 1 because no newlines means 1 line, 1 newline means 2 lines etc.
|
||||||
|
|
||||||
|
# newtext = sentencecase(newtext) # make all the things uppercase!
|
||||||
|
|
||||||
|
# # done after uppercasing everything because string[i] == u2022 doesn't work apparently.
|
||||||
|
# newtext = newtext.replace(utils.bullet_marker, u'\u2022') # replace the = with a u2022.
|
||||||
|
|
||||||
|
# used later
|
||||||
linecount = newtext.count('\n') + 1 # adding 1 because no newlines means 1 line, 1 newline means 2 lines etc.
|
linecount = newtext.count('\n') + 1 # adding 1 because no newlines means 1 line, 1 newline means 2 lines etc.
|
||||||
|
|
||||||
newtext = uppercaseNewLineAndFullstop(newtext) # make all the things uppercase!
|
# actually really important
|
||||||
|
newtext = newtext.replace('{','<sym-auto>').replace('}','</sym-auto>') # now we encase mana/tap symbols with the correct tags for mse.
|
||||||
# done after uppercasing everything because string[i] == • doesn't work apparently.
|
|
||||||
newtext = newtext.replace(utils.bullet_marker, "•") # replace the = with a •.
|
|
||||||
|
|
||||||
newlineIndices = [0] # also need to keep track of pure newlines (for planeswalkers).
|
newlineIndices = [0] # also need to keep track of pure newlines (for planeswalkers).
|
||||||
for i in range (len(newtext)):
|
for i in range (len(newtext)):
|
||||||
|
@ -736,26 +762,21 @@ class Card:
|
||||||
|
|
||||||
# set up the loyalty cost fields using regex to find how many there are.
|
# set up the loyalty cost fields using regex to find how many there are.
|
||||||
i = 0
|
i = 0
|
||||||
for costs in re.findall('[-+]\d?\d: ', newtext): # regex handles 2-figure loyalty costs.
|
lcost_regex = r'[-+]?\d+: ' # 1+ figures, might be 0.
|
||||||
|
for costs in re.findall(lcost_regex, newtext):
|
||||||
i += 1
|
i += 1
|
||||||
outstr += '\tloyalty cost ' + str(i) + ': ' + costs + '\n'
|
outstr += '\tloyalty cost ' + str(i) + ': ' + costs + '\n'
|
||||||
# sub out the loyalty costs.
|
# sub out the loyalty costs.
|
||||||
newtext = re.sub('[-+]\d?\d: ', '', newtext)
|
newtext = re.sub(lcost_regex, '', newtext)
|
||||||
|
|
||||||
newtext = uppercaseNewLineAndFullstop(newtext) # we need to uppercase again; previous uppercase call didn't work due to loyalty costs being there.
|
#newtext = sentencecase(newtext) # we need to uppercase again; previous uppercase call didn't work due to loyalty costs being there.
|
||||||
|
|
||||||
if self.__dict__[field_loyalty]:
|
if self.__dict__[field_loyalty]:
|
||||||
outstr += '\tloyalty: ' + utils.from_unary(self.__dict__[field_loyalty]) + '\n'
|
outstr += '\tloyalty: ' + utils.from_unary(self.__dict__[field_loyalty]) + '\n'
|
||||||
|
|
||||||
# have to do special snowflake stuff for rule text with more than 1 line. 2 or more lines need to be double-indented...
|
|
||||||
if linecount == 1:
|
|
||||||
outstr += '\trule text: ' + newtext + '\n'
|
|
||||||
elif linecount > 1:
|
|
||||||
newtext = newtext.replace('\n','\n\t\t')
|
newtext = newtext.replace('\n','\n\t\t')
|
||||||
outstr += '\trule text:\n\t\t' + newtext + '\n'
|
outstr += '\trule text:\n\t\t' + newtext + '\n'
|
||||||
|
|
||||||
# also uncast still exists at this point? weird. should be 'unpassed' apparently. until then, did a manual replace.
|
|
||||||
|
|
||||||
if self.__dict__[field_pt]:
|
if self.__dict__[field_pt]:
|
||||||
ptstring = utils.from_unary(self.__dict__[field_pt]).split('/')
|
ptstring = utils.from_unary(self.__dict__[field_pt]).split('/')
|
||||||
if (len(ptstring) > 1): #really don't want to be accessing anything nonexistent.
|
if (len(ptstring) > 1): #really don't want to be accessing anything nonexistent.
|
||||||
|
@ -766,12 +787,6 @@ class Card:
|
||||||
# now append all the other useless fields that the setfile expects.
|
# now append all the other useless fields that the setfile expects.
|
||||||
outstr += '\thas styling: false\n\tnotes:\n\ttime created:2015-07-20 22:53:07\n\ttime modified:2015-07-20 22:53:08\n\textra data:\n\timage:\n\tcard code text:\n\tcopyright:\n\timage 2:\n\tcopyright 2: '
|
outstr += '\thas styling: false\n\tnotes:\n\ttime created:2015-07-20 22:53:07\n\ttime modified:2015-07-20 22:53:08\n\textra data:\n\timage:\n\tcard code text:\n\tcopyright:\n\timage 2:\n\tcopyright 2: '
|
||||||
|
|
||||||
#print outstr
|
|
||||||
|
|
||||||
if self.bside and not for_mse:
|
|
||||||
outstr += utils.dash_marker * 8 + '\n'
|
|
||||||
outstr += self.bside.format(gatherer = gatherer, for_forum = for_forum)
|
|
||||||
|
|
||||||
return outstr
|
return outstr
|
||||||
|
|
||||||
def vectorize(self):
|
def vectorize(self):
|
||||||
|
|
254
lib/titlecase.py
254
lib/titlecase.py
|
@ -1,254 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
titlecase.py v0.2
|
|
||||||
Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008
|
|
||||||
Python version by Stuart Colville http://muffinresearch.co.uk
|
|
||||||
License: http://www.opensource.org/licenses/mit-license.php
|
|
||||||
"""
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
|
||||||
PUNCT = "[!\"#$%&'‘()*+,-./:;?@[\\\\\\]_`{|}~]"
|
|
||||||
|
|
||||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
|
||||||
INLINE_PERIOD = re.compile(r'[a-zA-Z][.][a-zA-Z]')
|
|
||||||
UC_ELSEWHERE = re.compile(r'%s*?[a-zA-Z]+[A-Z]+?' % PUNCT)
|
|
||||||
CAPFIRST = re.compile(r"^%s*?([A-Za-z])" % PUNCT)
|
|
||||||
SMALL_FIRST = re.compile(r'^(%s*)(%s)\b' % (PUNCT, SMALL), re.I)
|
|
||||||
SMALL_LAST = re.compile(r'\b(%s)%s?$' % (SMALL, PUNCT), re.I)
|
|
||||||
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
|
||||||
|
|
||||||
def titlecase(text):
|
|
||||||
|
|
||||||
"""
|
|
||||||
Titlecases input text
|
|
||||||
|
|
||||||
This filter changes all words to Title Caps, and attempts to be clever
|
|
||||||
about *un*capitalizing SMALL words like a/an/the in the input.
|
|
||||||
|
|
||||||
The list of "SMALL words" which are not capped comes from
|
|
||||||
the New York Times Manual of Style, plus 'vs' and 'v'.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
words = re.split('\s', text)
|
|
||||||
line = []
|
|
||||||
for word in words:
|
|
||||||
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
|
|
||||||
line.append(word)
|
|
||||||
continue
|
|
||||||
if SMALL_WORDS.match(word):
|
|
||||||
line.append(word.lower())
|
|
||||||
continue
|
|
||||||
line.append(CAPFIRST.sub(lambda m: m.group(0).upper(), word))
|
|
||||||
|
|
||||||
line = " ".join(line)
|
|
||||||
|
|
||||||
line = SMALL_FIRST.sub(lambda m: '%s%s' % (
|
|
||||||
m.group(1),
|
|
||||||
m.group(2).capitalize()
|
|
||||||
), line)
|
|
||||||
|
|
||||||
line = SMALL_LAST.sub(lambda m: m.group(0).capitalize(), line)
|
|
||||||
|
|
||||||
line = SUBPHRASE.sub(lambda m: '%s%s' % (
|
|
||||||
m.group(1),
|
|
||||||
m.group(2).capitalize()
|
|
||||||
), line)
|
|
||||||
|
|
||||||
return line
|
|
||||||
|
|
||||||
class TitlecaseTests(unittest.TestCase):
|
|
||||||
|
|
||||||
"""Tests to ensure titlecase follows all of the rules"""
|
|
||||||
|
|
||||||
def test_q_and_a(self):
|
|
||||||
"""Testing: Q&A With Steve Jobs: 'That's What Happens In Technology' """
|
|
||||||
text = titlecase(
|
|
||||||
"Q&A with steve jobs: 'that's what happens in technology'"
|
|
||||||
)
|
|
||||||
result = "Q&A With Steve Jobs: 'That's What Happens in Technology'"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_at_and_t(self):
|
|
||||||
"""Testing: What Is AT&T's Problem?"""
|
|
||||||
|
|
||||||
text = titlecase("What is AT&T's problem?")
|
|
||||||
result = "What Is AT&T's Problem?"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_apple_deal(self):
|
|
||||||
"""Testing: Apple Deal With AT&T Falls Through"""
|
|
||||||
|
|
||||||
text = titlecase("Apple deal with AT&T falls through")
|
|
||||||
result = "Apple Deal With AT&T Falls Through"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_this_v_that(self):
|
|
||||||
"""Testing: this v that"""
|
|
||||||
text = titlecase("this v that")
|
|
||||||
result = "This v That"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_this_v_that2(self):
|
|
||||||
"""Testing: this v. that"""
|
|
||||||
|
|
||||||
text = titlecase("this v. that")
|
|
||||||
result = "This v. That"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_this_vs_that(self):
|
|
||||||
"""Testing: this vs that"""
|
|
||||||
|
|
||||||
text = titlecase("this vs that")
|
|
||||||
result = "This vs That"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_this_vs_that2(self):
|
|
||||||
"""Testing: this vs. that"""
|
|
||||||
|
|
||||||
text = titlecase("this vs. that")
|
|
||||||
result = "This vs. That"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_apple_sec(self):
|
|
||||||
"""Testing: The SEC's Apple Probe: What You Need to Know"""
|
|
||||||
|
|
||||||
text = titlecase("The SEC's Apple Probe: What You Need to Know")
|
|
||||||
result = "The SEC's Apple Probe: What You Need to Know"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_small_word_quoted(self):
|
|
||||||
"""Testing: 'by the Way, Small word at the start but within quotes.'"""
|
|
||||||
|
|
||||||
text = titlecase(
|
|
||||||
"'by the Way, small word at the start but within quotes.'"
|
|
||||||
)
|
|
||||||
result = "'By the Way, Small Word at the Start but Within Quotes.'"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_small_word_end(self):
|
|
||||||
"""Testing: Small word at end is nothing to be afraid of"""
|
|
||||||
|
|
||||||
text = titlecase("Small word at end is nothing to be afraid of")
|
|
||||||
result = "Small Word at End Is Nothing to Be Afraid Of"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_sub_phrase_small_word(self):
|
|
||||||
"""Testing: Starting Sub-Phrase With a Small Word: a Trick, Perhaps?"""
|
|
||||||
|
|
||||||
text = titlecase(
|
|
||||||
"Starting Sub-Phrase With a Small Word: a Trick, Perhaps?"
|
|
||||||
)
|
|
||||||
result = "Starting Sub-Phrase With a Small Word: A Trick, Perhaps?"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_small_word_quotes(self):
|
|
||||||
"""Testing: Sub-Phrase With a Small Word in Quotes: 'a Trick..."""
|
|
||||||
|
|
||||||
text = titlecase(
|
|
||||||
"Sub-Phrase With a Small Word in Quotes: 'a Trick, Perhaps?'"
|
|
||||||
)
|
|
||||||
result = "Sub-Phrase With a Small Word in Quotes: 'A Trick, Perhaps?'"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_small_word_double_quotes(self):
|
|
||||||
"""Testing: Sub-Phrase With a Small Word in Quotes: \"a Trick..."""
|
|
||||||
text = titlecase(
|
|
||||||
'Sub-Phrase With a Small Word in Quotes: "a Trick, Perhaps?"'
|
|
||||||
)
|
|
||||||
result = 'Sub-Phrase With a Small Word in Quotes: "A Trick, Perhaps?"'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_nothing_to_be_afraid_of(self):
|
|
||||||
"""Testing: \"Nothing to Be Afraid of?\""""
|
|
||||||
text = titlecase('"Nothing to Be Afraid of?"')
|
|
||||||
result = '"Nothing to Be Afraid Of?"'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_nothing_to_be_afraid_of2(self):
|
|
||||||
"""Testing: \"Nothing to Be Afraid Of?\""""
|
|
||||||
|
|
||||||
text = titlecase('"Nothing to be Afraid Of?"')
|
|
||||||
result = '"Nothing to Be Afraid Of?"'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_a_thing(self):
|
|
||||||
"""Testing: a thing"""
|
|
||||||
|
|
||||||
text = titlecase('a thing')
|
|
||||||
result = 'A Thing'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_vapourware(self):
|
|
||||||
"""Testing: 2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'"""
|
|
||||||
text = titlecase(
|
|
||||||
"2lmc Spool: 'gruber on OmniFocus and vapo(u)rware'"
|
|
||||||
)
|
|
||||||
result = "2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'"
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_domains(self):
|
|
||||||
"""Testing: this is just an example.com"""
|
|
||||||
text = titlecase('this is just an example.com')
|
|
||||||
result = 'This Is Just an example.com'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_domains2(self):
|
|
||||||
"""Testing: this is something listed on an del.icio.us"""
|
|
||||||
|
|
||||||
text = titlecase('this is something listed on del.icio.us')
|
|
||||||
result = 'This Is Something Listed on del.icio.us'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_itunes(self):
|
|
||||||
"""Testing: iTunes should be unmolested"""
|
|
||||||
|
|
||||||
text = titlecase('iTunes should be unmolested')
|
|
||||||
result = 'iTunes Should Be Unmolested'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_thoughts_on_music(self):
|
|
||||||
"""Testing: Reading Between the Lines of Steve Jobs’s..."""
|
|
||||||
|
|
||||||
text = titlecase(
|
|
||||||
'Reading between the lines of steve jobs’s ‘thoughts on music’'
|
|
||||||
)
|
|
||||||
result = 'Reading Between the Lines of Steve Jobs’s ‘Thoughts on '\
|
|
||||||
'Music’'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_repair_perms(self):
|
|
||||||
"""Testing: Seriously, ‘Repair Permissions’ Is Voodoo"""
|
|
||||||
|
|
||||||
text = titlecase('seriously, ‘repair permissions’ is voodoo')
|
|
||||||
result = 'Seriously, ‘Repair Permissions’ Is Voodoo'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
def test_generalissimo(self):
|
|
||||||
"""Testing: Generalissimo Francisco Franco..."""
|
|
||||||
|
|
||||||
text = titlecase(
|
|
||||||
'generalissimo francisco franco: still dead; kieren McCarthy: '\
|
|
||||||
'still a jackass'
|
|
||||||
)
|
|
||||||
result = 'Generalissimo Francisco Franco: Still Dead; Kieren '\
|
|
||||||
'McCarthy: Still a Jackass'
|
|
||||||
self.assertEqual(text, result, "%s should be: %s" % (text, result, ))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
if not sys.stdin.isatty():
|
|
||||||
for line in sys.stdin:
|
|
||||||
print titlecase(line)
|
|
||||||
|
|
||||||
else:
|
|
||||||
suite = unittest.TestLoader().loadTestsFromTestCase(TitlecaseTests)
|
|
||||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
|
||||||
|
|
Loading…
Reference in a new issue