Merge remote-tracking branch 'refs/remotes/billzorn/master'
This commit is contained in:
commit
9b1e607330
29 changed files with 50350 additions and 44332 deletions
43
README.md
43
README.md
|
@ -4,7 +4,7 @@ Utilities to assist in the process of generating Magic the Gathering cards with
|
||||||
|
|
||||||
http://www.mtgsalvation.com/forums/creativity/custom-card-creation/612057-generating-magic-cards-using-deep-recurrent-neural
|
http://www.mtgsalvation.com/forums/creativity/custom-card-creation/612057-generating-magic-cards-using-deep-recurrent-neural
|
||||||
|
|
||||||
The purpose of this code is mostly to wrangle text between various human and machine readable formats. The original input comes from [mtgjson](http://mtgjson.com); this is filtered and reduced to one of several input formats intended for neural network training, such as the standard encoded format used in [data/output.txt](https://github.com/billzorn/mtgencode/blob/master/data/output.txt). Any json or encoded data, including output from appropriately trained neural nets, can then be interpreted as cards and decoded to a human readable format, such as a text spoiler or [Magic Set Editor 2](http://magicseteditor.sourceforge.net) set file.
|
The purpose of this code is mostly to wrangle text between various human and machine readable formats. The original input comes from [mtgjson](http://mtgjson.com); this is filtered and reduced to one of several input formats intended for neural network training, such as the standard encoded format used in [data/output.txt](https://github.com/billzorn/mtgencode/blob/master/data/output.txt). Any json or encoded data, including output from appropriately trained neural nets, can then be interpreted as cards and decoded to a human readable format, such as a text spoiler, [Magic Set Editor 2](http://magicseteditor.sourceforge.net) set file, or a pretty, portable html file that can be viewed in any browser.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
@ -21,7 +21,8 @@ Functionality is provided by two main driver scripts: encode.py and decode.py. L
|
||||||
### encode.py
|
### encode.py
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: encode.py [-h] [-d N] [-e {std,rmana,rmana_dual,rfields,vec}] [-s] [-v]
|
usage: encode.py [-h] [-e {std,named,noname,rfields,old,norarity,vec,custom}]
|
||||||
|
[-r] [--nolinetrans] [--nolabel] [-s] [-v]
|
||||||
infile [outfile]
|
infile [outfile]
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
|
@ -30,27 +31,33 @@ positional arguments:
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-d N, --duplicate N number of times to duplicate each card
|
-e {std,named,noname,rfields,old,norarity,vec,custom}, --encoding {std,named,noname,rfields,old,norarity,vec,custom}
|
||||||
-e {std,rmana,rmana_dual,rfields,vec}, --encoding {std,rmana,rmana_dual,rfields,vec}
|
encoding format to use
|
||||||
|
-r, --randomize randomize the order of symbols in mana costs
|
||||||
|
--nolinetrans don't reorder lines of card text
|
||||||
|
--nolabel don't label fields
|
||||||
-s, --stable don't randomize the order of the cards
|
-s, --stable don't randomize the order of the cards
|
||||||
-v, --verbose verbose output
|
-v, --verbose verbose output
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
The supported encodings are:
|
The supported encodings are:
|
||||||
|
|
||||||
Argument | Description
|
Argument | Description
|
||||||
-----------|------------
|
-----------|------------
|
||||||
std | standard format: |name|supertypes|types|loyalty|subtypes|rarity|pt|cost|text|
|
std | Standard format: `|type|supertype|subtype|loyalty|pt|text|cost|rarity|name|`.
|
||||||
rmana | randomized mana: as standard, but symbols in mana costs will be mixed: {^^UUUU} -> {UU^^UU}
|
named | Name first: `|name|type|supertype|subtype|loyalty|pt|text|cost|rarity|`.
|
||||||
rmana_dual | as rmana, but with a second mana cost field after the text field
|
noname | No name field at all: `|type|supertype|subtype|loyalty|pt|text|cost|rarity|`.
|
||||||
rfields | randomize the order of the fields, and use a label to distinguish which field is which
|
rfields | Randomize the order of the fields, using only the label to distinguish which field is which.
|
||||||
vec | produce a content vector for each card; used with [word2vec](https://code.google.com/p/word2vec/)
|
old | Legacy format: `|name|supertype|type|loyalty|subtype|rarity|pt|cost|text|`. No field labels.
|
||||||
|
norarity | Older legacy format: `|name|supertype|type|loyalty|subtype|pt|cost|text|`. No field labels.
|
||||||
|
vec | Produce a content vector for each card; used with [word2vec](https://code.google.com/p/word2vec/).
|
||||||
|
custom | Blank format slot, inteded to help users add their own formats to the python source.
|
||||||
|
|
||||||
### decode.py
|
### decode.py
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: decode.py [-h] [-g] [-f] [-c] [-d] [--norarity] [-v] [-mse]
|
usage: decode.py [-h] [-e {std,named,noname,rfields,old,norarity,vec,custom}]
|
||||||
|
[-g] [-f] [-c] [-d] [-v] [-mse] [-html]
|
||||||
infile [outfile]
|
infile [outfile]
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
|
@ -59,21 +66,23 @@ positional arguments:
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
|
-e {std,named,noname,rfields,old,norarity,vec,custom}, --encoding {std,named,noname,rfields,old,norarity,vec,custom}
|
||||||
|
encoding format to use
|
||||||
-g, --gatherer emulate Gatherer visual spoiler
|
-g, --gatherer emulate Gatherer visual spoiler
|
||||||
-f, --forum use pretty mana encoding for mtgsalvation forum
|
-f, --forum use pretty mana encoding for mtgsalvation forum
|
||||||
-c, --creativity use CBOW fuzzy matching to check creativity of cards
|
-c, --creativity use CBOW fuzzy matching to check creativity of cards
|
||||||
-d, --dump dump out lots of information about invalid cards
|
-d, --dump dump out lots of information about invalid cards
|
||||||
--norarity the card format has no rarity field; use for legacy input
|
|
||||||
-v, --verbose verbose output
|
-v, --verbose verbose output
|
||||||
-mse, --mse use Magic Set Editor 2 encoding; will output as .mse-set
|
-mse, --mse use Magic Set Editor 2 encoding; will output as .mse-
|
||||||
file
|
set file
|
||||||
|
-html, --html create a .html file with pretty forum formatting
|
||||||
```
|
```
|
||||||
|
|
||||||
The default output is a text spoiler which modifies the output of the neural net as little as possible while making it human readable. Specifying the -g option will produce a prettier, Gatherer-inspired text spoiler with heavier-weight transformations applied to the text, such as capitalization. The -f option encodes mana symbols in the format used by the mtgsalvation forum; this is useful if you want to cut and paste your spoiler into a post to share it.
|
The default output is a text spoiler which modifies the output of the neural net as little as possible while making it human readable. Specifying the -g option will produce a prettier, Gatherer-inspired text spoiler with heavier-weight transformations applied to the text, such as capitalization. The -f option encodes mana symbols in the format used by the mtgsalvation forum; this is useful if you want to cut and paste your spoiler into a post to share it.
|
||||||
|
|
||||||
Passing the -mse option will cause decode.py to produce both the hilarious internal MSE text format as well as an actual mse set file, which is really just a renamed zip archive. The -f and -g flags will be respected in the text that is dumped to each card's notes field.
|
Passing the -mse option will cause decode.py to produce both the hilarious internal MSE text format as well as an actual mse set file, which is really just a renamed zip archive. The -f and -g flags will be respected in the text that is dumped to each card's notes field.
|
||||||
|
|
||||||
Finally, the -c and -d options will print out additional data about the quality of the cards. Running with -c is extremely slow due to the massive amount of computation involved; -d is probably a good idea to use in general unless you're trying to produce pretty output to show off.
|
Finally, the -c and -d options will print out additional data about the quality of the cards. Running with -c is extremely slow due to the massive amount of computation involved, though at least we can do it in parallel over all of your processor cores; -d is probably a good idea to use in general unless you're trying to produce pretty output to show off. Using html mode is especially useful with -c as we can link to visual spoilers from magiccards.info.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
|
@ -287,6 +296,8 @@ Several ambiguous words are resolved. Most directly, the word 'counter' as in 'c
|
||||||
|
|
||||||
I also reformat cards that choose between multiple things by removing the choice clause itself and instead having a delimited list of options prefixed by a number. If you could choose different numbers of things (one or both, one or more - turns out the latter is valid in all existing cases) then the number is 0, otherwise it's however many things you'd get to choose. So, 'choose one -\= effect x\= effect y' (the \ is a newline) becomes [&^ = effect x = effect y].
|
I also reformat cards that choose between multiple things by removing the choice clause itself and instead having a delimited list of options prefixed by a number. If you could choose different numbers of things (one or both, one or more - turns out the latter is valid in all existing cases) then the number is 0, otherwise it's however many things you'd get to choose. So, 'choose one -\= effect x\= effect y' (the \ is a newline) becomes [&^ = effect x = effect y].
|
||||||
|
|
||||||
|
Finally, some postprocessing is done to put the lines of a card's ability text into a standardized, canonical form. Lines with multiple keywords are split, and then we put all of the simple keywords first, followed by things like static or activated abilities. A few things always go first (such as equip and enchant) and a few other things always go last (such as kicker and countertype). There are various reasons for doing this transformation, and some proper science could probably come up with a better specific procedure. One of the primary motivations for putting abilities onto individual lines is that it should simplify the process of adding back in reminder text. It should be noted somewhere that the definition of a simple keyword ability vs. some other line of text is that a simple keyword won't contain a period, and we can split a line with multiple of them by looking for commas and semicolons.
|
||||||
|
|
||||||
======
|
======
|
||||||
|
|
||||||
Here's an attempt at a list of all the things I do:
|
Here's an attempt at a list of all the things I do:
|
||||||
|
@ -318,3 +329,5 @@ Here's an attempt at a list of all the things I do:
|
||||||
* Replace acutal newline characters with \ so that we can use those to separate cards
|
* Replace acutal newline characters with \ so that we can use those to separate cards
|
||||||
|
|
||||||
* Clean all the unicode junk like accents and unicode minus signs out of the text so there are fewer characters
|
* Clean all the unicode junk like accents and unicode minus signs out of the text so there are fewer characters
|
||||||
|
|
||||||
|
* Split composite text lines (i.e. "flying, first strike" -> "flying\first strike") and put the lines into canonical order
|
||||||
|
|
BIN
data/cbow.bin
BIN
data/cbow.bin
Binary file not shown.
59902
data/cbow.txt
59902
data/cbow.txt
File diff suppressed because it is too large
Load diff
1
data/mtgvocab.json
Normal file
1
data/mtgvocab.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"idx_to_token": {"1": "\n", "2": " ", "3": "\"", "4": "%", "5": "&", "6": "'", "7": "*", "8": "+", "9": ",", "10": "-", "11": ".", "12": "/", "13": "0", "14": "1", "15": "2", "16": "3", "17": "4", "18": "5", "19": "6", "20": "7", "21": "8", "22": "9", "23": ":", "24": "=", "25": "@", "26": "A", "27": "B", "28": "C", "29": "E", "30": "G", "31": "L", "32": "N", "33": "O", "34": "P", "35": "Q", "36": "R", "37": "S", "38": "T", "39": "U", "40": "W", "41": "X", "42": "Y", "43": "[", "44": "\\", "45": "]", "46": "^", "47": "a", "48": "b", "49": "c", "50": "d", "51": "e", "52": "f", "53": "g", "54": "h", "55": "i", "56": "j", "57": "k", "58": "l", "59": "m", "60": "n", "61": "o", "62": "p", "63": "q", "64": "r", "65": "s", "66": "t", "67": "u", "68": "v", "69": "w", "70": "x", "71": "y", "72": "z", "73": "{", "74": "|", "75": "}", "76": "~"}, "token_to_idx": {"\n": 1, " ": 2, "\"": 3, "%": 4, "'": 6, "&": 5, "+": 8, "*": 7, "-": 10, ",": 9, "/": 12, ".": 11, "1": 14, "0": 13, "3": 16, "2": 15, "5": 18, "4": 17, "7": 20, "6": 19, "9": 22, "8": 21, ":": 23, "=": 24, "A": 26, "@": 25, "C": 28, "B": 27, "E": 29, "G": 30, "L": 31, "O": 33, "N": 32, "Q": 35, "P": 34, "S": 37, "R": 36, "U": 39, "T": 38, "W": 40, "Y": 42, "X": 41, "[": 43, "]": 45, "\\": 44, "^": 46, "a": 47, "c": 49, "b": 48, "e": 51, "d": 50, "g": 53, "f": 52, "i": 55, "h": 54, "k": 57, "j": 56, "m": 59, "l": 58, "o": 61, "n": 60, "q": 63, "p": 62, "s": 65, "r": 64, "u": 67, "t": 66, "w": 69, "v": 68, "y": 71, "x": 70, "{": 73, "z": 72, "}": 75, "|": 74, "~": 76}}
|
31355
data/output.txt
31355
data/output.txt
File diff suppressed because it is too large
Load diff
71
decode.py
71
decode.py
|
@ -12,13 +12,16 @@ import cardlib
|
||||||
from cbow import CBOW
|
from cbow import CBOW
|
||||||
from namediff import Namediff
|
from namediff import Namediff
|
||||||
|
|
||||||
def exclude_sets(cardset):
|
|
||||||
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
|
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True, encoding = 'std',
|
def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||||
gatherer = False, for_forum = False, for_mse = False,
|
gatherer = False, for_forum = False, for_mse = False,
|
||||||
creativity = False, vdump = False, for_html = False):
|
creativity = False, vdump = False, for_html = False):
|
||||||
|
|
||||||
|
# there is a sane thing to do here (namely, produce both at the same time)
|
||||||
|
# but we don't support it yet.
|
||||||
|
if for_mse and for_html:
|
||||||
|
print 'ERROR - decode.py - incompatible formats "mse" and "html"'
|
||||||
|
return
|
||||||
|
|
||||||
fmt_ordered = cardlib.fmt_ordered_default
|
fmt_ordered = cardlib.fmt_ordered_default
|
||||||
|
|
||||||
if encoding in ['std']:
|
if encoding in ['std']:
|
||||||
|
@ -46,8 +49,36 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||||
cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered)
|
cards = jdecode.mtg_open_file(fname, verbose=verbose, fmt_ordered=fmt_ordered)
|
||||||
|
|
||||||
if creativity:
|
if creativity:
|
||||||
cbow = CBOW()
|
|
||||||
namediff = Namediff()
|
namediff = Namediff()
|
||||||
|
cbow = CBOW()
|
||||||
|
if verbose:
|
||||||
|
print 'Computing nearest names...'
|
||||||
|
nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3)
|
||||||
|
if verbose:
|
||||||
|
print 'Computing nearest cards...'
|
||||||
|
nearest_cards = cbow.nearest_par(cards)
|
||||||
|
for i in range(0, len(cards)):
|
||||||
|
cards[i].nearest_names = nearest_names[i]
|
||||||
|
cards[i].nearest_cards = nearest_cards[i]
|
||||||
|
if verbose:
|
||||||
|
print '...Done.'
|
||||||
|
|
||||||
|
def hoverimg(cardname, dist, nd):
|
||||||
|
truename = nd.names[cardname]
|
||||||
|
code = nd.codes[cardname]
|
||||||
|
namestr = ''
|
||||||
|
if for_html:
|
||||||
|
if code:
|
||||||
|
namestr = ('<div class="hover_img"><a href="#">' + truename
|
||||||
|
+ '<span><img src="http://magiccards.info/scans/en/' + code
|
||||||
|
+ '" alt="image"/></span></a>' + ': ' + str(dist) + '</div>')
|
||||||
|
else:
|
||||||
|
namestr = '<div>' + truename + ': ' + str(dist) + '</div>'
|
||||||
|
elif for_forum:
|
||||||
|
namestr = '[card]' + truename + '[/card]' + ': ' + str(dist) + '\n'
|
||||||
|
else:
|
||||||
|
namestr = truename + ': ' + str(dist) + '\n'
|
||||||
|
return namestr
|
||||||
|
|
||||||
def writecards(writer):
|
def writecards(writer):
|
||||||
if for_mse:
|
if for_mse:
|
||||||
|
@ -68,31 +99,30 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
|
||||||
fstring += 'raw:\n' + card.raw + '\n'
|
fstring += 'raw:\n' + card.raw + '\n'
|
||||||
fstring += '\n'
|
fstring += '\n'
|
||||||
fstring += card.format(gatherer = gatherer, for_forum = for_forum,
|
fstring += card.format(gatherer = gatherer, for_forum = for_forum,
|
||||||
vdump = vdump)
|
vdump = vdump) + '\n'
|
||||||
fstring = fstring.replace('<', '(').replace('>', ')')
|
fstring = fstring.replace('<', '(').replace('>', ')')
|
||||||
writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t'))
|
writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t'))
|
||||||
else:
|
else:
|
||||||
writer.write(card.format(gatherer = gatherer, for_forum = for_forum,
|
fstring = card.format(gatherer = gatherer, for_forum = for_forum,
|
||||||
vdump = vdump, for_html = for_html).encode('utf-8'))
|
vdump = vdump, for_html = for_html)
|
||||||
|
if creativity and for_html:
|
||||||
|
fstring = fstring[:-6] # chop off the closing </div> to stick stuff in
|
||||||
|
writer.write((fstring + '\n').encode('utf-8'))
|
||||||
|
|
||||||
if creativity:
|
if creativity:
|
||||||
cstring = '~~ closest cards ~~\n'
|
cstring = '~~ closest cards ~~\n'
|
||||||
nearest = cbow.nearest(card)
|
nearest = card.nearest_cards
|
||||||
for dist, cardname in nearest:
|
for dist, cardname in nearest:
|
||||||
cardname = namediff.names[cardname]
|
cstring += hoverimg(cardname, dist, namediff)
|
||||||
if for_forum:
|
|
||||||
cardname = '[card]' + cardname + '[/card]'
|
|
||||||
cstring += cardname + ': ' + str(dist) + '\n'
|
|
||||||
cstring += '~~ closest names ~~\n'
|
cstring += '~~ closest names ~~\n'
|
||||||
nearest = namediff.nearest(card.name)
|
nearest = card.nearest_names
|
||||||
for dist, cardname in nearest:
|
for dist, cardname in nearest:
|
||||||
cardname = namediff.names[cardname]
|
cstring += hoverimg(cardname, dist, namediff)
|
||||||
if for_forum:
|
if for_html:
|
||||||
cardname = '[card]' + cardname + '[/card]'
|
cstring = '<hr><div>' + cstring.replace('\n', '<br>\n') + '</div>\n</div>'
|
||||||
cstring += cardname + ': ' + str(dist) + '\n'
|
elif for_mse:
|
||||||
if for_mse:
|
|
||||||
cstring = cstring.replace('<', '(').replace('>', ')')
|
|
||||||
cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t')
|
cstring = ('\n\n' + cstring[:-1]).replace('\n', '\n\t\t')
|
||||||
|
|
||||||
writer.write(cstring.encode('utf-8'))
|
writer.write(cstring.encode('utf-8'))
|
||||||
|
|
||||||
writer.write('\n'.encode('utf-8'))
|
writer.write('\n'.encode('utf-8'))
|
||||||
|
@ -159,8 +189,11 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('-mse', '--mse', action='store_true',
|
parser.add_argument('-mse', '--mse', action='store_true',
|
||||||
help='use Magic Set Editor 2 encoding; will output as .mse-set file')
|
help='use Magic Set Editor 2 encoding; will output as .mse-set file')
|
||||||
parser.add_argument('-html', '--html', action='store_true', help='create a .html file with pretty forum formatting')
|
parser.add_argument('-html', '--html', action='store_true', help='create a .html file with pretty forum formatting')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
|
main(args.infile, args.outfile, verbose = args.verbose, encoding = args.encoding,
|
||||||
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
|
gatherer = args.gatherer, for_forum = args.forum, for_mse = args.mse,
|
||||||
creativity = args.creativity, vdump = args.dump, for_html = args.html)
|
creativity = args.creativity, vdump = args.dump, for_html = args.html)
|
||||||
|
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
213
lib/cardlib.py
213
lib/cardlib.py
|
@ -257,9 +257,12 @@ def fields_from_json(src_json, linetrans = True):
|
||||||
parsed = False
|
parsed = False
|
||||||
|
|
||||||
if 'subtypes' in src_json:
|
if 'subtypes' in src_json:
|
||||||
fields[field_subtypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
|
fields[field_subtypes] = [(-1, map(lambda s: utils.to_ascii(s.lower())
|
||||||
|
# urza's lands...
|
||||||
|
.replace('"', "'").replace('-', utils.dash_marker),
|
||||||
src_json['subtypes']))]
|
src_json['subtypes']))]
|
||||||
|
|
||||||
|
|
||||||
if 'rarity' in src_json:
|
if 'rarity' in src_json:
|
||||||
if src_json['rarity'] in utils.json_rarity_map:
|
if src_json['rarity'] in utils.json_rarity_map:
|
||||||
fields[field_rarity] = [(-1, utils.json_rarity_map[src_json['rarity']])]
|
fields[field_rarity] = [(-1, utils.json_rarity_map[src_json['rarity']])]
|
||||||
|
@ -389,13 +392,19 @@ def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep):
|
||||||
class Card:
|
class Card:
|
||||||
'''card representation with data'''
|
'''card representation with data'''
|
||||||
|
|
||||||
def __init__(self, src, fmt_ordered = fmt_ordered_default, fmt_labeled = fmt_labeled_default, fieldsep = utils.fieldsep, linetrans = True):
|
def __init__(self, src, fmt_ordered = fmt_ordered_default,
|
||||||
|
fmt_labeled = fmt_labeled_default,
|
||||||
|
fieldsep = utils.fieldsep, linetrans = True):
|
||||||
|
|
||||||
# source fields, exactly one will be set
|
# source fields, exactly one will be set
|
||||||
self.json = None
|
self.json = None
|
||||||
self.raw = None
|
self.raw = None
|
||||||
# flags
|
# flags
|
||||||
self.parsed = True
|
self.parsed = True
|
||||||
self.valid = True # doesn't record that much
|
self.valid = True # doesn't record that much
|
||||||
|
# placeholders to fill in with expensive distance metrics
|
||||||
|
self.nearest_names = []
|
||||||
|
self.nearest_cards = []
|
||||||
# default values for all fields
|
# default values for all fields
|
||||||
self.__dict__[field_name] = ''
|
self.__dict__[field_name] = ''
|
||||||
self.__dict__[field_rarity] = ''
|
self.__dict__[field_rarity] = ''
|
||||||
|
@ -413,6 +422,7 @@ class Card:
|
||||||
self.__dict__[field_text] = Manatext('')
|
self.__dict__[field_text] = Manatext('')
|
||||||
self.__dict__[field_text + '_lines'] = []
|
self.__dict__[field_text + '_lines'] = []
|
||||||
self.__dict__[field_text + '_words'] = []
|
self.__dict__[field_text + '_words'] = []
|
||||||
|
self.__dict__[field_text + '_lines_words'] = []
|
||||||
self.__dict__[field_other] = []
|
self.__dict__[field_other] = []
|
||||||
self.bside = None
|
self.bside = None
|
||||||
# format-independent view of processed input
|
# format-independent view of processed input
|
||||||
|
@ -539,6 +549,9 @@ class Card:
|
||||||
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
|
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
|
||||||
' ',
|
' ',
|
||||||
fulltext).split()
|
fulltext).split()
|
||||||
|
self.__dict__[field_text + '_lines_words'] = map(
|
||||||
|
lambda line: re.sub(utils.unletters_regex, ' ', line).split(),
|
||||||
|
fulltext.split(utils.newline))
|
||||||
else:
|
else:
|
||||||
self.valid = False
|
self.valid = False
|
||||||
self.__dict__[field_other] += [(idx, '<text> ' + str(value))]
|
self.__dict__[field_other] += [(idx, '<text> ' + str(value))]
|
||||||
|
@ -552,7 +565,9 @@ class Card:
|
||||||
# the NN representation, use str() or format() for output intended for human
|
# the NN representation, use str() or format() for output intended for human
|
||||||
# readers.
|
# readers.
|
||||||
|
|
||||||
def encode(self, fmt_ordered = fmt_ordered_default, fmt_labeled = None, fieldsep = utils.fieldsep, randomize_fields = False, randomize_mana = False, initial_sep = True, final_sep = True):
|
def encode(self, fmt_ordered = fmt_ordered_default, fmt_labeled = fmt_labeled_default,
|
||||||
|
fieldsep = utils.fieldsep, initial_sep = True, final_sep = True,
|
||||||
|
randomize_fields = False, randomize_mana = False, randomize_lines = False):
|
||||||
outfields = []
|
outfields = []
|
||||||
|
|
||||||
for field in fmt_ordered:
|
for field in fmt_ordered:
|
||||||
|
@ -566,6 +581,8 @@ class Card:
|
||||||
outfield_str = outfield.encode(randomize = randomize_mana)
|
outfield_str = outfield.encode(randomize = randomize_mana)
|
||||||
elif isinstance(outfield, Manatext):
|
elif isinstance(outfield, Manatext):
|
||||||
outfield_str = outfield.encode(randomize = randomize_mana)
|
outfield_str = outfield.encode(randomize = randomize_mana)
|
||||||
|
if randomize_lines:
|
||||||
|
outfield_str = transforms.randomize_lines(outfield_str)
|
||||||
else:
|
else:
|
||||||
outfield_str = outfield
|
outfield_str = outfield
|
||||||
else:
|
else:
|
||||||
|
@ -599,19 +616,31 @@ class Card:
|
||||||
|
|
||||||
return outstr
|
return outstr
|
||||||
|
|
||||||
def format(self, gatherer = False, for_forum = False, for_mse = False, vdump = False, for_html = False):
|
def format(self, gatherer = False, for_forum = False, vdump = False, for_html = False):
|
||||||
|
linebreak = '\n'
|
||||||
|
if for_html:
|
||||||
|
linebreak = '<hr>' + linebreak
|
||||||
|
|
||||||
outstr = ''
|
outstr = ''
|
||||||
|
if for_html:
|
||||||
|
outstr += '<div class="card-text">\n'
|
||||||
|
|
||||||
if gatherer:
|
if gatherer:
|
||||||
cardname = titlecase(transforms.name_unpass_1_dashes(self.__dict__[field_name]))
|
cardname = titlecase(transforms.name_unpass_1_dashes(self.__dict__[field_name]))
|
||||||
if vdump and not cardname:
|
if vdump and not cardname:
|
||||||
cardname = '_NONAME_'
|
cardname = '_NONAME_'
|
||||||
if for_forum:
|
# in general, for_html overrides for_forum
|
||||||
|
if for_html:
|
||||||
|
outstr += '<b>'
|
||||||
|
elif for_forum:
|
||||||
outstr += '[b]'
|
outstr += '[b]'
|
||||||
outstr += cardname
|
outstr += cardname
|
||||||
if for_forum:
|
if for_html:
|
||||||
|
outstr += '</b>'
|
||||||
|
elif for_forum:
|
||||||
outstr += '[/b]'
|
outstr += '[/b]'
|
||||||
|
|
||||||
coststr = self.__dict__[field_cost].format(for_forum = for_forum)
|
coststr = self.__dict__[field_cost].format(for_forum=for_forum, for_html=for_html)
|
||||||
if vdump or not coststr == '_NOCOST_':
|
if vdump or not coststr == '_NOCOST_':
|
||||||
outstr += ' ' + coststr
|
outstr += ' ' + coststr
|
||||||
|
|
||||||
|
@ -628,7 +657,7 @@ class Card:
|
||||||
if not self.valid:
|
if not self.valid:
|
||||||
outstr += ' _INVALID_'
|
outstr += ' _INVALID_'
|
||||||
|
|
||||||
outstr += '\n'
|
outstr += linebreak
|
||||||
|
|
||||||
basetypes = map(str.capitalize, self.__dict__[field_types])
|
basetypes = map(str.capitalize, self.__dict__[field_types])
|
||||||
if vdump and len(basetypes) < 1:
|
if vdump and len(basetypes) < 1:
|
||||||
|
@ -646,9 +675,9 @@ class Card:
|
||||||
if self.__dict__[field_loyalty]:
|
if self.__dict__[field_loyalty]:
|
||||||
outstr += ' ((' + utils.from_unary(self.__dict__[field_loyalty]) + '))'
|
outstr += ' ((' + utils.from_unary(self.__dict__[field_loyalty]) + '))'
|
||||||
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if self.__dict__[field_text].text:
|
if self.__dict__[field_text].text:
|
||||||
|
outstr += linebreak
|
||||||
|
|
||||||
mtext = self.__dict__[field_text].text
|
mtext = self.__dict__[field_text].text
|
||||||
mtext = transforms.text_unpass_1_choice(mtext, delimit = False)
|
mtext = transforms.text_unpass_1_choice(mtext, delimit = False)
|
||||||
mtext = transforms.text_unpass_2_counters(mtext)
|
mtext = transforms.text_unpass_2_counters(mtext)
|
||||||
|
@ -662,109 +691,68 @@ class Card:
|
||||||
newtext = Manatext('')
|
newtext = Manatext('')
|
||||||
newtext.text = mtext
|
newtext.text = mtext
|
||||||
newtext.costs = self.__dict__[field_text].costs
|
newtext.costs = self.__dict__[field_text].costs
|
||||||
outstr += newtext.format(for_forum = for_forum)
|
|
||||||
|
|
||||||
outstr += '\n'
|
outstr += newtext.format(for_forum = for_forum, for_html = for_html)
|
||||||
|
|
||||||
if vdump and self.__dict__[field_other]:
|
if vdump and self.__dict__[field_other]:
|
||||||
if for_forum:
|
outstr += linebreak
|
||||||
|
|
||||||
|
if for_html:
|
||||||
|
outstr += '<i>'
|
||||||
|
elif for_forum:
|
||||||
outstr += '[i]'
|
outstr += '[i]'
|
||||||
else:
|
else:
|
||||||
outstr += utils.dash_marker * 2
|
outstr += utils.dash_marker * 2
|
||||||
outstr += '\n'
|
|
||||||
|
first = True
|
||||||
for idx, value in self.__dict__[field_other]:
|
for idx, value in self.__dict__[field_other]:
|
||||||
outstr += '<' + str(idx) + '> ' + str(value)
|
if for_html:
|
||||||
outstr += '\n'
|
if not first:
|
||||||
if for_forum:
|
outstr += '<br>\n'
|
||||||
outstr = outstr[:-1] # hack off the last newline
|
|
||||||
outstr += '[/i]'
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
elif for_html:
|
|
||||||
outstr += '<div class="card-text">'
|
|
||||||
cardname = self.__dict__[field_name]
|
|
||||||
#cardname = transforms.name_unpass_1_dashes(self.__dict__[field_name])
|
|
||||||
if vdump and not cardname:
|
|
||||||
cardname = '_NONAME_'
|
|
||||||
outstr += cardname + ' '
|
|
||||||
|
|
||||||
coststr = self.__dict__[field_cost].format(for_html = for_html)
|
|
||||||
if vdump or not coststr == '_NOCOST_':
|
|
||||||
outstr += coststr
|
|
||||||
outstr += '<br>'
|
|
||||||
|
|
||||||
if self.__dict__[field_rarity]:
|
|
||||||
if self.__dict__[field_rarity] in utils.json_rarity_unmap:
|
|
||||||
rarity = utils.json_rarity_unmap[self.__dict__[field_rarity]]
|
|
||||||
else:
|
else:
|
||||||
rarity = self.__dict__[field_rarity]
|
first = False
|
||||||
outstr += ' (' + rarity.lower() + ') '
|
else:
|
||||||
outstr += '\n<hr><b>'
|
outstr += linebreak
|
||||||
|
outstr += '(' + str(idx) + ') ' + str(value)
|
||||||
|
|
||||||
outstr += ' '.join(self.__dict__[field_supertypes] + self.__dict__[field_types])
|
if for_html:
|
||||||
if self.__dict__[field_subtypes]:
|
outstr += '</i>'
|
||||||
outstr += ' ' + utils.dash_marker + ' ' + ' '.join(self.__dict__[field_subtypes])
|
if for_forum:
|
||||||
outstr += '</b><hr>\n'
|
outstr += '[/i]'
|
||||||
|
|
||||||
if self.__dict__[field_text].text:
|
|
||||||
mtext = self.__dict__[field_text].text
|
|
||||||
mtext = transforms.text_unpass_1_choice(mtext, delimit = True)
|
|
||||||
#mtext = transforms.text_unpass_2_counters(mtext)
|
|
||||||
#mtext = transforms.text_unpass_3_uncast(mtext)
|
|
||||||
mtext = transforms.text_unpass_4_unary(mtext)
|
|
||||||
mtext = transforms.text_unpass_5_symbols(mtext,for_forum, for_html)
|
|
||||||
#mtext = transforms.text_unpass_6_cardname(mtext, cardname)
|
|
||||||
mtext = transforms.text_unpass_7_newlines(mtext).replace("\n", "<br>")
|
|
||||||
#mtext = transforms.text_unpass_8_unicode(mtext)
|
|
||||||
newtext = Manatext('')
|
|
||||||
newtext.text = mtext
|
|
||||||
newtext.costs = self.__dict__[field_text].costs
|
|
||||||
outstr += newtext.format(for_html = for_html) + '\n'
|
|
||||||
|
|
||||||
if self.__dict__[field_pt]:
|
|
||||||
outstr += '<br>(' + utils.from_unary(self.__dict__[field_pt]) + ')<br>'
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if self.__dict__[field_loyalty]:
|
|
||||||
outstr += '((' + utils.from_unary(self.__dict__[field_loyalty]) + '))'
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if vdump and self.__dict__[field_other]:
|
|
||||||
outstr += utils.dash_marker * 2
|
|
||||||
outstr += '\n'
|
|
||||||
for idx, value in self.__dict__[field_other]:
|
|
||||||
outstr += '<' + str(idx) + '> ' + str(value)
|
|
||||||
outstr += '\n'
|
|
||||||
else:
|
else:
|
||||||
cardname = self.__dict__[field_name]
|
cardname = self.__dict__[field_name]
|
||||||
#cardname = transforms.name_unpass_1_dashes(self.__dict__[field_name])
|
#cardname = transforms.name_unpass_1_dashes(self.__dict__[field_name])
|
||||||
if vdump and not cardname:
|
if vdump and not cardname:
|
||||||
cardname = '_NONAME_'
|
cardname = '_NONAME_'
|
||||||
outstr += cardname
|
outstr += cardname
|
||||||
|
|
||||||
|
coststr = self.__dict__[field_cost].format(for_forum=for_forum, for_html=for_html)
|
||||||
|
if vdump or not coststr == '_NOCOST_':
|
||||||
|
outstr += ' ' + coststr
|
||||||
|
|
||||||
|
if vdump:
|
||||||
|
if not self.parsed:
|
||||||
|
outstr += ' _UNPARSED_'
|
||||||
|
if not self.valid:
|
||||||
|
outstr += ' _INVALID_'
|
||||||
|
|
||||||
|
outstr += linebreak
|
||||||
|
|
||||||
|
outstr += ' '.join(self.__dict__[field_supertypes] + self.__dict__[field_types])
|
||||||
|
if self.__dict__[field_subtypes]:
|
||||||
|
outstr += ' ' + utils.dash_marker + ' ' + ' '.join(self.__dict__[field_subtypes])
|
||||||
|
|
||||||
if self.__dict__[field_rarity]:
|
if self.__dict__[field_rarity]:
|
||||||
if self.__dict__[field_rarity] in utils.json_rarity_unmap:
|
if self.__dict__[field_rarity] in utils.json_rarity_unmap:
|
||||||
rarity = utils.json_rarity_unmap[self.__dict__[field_rarity]]
|
rarity = utils.json_rarity_unmap[self.__dict__[field_rarity]]
|
||||||
else:
|
else:
|
||||||
rarity = self.__dict__[field_rarity]
|
rarity = self.__dict__[field_rarity]
|
||||||
outstr += ' (' + rarity.lower() + ')'
|
outstr += ' (' + rarity.lower() + ')'
|
||||||
if vdump:
|
|
||||||
if not self.parsed:
|
|
||||||
outstr += ' _UNPARSED_'
|
|
||||||
if not self.valid:
|
|
||||||
outstr += ' _INVALID_'
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
coststr = self.__dict__[field_cost].format(for_forum = for_forum)
|
|
||||||
if vdump or not coststr == '_NOCOST_':
|
|
||||||
outstr += coststr
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
outstr += ' '.join(self.__dict__[field_supertypes] + self.__dict__[field_types])
|
|
||||||
if self.__dict__[field_subtypes]:
|
|
||||||
outstr += ' ' + utils.dash_marker + ' ' + ' '.join(self.__dict__[field_subtypes])
|
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if self.__dict__[field_text].text:
|
if self.__dict__[field_text].text:
|
||||||
|
outstr += linebreak
|
||||||
|
|
||||||
mtext = self.__dict__[field_text].text
|
mtext = self.__dict__[field_text].text
|
||||||
mtext = transforms.text_unpass_1_choice(mtext, delimit = True)
|
mtext = transforms.text_unpass_1_choice(mtext, delimit = True)
|
||||||
#mtext = transforms.text_unpass_2_counters(mtext)
|
#mtext = transforms.text_unpass_2_counters(mtext)
|
||||||
|
@ -777,31 +765,60 @@ class Card:
|
||||||
newtext = Manatext('')
|
newtext = Manatext('')
|
||||||
newtext.text = mtext
|
newtext.text = mtext
|
||||||
newtext.costs = self.__dict__[field_text].costs
|
newtext.costs = self.__dict__[field_text].costs
|
||||||
outstr += newtext.format(for_forum = for_forum) + '\n'
|
|
||||||
|
outstr += newtext.format(for_forum=for_forum, for_html=for_html)
|
||||||
|
|
||||||
if self.__dict__[field_pt]:
|
if self.__dict__[field_pt]:
|
||||||
|
outstr += linebreak
|
||||||
outstr += '(' + utils.from_unary(self.__dict__[field_pt]) + ')'
|
outstr += '(' + utils.from_unary(self.__dict__[field_pt]) + ')'
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if self.__dict__[field_loyalty]:
|
if self.__dict__[field_loyalty]:
|
||||||
|
outstr += linebreak
|
||||||
outstr += '((' + utils.from_unary(self.__dict__[field_loyalty]) + '))'
|
outstr += '((' + utils.from_unary(self.__dict__[field_loyalty]) + '))'
|
||||||
outstr += '\n'
|
|
||||||
|
|
||||||
if vdump and self.__dict__[field_other]:
|
if vdump and self.__dict__[field_other]:
|
||||||
|
outstr += linebreak
|
||||||
|
|
||||||
|
if for_html:
|
||||||
|
outstr += '<i>'
|
||||||
|
else:
|
||||||
outstr += utils.dash_marker * 2
|
outstr += utils.dash_marker * 2
|
||||||
outstr += '\n'
|
|
||||||
|
first = True
|
||||||
for idx, value in self.__dict__[field_other]:
|
for idx, value in self.__dict__[field_other]:
|
||||||
outstr += '<' + str(idx) + '> ' + str(value)
|
if for_html:
|
||||||
outstr += '\n'
|
if not first:
|
||||||
|
outstr += '<br>\n'
|
||||||
|
else:
|
||||||
|
first = False
|
||||||
|
else:
|
||||||
|
outstr += linebreak
|
||||||
|
outstr += '(' + str(idx) + ') ' + str(value)
|
||||||
|
|
||||||
|
if for_html:
|
||||||
|
outstr += '</i>'
|
||||||
|
|
||||||
if self.bside:
|
if self.bside:
|
||||||
if for_html:
|
if for_html:
|
||||||
outstr += "<hr><hr>\n"
|
outstr += '\n'
|
||||||
|
# force for_forum to false so that the inner div doesn't duplicate the forum
|
||||||
|
# spoiler of the bside
|
||||||
|
outstr += self.bside.format(gatherer=gatherer, for_forum=False, for_html=for_html, vdump=vdump)
|
||||||
else:
|
else:
|
||||||
outstr += utils.dash_marker * 8 + '\n'
|
outstr += linebreak
|
||||||
outstr += self.bside.format(gatherer = gatherer, for_forum = for_forum, for_html = for_html)
|
outstr += utils.dash_marker * 8
|
||||||
|
outstr += linebreak
|
||||||
|
outstr += self.bside.format(gatherer=gatherer, for_forum=for_forum, for_html=for_html, vdump=vdump)
|
||||||
|
|
||||||
if for_html:
|
if for_html:
|
||||||
|
if for_forum:
|
||||||
|
outstr += linebreak
|
||||||
|
# force for_html to false to create a copyable forum spoiler div
|
||||||
|
outstr += ('<div>'
|
||||||
|
+ self.format(gatherer=gatherer, for_forum=for_forum, for_html=False, vdump=vdump).replace('\n', '<br>')
|
||||||
|
+ '</div>')
|
||||||
outstr += "</div>"
|
outstr += "</div>"
|
||||||
|
|
||||||
return outstr
|
return outstr
|
||||||
|
|
||||||
def to_mse(self, print_raw = False, vdump = False):
|
def to_mse(self, print_raw = False, vdump = False):
|
||||||
|
|
74
lib/cbow.py
74
lib/cbow.py
|
@ -8,20 +8,18 @@ import subprocess
|
||||||
import os
|
import os
|
||||||
import struct
|
import struct
|
||||||
import math
|
import math
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
import cardlib
|
import cardlib
|
||||||
import transforms
|
import transforms
|
||||||
|
import namediff
|
||||||
# # this would be nice, but doing it naively makes things worse
|
|
||||||
# from joblib import Parallel, delayed
|
|
||||||
# import multiprocessing
|
|
||||||
|
|
||||||
libdir = os.path.dirname(os.path.realpath(__file__))
|
libdir = os.path.dirname(os.path.realpath(__file__))
|
||||||
datadir = os.path.realpath(os.path.join(libdir, '../data'))
|
datadir = os.path.realpath(os.path.join(libdir, '../data'))
|
||||||
|
|
||||||
# # multithreading control parameters
|
# multithreading control parameters
|
||||||
# cores = multiprocessing.cpu_count()
|
cores = multiprocessing.cpu_count()
|
||||||
# segments = cores / 2 if cores / 2 > 0 else 1
|
|
||||||
|
|
||||||
# max length of vocabulary entries
|
# max length of vocabulary entries
|
||||||
max_w = 50
|
max_w = 50
|
||||||
|
@ -67,6 +65,11 @@ def makevector(vocabulary,vecs,sequence):
|
||||||
res = v
|
res = v
|
||||||
else:
|
else:
|
||||||
res = [x + y for x, y in zip(res,v)]
|
res = [x + y for x, y in zip(res,v)]
|
||||||
|
|
||||||
|
# bad things happen if we have a vector of only unknown words
|
||||||
|
if res is None:
|
||||||
|
return [0.0]*len(vecs[0])
|
||||||
|
|
||||||
length = math.sqrt(sum([res[i] * res[i] for i in range(0,len(res))]))
|
length = math.sqrt(sum([res[i] * res[i] for i in range(0,len(res))]))
|
||||||
for i in range(0,len(res)):
|
for i in range(0,len(res)):
|
||||||
res[i] /= length
|
res[i] /= length
|
||||||
|
@ -118,6 +121,33 @@ except ImportError:
|
||||||
def cosine_similarity_name(cardvec, v, name):
|
def cosine_similarity_name(cardvec, v, name):
|
||||||
return (cosine_similarity(cardvec, v), name)
|
return (cosine_similarity(cardvec, v), name)
|
||||||
|
|
||||||
|
# we need to put the logic in a regular function (as opposed to a method of an object)
|
||||||
|
# so that we can pass the function to multiprocessing
|
||||||
|
def f_nearest(card, vocab, vecs, cardvecs, n):
|
||||||
|
if isinstance(card, cardlib.Card):
|
||||||
|
words = card.vectorize().split('\n\n')[0]
|
||||||
|
else:
|
||||||
|
# assume it's a string (that's already a vector)
|
||||||
|
words = card
|
||||||
|
|
||||||
|
if not words:
|
||||||
|
return []
|
||||||
|
|
||||||
|
cardvec = makevector(vocab, vecs, words)
|
||||||
|
|
||||||
|
comparisons = [cosine_similarity_name(cardvec, v, name) for (name, v) in cardvecs]
|
||||||
|
|
||||||
|
comparisons.sort(reverse = True)
|
||||||
|
comp_n = comparisons[:n]
|
||||||
|
|
||||||
|
if isinstance(card, cardlib.Card) and card.bside:
|
||||||
|
comp_n += f_nearest(card.bside, vocab, vecs, cardvecs, n=n)
|
||||||
|
|
||||||
|
return comp_n
|
||||||
|
|
||||||
|
def f_nearest_per_thread(workitem):
|
||||||
|
(workcards, vocab, vecs, cardvecs, n) = workitem
|
||||||
|
return map(lambda card: f_nearest(card, vocab, vecs, cardvecs, n), workcards)
|
||||||
|
|
||||||
class CBOW:
|
class CBOW:
|
||||||
def __init__(self, verbose = True,
|
def __init__(self, verbose = True,
|
||||||
|
@ -148,8 +178,6 @@ class CBOW:
|
||||||
self.vecs,
|
self.vecs,
|
||||||
card.vectorize()))]
|
card.vectorize()))]
|
||||||
|
|
||||||
# self.par = Parallel(n_jobs=segments)
|
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print '... Done.'
|
print '... Done.'
|
||||||
print ' vocab size: ' + str(len(self.vocab))
|
print ' vocab size: ' + str(len(self.vocab))
|
||||||
|
@ -157,25 +185,11 @@ class CBOW:
|
||||||
print ' card vecs: ' + str(len(self.cardvecs))
|
print ' card vecs: ' + str(len(self.cardvecs))
|
||||||
|
|
||||||
def nearest(self, card, n=5):
|
def nearest(self, card, n=5):
|
||||||
if isinstance(card, cardlib.Card):
|
return f_nearest(card, self.vocab, self.vecs, self.cardvecs, n)
|
||||||
words = card.vectorize().split('\n\n')[0]
|
|
||||||
else:
|
|
||||||
# assume it's a string (that's already a vector)
|
|
||||||
words = card
|
|
||||||
|
|
||||||
if not words:
|
def nearest_par(self, cards, n=5, threads=cores):
|
||||||
return []
|
workpool = multiprocessing.Pool(threads)
|
||||||
|
proto_worklist = namediff.list_split(cards, threads)
|
||||||
cardvec = makevector(self.vocab, self.vecs, words)
|
worklist = map(lambda x: (x, self.vocab, self.vecs, self.cardvecs, n), proto_worklist)
|
||||||
|
donelist = workpool.map(f_nearest_per_thread, worklist)
|
||||||
comparisons = [cosine_similarity_name(cardvec, v, name) for (name, v) in self.cardvecs]
|
return namediff.list_flatten(donelist)
|
||||||
# comparisons = self.par(delayed(cosine_similarity_name)(cardvec, v, name)
|
|
||||||
# for (name, v) in self.cardvecs)
|
|
||||||
|
|
||||||
comparisons.sort(reverse = True)
|
|
||||||
comp_n = comparisons[:n]
|
|
||||||
|
|
||||||
if isinstance(card, cardlib.Card) and card.bside:
|
|
||||||
comp_n += self.nearest(card.bside)
|
|
||||||
|
|
||||||
return comp_n
|
|
||||||
|
|
|
@ -61,3 +61,4 @@ field_label_text = '9'
|
||||||
# additional fields we add to the json cards
|
# additional fields we add to the json cards
|
||||||
json_field_bside = 'bside'
|
json_field_bside = 'bside'
|
||||||
json_field_set_name = 'setName'
|
json_field_set_name = 'setName'
|
||||||
|
json_field_info_code = 'magicCardsInfoCode'
|
||||||
|
|
371
lib/html_extra_data.py
Normal file
371
lib/html_extra_data.py
Normal file
|
@ -0,0 +1,371 @@
|
||||||
|
box_width = 350
|
||||||
|
html_prepend = """<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<style>
|
||||||
|
.card-text{ display:inline-block;
|
||||||
|
max-width:350px;
|
||||||
|
margin: 2px;
|
||||||
|
padding:3px;
|
||||||
|
border: 3px solid #000000;}
|
||||||
|
.hover_img a { position:relative; }
|
||||||
|
.hover_img a span { position:absolute; display:none; z-index:99; }
|
||||||
|
.hover_img a:hover span { display:block; }
|
||||||
|
.mana-0 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px; ;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEvSURBVChTY/gPBn///nn57PGNy2cvnj4CREAGkAsUhMiCFP348f36pTMXTh2eOXWin6+Pk6NDZ2sjkAsUBEqBFAGVQ1QsXTDbxsb69LH9u7dtkJGV625vhqgDKmAAGgvkAJG9vV1bcz2EnZGWoqqqdv7kISAbqIABaD2QdWTfDj4+gWUL50AUTZnQw8PLv2ntciAbqIAB6Ewga/H8WXBRIFq1bAGQO3/2NCAbqACqCMgHiq5ethCiaO3KxUDu4nkzoYog1m1etwIoOn1yP0QR0BO8fAIH92wDskHWwR2uqamVnZkOYQN9YGNtDWGDHA4Pggk9HfIKikAfnDtx0N7ODmIXNAiQA7OrvdnXxzsiLHTapD6ICmhgAjEQAJUDjQVaD3QmEAEZQC5QECT3/z8A05MwgYZeWs0AAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-1 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAADjSURBVChTjZGxDgFBFEXHE7MrthFClBrNRvyTSET4CBIaCo3GT4jCT5AoNRoJdtcK0bFjY9dlZnXErd579+TNzJ1YGIaMsSB4nFzncj4K74ZW05PpTC6bLxDF0b4gIbzNeuXdrs7BnS+W9sFt1qsSLZZMTdMJO0Bgge/7k+lsOBrvLRsEhCEsAIRT5BGc81ajVimbkpCCBYBwDzV4yzBSqooEgOSaHwJAqvwpwhNU+UUACHmoLpJM7iMAhMQ+y2BbtrPdWUHEwXpHSvFXYnpSiHunN0DPeaLd7aPFEBaAP76FsSe9yn/QI2WYAgAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-2{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFPSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpTed+Dwuk1bf/z4aWttERsZysnFraimzc7OwQQ0A6Ji+669y1at8/FwMzLQmz1v8ez5i4GCQCmgAhagLUDOr1+/tu3YM6W/g4OdHWj+3Xv39+w7lJ4cD5QCKmACugMo+uPnz9ysFIgKIBAVEeHl4YGwgQqYgGqBLD5eXjUVZYjo589fDh89HhbsD+ECFTBBWHDw+8+f+pbOAD9vDzcnqBADAxPQt1AmKCD+dfRMcnN2TE2MgQqBg4MJGB4QDjAspsyY6+3hAjHj3IVLDx89ATKAChj//v1z6+qFH9+/9U6cBnQ+0P9AiffvP6zftG3JvGn8AoJq2gYswDAFhtiGNSuvXLsBlL5z9z6QBAIzE0OgCqAUUAER0cLAAAAj1rLKRkR9sgAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-3{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFUSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpQ+efrc2g1b3r57Z6ink5oUx8fPr6imzc7OwQQ0A6Jiw+btG7dsj48OC/L3WbFmw5btu4CCQCmgAiagLRAreLi5WhuqtLU0PN2cODk5RISFgIJAKaACxhuXz/74/g3Ih4Dfv39Pmjabj48vNTEGIsLBycUCMQYC7t5/0Nk7+dWr11+/fRMU4A8J9AUKAhUwXjx9BOJBIHj1+g0vDw8HB3v3hKnbd+zZvHYpDw83IyMjE9C3EBVAICYqAnQNUNTN2eH3nz9v3r4FCgIVMAHDA6ICGdy//1BQQEBGWgrIBipgAoYYUC3QxucvXkJUnDh1Zt6i5dXlBSwsLEApoAJoYF69eGb6jDmfv3z58vUbHy9vZFignKwMUAUkMImIFgYGALORsD4EQl2jAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-4{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE3SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpEGcpvbe9nY2CpK8oBKFdW02dk5mIBmwFUAwfpNW48cP/nk2TMgGygIlAIqYALaAlfx5Omzu/ceqqkqQ7hAAJQCKmACugPC//fv38w5CzNTEyBcOAAqYIIbs3TFWk93Fx4ebggXDoAKmCCsO3fvP3/50srCFMIFgrPnL756/QbCZrxx+eyP79/aeyZ+/vyFm4sLKHTy9Nl/////+/tv2YIZAgL8HJxcjC+fPX7+5MHHj5++fYfaW9/SBSQba8okJcSBDEkZBSZgiAHDg5+fDygERBLiYj9+/Pj06TOQAVQBlAIHKRMzKMQ4OEGGMDC0dPYDhTg42IFBCglMoAIiooWBAQBwdqfO6tnExwAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-5{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEpSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpRu6ez7+vUbUBQIfv36NWVCt6KaNjs7BxPQDIiKV6/fvHz5WkJcDIIszEyAgkApoALGl88eP3/yAKh1z/5D/Hy8psaGEJPgQFJGgQXoDgjn0uVrFy5dfvT4qZCgQHCAT1R4MCMjI1AcqIAF4lIIiIsOFxURPnDo6PTZC/7++xcXFQYUBCpgvHj6CMSDyKCyruXajVsbVy0CsoHmMQF9C5FABloa6h8/foKwgQqYgOEB4SCDazduaqipQNhABUzAEEM27PefPwuXrjx3/lJeViqQC5QCKkAEZntX/7fv396//yArIx0eEiApIQ5UAQlMIqKFgQEAnxOfpbljyQoAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-6{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFgSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpF+9frN2vWb33/4GBcdrqyspKimzc7Owfj3759bVy9AVBw5dnLuwqWlhTlaGmpALhAAjVTTNmB8+ezx8ycPgPx79x9m5pXOnzVJSlICogICJGUUGG9cPvvj+zcgp7KuhYWZWV9P58q1GypKCuGhgawsLEBxDk4uJohFv//8OXHyzI1bd2SkJYP8vVeu2dg3cTrIHAYGoAImCOvdu/dAdSkJ0RZmJno6WoH+Xtt27vnz5w9ElgnoNCDFxQkiGRgZQSQDg4Kc7N+/f9+9/wBkAxUwAcMDyOLl5VFRVrxx8zZYDcOXr9+4ubiEhQSBbKACJmCIQQzLSInfs+/Qm7fv/v37t2vv/qjwIGZmZqAUUAFKYB47cWrL9t2MjIzamuqRYUFAf0EDk3C0MDAAACK6pimZEg74AAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-7 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEVSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+62tM/+fGTZ8zMTEBRCCgpyLF2cGFn52ACmgFUATTgwKGjHBzsEuJiEHT7zj0ODjagFFABC9AWoIr3Hz64OjtkpMRDzDh4+Ji4mCg3FxdQCqiA8cblsz++f4PIQcCfP39SsgqnTeji4uIEcjk4uZggLkUGew8cVlNRhqgAAqAChDPhYNnKtc6OdlAOGDABfQtlgsGlK9cePXqir6cN5YODgwkYHlAeGOzcvV9bS4ODnR3KZ2AAKmAChhiyYWfPX1RVUYJywMaAg5SJWVFNG66Oh4dbSVEewgYKAqWACoiIFgYGAIXReK4bpsD6AAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-8 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFrSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6+v3b1+Wr1t24defnz5/aWhqRoYG8fPyKatrs7ByMf//+uXX1AtCAvkkz/vz9U1KQ/e/v396J03/9/l1bUQQ0Uk3bgAloC8SKo8dPamuoMzEysrCwuDrbX712HSgIlAIqYAK6A8gBAg111RlzFu7Zf+jv37+79x709nCDiAMVMF48fQTi9i9fv7Z09B05dpKTk8PJwbayJB+iiJGRkQnCAoKHDx///v27oaZMR0tz6/bd8xYtg0oA1d24fPbH929//vwJDE/obKnV0lQHigK9OXPuoh2bVnIA/cbJxQQMD6Dop0+f33/4ICYmCtbJEB4aCPTjt6/fgGygAiZgiAEVCwkJ6mprrt+0DaLoxMnTBvq6QEGgFFABIjDfvXu7aOmqN2/f8XBzcXBwxEWHCQkJQwOTcLQwMAAAqCC9xuBqHPkAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-9 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFeSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6+uP7t+Wr1l24dIWZmdnfx8PCzASoVFFNm52dg+Hv3z/XL525cOpwUUGulaXl2eMHdmxeKyUlM2vaJKAgUAqogAloC9CKf//+AY1xc3EEGiMhLubqbD9jzgKgRUApoAImoDuAnDdv333+/IWPlwfIBgItDbVbt+9+/fYNyAYqYIK4VEhQAGjGw8dPwGqATmUEEl+/ghQBFTCBxRhYWFi83F02bN5+/ebt02fPz5m/BCgiwM8HkWUuyMv+8+c3kGVuZvz71++Dh4+xsrL+//dPTFTU090ZKM7BycX48tnj508egDVAwes3b+NSsid2t6qpKgO5kjIKTMAQA4YHRBoIbt25W1nXUlaUC1EBlAIqQATm3bv3Fi9fxcfLG+TvLSkhDlEBCUwiooWBAQCs87JoDmJq4gAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-10 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE9SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcLEAzgCqOHT+x/+ARH083DXXV6zdv79i19/PnL24uDkClatoGLEBbgGZ8//5j6449zo52P378mDhl5tQJnUCR6KRMFWUlIRFxJqA7gMptrMylJSWAjMNHT8jJyjAzM/PwcGuoqezasx+ogAnuFBZWFiB56849bm4uiIikhPiTp8+ACpggfDj49u37v3//IGw2NjZ2dnYggwnoBYgQBCjIyz599gLC/vLli6qKElABEzA8IEKfPn3+8uWrq5P9rTt3P33+/O///3v3H9rbWgEVMP79++fW1QsTJk07eeacmKhIamLsx0+fdu87KMDHB/SNiYkxMAiICkwiooWBAQCBlamF9Pj2KwAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-11 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAD0SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcTEAzgCoePXq0ccv2tq4JQLmnz56vXLNh+qz5QG1AKaACJqAtQA4jI+OVqzdu3bkLVMTExHT2/KVHT54C2UApoAImoDuAHDFRETsbSyADCCQlxE2M9CFsIAAqYMJ0ChoAKmCCMvECJqAXoEwcAKiACRgeEM73HyAAYf/+/fvr128QNlABc2dX98f3b69du7F2w+afv359+fL19+8/23bu/fL1Kwszi66ujqyiKlGBSUS0MDAAAJsNl49choAcAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-12 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEySURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcjH///rl19cLly1cOHz1hamKop6P16PGTNeu3MDMzhYcEyMvLq2kbMAFtAZrx+s3bFWvWf/z46c3bd9NnL7C3tfz582decRVQCqiABegOoMnWlmbSUpJAxsnT58qLcgUE+BUV5Ddt3fnp82eOt6+Y0Jzi7eECVAFk3Lh5S1dHi4+XF6iACSKHBp48fb5y7cbGmjIIlwnoBQgLDl69frNm/ab2xhphIUGgK4EKmIDhAZH79u3716/fPnz42DtxmpWF2c3bd6bNmn/vwUOgAmgQzFu45MrV61xcXCzMTE+fv4BoY2VhndTXoaVvQlxgEo4WBgYAuEeubJyWa+YAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-13 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE1SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcLEAzgCrevH594dKVr1+/ebg5ff/+Y+2GzU+ePnd2tAUqVdM2YALaAjTjxctXC5euvHn7zq9fv3omTNXUUDM21KtpaAdKARUwAd0BVK6qomRlbgpksLKyFudnGRvqc3FxOdrbAEWACpjQnMLIyMjBybF42eqOnkl///4FWg1UwASVRAJMjIye7s7rVi54+PjJlu27QCJAL0DkkIGIsBArC4ucrDTQ70AFTMDwgEh8+/bt7bv3v3//BloB5J4+e/7uvQde7i5ABYx///65dfXCgYOHt+7YDXSEpbnp5y9fPn36zMfHG+DrKSQkDAwCogKTiGhhYAAAExGx/k7dMTQAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-14 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEuSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcTEAzgCoePHiwet2ms+cvQuUZGLbt3LNl63agFFABE9AWoBlPnj5buHTly1evISru3L2/eevOazduAaWACpiA7gCKmhobammoQVT8/v17zfrNYcH+EC5QARPcKczMIDcCweJlq4EqWFhYIFygAiYICw6uXrvx9+9fMVGR7z9+/PnzB+gyoCAL0As/vn+DqACCd+8/vHr9ZuLUWddv3AJyjQz1gQpYgOHx/MkDIP/Dx08fP37ycnextbYAcls7+5mYmRztrIEKmIAhBlS7YfN2AX6+W3fu3b0P0nD56nWgCiDj5u17QAVEBSYR0cLAAAAOyLDdh7oV9AAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-15 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEySURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcjH///rl28cyuPfv27DuYnhynIC83efqcZ89fABV9/PhpYm+Hkbk1C9CW379+srOxHTl2Miku6tevX3/+/AkL9gcqYmJiYmVhAipgAroDyLGxMufm4gJKXL95++fPX0ANL1++1tXWBIqAFKA55fGTp/z8fGKiIrPnL+7smwwUASpggcjBgY+nG4Shq6OVnlOcm5HCy8vDBPQCRBQNSIiLAT3+7/8/oAImYHgAhf7+/fvr9+/vP35AVAClFy9bZWttwcfLC1QACoIr50939k64dv2mtJSkiaH+i5evXr15KyMlGRMZysfPr6ZtQFxgEo4WBgYA5PenFhKZDwsAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-16 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE7SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcTEAzbl+7uGv33prG9jdv30GkT54+O2XG3EOHjwI1AxUwAW359fMHEBw4dPT3799AFQuXrjx34XJWepK5qRHQeKACJqA7mJmZ3V2dIGbcuXt/5+79Lo52p06f/fXrF1AEqIAJzSk79+wHyl25duPQ0ROFZbUgF//4zgSVhIHXb94CTQ308yrITrt2/ea9+w+BgkxAL0CkIUBURPj7d5DZbGxs3Dzcf/78ASpgAoYHUOj3nz9A8vOXLx6uTsdPngFyX7x8xcHOrqSkAFTA+Pfvn/Onjk2cMv3y1esKcrIFOenXbtw8cerc379/Q4N81dXV1bQNiApMIqKFgQEAeKDC4O/kvu0AAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-17{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEqSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcjH///rl+6eyZs+d27NqXn5124tSZ3fsOcnNxQdQVFWSbWdkzAW358f3btes3t2zf9ffv3xOnzxrq69rZWALR23fv2FiYgQoYb1w+C1T0798/O1e/HRtXvnn3TkFOFmjG4aMnfvz44erswMHJxQRxChMTE9h4BogKoENXr9vkYG8DZAMVQOXQwOmzFyQlxVlZWCBcJqAXICxksHHLdlMjAwgbqIAJGB5A1sePn4DkBzAJdN/xk2c01FVBShgYgAqYgCH25+//hUtX2lpbrF638ffv358/f9HSVBMTFQGqABoDVEBcYBKOFgYGALrIn9uhneDtAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-18{
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFCSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcLEAzgCquX79x+NgJDTUVc1Pjw0dPXLx89efPny5O9kClatoGTEBbgGa8ev16+ap1b9+9v3LtxqatO3IykjNTE9u7J3788B6ogAnoDqByCzMTFWVFIOPdu/fPn78Ekhwc7Dw83AyMjEAFLGhOsbEyP3HqTEh0soKcbE5mMgc7O1ABE1QSBj5++vT9x8+m2nIhIcGuvsk/fv4ECjIBvQCRhoD1m7Zpa6oDzetuq5eSlLh46QpQARMwPCDS3759//nzl7iY2M1bd4BcRkZGFhYWGWkpoALGv3//3Lp6YdbcBafPnufh5k6Kj7px4/bzly9ZWVj19bStLC2AQUBUYBIRLQwMAOBQr7aPizrUAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-19 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE8SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcLEAzgCouX7ly7PhpQUF+fx/Pq9dubN25h+H/fx8vd6BSNW0DJqAtQDOYGJl27tn/9u37799/1DZ1ZKTEpyTG1ja2v3zxAqiACegOoHIVZUUdLQ0g4/zFS/z8fHy8vEKCAqoqSgcOHwMqYEJzChMT0/v3HyBsbi6uV69eg+yB8OHA2MiAk5Nz1txFq9ZuPH7yjJCQIFCQCegFiDQEsLKwzJ7Wp6GuaqCnw8LCbGdjCVTABAwPiPSPHz++//gBZPBwc1tamK7ftC0jNUFMVASogAUYYu/evNy0eeunz1++ff9x5tyFz5+/XLtxy9vTFegVoDFABUQFJhHRwsAAAEKMo0bborQ8AAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-20 {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFjSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcjH///rl19cKxYydmz1/87///1MQYU2PDHbv23Xvw8PmLlxmpiR4+AQwvnz3evmlNcGDAvp2bm+qqRUTFN6xe6uvtdeHU4RWL5xkYGAAVMAHd8fjJs8LcDCFBATdXxx8/fmzYvF1DQw1onbqayqtXb86ePc0EdIqZiSEPDzdQFGiLu4vj+/cfODnYgVxGRkagzmdPnzGBXAgGFy5duX7zVkVpPhMz848fPyGCTMxMvDw8TEAvADnXrt+8eOlKRXHer1+/uDg579y9DxQE+gPI1dbRYgKGx937Dzr7Jv/9+2/ewmUllQ3enq537z149PjJvv2HAvy8RcUkQUGwYsmiq9eugY1nYGVlDfTz+vjx04HDRyXExexsbdS0DYgLTMLRwsAAABnzwHeRF3aFAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-2-B {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF8SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/Th8/vHrtej0dLRMjA6AoBACVyitrcHJxMwHNuH/r6vqNm3083abPXvD792+oEqDxf/9ePXPs79+/TEBbgFZEhwcLCvCLi4mysLBAVLCysr19/vT71y/PHtxlAroDKMTIyLhq7cbC3Ix///7BVYAVM7x+/pgJ4tLO3kk/f/2au2Dpq9dvgCpWLF+5Yt3mG7fvAqW+ffnCePH0EaDbb9+5B+QzMTFpqKvVN7YeO3X20+cv3FycRVkpVmYmTEAvAKVVVZSACKjixaOHv379CvLxADouPz3pyo1bXDw8TMDwACoCAog7WFlZJMXFzl28oqelceLseWUFeVFJWSZgiAENQ3ZpdGgAMzPT12/fgCq8PVylFJRBgfnj29crZ44BfQtRBATXbt7m5ub6x8AUGh0PDExotABDDBgeQN8C/QLkAt0BtAVoBjMzMwMDAwArqbIv/yKO/gAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-2-G {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF8SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/Th8/vHrtej0dLRMjA6AoBACVyqio8XDyMgHNuH/r6vqNm3083abPXvD792+oEgaGz4zfdtzY8ufvHyagLUArosODBQX4xcVEWVhYICp+cf6/9Onqx+8frz6/zAR0B1CIkZFx1dqNhbkZ//79g6sAK2a4++Y2E8Slnb2Tfv76NXfB0lev33xj+b1i77ot83c8uvUEKPXx+wfGi6ePAN1++849IJ+JiUlWW76lr3vfmkNALjMLc35fpoK6HBPQC0C+qooSEAFVAG05seM0UJqbjxsovn/tIX5OASZgeAA5QABxB9BUeU05EyfDH99+RBQGG9rrK4uoMgFDDGgY3KVAH6Q2xIvJirKysQgI89s6W2lL6oIC8+uPL9uvbwb6FmIkEPz6+fvLhy+S4lJ+RgF8HPzQaAGGGDA8gL4F+gXIBboDaAvQDBZmFgYGBgCzOLYQy2aUMQAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-2-R {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF2SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/Th8/vHrtej0dLRMjA6AoBACVqigqs/AIMAHNuH/r6vqNm3083abPXvD792+oEgYG3u/v/u2ZzfDnFxPQFqAV0eHBggL84mKiLCwsEBUi/7+L3Njz/9Obv9ePMgHdARRiZGRctXZjYW7Gv3//4CrAihn+3r/ABHFpZ++kn79+zV2w9NXrNyL/vr3at2rbpQcfv/8CSv3/9Jrx4ukjQLffvnMPyGdiYjJXFL++aeGn7792X3n05P2XhgALHTkxxhuXz/74/g1kLpItzz58ffD605S9F8X5uSZkhjEBwwNNxdxDV/VqlrRvPX375QcjOVFmRQMmYIgBwwPZpa8/gwy+9PiNpqSgpooSs6Y1KDD/fP34b/csoG8hin79+bv5wn0mRgZLA225kHxGXmFotABDDBgeQN8C/QLkMfKBbAGawcDCxsDAAACkY8S0HdMPswAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-2-U {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFmSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/Th8/vHrtej0dLRMjA6AoBACVyqvpcrKzMQHNuH/r6vqNm3083abPXvD792+oEgaGP2x8F158+/vvPxPQFqAV0eHBggL84mKiLCwsEBXMfOIvmYS+/fr7+MMPJqA7gEKMjIyr1m4szM349+8fXAVYMcPLzz+ZIC7t7J3089evuQuWvnr9Bqji2utfR3ZthSj6+vsv1PCQQD8gycTEJKOm9/Qvb0thkLquoY2bN0SWCegFIKWqogREagbmQFv2bFx9/+a1PRtXPbh1HSjFzcrMBAwPsGqEOy6ePAIkJaTlLp85AWSI87IzAUMMaBiySyVk5YHk04f3gB7nYmOWFeAABeb3n7+A4QH0LUQR0CvbVi0BMkJi4s2VxDhZmaHRAgwxYHgAfQv0C5ALdAfQFqAZzEyMDAwMAPOdnxQjjxd1AAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-2-W {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF9SURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/Th8/vHrtej0dLRMjA6AoBACVqmhos7ByMAHNuH/r6vqNm3083abPXvD792+oEgYGXn7O//9fAq1hAtoCtCI6PFhQgF9cTJSFhQWiQkRcSFSc7f//33//fmQCugMoxMjIuGrtxsLcjH///sFVgBUz/Pv7mQni0s7eST9//Zq7YOmr12+ERQUZmL7s33/i1y+Q1UDDGC+ePgJ0++0794B8JiYmEzP97z/flJZ3vn37IS013M7WTFpagvHG5bM/vn8DmQu2hYPrd1JyxbHj54DG2NmZzp/byc8vxAQMD7gKoDs4ONjNzfUFBfnZ2dnkZKUuXLjGxMzLAgyxd29eAn0LcSkrK0t0lN/37z9kZaX+/vljb2/NzMwPCsw/v3/+//8C6ECIkUDw6dMXPj6eP38YObnkGBlZodEC8unfj0DfQpQCJYC2AM0AeoaBgQEAtzarPAuf1fsAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-B-G {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGHSURBVChTY/z//z8DA8Pfv38f3L7+7uXzn9+/AblcPDyikrJSCsrMzMxALhMQf//2tb6y1NDc1j0ocvXGrf////v6+dODW1fPH933+dtHkCKgGQtnT9+x5wAHOxsjA+ORE6dv3L4LlACCH+x/d97c9ufvH+aUuOgjhw9rqiozMjFKiIt5uzq+ffdBWUGOUYjz5t8HP//8ZGVmZXr9/LGjjQUvL8+9B4+ePn/By8PtYm8NUQEx7+6b20zfvnxhZGRUkJORkZKUk5GSl5X5x8++/sTOLfN3PLr1BKjo4/cPjIe3bwC6FKIJCIBmdM+ctm/NISCbmYU5vy9TQV2OCehbiDQQQGw5seM0UJqbjxsosn/tIX5OASZgeCCrAAabvKaciZPhj28/IgqDDe31lUVUmYAhxsnNA3cp0H2pDfFisqKsbCwCwvy2zlbakrqgEP/y7dOOm1s/fgeFGwT8+vn7y4cvkuJSfkYBfBz80GgBhtjV55eBvgX6BcgFugNoC9AMFmYWBgYGAMLJqhxVea9iAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-B {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGPSURBVChTY/z//z8DA8Pfv3+fPbj7+vnjFy9eMDMziwKBpKyUgjKQDZRlACr69vXLqQM7i3IyxCUkeXj5gUhRQbEwK/30wV1AKaACJqAZV04frWpsf/r8xdev30D6GBhev3137PTZSTPmXD1zDKiABWjLhs3bTpw5z8fLw8bK+uv3b4i6qzduA5GOhpq4tDwT0B17Dx011NOOiwgWFRGGqBATFc5MjDE11Nu2ez9QAdO3L1+AhtcU5Xi5OLRUFzMyMgIVNZYX+no4VxfnPH/5CqiABaRPRGTJmg1y0lIfP31WVpR/9frN+ctXb929/+79ByEBAaACxrOH9zx/9uz46XNAdOrcxdjwIBZm5vnLVluYGFqZGQNtlJKWZgGGx9fPn9yd7ORkpICK3rx9x8nBAdSdnhAlLioCZAAVMAFDjJObB8j58vWbrLTksVNnDx47KSUh9uPnT6AgUAqogAkYptomVkAOJwe7oa62tZlxbmq8orzc71+/gYJAKaAC9GgB+gXIZWJllVVQgUYLAwMAd3S7M57KFFAAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-B-P {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFxSURBVChTY/z//z8DA8Pfv3+fPbj7+vnjL58+Xbhy3cbKXFRSVkpBmZmZGSgLUvT929erZ459//oFyL/38PH1m3e83RyBbE5uHm0TK04ubiagGXAVv3//OXDkxItXbyDGAwWBUkAFjI/u3Hxw6ypQ6O79RxNmLuDkYH/15i0HO0dxdrKqkjxQXEFNmwnoDiDr379/vdPmZiZGBXi7WpkaVRdlTpq18MfPn0ApoAKmb19AFl2+dktCTFRLXYWNlfXNu/ey0pL6Oho3bt8DSgEVMAEpIPjz5w8LC8gjnz9/FRUWAjKAqn///g2WZGDi4uEBUprqKrfvPrj34BEPDzfQ1R8/fT574YqyghxQCqgA4fCbd+73TZsrIyX59PkLoBNjwgLsLE2B4kCHMwItOn90HyQIvv/4sWbTDmAQpMaGCfDzAUWAQWVo7cQEDFNQiHGDLOXk4HCxtxIXFYarAEoBFaBHC9Avl67esDA3QUQLAwMA8Y7AP7Vm5bMAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-B-R {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGKSURBVChTY/z//z8DA8Pfv38f3L7+7uXzn9+/AblcPDyikrJSCsrMzMxALhMQf//2tb6y1NDc1j0ocvXGrf////v6+dODW1fPH933+9M7kCKgGQtnT9+x5wAHOxsjA+ORE6dv3L4LlAACwb9f/u+by/DnF3NKXPSRw4c1VZWZmJgkxEW9XR3fvvugrCAnxcEg8/gEw89vjCzsTK+fP3a0seDl5bn74OHT5y94ebhd7K2hKsDg7/0LTN++fGFkZFSQk5GRkpSTkZKXlZHiZPh0ctu2Sw8+fv8FVPT/02vGw9s3AF0K0QQEQDPu71378duvPVcfPXn/pSHAQkdOjAnoW6g8WAXQFls1aSMFsTAzNTYW5rmHrzLyiTIBwwNZBZAx99BVvZol7VtP3375wUhOlFnRgAkYYpzcPMguff0ZFJ6XHr/RlBTUVFFi1rQGhfjvz+//753z/9MbiKJff/5uvnCfiZHB0kBbLiSfkVcYGi3AEPt7/SjQt0C/AHlAdwBtAZrBwMLGwMAAABgmq3sFycQFAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-G {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF7SURBVChTY/z//z8DA8Ofv3+uPr9868WNy5euiEgIy0jJKIuoakvqsjCzAGVBij79+LjhzLqP3z7Ma1x85/I9NnZWK29zIXEh71APDx1vPg5+ht9/fs8/MEdUUkJASJiHlx8ZReSFrT63DKiACWjLs+dPv3/5/uf3H6DJyGDL/J2XL1wBKmC6+eL62umboMIMDEJigkBSw1gVwl3ev+b2y5tMz14+u3PxHpDPyMgoISf27tV7IPvLx29gNQwvHr568/EN05+ff5iYmYB8Ll5OMVkxiBzQdn5hPgj7zbM3LIL8QswsTP//M1u4m146dhVoHtC/cuoy9oE2QNef2XdeSUmJ8cLjc7vP7GRkYhQQ4b97+f7k0plA3dVzS0SlRSAmmciZMwFDTEFeHqgC6Luf339CJP7++Qth8HPyAxUwAA3/+P3D4qMLQrND9K0MRMTFhERERCTETRxMFh2eB5QCKkCJlkv3Lzx99pRXkIfhB5O1obWOlD4oWhgYALFwqFkPjXZZAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-G-P {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF4SURBVChTY/z//z8DA8Ofv3+uPr98983t91/f3bpw19zaVFlEVVtSl4WZBSgLUvTpx8dd17d9/P4RyH967/mDa4+sfcyBbH5OfjdNLz4OfiagGXAVf37/Obf/4tsX7yHGAwWBUkAFjBcenzvz6CRQ6MndZyv617NzsL1/9YGdky2qJERWVRoobiJnzuwa5/Tj9/d///7NrF4Qku0npSTBysbqm+KxrHetiZMBMwvz99/fmT5+/wBUfvfSA2FJIUVteRZWlo9vPonLiqrqKz248Rhs6QcmIAUEf//8YWZmBjK+ff4mIMYPZLCysfz59QcsycDEzykApBS05B/dfvL07nMuXs7///5/+fD1xpnbMipSQCmgAoTDH918srRnDdCiV0/eAJ3oGediaKcLFAc6nPH3n98bLq2GBMHP7z/3rT4MDAL/NE9eAR6wMfwBeqFMwDAFhhiQAxRi52Q3dTESEheAqwBKARWgRwvQL7cv3jOxMEJECwMDAOLavZeFAemOAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-G-U {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF6SURBVChTY/z//z8DA8Ofv3+uPr98983tj98/ALn8nALKIqrakroszCxALkjRpx8f1xxbuXb+Blk1GT0rHXZONqAEWCm/u6Y/LwcX4+8/vxcfWpDnXwKRMHUxii4Jg7BlBLX5uHTNZPmZgLbs2rCbkZGRmYWZi5fr9J5zH99+gqhgYdH69uvv4w8/mIDu4BXgtfQ05ebj0jJVjywMYWFlhqiAmPfy808moEvN3U1s/aw5uDjYONj0rLXV5c0+vhU6smsrRNHX33+Z/ZN9/zP85xXg0bfRVdZVVFcwZ2RQrc2I+ff3r4mtI1ARIyMDE9C3EB18QrzaKpasrFp7Nq6+f/Pano2rHty6DhTnZmVmAoYHRBHcHRdPHgGSEtJyl8+cADLEedmZgCEGDA9kl0rIygPJpw/vvXn5nIuNWVaAAxSYn39+u/LiF9C3EEU/f3zftmoJkBESE2+uJMbJygyNlr///gPDA+hboF+AXKA7gLYAzWBmYmRgYAAAmW+MFdMXf4cAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-G-W {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGASURBVChTLZHBS8JQHMff9jadm9ucRaSyHTIzULMws9LwFBGUXuqQHbsXUtQ/0Km/ontJHRUPQhcjiIQsIuncIYNmhaJb67de7/Qevw/f3/f7fZRlWQghwzQeXu9f2i29+wFP2eUJDocivhiDGXjaUKenl+pnpdNLLRSILUYcnAMGf6i8Fs1xrMCABhC7+QMymFtOFPY3yV3zjGKrjZCLhi2ViypFUZjBvMjfVG/19w4QMV942q9a1sA0dRp8SIq4sJp0y8JkIrRV3ACaEETvx/ykwWlqZXYplwYrLoGLZ6LzEzMBVqjVrvv9AUAghvM76xayRI8bxsHYWHJ8ymvgveJxuXLF85xX8UiSm4a0RFYekjLRZFgZOTw6qdfvms3n81JZFAWKYmnog0DEB8c5U6m4oshOp0NT/Y3GI41FBhprvT1BWuKUZZntQq7b7amq3zSMbDaNsWyX2Rt8Qx9gkEjC6XS+wIphUC5eg3X/32InNXVIS1DbBxZBAyEaIfQLv3eOqz+RRXkAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-Q {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEPSURBVChTY/gPBl+/fu3p6TUztxAUEgEiIAPIBQpCZEGK7t69a2BozMPLj4aAgkApoAJGoHJrG7s7d+4wgIGri4u9gz2Qe//+/cuXrwgJCR09cohZSEh43fr1EBVA4OnhXlNd5eXp+frNm4MHD7148YKHm5tp1erVUHlUADTsy5cvQAZQAdPt21CL4KClte3b9++ZGRkQLkgB0C9AN5pbWObm5QMZRsamQBJoEdC9EpLSQDZQAQPQtwKCwjdv3gSK7ty1KzAwGOgpoJP//fsnLiEFVARUwAAMDyBLTV1j27btQHVwsHXrNqA4EAEVoASBkaGhr6+PlLQ00OAZ02cAXaaiogIMAqICk4ho+f8fAP4Nu28tFPTdAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-R-G {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGBSURBVChTY/z//z8DEPz59ff60b/3L/z/9BrIY+QTZVY0YNa0ZmBhA3GBiv5/frult+rF8+cXHr0ucDOUE+YFaQMpFfnrksTJI8wENOP33nmizL81pYT0ZEUCJm1+8v4LRNFzPv6tt3f9+fuHCWjL/09vfvz+c/jW03uvPjIxMr748BWo4oWM8h6Ofx+/f7z6/DIT0B1AoYQ5u9o2n37w5tPff/9FeTkhKsDGMdx9c5sJ4lJbNWkg+fzj14nR9kw6epMuX98yf8ejW0+Agh+/f2D8sbiS4d9fIOff//8gu2SU85Zs2bfmEFCEmYU5vy9TQV2OCehbIB8IICqAtpzYcRoozc3HDRTcv/YQP6cAEzA8IIogKoABIq8pZ+Jk+OPbj4jCYEN7fWURVSZgiAHDA+5SRkbG1IZ4MVlRVjYWAWF+W2crbUldUGD++PJ2y+1dQN9CjASCXz9/f/nwRVJcys8ogI+DHxotwBADhgfQt0C/ALlAdwBtAZrBwszCwMAAABU4tE5ME7m/AAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-R {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGQSURBVChTY/z//z8DEPz59ff60cdnD7198YSTlUVZRZVFyZBZ05qBhQ0oCVL0//PbN5unda3cZawgxszENGHX+ZefvlX6mMa627A6JzHyCjMBzfi9d17Ngi0P3342kBMF6rNRk1qc5t619czMTQfOz28HKmCuDXX69/CSg4aMj4GSCC/nhnN3l5+4ZaEsef/Np43n7vEw/bXRUmT8ubHv/4cXIGfBwL///3/8/ps8b/eeq49YmZiuTyth+f/pNUTu/uuPN56/lxbkufv644azdyPN1V9+/Hbp8Ztf71+xQFQsP3GzYNnBv//APmVgqPY16995/vKTN0B7Rfm4mBj5RP/8+1ez9hhcRYGboa+BIlCFlAB3ta/pm/+cTMyKBt9//fn4/RdEBRBsuXjfZ8ImIIOPkw1onriBNRMwxPjEpLSkhCAqgODOyw/ywryRFurRlhoLCiNYNG2YgGEKDLFZOSFAw6GqGBg0JYXirTWzApwEvDOAChDR8vPKocdnD///8laQm4NPTBroDGi0MDAAAN4or3qfztnlAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-R-P {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFzSURBVChTY/z//z8DEPz59ff60b/3L/z58OrQzadO5obMigbMmtYMLGxASZCi/5/f/t477/+nN0D+lafvTt17mWSrCZLjE2F1TmLkFWYCmgFX8evP3zWn7zx8+/kf2HigIFAKqIDxz6V9f87vAApdevI2f+lhbnaWJ+++cLOzTo21N5ATAYqzGHowAd0BZP399z9r0cGOUMtMRx1vfYUFKc6Fy498+/UHJHX/AtP/T6+BrGN3niuI8JoribOzsjz78FVVXMBWTfLM/VdAKaACJiAFBL/+/GNlBrHff/0hLcgDZHCwsvz88xcsycDEyCcKpMyUxC88enP5yVsBLnagd998/rH32mNdGWGgFFABwuHnHr7OXnxQRVzg7quPf//9q/Q2DjBSAooDHc74//fPX1smQoLgy8/fk/dcAgZBc6C5KC8n2BgRNp98JmCYgkKMD+RbHnbWCHNVOSFeuAqgFFABerQA/XLk1jNbE31EtDAwAADyV7v/4UsLTAAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-R-W {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGDSURBVChTY/z//z8DEPz59ff60b/3L/z/9BrIY+QTZVY0YNa0ZmBhA3GBiv5/frult+rF8+cXHr0ucDOUE+YFaQMpFWH1SGfk4GMCmvF77zxR5t+aUkJ6siIBkzY/ef8FoohBRukP03sGhn9MQFv+f3rz4/efw7ee3nv1kYmR8cWHr0AFjFpmDLr6/////vv3IxPQHUChhDm72jaffvDm099//0V5OSEqwKYx/Pv7mQniUls1aSD5/OPXidH28rbOryVk9+8/8evXb6Ag0DDGH4srgYpBOv7/B9r1V8P4MZ9oaXnn27cf0lLD7WzNpKUlmIC+BaoAAqAKoC3fldTLyruOHz9/5cqtNWt38PJyMzKyMgHDA6II4g4ODnZzc31BQX52djY5WakLF64xMfOyAEPs790zQN9CXMrKyhId5ff9+w9ZWam/f/7Y21szM/ODA/PH5z9M74AOhBgJBJ8+feHj4/nzh5GTSw5oHSxagI7/+xHoW4hSsDt4gWYAncrAwAAAt/eqrK7r7XkAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-S {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE4SURBVChTY/z//z8DA8OvX78e3r/z59eP3Xv2ArmuLs4sbBzyiipsbGxALkjRzx/fT5882tzeJyrE//HLN6AoPw/X63cfayuLTM2t2Tk4Gf/+/XPlwunde/a9//T96bNnsbExQEWLFi2WkZYW5ON0dXHSMTBlevbkUVh00ut3n+/cvRsaErJ/334gCgsNBXKBguExyUAFTJ8/vre1Nreyslq8aOHmzZulpKUlpaSADCAXKGhjZQZUwNhYX/Ps5dtv374JCAgwMjCUlZUBrevq6gJ658OHD1xcXFLiwkxAITyAkRGokYEpNDjw7+/vIcHBvT3dwIDYtHkzEAEZQC5Q8M+vb0AFTLz8gkeOnTp27FhsXLyvr++zp0+BCMgAcoGCQCmgAuxBsHjxEmkpKXgQEBeYhKOFgQEA/P7IQI/HNAMAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-T {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAEySURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRz58/7t+6CpFGBkClimra7OwcTEAzsKoAAqAgUAqogAloC0TFjx8/NmzePn3WfCD57t17oAiQAZQCKmC8cfnsj+/f/v79m5pddOv2XbARDCwsLM6OdidPn92ydikHJxcLxJgLF68AVehqa0aEBj549HjVmg07d+8Din/5+pWRkZEJrJPh67dvQFJfT8fe1kqQn//nz18Q8WfPXwBJFqAXgNYpKSoAOZcuXwXaa2piuHjeNLAaBmEhQaACJmB4ADky0pJ6OlqXrlw7e/6SpIQ4HLGxsQEVMAFDDKgWqC4zLRFofWtX/69fULuAACgFVIASmHMXLJWVlXZzdoCrgAQmEdHCwAAAflyrAjDm4ZIAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-U-B {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGDSURBVChTY/z//z8DA8Pff/8ff/jx5N2XXwxMQC43K7M4L7usAAczEyOQCxL6/vvvqccfm1tar9+4AdQCRF9+/b379htQ8Nv3HyBFQDMuPPt85MD+VXOmXD9/BigEByzfP1w7ffjv379MQFu+/fq7YuZEoOjaBTPevHwOUcH39/OP++e/f/3y7MFdppeffwKFHty+ASQ19I3OHN4PVwFSy8Dw+vljpq+//wJZJraOQPL0ob1ahiZAFZvnT16xbvON23eBgt++fGHce/sN0KV/fv++cemchIyckghPV2X+sVNnP33+ws3FWZSVYmVmwgT0LVA5CyurjrE5UMXnW6d//foV5OPBwsKSn5505cYtLh4eJmB4ABUBAcQdrKwskuJi5y5e0dPSOHH2vLKCvKikLBMwxLjYmJFdGh0awMzM9PXbN6AKbw9XKQVlUIh/+/Hz2qlDQN9CFAHBtZu3ubm5/jEwhUbHc3Jxw6Ll719geAB9C/QLkAt0B9AWoBnMzMwMDAwA1ATEGG+vZB0AAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-U {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAE0SURBVChTY/z//z8DA8Pff/8ff/jx8vPPr7//ArncrMzivOyyAhzMTIxALhMQf//999Tjj3fffvvy6y9Qy+GdW4EMIBcoCJQCKQKaceHZ52+/QBwgWDFz4r7NayBsoCBQCqiACWgLXMXJA3tWzJr06tlTCBcIgFJABUxAd0D4H9+9ndxYBmQ8eXD31ME9EEEgACpggrgUCNYtnPnl00cIe3pb7eePHyBsoAKQwyFg/9b1UBYDg4yCEtBxUA7Q4UDfAqlvXz5/ev8OIgQEzn4hL589gbCBCpiA4QFksXNwsrKxQUSBYPvqpY/u3oKwgQqYgCHGxcbMzMLi4BUIEQWCm5fPW7t4AhlAKaACUIgDQwwYHu8/fZnb23L64F4GRkYH74DozCI+bg4DKV5OVmYiooWBAQBSlKd9GixjLgAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-U-P {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF6SURBVChTY/z//z8DA8Pff/8ff/jx8vPPzz9/XTp53NrOXpyXXVaAg5mJESjLBMTff/899fjj3bffvvz6++D2rWePHwIZQC5QECgFUgQ048Kzz99+gTi/f/06tH3Ty6dP/v/7B+QCBYFSQAWM999+A2oCCt27cW1KUyUHF9fr58+AZEFTt7KmDlBcWZiL8cSD90DD//37Wxjhl1HV9PH9u6tnT7qHRPZVFbXPXcHOycnDxsz0FWzr1bOnJGRkNQ2M2djY3r56KaOgrGdmefPyeaAUUAHI4UDw+/dvZhZWIOPzxw8iEpJABisb269fv8CSDEzcrMxASlPf6M61y/dvXuPh5we6+uP7t+ePHVZS1wJKARUgHH77ysWJ9WXSCkrPHt7/9+9fZEa+jZs3UBzk8D9//wHDAxIE3799Xb9w9stnjxMLqwSEhIEiXGzMZrL8TMAwNZDiBXKAQpxc3E4+QWKS0nAVQCmgAvRoAfrlyukT5lbWiGhhYAAAbkHMEz2QWQgAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-U-R {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF3SURBVChTY/z//z8DA8Pff/8ff/jx5N2XXwxMQC43K7M4L7usAAczEyOQCxL6/vvvqccfm1tar9+4AdQCRF9+/b379htQ8NfXzyBFQDMuPPt85MD+VXOmXD9/BigEB8KfHjDsnsHw5xcT0JZvv/6umDkRKLp2wYw3L59DVMj+eCx/fvn/T2/+Xj/K9PLzT6DQg9s3gKSGvtGZw/vhKkBqgc69f4Hp6++/QJaJrSOQPH1or5ahicz3x1+3Tt126cHH77+Agv8/vWbce/sN0KV/fv++cemchIycIf+fRyt7Pn77tefqoyfvvzQEWOjIiTGeePAe6BeQuUi2PPvw9cHrT1P2XhTn55qQGcYEDA80FXMPXdWrWdK+9fTtlx+M5ESZFQ2YgCHGxcaM7NLXn78ByUuP32hKCmqqKDFrWoNC/Ne3Lwy7pgN9C1H068/fzRfuA4Pa0kBbLiSfkVcYGi3AEAOGB9C3QL8AeYx8IFuAZjCwsDEwMAAA6yrNGOnSx68AAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-W-B {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAF9SURBVChTY/z//z8DCPz7+/fjv7+f////DeQwMrIyMfMyM/MzMDABuSAMlPjy+e6d2xcWLlr569cvkMD/X3//vP396/Hvn99Bev7///vx4+0njx8nJJUB+T7eTnGxgdLS4kD2p/e/H9x6YmjtxAS0hZnpX0PjpDt3Ht64ce/o0bMQBwBVXD9/4/vXL88e3GUCuoORkVFaRkJRUVZAgM/cXP/duw8QFUClQPD6+WPGnz/uAB0B5EyavMjIUOvN2/dOdg6T+ue8fffeQFdLQ1WZkZEJoQhoC9BIoBmZmRXHTp399PkLNxdnUVaKlZkJE9C3YFOB3gapuHTqCtCDQT4eLCws+elJV27c4uLhYQKGB0QRxB2srCyS4mLnLl7R09I4cfa8soK8qKQsEzDEgIYhuzQ6NICZmenrt29AFd4erlIKyqAQ//3r+8UTR4C+hSgCgms3b3Nzc/1jYAqNjufk4oZGy9+/f4HhAfTtty8gpUB3AG0BmsHMzMzAwAAAzY/G/vJiHAIAAAAASUVORK5CYII=') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-W {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGLSURBVChTY/gPBX///Hn36+fDT5+ubVg/B8gAcoGCEDkmBgagut+3bx3/++ftj59fAgMz3n/4+P//LyD396/HQCmgAqCif39+P5swcc6lyze/ff3x6/fvvv75r1+/Ky5p6+icevPGCaACRqCxQE129pFXr922sTHh5uL8/Pmrnp7GjJnLtm6ZY2FuwMwizPTv72eggRvWz4iI8Pn08fPOXYePHT8HRMrKcjw8XF+/fgMqYNy5YwkjI2Nbx3QuLs5Dh04BNUCAk6Plrdv3RUWFtm6Zy2RsrFta3vHo0TM2VhaoPBh8+fKVn483Oytm756jjEDfvn37+vOXrwWFLR/efwQ6H2iwvr6mpYVBWWkaHx8PIyMbExMzr5AQ/7ev34F2mZnrA6WBFrGwME+fsSw5pRJoJFABEzMz/717z+cvWOPhbldZnvH27fvXr98unN+1f99SKSkxRkZWoAJQiP/79+vXzwc/f9wGookTG0PD/L5+uQFk37p1FCgFVIAeLevXzf7+7RZKtPz/DwBg2QLLp+LstgAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-W-P {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFpSURBVChTY/gPBX///Hn36+fDb19v7di+BMgAcoGCEDkmBgagut+/fz3+++ft//+/rl+/c//BYyADyAUKAqWACoCK/v35/QzC+fXr95q1Ox4+fPbv3z+IZqAUUAHT378fISouXb7p5pF0+szlDRv32NhGXrh4HaIOqIABaP3PH7e/fb1pampy6OCadWtnFxdnXr6829LK4v27S0ApoAImiDHHjp1TUJA2N9dn52B79vyVmqqCna3JmTNXIIaBHA4Ev37/ZmFlATI+vP8kLSUOZLCzs/34+QssycDEyMgKpMxM9S6cv3758i1+AV6gn9+8eb9333E9XXWgFFABIzA8gL4Fcs6du5qV06iqIn/n7qO/f/9WVqQHBrgCxZlZhBmBIQYPjy9fvk2esvjhw6fNTQWiokIQY1jZZIFuYmJhlYJYysPDFRnhIyMjCVcBlAIqADkcohxkLCObgoKMg70ZkAHkAgVBmhkYAPMk1o1WW4JjAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-W-U {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFwSURBVChTRVFNSwJRFH3vTZN9ma5SsiFsEwQRtshNi1YRtDJaVkar/kBBu+on2C9I2lQrN61ctEgoSMqEtBq1UosBv1KrmdGZ6UwTduE97rnvcO6951HDMIgZuqZ96FrDMFoAlPKMs3OcgxAGaB48NBsZ8en2MHysqqpZMFStXW6pebXdBqELGvV6tpDPr29sAxeL0tpqwONxIa/Ktky1OSM4GLpwTN/dC4niSzqdjcXi1gA1ZSAp8V+qlq/JDHNQSj0jbq9XcDoH/f6pSqVmMUBFSA2FKrKIIQBCB+Fp30SpXJ1bCJzflVOJ+Oz8IuqUkn8SukASGjcFshVcGp/0be7sWySGbZH9ApOBLtHIae7hPho5eX5Mod7Pcwx+WKTOHImrC9yuYSF5fWkmdhuDYxDrMBBuYRT322uuJL33dXOCs8d0HMbFC01sa5EU+fvs5AjJ8krQPzbUy3N/36LpBvzAtp8tk4o50AUaHKOEkB8/eLy7BZycwQAAAABJRU5ErkJggg==') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-X {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url('data:image/png; base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAFdSURBVChTY/z//z8DA8O/f3/fvHz+/u2rnz++A7nsHJyCwmIi4pJMTMxALkjRt69fTh49+PPnDyCfl4eHi5vr5ctXQDYXF7e5jQM7OwfD379/zp48nBgfy8PLr6SkvGzhnNPH9nt7eYqIiFWVl1y/dAaogAVoCzMjQ352moS42Oz5Szg5OVlZWORkpYP8vY0N9YG2gxSkJMT8+fMbaLiOlsbzFy8XLVvFwswsLiZqY2UBFASCP79/MUFcCgFF+ZlCggIr1mxwtLeBCjEwABUwQZlg8O7deyVF+d+/f0+cNhsqBAZMQN9CWF++fp27YGledlp7U83W7bs3bd0JEQcqYAKGB0jFl69Vda0OdlYc7OzqaiqRYUF9k6YfOXYSKAVUwAQMMaDaWfMWAx177MRpoF0vXr56//69q5P9wcPHbt65B1QACkxgMN6/dRXZBxAA1Kyopg0MTCKihYEBAH+uo0BN0hOvAAAAAElFTkSuQmCC') no-repeat;
|
||||||
|
}
|
||||||
|
.mana-C {
|
||||||
|
display:inline-block;
|
||||||
|
width:12px;
|
||||||
|
height:12px;
|
||||||
|
\#padding-left:12px;
|
||||||
|
background: url() no-repeat;
|
||||||
|
}
|
||||||
|
</style></head><body>"""
|
|
@ -15,9 +15,14 @@ def mtg_open_json(fname, verbose = False):
|
||||||
for k_set in jobj:
|
for k_set in jobj:
|
||||||
set = jobj[k_set]
|
set = jobj[k_set]
|
||||||
setname = set['name']
|
setname = set['name']
|
||||||
|
if 'magicCardsInfoCode' in set:
|
||||||
|
codename = set['magicCardsInfoCode']
|
||||||
|
else:
|
||||||
|
codename = ''
|
||||||
|
|
||||||
for card in set['cards']:
|
for card in set['cards']:
|
||||||
card[utils.json_field_set_name] = setname
|
card[utils.json_field_set_name] = setname
|
||||||
|
card[utils.json_field_info_code] = codename
|
||||||
|
|
||||||
cardnumber = None
|
cardnumber = None
|
||||||
if 'number' in card:
|
if 'number' in card:
|
||||||
|
@ -137,9 +142,10 @@ def mtg_open_file(fname, verbose = False,
|
||||||
for card_src in text.split(utils.cardsep):
|
for card_src in text.split(utils.cardsep):
|
||||||
if card_src:
|
if card_src:
|
||||||
card = cardlib.Card(card_src, fmt_ordered=fmt_ordered)
|
card = cardlib.Card(card_src, fmt_ordered=fmt_ordered)
|
||||||
|
# unlike opening from json, we still want to return invalid cards
|
||||||
|
cards += [card]
|
||||||
if card.valid:
|
if card.valid:
|
||||||
valid += 1
|
valid += 1
|
||||||
cards += [card]
|
|
||||||
elif card.parsed:
|
elif card.parsed:
|
||||||
invalid += 1
|
invalid += 1
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -13,7 +13,7 @@ class Manacost:
|
||||||
colors = ''
|
colors = ''
|
||||||
for sym in self.symbols:
|
for sym in self.symbols:
|
||||||
if self.symbols[sym] > 0:
|
if self.symbols[sym] > 0:
|
||||||
symcolors = re.sub(r'2|P|S|X', '', sym)
|
symcolors = re.sub(r'2|P|S|X|C', '', sym)
|
||||||
for symcolor in symcolors:
|
for symcolor in symcolors:
|
||||||
if symcolor not in colors:
|
if symcolor not in colors:
|
||||||
colors += symcolor
|
colors += symcolor
|
||||||
|
@ -136,7 +136,7 @@ class Manacost:
|
||||||
else:
|
else:
|
||||||
ld = ''
|
ld = ''
|
||||||
rd = ''
|
rd = ''
|
||||||
return ' '.join(map(lambda s: ld + s + rd, self.sequence))
|
return ' '.join(map(lambda s: ld + s + rd, sorted(self.sequence)))
|
||||||
|
|
||||||
|
|
||||||
class Manatext:
|
class Manatext:
|
||||||
|
@ -181,7 +181,9 @@ class Manatext:
|
||||||
def format(self, for_forum = False, for_html = False):
|
def format(self, for_forum = False, for_html = False):
|
||||||
text = self.text
|
text = self.text
|
||||||
for cost in self.costs:
|
for cost in self.costs:
|
||||||
text = text.replace(utils.reserved_mana_marker, cost.format(for_forum = for_forum, for_html = for_html), 1)
|
text = text.replace(utils.reserved_mana_marker, cost.format(for_forum=for_forum, for_html=for_html), 1)
|
||||||
|
if for_html:
|
||||||
|
text = text.replace('\n', '<br>\n')
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def encode(self, randomize = False):
|
def encode(self, randomize = False):
|
||||||
|
|
|
@ -1,16 +1,62 @@
|
||||||
|
# This module is misleadingly named, as it has other utilities as well
|
||||||
|
# that are generally necessary when trying to postprocess output by
|
||||||
|
# comparing it against existing cards.
|
||||||
|
|
||||||
import difflib
|
import difflib
|
||||||
import os
|
import os
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
|
import utils
|
||||||
import jdecode
|
import jdecode
|
||||||
import cardlib
|
import cardlib
|
||||||
|
|
||||||
libdir = os.path.dirname(os.path.realpath(__file__))
|
libdir = os.path.dirname(os.path.realpath(__file__))
|
||||||
datadir = os.path.realpath(os.path.join(libdir, '../data'))
|
datadir = os.path.realpath(os.path.join(libdir, '../data'))
|
||||||
|
|
||||||
|
# multithreading control parameters
|
||||||
|
cores = multiprocessing.cpu_count()
|
||||||
|
|
||||||
|
# split a list into n pieces; return a list of these lists
|
||||||
|
# has slightly interesting behavior, in that if n is large, it can
|
||||||
|
# run out of elements early and return less than n lists
|
||||||
|
def list_split(l, n):
|
||||||
|
if n <= 0:
|
||||||
|
return l
|
||||||
|
split_size = len(l) / n
|
||||||
|
if len(l) % n > 0:
|
||||||
|
split_size += 1
|
||||||
|
return [l[i:i+split_size] for i in range(0, len(l), split_size)]
|
||||||
|
|
||||||
|
# flatten a list of lists into a single list of all their contents, in order
|
||||||
|
def list_flatten(l):
|
||||||
|
return [item for sublist in l for item in sublist]
|
||||||
|
|
||||||
|
|
||||||
|
# isolated logic for multiprocessing
|
||||||
|
def f_nearest(name, matchers, n):
|
||||||
|
for m in matchers:
|
||||||
|
m.set_seq1(name)
|
||||||
|
ratios = [(m.ratio(), m.b) for m in matchers]
|
||||||
|
ratios.sort(reverse = True)
|
||||||
|
|
||||||
|
if ratios[0][0] >= 1:
|
||||||
|
return ratios[:1]
|
||||||
|
else:
|
||||||
|
return ratios[:n]
|
||||||
|
|
||||||
|
def f_nearest_per_thread(workitem):
|
||||||
|
(worknames, names, n) = workitem
|
||||||
|
# each thread (well, process) needs to generate its own matchers
|
||||||
|
matchers = [difflib.SequenceMatcher(b=name, autojunk=False) for name in names]
|
||||||
|
return map(lambda name: f_nearest(name, matchers, n), worknames)
|
||||||
|
|
||||||
class Namediff:
|
class Namediff:
|
||||||
def __init__(self, verbose = True,
|
def __init__(self, verbose = True,
|
||||||
json_fname = os.path.join(datadir, 'AllSets.json')):
|
json_fname = os.path.join(datadir, 'AllSets.json')):
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.names = {}
|
self.names = {}
|
||||||
|
self.codes = {}
|
||||||
|
self.cardstrings = {}
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print 'Setting up namediff...'
|
print 'Setting up namediff...'
|
||||||
|
@ -28,30 +74,47 @@ class Namediff:
|
||||||
card = cardlib.Card(jcards[idx])
|
card = cardlib.Card(jcards[idx])
|
||||||
name = card.name
|
name = card.name
|
||||||
jname = jcards[idx]['name']
|
jname = jcards[idx]['name']
|
||||||
|
jcode = jcards[idx][utils.json_field_info_code]
|
||||||
|
if 'number' in jcards[idx]:
|
||||||
|
jnum = jcards[idx]['number']
|
||||||
|
else:
|
||||||
|
jnum = ''
|
||||||
|
|
||||||
if name in self.names:
|
if name in self.names:
|
||||||
print ' Duplicate name ' + name + ', ignoring.'
|
print ' Duplicate name ' + name + ', ignoring.'
|
||||||
else:
|
else:
|
||||||
self.names[name] = jname
|
self.names[name] = jname
|
||||||
|
self.cardstrings[name] = card.encode()
|
||||||
|
if jcode and jnum:
|
||||||
|
self.codes[name] = jcode + '/' + jnum + '.jpg'
|
||||||
|
else:
|
||||||
|
self.codes[name] = ''
|
||||||
namecount += 1
|
namecount += 1
|
||||||
|
|
||||||
print ' Read ' + str(namecount) + ' unique cardnames'
|
print ' Read ' + str(namecount) + ' unique cardnames'
|
||||||
print ' Building SequenceMatcher objects.'
|
print ' Building SequenceMatcher objects.'
|
||||||
|
|
||||||
self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names]
|
self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names]
|
||||||
|
self.card_matchers = [difflib.SequenceMatcher(b=self.cardstrings[n], autojunk=False) for n in self.cardstrings]
|
||||||
|
|
||||||
print '... Done.'
|
print '... Done.'
|
||||||
|
|
||||||
def nearest(self, name, n=3):
|
def nearest(self, name, n=3):
|
||||||
for m in self.matchers:
|
return f_nearest(name, self.matchers, n)
|
||||||
m.set_seq1(name)
|
|
||||||
ratios = [(m.ratio(), m.b) for m in self.matchers]
|
|
||||||
ratios.sort(reverse = True)
|
|
||||||
|
|
||||||
if ratios[0][0] >= 1:
|
|
||||||
return ratios[:1]
|
|
||||||
else:
|
|
||||||
return ratios[:n]
|
|
||||||
|
|
||||||
|
def nearest_par(self, names, n=3, threads=cores):
|
||||||
|
workpool = multiprocessing.Pool(threads)
|
||||||
|
proto_worklist = list_split(names, threads)
|
||||||
|
worklist = map(lambda x: (x, self.names, n), proto_worklist)
|
||||||
|
donelist = workpool.map(f_nearest_per_thread, worklist)
|
||||||
|
return list_flatten(donelist)
|
||||||
|
|
||||||
|
def nearest_card(self, card, n=5):
|
||||||
|
return f_nearest(card.encode(), self.card_matchers, n)
|
||||||
|
|
||||||
|
def nearest_card_par(self, cards, n=5, threads=cores):
|
||||||
|
workpool = multiprocessing.Pool(threads)
|
||||||
|
proto_worklist = list_split(cards, threads)
|
||||||
|
worklist = map(lambda x: (map(lambda c: c.encode(), x), self.cardstrings.values(), n), proto_worklist)
|
||||||
|
donelist = workpool.map(f_nearest_per_thread, worklist)
|
||||||
|
return list_flatten(donelist)
|
||||||
|
|
305
lib/nltk_model.py
Normal file
305
lib/nltk_model.py
Normal file
|
@ -0,0 +1,305 @@
|
||||||
|
# Natural Language Toolkit: Language Models
|
||||||
|
#
|
||||||
|
# Copyright (C) 2001-2014 NLTK Project
|
||||||
|
# Authors: Steven Bird <stevenbird1@gmail.com>
|
||||||
|
# Daniel Blanchard <dblanchard@ets.org>
|
||||||
|
# Ilia Kurenkov <ilia.kurenkov@gmail.com>
|
||||||
|
# URL: <http://nltk.org/>
|
||||||
|
# For license information, see LICENSE.TXT
|
||||||
|
#
|
||||||
|
# adapted for mtgencode Nov. 2015
|
||||||
|
# an attempt was made to preserve the exact functionality of this code,
|
||||||
|
# hampered somewhat by its brokenness
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from math import log
|
||||||
|
|
||||||
|
from nltk.probability import ConditionalProbDist, ConditionalFreqDist, LidstoneProbDist
|
||||||
|
from nltk.util import ngrams
|
||||||
|
from nltk_model_api import ModelI
|
||||||
|
|
||||||
|
from nltk import compat
|
||||||
|
|
||||||
|
|
||||||
|
def _estimator(fdist, **estimator_kwargs):
|
||||||
|
"""
|
||||||
|
Default estimator function using a LidstoneProbDist.
|
||||||
|
"""
|
||||||
|
# can't be an instance method of NgramModel as they
|
||||||
|
# can't be pickled either.
|
||||||
|
return LidstoneProbDist(fdist, 0.001, **estimator_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@compat.python_2_unicode_compatible
|
||||||
|
class NgramModel(ModelI):
|
||||||
|
"""
|
||||||
|
A processing interface for assigning a probability to the next word.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, n, train, pad_left=True, pad_right=False,
|
||||||
|
estimator=None, **estimator_kwargs):
|
||||||
|
"""
|
||||||
|
Create an ngram language model to capture patterns in n consecutive
|
||||||
|
words of training text. An estimator smooths the probabilities derived
|
||||||
|
from the text and may allow generation of ngrams not seen during
|
||||||
|
training. See model.doctest for more detailed testing
|
||||||
|
|
||||||
|
>>> from nltk.corpus import brown
|
||||||
|
>>> lm = NgramModel(3, brown.words(categories='news'))
|
||||||
|
>>> lm
|
||||||
|
<NgramModel with 91603 3-grams>
|
||||||
|
>>> lm._backoff
|
||||||
|
<NgramModel with 62888 2-grams>
|
||||||
|
>>> lm.entropy(brown.words(categories='humor'))
|
||||||
|
... # doctest: +ELLIPSIS
|
||||||
|
12.0399...
|
||||||
|
|
||||||
|
:param n: the order of the language model (ngram size)
|
||||||
|
:type n: int
|
||||||
|
:param train: the training text
|
||||||
|
:type train: list(str) or list(list(str))
|
||||||
|
:param pad_left: whether to pad the left of each sentence with an (n-1)-gram of empty strings
|
||||||
|
:type pad_left: bool
|
||||||
|
:param pad_right: whether to pad the right of each sentence with an (n-1)-gram of empty strings
|
||||||
|
:type pad_right: bool
|
||||||
|
:param estimator: a function for generating a probability distribution
|
||||||
|
:type estimator: a function that takes a ConditionalFreqDist and
|
||||||
|
returns a ConditionalProbDist
|
||||||
|
:param estimator_kwargs: Extra keyword arguments for the estimator
|
||||||
|
:type estimator_kwargs: (any)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# protection from cryptic behavior for calling programs
|
||||||
|
# that use the pre-2.0.2 interface
|
||||||
|
assert(isinstance(pad_left, bool))
|
||||||
|
assert(isinstance(pad_right, bool))
|
||||||
|
|
||||||
|
self._lpad = ('',) * (n - 1) if pad_left else ()
|
||||||
|
self._rpad = ('',) * (n - 1) if pad_right else ()
|
||||||
|
|
||||||
|
# make sure n is greater than zero, otherwise print it
|
||||||
|
assert (n > 0), n
|
||||||
|
|
||||||
|
# For explicitness save the check whether this is a unigram model
|
||||||
|
self.is_unigram_model = (n == 1)
|
||||||
|
# save the ngram order number
|
||||||
|
self._n = n
|
||||||
|
# save left and right padding
|
||||||
|
self._lpad = ('',) * (n - 1) if pad_left else ()
|
||||||
|
self._rpad = ('',) * (n - 1) if pad_right else ()
|
||||||
|
|
||||||
|
if estimator is None:
|
||||||
|
estimator = _estimator
|
||||||
|
|
||||||
|
cfd = ConditionalFreqDist()
|
||||||
|
|
||||||
|
# set read-only ngrams set (see property declaration below to reconfigure)
|
||||||
|
self._ngrams = set()
|
||||||
|
|
||||||
|
# If given a list of strings instead of a list of lists, create enclosing list
|
||||||
|
if (train is not None) and isinstance(train[0], compat.string_types):
|
||||||
|
train = [train]
|
||||||
|
|
||||||
|
# we need to keep track of the number of word types we encounter
|
||||||
|
vocabulary = set()
|
||||||
|
for sent in train:
|
||||||
|
raw_ngrams = ngrams(sent, n, pad_left, pad_right, pad_symbol='')
|
||||||
|
for ngram in raw_ngrams:
|
||||||
|
self._ngrams.add(ngram)
|
||||||
|
context = tuple(ngram[:-1])
|
||||||
|
token = ngram[-1]
|
||||||
|
cfd[context][token] += 1
|
||||||
|
vocabulary.add(token)
|
||||||
|
|
||||||
|
# Unless number of bins is explicitly passed, we should use the number
|
||||||
|
# of word types encountered during training as the bins value.
|
||||||
|
# If right padding is on, this includes the padding symbol.
|
||||||
|
if 'bins' not in estimator_kwargs:
|
||||||
|
estimator_kwargs['bins'] = len(vocabulary)
|
||||||
|
|
||||||
|
self._model = ConditionalProbDist(cfd, estimator, **estimator_kwargs)
|
||||||
|
|
||||||
|
# recursively construct the lower-order models
|
||||||
|
if not self.is_unigram_model:
|
||||||
|
self._backoff = NgramModel(n-1, train,
|
||||||
|
pad_left, pad_right,
|
||||||
|
estimator,
|
||||||
|
**estimator_kwargs)
|
||||||
|
|
||||||
|
self._backoff_alphas = dict()
|
||||||
|
# For each condition (or context)
|
||||||
|
for ctxt in cfd.conditions():
|
||||||
|
backoff_ctxt = ctxt[1:]
|
||||||
|
backoff_total_pr = 0.0
|
||||||
|
total_observed_pr = 0.0
|
||||||
|
|
||||||
|
# this is the subset of words that we OBSERVED following
|
||||||
|
# this context.
|
||||||
|
# i.e. Count(word | context) > 0
|
||||||
|
for words in self._words_following(ctxt, cfd):
|
||||||
|
|
||||||
|
# so, _words_following as fixed gives back a whole list now...
|
||||||
|
for word in words:
|
||||||
|
|
||||||
|
total_observed_pr += self.prob(word, ctxt)
|
||||||
|
# we also need the total (n-1)-gram probability of
|
||||||
|
# words observed in this n-gram context
|
||||||
|
backoff_total_pr += self._backoff.prob(word, backoff_ctxt)
|
||||||
|
|
||||||
|
assert (0 <= total_observed_pr <= 1), total_observed_pr
|
||||||
|
# beta is the remaining probability weight after we factor out
|
||||||
|
# the probability of observed words.
|
||||||
|
# As a sanity check, both total_observed_pr and backoff_total_pr
|
||||||
|
# must be GE 0, since probabilities are never negative
|
||||||
|
beta = 1.0 - total_observed_pr
|
||||||
|
|
||||||
|
# backoff total has to be less than one, otherwise we get
|
||||||
|
# an error when we try subtracting it from 1 in the denominator
|
||||||
|
assert (0 <= backoff_total_pr < 1), backoff_total_pr
|
||||||
|
alpha_ctxt = beta / (1.0 - backoff_total_pr)
|
||||||
|
|
||||||
|
self._backoff_alphas[ctxt] = alpha_ctxt
|
||||||
|
|
||||||
|
# broken
|
||||||
|
# def _words_following(self, context, cond_freq_dist):
|
||||||
|
# for ctxt, word in cond_freq_dist.iterkeys():
|
||||||
|
# if ctxt == context:
|
||||||
|
# yield word
|
||||||
|
|
||||||
|
# fixed
|
||||||
|
def _words_following(self, context, cond_freq_dist):
|
||||||
|
for ctxt in cond_freq_dist.iterkeys():
|
||||||
|
if ctxt == context:
|
||||||
|
yield cond_freq_dist[ctxt].keys()
|
||||||
|
|
||||||
|
def prob(self, word, context):
|
||||||
|
"""
|
||||||
|
Evaluate the probability of this word in this context using Katz Backoff.
|
||||||
|
|
||||||
|
:param word: the word to get the probability of
|
||||||
|
:type word: str
|
||||||
|
:param context: the context the word is in
|
||||||
|
:type context: list(str)
|
||||||
|
"""
|
||||||
|
context = tuple(context)
|
||||||
|
if (context + (word,) in self._ngrams) or (self.is_unigram_model):
|
||||||
|
return self._model[context].prob(word)
|
||||||
|
else:
|
||||||
|
return self._alpha(context) * self._backoff.prob(word, context[1:])
|
||||||
|
|
||||||
|
def _alpha(self, context):
|
||||||
|
"""Get the backoff alpha value for the given context
|
||||||
|
"""
|
||||||
|
error_message = "Alphas and backoff are not defined for unigram models"
|
||||||
|
assert not self.is_unigram_model, error_message
|
||||||
|
|
||||||
|
if context in self._backoff_alphas:
|
||||||
|
return self._backoff_alphas[context]
|
||||||
|
else:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def logprob(self, word, context):
|
||||||
|
"""
|
||||||
|
Evaluate the (negative) log probability of this word in this context.
|
||||||
|
|
||||||
|
:param word: the word to get the probability of
|
||||||
|
:type word: str
|
||||||
|
:param context: the context the word is in
|
||||||
|
:type context: list(str)
|
||||||
|
"""
|
||||||
|
return -log(self.prob(word, context), 2)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ngrams(self):
|
||||||
|
return self._ngrams
|
||||||
|
|
||||||
|
@property
|
||||||
|
def backoff(self):
|
||||||
|
return self._backoff
|
||||||
|
|
||||||
|
@property
|
||||||
|
def model(self):
|
||||||
|
return self._model
|
||||||
|
|
||||||
|
def choose_random_word(self, context):
|
||||||
|
'''
|
||||||
|
Randomly select a word that is likely to appear in this context.
|
||||||
|
|
||||||
|
:param context: the context the word is in
|
||||||
|
:type context: list(str)
|
||||||
|
'''
|
||||||
|
|
||||||
|
return self.generate(1, context)[-1]
|
||||||
|
|
||||||
|
# NB, this will always start with same word if the model
|
||||||
|
# was trained on a single text
|
||||||
|
def generate(self, num_words, context=()):
|
||||||
|
'''
|
||||||
|
Generate random text based on the language model.
|
||||||
|
|
||||||
|
:param num_words: number of words to generate
|
||||||
|
:type num_words: int
|
||||||
|
:param context: initial words in generated string
|
||||||
|
:type context: list(str)
|
||||||
|
'''
|
||||||
|
|
||||||
|
text = list(context)
|
||||||
|
for i in range(num_words):
|
||||||
|
text.append(self._generate_one(text))
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _generate_one(self, context):
|
||||||
|
context = (self._lpad + tuple(context))[-self._n + 1:]
|
||||||
|
if context in self:
|
||||||
|
return self[context].generate()
|
||||||
|
elif self._n > 1:
|
||||||
|
return self._backoff._generate_one(context[1:])
|
||||||
|
else:
|
||||||
|
return '.'
|
||||||
|
|
||||||
|
def entropy(self, text):
|
||||||
|
"""
|
||||||
|
Calculate the approximate cross-entropy of the n-gram model for a
|
||||||
|
given evaluation text.
|
||||||
|
This is the average log probability of each word in the text.
|
||||||
|
|
||||||
|
:param text: words to use for evaluation
|
||||||
|
:type text: list(str)
|
||||||
|
"""
|
||||||
|
|
||||||
|
H = 0.0 # entropy is conventionally denoted by "H"
|
||||||
|
text = list(self._lpad) + text + list(self._rpad)
|
||||||
|
for i in range(self._n - 1, len(text)):
|
||||||
|
context = tuple(text[(i - self._n + 1):i])
|
||||||
|
token = text[i]
|
||||||
|
H += self.logprob(token, context)
|
||||||
|
return H / float(len(text) - (self._n - 1))
|
||||||
|
|
||||||
|
def perplexity(self, text):
|
||||||
|
"""
|
||||||
|
Calculates the perplexity of the given text.
|
||||||
|
This is simply 2 ** cross-entropy for the text.
|
||||||
|
|
||||||
|
:param text: words to calculate perplexity of
|
||||||
|
:type text: list(str)
|
||||||
|
"""
|
||||||
|
|
||||||
|
return pow(2.0, self.entropy(text))
|
||||||
|
|
||||||
|
def __contains__(self, item):
|
||||||
|
if not isinstance(item, tuple):
|
||||||
|
item = (item,)
|
||||||
|
return item in self._model
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
if not isinstance(item, tuple):
|
||||||
|
item = (item,)
|
||||||
|
return self._model[item]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<NgramModel with %d %d-grams>' % (len(self._ngrams), self._n)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
|
42
lib/nltk_model_api.py
Normal file
42
lib/nltk_model_api.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
# Natural Language Toolkit: API for Language Models
|
||||||
|
#
|
||||||
|
# Copyright (C) 2001-2014 NLTK Project
|
||||||
|
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||||
|
# URL: <http://nltk.org/>
|
||||||
|
# For license information, see LICENSE.TXT
|
||||||
|
#
|
||||||
|
# imported for use in mtgcode Nov. 2015
|
||||||
|
|
||||||
|
|
||||||
|
# should this be a subclass of ConditionalProbDistI?
|
||||||
|
|
||||||
|
class ModelI(object):
|
||||||
|
"""
|
||||||
|
A processing interface for assigning a probability to the next word.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
'''Create a new language model.'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def prob(self, word, context):
|
||||||
|
'''Evaluate the probability of this word in this context.'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def logprob(self, word, context):
|
||||||
|
'''Evaluate the (negative) log probability of this word in this context.'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def choose_random_word(self, context):
|
||||||
|
'''Randomly select a word that is likely to appear in this context.'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def generate(self, n):
|
||||||
|
'''Generate n words of text from the language model.'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def entropy(self, text):
|
||||||
|
'''Evaluate the total entropy of a message with respect to the model.
|
||||||
|
This is the sum of the log probability of each word in the message.'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# transform passes used to encode / decode cards
|
# transform passes used to encode / decode cards
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
# These could probably use a little love... They tend to hardcode in lots
|
# These could probably use a little love... They tend to hardcode in lots
|
||||||
# of things very specific to the mtgjson format.
|
# of things very specific to the mtgjson format.
|
||||||
|
@ -94,6 +95,19 @@ def text_pass_2_cardname(s, name):
|
||||||
for override in overrides:
|
for override in overrides:
|
||||||
s = s.replace(override, this_marker)
|
s = s.replace(override, this_marker)
|
||||||
|
|
||||||
|
# stupid planeswalker abilities
|
||||||
|
s = s.replace('to him.', 'to ' + this_marker + '.')
|
||||||
|
s = s.replace('to him this', 'to ' + this_marker + ' this')
|
||||||
|
s = s.replace('to himself', 'to itself')
|
||||||
|
s = s.replace("he's", this_marker + ' is')
|
||||||
|
|
||||||
|
# sometimes we actually don't want to do this replacement
|
||||||
|
s = s.replace('named ' + this_marker, 'named ' + name)
|
||||||
|
s = s.replace('name is still ' + this_marker, 'name is still ' + name)
|
||||||
|
s = s.replace('named keeper of ' + this_marker, 'named keeper of ' + name)
|
||||||
|
s = s.replace('named kobolds of ' + this_marker, 'named kobolds of ' + name)
|
||||||
|
s = s.replace('named sword of kaldra, ' + this_marker, 'named sword of kaldra, ' + name)
|
||||||
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
@ -133,7 +147,14 @@ def text_pass_4b_x(s):
|
||||||
s = s.replace(u'x\u2014', x_marker + u'\u2014')
|
s = s.replace(u'x\u2014', x_marker + u'\u2014')
|
||||||
s = s.replace('x.', x_marker + '.')
|
s = s.replace('x.', x_marker + '.')
|
||||||
s = s.replace('x,', x_marker + ',')
|
s = s.replace('x,', x_marker + ',')
|
||||||
|
s = s.replace('x is', x_marker + ' is')
|
||||||
|
s = s.replace('x can\'t', x_marker + ' can\'t')
|
||||||
s = s.replace('x/x', x_marker + '/' + x_marker)
|
s = s.replace('x/x', x_marker + '/' + x_marker)
|
||||||
|
s = s.replace('x target', x_marker + ' target')
|
||||||
|
s = s.replace('si' + x_marker + ' target', 'six target')
|
||||||
|
s = s.replace('avara' + x_marker, 'avarax')
|
||||||
|
# there's also some stupid ice age card that wants -x/-y
|
||||||
|
s = s.replace('/~', '/-')
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
@ -249,6 +270,7 @@ def text_pass_5_counters(s):
|
||||||
'petrification counter',
|
'petrification counter',
|
||||||
'shred counter',
|
'shred counter',
|
||||||
'pupa counter',
|
'pupa counter',
|
||||||
|
'crystal counter',
|
||||||
]
|
]
|
||||||
usedcounters = []
|
usedcounters = []
|
||||||
for countername in allcounters:
|
for countername in allcounters:
|
||||||
|
@ -336,11 +358,11 @@ def text_pass_7_choice(s):
|
||||||
# the idea is to take 'choose n ~\n=ability\n=ability\n'
|
# the idea is to take 'choose n ~\n=ability\n=ability\n'
|
||||||
# to '[n = ability = ability]\n'
|
# to '[n = ability = ability]\n'
|
||||||
|
|
||||||
def choice_formatting_helper(s_helper, prefix, count):
|
def choice_formatting_helper(s_helper, prefix, count, suffix = ''):
|
||||||
single_choices = re.findall(ur'(' + prefix + ur'\n?(\u2022.*(\n|$))+)', s_helper)
|
single_choices = re.findall(ur'(' + prefix + ur'\n?(\u2022.*(\n|$))+)', s_helper)
|
||||||
for choice in single_choices:
|
for choice in single_choices:
|
||||||
newchoice = choice[0]
|
newchoice = choice[0]
|
||||||
newchoice = newchoice.replace(prefix, unary_marker + (unary_counter * count))
|
newchoice = newchoice.replace(prefix, unary_marker + (unary_counter * count) + suffix)
|
||||||
newchoice = newchoice.replace('\n', ' ')
|
newchoice = newchoice.replace('\n', ' ')
|
||||||
if newchoice[-1:] == ' ':
|
if newchoice[-1:] == ' ':
|
||||||
newchoice = choice_open_delimiter + newchoice[:-1] + choice_close_delimiter + '\n'
|
newchoice = choice_open_delimiter + newchoice[:-1] + choice_close_delimiter + '\n'
|
||||||
|
@ -358,6 +380,12 @@ def text_pass_7_choice(s):
|
||||||
s = choice_formatting_helper(s, ur'choose khans or dragons.', 1)
|
s = choice_formatting_helper(s, ur'choose khans or dragons.', 1)
|
||||||
# this is for 'an opponent chooses one', which will be a bit weird but still work out
|
# this is for 'an opponent chooses one', which will be a bit weird but still work out
|
||||||
s = choice_formatting_helper(s, ur'chooses one \u2014', 1)
|
s = choice_formatting_helper(s, ur'chooses one \u2014', 1)
|
||||||
|
# Demonic Pact has 'choose one that hasn't been chosen'...
|
||||||
|
s = choice_formatting_helper(s, ur"choose one that hasn't been chosen \u2014", 1,
|
||||||
|
suffix=" that hasn't been chosen")
|
||||||
|
# 'choose n. you may choose the same mode more than once.'
|
||||||
|
s = choice_formatting_helper(s, ur'choose three. you may choose the same mode more than once.', 3,
|
||||||
|
suffix='. you may choose the same mode more than once.')
|
||||||
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
@ -423,31 +451,138 @@ def text_pass_11_linetrans(s):
|
||||||
|
|
||||||
lines = s.split(utils.newline)
|
lines = s.split(utils.newline)
|
||||||
for line in lines:
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line == '':
|
||||||
|
continue
|
||||||
if not '.' in line:
|
if not '.' in line:
|
||||||
# because this is inconsistent
|
# because this is inconsistent
|
||||||
line = line.replace(';', ',')
|
line = line.replace(',', ';')
|
||||||
sublines = line.split(',')
|
line = line.replace('; where', ', where') # Thromok the Insatiable
|
||||||
|
line = line.replace('; and', ', and') # wonky protection
|
||||||
|
line = line.replace('; from', ', from') # wonky protection
|
||||||
|
line = line.replace('upkeep;', 'upkeep,') # wonky protection
|
||||||
|
sublines = line.split(';')
|
||||||
for subline in sublines:
|
for subline in sublines:
|
||||||
|
subline = subline.strip()
|
||||||
if 'equip' in subline or 'enchant' in subline:
|
if 'equip' in subline or 'enchant' in subline:
|
||||||
prelines += [subline.strip()]
|
prelines += [subline]
|
||||||
elif 'countertype' in subline or 'kicker' in subline:
|
elif 'countertype' in subline or 'kicker' in subline:
|
||||||
postlines += [subline.strip()]
|
postlines += [subline]
|
||||||
else:
|
else:
|
||||||
keylines += [subline.strip()]
|
keylines += [subline]
|
||||||
elif u'\u2014' in line and not u' \u2014 ' in line:
|
elif u'\u2014' in line and not u' \u2014 ' in line:
|
||||||
if 'equip' in line or 'enchant' in line:
|
if 'equip' in line or 'enchant' in line:
|
||||||
prelines += [line.strip()]
|
prelines += [line]
|
||||||
elif 'countertype' in line or 'kicker' in line:
|
elif 'countertype' in line or 'kicker' in line:
|
||||||
postlines += [line.strip()]
|
postlines += [line]
|
||||||
else:
|
else:
|
||||||
keylines += [line.strip()]
|
keylines += [line]
|
||||||
else:
|
else:
|
||||||
mainlines += [line.strip()]
|
mainlines += [line]
|
||||||
|
|
||||||
alllines = prelines + keylines + mainlines + postlines
|
alllines = prelines + keylines + mainlines + postlines
|
||||||
return utils.newline.join(alllines)
|
return utils.newline.join(alllines)
|
||||||
|
|
||||||
|
|
||||||
|
# randomize the order of the lines
|
||||||
|
# not a text pass, intended to be invoked dynamically when encoding a card
|
||||||
|
# call this on fully encoded text, with mana symbols expanded
|
||||||
|
def separate_lines(text):
|
||||||
|
# forget about level up, ignore empty text too while we're at it
|
||||||
|
if text == '' or 'level up' in text:
|
||||||
|
return [],[],[],[],[]
|
||||||
|
|
||||||
|
preline_search = ['equip', 'fortify', 'enchant ', 'bestow']
|
||||||
|
# probably could use optimization with a regex
|
||||||
|
costline_search = [
|
||||||
|
'multikicker', 'kicker', 'suspend', 'echo', 'awaken',
|
||||||
|
'buyback', 'dash', 'entwine', 'evoke', 'flashback',
|
||||||
|
'madness', 'megamorph', 'morph', 'miracle', 'ninjutsu', 'overload',
|
||||||
|
'prowl', 'recover', 'reinforce', 'replicate', 'scavenge', 'splice',
|
||||||
|
'surge', 'unearth', 'transmute', 'transfigure',
|
||||||
|
]
|
||||||
|
# cycling is a special case to handle the variants
|
||||||
|
postline_search = ['countertype']
|
||||||
|
keyline_search = ['cumulative']
|
||||||
|
|
||||||
|
prelines = []
|
||||||
|
keylines = []
|
||||||
|
mainlines = []
|
||||||
|
costlines = []
|
||||||
|
postlines = []
|
||||||
|
|
||||||
|
lines = text.split(utils.newline)
|
||||||
|
# we've already done linetrans once, so some of the irregularities have been simplified
|
||||||
|
for line in lines:
|
||||||
|
if not '.' in line:
|
||||||
|
if any(line.startswith(s) for s in preline_search):
|
||||||
|
prelines.append(line)
|
||||||
|
elif any(line.startswith(s) for s in postline_search):
|
||||||
|
postlines.append(line)
|
||||||
|
elif any(line.startswith(s) for s in costline_search) or 'cycling' in line:
|
||||||
|
costlines.append(line)
|
||||||
|
else:
|
||||||
|
keylines.append(line)
|
||||||
|
elif (utils.dash_marker in line and not
|
||||||
|
(' '+utils.dash_marker+' ' in line or 'non'+utils.dash_marker in line)):
|
||||||
|
if any(line.startswith(s) for s in preline_search):
|
||||||
|
prelines.append(line)
|
||||||
|
elif any(line.startswith(s) for s in costline_search) or 'cycling' in line:
|
||||||
|
costlines.append(line)
|
||||||
|
elif any(line.startswith(s) for s in keyline_search):
|
||||||
|
keylines.append(line)
|
||||||
|
else:
|
||||||
|
mainlines.append(line)
|
||||||
|
elif ': monstrosity' in line:
|
||||||
|
costlines.append(line)
|
||||||
|
else:
|
||||||
|
mainlines.append(line)
|
||||||
|
|
||||||
|
return prelines, keylines, mainlines, costlines, postlines
|
||||||
|
|
||||||
|
choice_re = re.compile(re.escape(utils.choice_open_delimiter) + r'.*' +
|
||||||
|
re.escape(utils.choice_close_delimiter))
|
||||||
|
choice_divider = ' ' + utils.bullet_marker + ' '
|
||||||
|
def randomize_choice(line):
|
||||||
|
choices = re.findall(choice_re, line)
|
||||||
|
if len(choices) < 1:
|
||||||
|
return line
|
||||||
|
new_line = line
|
||||||
|
for choice in choices:
|
||||||
|
parts = choice[1:-1].split(choice_divider)
|
||||||
|
if len(parts) < 3:
|
||||||
|
continue
|
||||||
|
choiceparts = parts[1:]
|
||||||
|
random.shuffle(choiceparts)
|
||||||
|
new_line = new_line.replace(choice,
|
||||||
|
utils.choice_open_delimiter +
|
||||||
|
choice_divider.join(parts[:1] + choiceparts) +
|
||||||
|
utils.choice_close_delimiter,
|
||||||
|
1)
|
||||||
|
return new_line
|
||||||
|
|
||||||
|
|
||||||
|
def randomize_lines(text):
|
||||||
|
if text == '' or 'level up' in text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
prelines, keylines, mainlines, costlines, postlines = separate_lines(text)
|
||||||
|
random.shuffle(prelines)
|
||||||
|
random.shuffle(keylines)
|
||||||
|
new_mainlines = []
|
||||||
|
for line in mainlines:
|
||||||
|
if line.endswith(utils.choice_close_delimiter):
|
||||||
|
new_mainlines.append(randomize_choice(line))
|
||||||
|
# elif utils.choice_open_delimiter in line or utils.choice_close_delimiter in line:
|
||||||
|
# print(line)
|
||||||
|
else:
|
||||||
|
new_mainlines.append(line)
|
||||||
|
random.shuffle(new_mainlines)
|
||||||
|
random.shuffle(costlines)
|
||||||
|
#random.shuffle(postlines) # only one kind ever (countertype)
|
||||||
|
return utils.newline.join(prelines+keylines+new_mainlines+costlines+postlines)
|
||||||
|
|
||||||
|
|
||||||
# Text unpasses, for decoding. All assume the text inside a Manatext, so don't do anything
|
# Text unpasses, for decoding. All assume the text inside a Manatext, so don't do anything
|
||||||
# weird with the mana cost symbol.
|
# weird with the mana cost symbol.
|
||||||
|
|
||||||
|
|
38
lib/utils.py
38
lib/utils.py
File diff suppressed because one or more lines are too long
351
mtg_sweep1.ipynb
Normal file
351
mtg_sweep1.ipynb
Normal file
File diff suppressed because one or more lines are too long
167
scripts/analysis.py
Executable file
167
scripts/analysis.py
Executable file
|
@ -0,0 +1,167 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
# scipy is kinda necessary
|
||||||
|
import scipy
|
||||||
|
import scipy.stats
|
||||||
|
import numpy as np
|
||||||
|
import math
|
||||||
|
|
||||||
|
def mean_nonan(l):
|
||||||
|
filtered = [x for x in l if not math.isnan(x)]
|
||||||
|
return np.mean(filtered)
|
||||||
|
|
||||||
|
def gmean_nonzero(l):
|
||||||
|
filtered = [x for x in l if x != 0 and not math.isnan(x)]
|
||||||
|
return scipy.stats.gmean(filtered)
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
||||||
|
import jdecode
|
||||||
|
|
||||||
|
import mtg_validate
|
||||||
|
import ngrams
|
||||||
|
|
||||||
|
def annotate_values(values):
|
||||||
|
for k in values:
|
||||||
|
(total, good, bad) = values[k]
|
||||||
|
values[k] = OrderedDict([('total', total), ('good', good), ('bad', bad)])
|
||||||
|
return values
|
||||||
|
|
||||||
|
def print_statistics(stats, ident = 0):
|
||||||
|
for k in stats:
|
||||||
|
if isinstance(stats[k], OrderedDict):
|
||||||
|
print(' ' * ident + str(k) + ':')
|
||||||
|
print_statistics(stats[k], ident=ident+2)
|
||||||
|
elif isinstance(stats[k], dict):
|
||||||
|
print(' ' * ident + str(k) + ': <dict with ' + str(len(stats[k])) + ' entries>')
|
||||||
|
elif isinstance(stats[k], list):
|
||||||
|
print(' ' * ident + str(k) + ': <list with ' + str(len(stats[k])) + ' entries>')
|
||||||
|
else:
|
||||||
|
print(' ' * ident + str(k) + ': ' + str(stats[k]))
|
||||||
|
|
||||||
|
def get_statistics(fname, lm = None, sep = False, verbose=False):
|
||||||
|
stats = OrderedDict()
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||||
|
stats['cards'] = cards
|
||||||
|
|
||||||
|
# unpack the name of the checkpoint - terrible and hacky
|
||||||
|
try:
|
||||||
|
final_name = os.path.basename(fname)
|
||||||
|
halves = final_name.split('_epoch')
|
||||||
|
cp_name = halves[0]
|
||||||
|
cp_info = halves[1][:-4]
|
||||||
|
info_halves = cp_info.split('_')
|
||||||
|
cp_epoch = float(info_halves[0])
|
||||||
|
fragments = info_halves[1].split('.')
|
||||||
|
cp_vloss = float('.'.join(fragments[:2]))
|
||||||
|
cp_temp = float('.'.join(fragments[-2:]))
|
||||||
|
cp_ident = '.'.join(fragments[2:-2])
|
||||||
|
stats['cp'] = OrderedDict([('name', cp_name),
|
||||||
|
('epoch', cp_epoch),
|
||||||
|
('vloss', cp_vloss),
|
||||||
|
('temp', cp_temp),
|
||||||
|
('ident', cp_ident)])
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# validate
|
||||||
|
((total_all, total_good, total_bad, total_uncovered),
|
||||||
|
values) = mtg_validate.process_props(cards)
|
||||||
|
|
||||||
|
stats['props'] = annotate_values(values)
|
||||||
|
stats['props']['overall'] = OrderedDict([('total', total_all),
|
||||||
|
('good', total_good),
|
||||||
|
('bad', total_bad),
|
||||||
|
('uncovered', total_uncovered)])
|
||||||
|
|
||||||
|
# distances
|
||||||
|
distfname = fname + '.dist'
|
||||||
|
if os.path.isfile(distfname):
|
||||||
|
name_dupes = 0
|
||||||
|
card_dupes = 0
|
||||||
|
with open(distfname, 'rt') as f:
|
||||||
|
distlines = f.read().split('\n')
|
||||||
|
dists = OrderedDict([('name', []), ('cbow', [])])
|
||||||
|
for line in distlines:
|
||||||
|
fields = line.split('|')
|
||||||
|
if len(fields) < 4:
|
||||||
|
continue
|
||||||
|
idx = int(fields[0])
|
||||||
|
name = str(fields[1])
|
||||||
|
ndist = float(fields[2])
|
||||||
|
cdist = float(fields[3])
|
||||||
|
dists['name'] += [ndist]
|
||||||
|
dists['cbow'] += [cdist]
|
||||||
|
if ndist == 1.0:
|
||||||
|
name_dupes += 1
|
||||||
|
if cdist == 1.0:
|
||||||
|
card_dupes += 1
|
||||||
|
|
||||||
|
dists['name_mean'] = mean_nonan(dists['name'])
|
||||||
|
dists['cbow_mean'] = mean_nonan(dists['cbow'])
|
||||||
|
dists['name_geomean'] = gmean_nonzero(dists['name'])
|
||||||
|
dists['cbow_geomean'] = gmean_nonzero(dists['cbow'])
|
||||||
|
stats['dists'] = dists
|
||||||
|
|
||||||
|
# n-grams
|
||||||
|
if not lm is None:
|
||||||
|
ngram = OrderedDict([('perp', []), ('perp_per', []),
|
||||||
|
('perp_max', []), ('perp_per_max', [])])
|
||||||
|
for card in cards:
|
||||||
|
if len(card.text.text) == 0:
|
||||||
|
perp = 0.0
|
||||||
|
perp_per = 0.0
|
||||||
|
elif sep:
|
||||||
|
vtexts = [line.vectorize().split() for line in card.text_lines
|
||||||
|
if len(line.vectorize().split()) > 0]
|
||||||
|
perps = [lm.perplexity(vtext) for vtext in vtexts]
|
||||||
|
perps_per = [perps[i] / float(len(vtexts[i])) for i in range(0, len(vtexts))]
|
||||||
|
perp = gmean_nonzero(perps)
|
||||||
|
perp_per = gmean_nonzero(perps_per)
|
||||||
|
perp_max = max(perps)
|
||||||
|
perp_per_max = max(perps_per)
|
||||||
|
else:
|
||||||
|
vtext = card.text.vectorize().split()
|
||||||
|
perp = lm.perplexity(vtext)
|
||||||
|
perp_per = perp / float(len(vtext))
|
||||||
|
perp_max = perp
|
||||||
|
perp_per_max = perps_per
|
||||||
|
|
||||||
|
ngram['perp'] += [perp]
|
||||||
|
ngram['perp_per'] += [perp_per]
|
||||||
|
ngram['perp_max'] += [perp_max]
|
||||||
|
ngram['perp_per_max'] += [perp_per_max]
|
||||||
|
|
||||||
|
ngram['perp_mean'] = mean_nonan(ngram['perp'])
|
||||||
|
ngram['perp_per_mean'] = mean_nonan(ngram['perp_per'])
|
||||||
|
ngram['perp_geomean'] = gmean_nonzero(ngram['perp'])
|
||||||
|
ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per'])
|
||||||
|
stats['ngram'] = ngram
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def main(infile, verbose = False):
|
||||||
|
lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))),
|
||||||
|
3, separate_lines=True, verbose=True)
|
||||||
|
stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose)
|
||||||
|
print_statistics(stats)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, verbose=args.verbose)
|
||||||
|
exit(0)
|
91
scripts/autosample.py
Executable file
91
scripts/autosample.py
Executable file
|
@ -0,0 +1,91 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import random
|
||||||
|
|
||||||
|
def extract_cp_name(name):
|
||||||
|
# "lm_lstm_epoch50.00_0.1870.t7"
|
||||||
|
if not (name[:13] == 'lm_lstm_epoch' and name[-3:] == '.t7'):
|
||||||
|
return None
|
||||||
|
name = name[13:-3]
|
||||||
|
(epoch, vloss) = tuple(name.split('_'))
|
||||||
|
return (float(epoch), float(vloss))
|
||||||
|
|
||||||
|
def sample(cp, temp, count, seed = None, ident = 'output'):
|
||||||
|
if seed is None:
|
||||||
|
seed = random.randint(-1000000000, 1000000000)
|
||||||
|
outfile = cp + '.' + ident + '.' + str(temp) + '.txt'
|
||||||
|
cmd = ('th sample.lua ' + cp
|
||||||
|
+ ' -temperature ' + str(temp)
|
||||||
|
+ ' -length ' + str(count)
|
||||||
|
+ ' -seed ' + str(seed)
|
||||||
|
+ ' >> ' + outfile)
|
||||||
|
if os.path.exists(outfile):
|
||||||
|
print(outfile + ' already exists, skipping')
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# UNSAFE SHELL=TRUE FOR CONVENIENCE
|
||||||
|
subprocess.call('echo "' + cmd + '" | tee ' + outfile, shell=True)
|
||||||
|
subprocess.call(cmd, shell=True)
|
||||||
|
|
||||||
|
def find_best_cp(cpdir):
|
||||||
|
best = None
|
||||||
|
best_cp = None
|
||||||
|
for path in os.listdir(cpdir):
|
||||||
|
fullpath = os.path.join(cpdir, path)
|
||||||
|
if os.path.isfile(fullpath):
|
||||||
|
extracted = extract_cp_name(path)
|
||||||
|
if not extracted is None:
|
||||||
|
(epoch, vloss) = extracted
|
||||||
|
if best is None or vloss < best:
|
||||||
|
best = vloss
|
||||||
|
best_cp = fullpath
|
||||||
|
return best_cp
|
||||||
|
|
||||||
|
def process_dir(cpdir, temp, count, seed = None, ident = 'output', verbose = False):
|
||||||
|
if verbose:
|
||||||
|
print('processing ' + cpdir)
|
||||||
|
best_cp = find_best_cp(cpdir)
|
||||||
|
if not best_cp is None:
|
||||||
|
sample(best_cp, temp, count, seed=seed, ident=ident)
|
||||||
|
for path in os.listdir(cpdir):
|
||||||
|
fullpath = os.path.join(cpdir, path)
|
||||||
|
if os.path.isdir(fullpath):
|
||||||
|
process_dir(fullpath, temp, count, seed=seed, ident=ident, verbose=verbose)
|
||||||
|
|
||||||
|
def main(rnndir, cpdir, temp, count, seed = None, ident = 'output', verbose = False):
|
||||||
|
if not os.path.isdir(rnndir):
|
||||||
|
raise ValueError('bad rnndir: ' + rnndir)
|
||||||
|
if not os.path.isdir(cpdir):
|
||||||
|
raise ValueError('bad cpdir: ' + cpdir)
|
||||||
|
os.chdir(rnndir)
|
||||||
|
process_dir(cpdir, temp, count, seed=seed, ident=ident, verbose=verbose)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('rnndir', #nargs='?'. default=None,
|
||||||
|
help='base rnn directory, must contain sample.lua')
|
||||||
|
parser.add_argument('cpdir', #nargs='?', default=None,
|
||||||
|
help='checkpoint directory, all subdirectories will be processed')
|
||||||
|
parser.add_argument('-t', '--temperature', action='store', default='1.0',
|
||||||
|
help='sampling temperature')
|
||||||
|
parser.add_argument('-c', '--count', action='store', default='1000000',
|
||||||
|
help='number of characters to sample each time')
|
||||||
|
parser.add_argument('-s', '--seed', action='store', default=None,
|
||||||
|
help='fixed seed; if not present, a random seed will be used')
|
||||||
|
parser.add_argument('-i', '--ident', action='store', default='output',
|
||||||
|
help='identifier to include in the output filenames')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.seed is None:
|
||||||
|
seed = None
|
||||||
|
else:
|
||||||
|
seed = int(args.seed)
|
||||||
|
main(args.rnndir, args.cpdir, float(args.temperature), int(args.count),
|
||||||
|
seed=seed, ident=args.ident, verbose = args.verbose)
|
||||||
|
exit(0)
|
99
scripts/collect_checkpoints.py
Executable file
99
scripts/collect_checkpoints.py
Executable file
|
@ -0,0 +1,99 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
def cleanup_dump(dumpstr):
|
||||||
|
cardfrags = dumpstr.split('\n\n')
|
||||||
|
if len(cardfrags) < 4:
|
||||||
|
return ''
|
||||||
|
else:
|
||||||
|
return '\n\n'.join(cardfrags[2:-1]) + '\n\n'
|
||||||
|
|
||||||
|
def identify_checkpoints(basedir, ident):
|
||||||
|
cp_infos = []
|
||||||
|
for path in os.listdir(basedir):
|
||||||
|
fullpath = os.path.join(basedir, path)
|
||||||
|
if not os.path.isfile(fullpath):
|
||||||
|
continue
|
||||||
|
if not (path[:13] == 'lm_lstm_epoch' and path[-4:] == '.txt'):
|
||||||
|
continue
|
||||||
|
if not ident in path:
|
||||||
|
continue
|
||||||
|
# attempt super hacky parsing
|
||||||
|
inner = path[13:-4]
|
||||||
|
halves = inner.split('_')
|
||||||
|
if not len(halves) == 2:
|
||||||
|
continue
|
||||||
|
parts = halves[1].split('.')
|
||||||
|
if not len(parts) == 6:
|
||||||
|
continue
|
||||||
|
# lm_lstm_epoch[25.00_0.3859.t7.output.1.0].txt
|
||||||
|
if not parts[3] == ident:
|
||||||
|
continue
|
||||||
|
epoch = halves[0]
|
||||||
|
vloss = '.'.join([parts[0], parts[1]])
|
||||||
|
temp = '.'.join([parts[4], parts[5]])
|
||||||
|
cpname = 'lm_lstm_epoch' + epoch + '_' + vloss + '.t7'
|
||||||
|
cp_infos += [(fullpath, os.path.join(basedir, cpname),
|
||||||
|
(epoch, vloss, temp))]
|
||||||
|
return cp_infos
|
||||||
|
|
||||||
|
def process_dir(basedir, targetdir, ident, copy_cp = False, verbose = False):
|
||||||
|
(basepath, basedirname) = os.path.split(basedir)
|
||||||
|
if basedirname == '':
|
||||||
|
(basepath, basedirname) = os.path.split(basepath)
|
||||||
|
|
||||||
|
cp_infos = identify_checkpoints(basedir, ident)
|
||||||
|
for (dpath, cpath, (epoch, vloss, temp)) in cp_infos:
|
||||||
|
if verbose:
|
||||||
|
print('found dumpfile ' + dpath)
|
||||||
|
dname = basedirname + '_epoch' + epoch + '_' + vloss + '.' + ident + '.' + temp + '.txt'
|
||||||
|
cname = basedirname + '_epoch' + epoch + '_' + vloss + '.t7'
|
||||||
|
tdpath = os.path.join(targetdir, dname)
|
||||||
|
tcpath = os.path.join(targetdir, cname)
|
||||||
|
if verbose:
|
||||||
|
print(' cpx ' + dpath + ' ' + tdpath)
|
||||||
|
with open(dpath, 'rt') as infile:
|
||||||
|
with open(tdpath, 'wt') as outfile:
|
||||||
|
outfile.write(cleanup_dump(infile.read()))
|
||||||
|
if copy_cp:
|
||||||
|
if os.path.isfile(cpath):
|
||||||
|
if verbose:
|
||||||
|
print(' cp ' + cpath + ' ' + tcpath)
|
||||||
|
shutil.copy(cpath, tcpath)
|
||||||
|
|
||||||
|
if copy_cp and len(cp_infos) > 0:
|
||||||
|
cmdpath = os.path.join(basedir, 'command.txt')
|
||||||
|
tcmdpath = os.path.join(targetdir, basedirname + '.command')
|
||||||
|
if os.path.isfile(cmdpath):
|
||||||
|
if verbose:
|
||||||
|
print(' cp ' + cmdpath + ' ' + tcmdpath)
|
||||||
|
shutil.copy(cmdpath, tcmdpath)
|
||||||
|
|
||||||
|
for path in os.listdir(basedir):
|
||||||
|
fullpath = os.path.join(basedir, path)
|
||||||
|
if os.path.isdir(fullpath):
|
||||||
|
process_dir(fullpath, targetdir, ident, copy_cp=copy_cp, verbose=verbose)
|
||||||
|
|
||||||
|
def main(basedir, targetdir, ident = 'output', copy_cp = False, verbose = False):
|
||||||
|
process_dir(basedir, targetdir, ident, copy_cp=copy_cp, verbose=verbose)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('basedir', #nargs='?'. default=None,
|
||||||
|
help='base rnn directory, must contain sample.lua')
|
||||||
|
parser.add_argument('targetdir', #nargs='?', default=None,
|
||||||
|
help='checkpoint directory, all subdirectories will be processed')
|
||||||
|
parser.add_argument('-c', '--copy_cp', action='store_true',
|
||||||
|
help='copy checkpoints used to generate the output files')
|
||||||
|
parser.add_argument('-i', '--ident', action='store', default='output',
|
||||||
|
help='identifier to look for to determine checkpoints')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.basedir, args.targetdir, ident=args.ident, copy_cp=args.copy_cp, verbose=args.verbose)
|
||||||
|
exit(0)
|
81
scripts/distances.py
Executable file
81
scripts/distances.py
Executable file
|
@ -0,0 +1,81 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
import utils
|
||||||
|
import jdecode
|
||||||
|
from namediff import Namediff
|
||||||
|
from cbow import CBOW
|
||||||
|
|
||||||
|
def main(fname, oname, verbose = True, parallel = True):
|
||||||
|
# may need to set special arguments here
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||||
|
|
||||||
|
# this could reasonably be some separate function
|
||||||
|
# might make sense to merge cbow and namediff and have this be the main interface
|
||||||
|
namediff = Namediff()
|
||||||
|
cbow = CBOW()
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print 'Computing nearest names...'
|
||||||
|
if parallel:
|
||||||
|
nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=1)
|
||||||
|
else:
|
||||||
|
nearest_names = [namediff.nearest(c.name, n=1) for c in cards]
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print 'Computing nearest cards...'
|
||||||
|
if parallel:
|
||||||
|
nearest_cards = cbow.nearest_par(cards, n=1)
|
||||||
|
else:
|
||||||
|
nearest_cards = [cbow.nearest(c, n=1) for c in cards]
|
||||||
|
|
||||||
|
for i in range(0, len(cards)):
|
||||||
|
cards[i].nearest_names = nearest_names[i]
|
||||||
|
cards[i].nearest_cards = nearest_cards[i]
|
||||||
|
|
||||||
|
# # unfortunately this takes ~30 hours on 8 cores for a 10MB dump
|
||||||
|
# if verbose:
|
||||||
|
# print 'Computing nearest encodings by text edit distance...'
|
||||||
|
# if parallel:
|
||||||
|
# nearest_cards_text = namediff.nearest_card_par(cards, n=1)
|
||||||
|
# else:
|
||||||
|
# nearest_cards_text = [namediff.nearest_card(c, n=1) for c in cards]
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print '...Done.'
|
||||||
|
|
||||||
|
# write to a file to store the data, this is a terribly long computation
|
||||||
|
# we could also just store this same info in the cards themselves as more fields...
|
||||||
|
sep = '|'
|
||||||
|
with open(oname, 'w') as ofile:
|
||||||
|
for i in range(0, len(cards)):
|
||||||
|
card = cards[i]
|
||||||
|
ostr = str(i) + sep + card.name + sep
|
||||||
|
ndist, _ = card.nearest_names[0]
|
||||||
|
ostr += str(ndist) + sep
|
||||||
|
cdist, _ = card.nearest_cards[0]
|
||||||
|
ostr += str(cdist) + '\n'
|
||||||
|
# tdist, _ = nearest_cards_text[i][0]
|
||||||
|
# ostr += str(tdist) + '\n'
|
||||||
|
ofile.write(ostr.encode('utf-8'))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('outfile', #nargs='?', default=None,
|
||||||
|
help='name of output file, will be overwritten')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
parser.add_argument('-p', '--parallel', action='store_true',
|
||||||
|
help='run in parallel on all cores')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, args.outfile, verbose=args.verbose, parallel=args.parallel)
|
||||||
|
exit(0)
|
81
scripts/keydiff.py
Executable file
81
scripts/keydiff.py
Executable file
|
@ -0,0 +1,81 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
def parse_keyfile(f, d, constructor = lambda x: x):
|
||||||
|
for line in f:
|
||||||
|
kv = map(lambda s: s.strip(), line.split(':'))
|
||||||
|
if not len(kv) == 2:
|
||||||
|
continue
|
||||||
|
d[kv[0]] = constructor(kv[1])
|
||||||
|
|
||||||
|
def merge_dicts(d1, d2):
|
||||||
|
d = {}
|
||||||
|
for k in d1:
|
||||||
|
d[k] = (d1[k], d2[k] if k in d2 else None)
|
||||||
|
for k in d2:
|
||||||
|
if not k in d:
|
||||||
|
d[k] = (None, d2[k])
|
||||||
|
return d
|
||||||
|
|
||||||
|
def main(fname1, fname2, verbose = True):
|
||||||
|
if verbose:
|
||||||
|
print 'opening ' + fname1 + ' as base key/value store'
|
||||||
|
print 'opening ' + fname2 + ' as target key/value store'
|
||||||
|
|
||||||
|
d1 = {}
|
||||||
|
d2 = {}
|
||||||
|
with open(fname1, 'rt') as f1:
|
||||||
|
parse_keyfile(f1, d1, int)
|
||||||
|
with open(fname2, 'rt') as f2:
|
||||||
|
parse_keyfile(f2, d2, int)
|
||||||
|
|
||||||
|
tot1 = sum(d1.values())
|
||||||
|
tot2 = sum(d2.values())
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print ' ' + fname1 + ': ' + str(len(d1)) + ', total ' + str(tot1)
|
||||||
|
print ' ' + fname2 + ': ' + str(len(d2)) + ', total ' + str(tot2)
|
||||||
|
|
||||||
|
d_merged = merge_dicts(d1, d2)
|
||||||
|
|
||||||
|
ratios = {}
|
||||||
|
only_1 = {}
|
||||||
|
only_2 = {}
|
||||||
|
for k in d_merged:
|
||||||
|
(v1, v2) = d_merged[k]
|
||||||
|
if v1 is None:
|
||||||
|
only_2[k] = v2
|
||||||
|
elif v2 is None:
|
||||||
|
only_1[k] = v1
|
||||||
|
else:
|
||||||
|
ratios[k] = float(v2 * tot1) / float(v1 * tot2)
|
||||||
|
|
||||||
|
print 'shared: ' + str(len(ratios))
|
||||||
|
for k in sorted(ratios, lambda x,y: cmp(d2[x], d2[y]), reverse=True):
|
||||||
|
print ' ' + k + ': ' + str(d2[k]) + '/' + str(d1[k]) + ' (' + str(ratios[k]) + ')'
|
||||||
|
print ''
|
||||||
|
|
||||||
|
print '1 only: ' + str(len(only_1))
|
||||||
|
for k in sorted(only_1, lambda x,y: cmp(d1[x], d1[y]), reverse=True):
|
||||||
|
print ' ' + k + ': ' + str(d1[k])
|
||||||
|
print ''
|
||||||
|
|
||||||
|
print '2 only: ' + str(len(only_2))
|
||||||
|
for k in sorted(only_2, lambda x,y: cmp(d2[x], d2[y]), reverse=True):
|
||||||
|
print ' ' + k + ': ' + str(d2[k])
|
||||||
|
print ''
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('file1', #nargs='?'. default=None,
|
||||||
|
help='base key file to diff against')
|
||||||
|
parser.add_argument('file2', nargs='?', default=None,
|
||||||
|
help='other file to compare against the baseline')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.file1, args.file2, verbose=args.verbose)
|
||||||
|
exit(0)
|
482
scripts/mtg_validate.py
Executable file
482
scripts/mtg_validate.py
Executable file
|
@ -0,0 +1,482 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
import utils
|
||||||
|
import jdecode
|
||||||
|
|
||||||
|
datadir = os.path.realpath(os.path.join(libdir, '../data'))
|
||||||
|
gramdir = os.path.join(datadir, 'ngrams')
|
||||||
|
compute_ngrams = False
|
||||||
|
gramdicts = {}
|
||||||
|
if os.path.isdir(gramdir):
|
||||||
|
import keydiff
|
||||||
|
compute_ngrams = True
|
||||||
|
for fname in os.listdir(gramdir):
|
||||||
|
suffixes = re.findall(r'\.[0-9]*g$', fname)
|
||||||
|
if suffixes:
|
||||||
|
grams = int(suffixes[0][1:-1])
|
||||||
|
d = {}
|
||||||
|
with open(os.path.join(gramdir, fname), 'rt') as f:
|
||||||
|
keydiff.parse_keyfile(f, d, int)
|
||||||
|
gramdicts[grams] = d
|
||||||
|
|
||||||
|
def rare_grams(card, thresh = 2, grams = 2):
|
||||||
|
if not grams in gramdicts:
|
||||||
|
return None
|
||||||
|
rares = 0
|
||||||
|
gramdict = gramdicts[grams]
|
||||||
|
for line in card.text_lines_words:
|
||||||
|
for i in range(0, len(line) - (grams - 1)):
|
||||||
|
ngram = ' '.join([line[i + j] for j in range(0, grams)])
|
||||||
|
if ngram in gramdict:
|
||||||
|
if gramdict[ngram] < thresh:
|
||||||
|
rares += 1
|
||||||
|
else:
|
||||||
|
rares += 1
|
||||||
|
return rares
|
||||||
|
|
||||||
|
def list_only(l, items):
|
||||||
|
for e in l:
|
||||||
|
if not e in items:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def pct(x, total):
|
||||||
|
pctstr = 100.0 * float(x) / float(total)
|
||||||
|
return '(' + str(pctstr)[:5] + '%)'
|
||||||
|
|
||||||
|
def check_types(card):
|
||||||
|
if 'instant' in card.types:
|
||||||
|
return list_only(card.types, ['tribal', 'instant'])
|
||||||
|
if 'sorcery' in card.types:
|
||||||
|
return list_only(card.types, ['tribal', 'sorcery'])
|
||||||
|
if 'creature' in card.types:
|
||||||
|
return list_only(card.types, ['tribal', 'creature', 'artifact', 'land', 'enchantment'])
|
||||||
|
if 'planeswalker' in card.types:
|
||||||
|
return list_only(card.types, ['tribal', 'planeswalker', 'artifact', 'land', 'enchantment'])
|
||||||
|
else:
|
||||||
|
return list_only(card.types, ['tribal', 'artifact', 'land', 'enchantment'])
|
||||||
|
|
||||||
|
def check_pt(card):
|
||||||
|
if 'creature' in card.types or card.pt:
|
||||||
|
return (('creature' in card.types and len(re.findall(re.escape('/'), card.pt)) == 1)
|
||||||
|
and not card.loyalty)
|
||||||
|
if 'planeswalker' in card.types or card.loyalty:
|
||||||
|
return (('planeswalker' in card.types and card.loyalty)
|
||||||
|
and not card.pt)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_lands(card):
|
||||||
|
if 'land' in card.types:
|
||||||
|
return card.cost.format() == '_NOCOST_'
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# doesn't handle granted activated abilities in ""
|
||||||
|
def check_X(card):
|
||||||
|
correct = None
|
||||||
|
incost = 'X' in card.cost.encode()
|
||||||
|
extra_cost_lines = 0
|
||||||
|
cost_lines = 0
|
||||||
|
use_lines = 0
|
||||||
|
for mt in card.text_lines:
|
||||||
|
sides = mt.text.split(':')
|
||||||
|
if len(sides) == 2:
|
||||||
|
actcosts = len(re.findall(re.escape(utils.reserved_mana_marker), sides[0]))
|
||||||
|
lcosts = mt.costs[:actcosts]
|
||||||
|
rcosts = mt.costs[actcosts:]
|
||||||
|
if 'X' in sides[0] or (utils.reserved_mana_marker in sides[0] and
|
||||||
|
'X' in ''.join(map(lambda c: c.encode(), lcosts))):
|
||||||
|
|
||||||
|
if incost:
|
||||||
|
return False # bad, duplicated Xs in costs
|
||||||
|
|
||||||
|
if 'X' in sides[1] or (utils.reserved_mana_marker in sides[1] and
|
||||||
|
'X' in ''.join(map(lambda c: c.encode(), rcosts))):
|
||||||
|
correct = True # good, defined X is either specified or used
|
||||||
|
if 'monstrosity' in sides[1]:
|
||||||
|
extra_cost_lines += 1
|
||||||
|
continue
|
||||||
|
elif 'remove X % counters' in sides[0] and 'each counter removed' in sides[1]:
|
||||||
|
correct = True # Blademane Baku
|
||||||
|
continue
|
||||||
|
elif 'note' in sides[1]:
|
||||||
|
correct = True # Ice Cauldron
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
return False # bad, defined X is unused
|
||||||
|
|
||||||
|
# we've checked all cases where an X ocurrs in an activiation cost
|
||||||
|
linetext = mt.encode()
|
||||||
|
intext = len(re.findall(r'X', linetext))
|
||||||
|
defs = (len(re.findall(r'X is', linetext))
|
||||||
|
+ len(re.findall(re.escape('pay {X'), linetext))
|
||||||
|
+ len(re.findall(re.escape('pay X'), linetext))
|
||||||
|
+ len(re.findall(re.escape('reveal X'), linetext))
|
||||||
|
+ len(re.findall(re.escape('may tap X'), linetext)))
|
||||||
|
|
||||||
|
if incost:
|
||||||
|
if intext:
|
||||||
|
correct = True # defined and used or specified in some way
|
||||||
|
elif intext > 0:
|
||||||
|
if intext > 1 and defs > 0:
|
||||||
|
correct = True # look for multiples
|
||||||
|
elif 'suspend' in linetext or 'bloodthirst' in linetext:
|
||||||
|
correct = True # special case keywords
|
||||||
|
elif 'reinforce' in linetext and intext > 2:
|
||||||
|
correct = True # this should work
|
||||||
|
elif 'contain {X' in linetext or 'with {X' in linetext:
|
||||||
|
correct = True
|
||||||
|
|
||||||
|
elif ('additional cost' in linetext
|
||||||
|
or 'morph' in linetext
|
||||||
|
or 'kicker' in linetext):
|
||||||
|
cost_lines += 1
|
||||||
|
else:
|
||||||
|
use_lines += 1
|
||||||
|
|
||||||
|
if incost and not correct:
|
||||||
|
if 'sunburst' in card.text.text or 'spent to cast' in card.text.text:
|
||||||
|
return True # Engineered Explosives, Skyrider Elf
|
||||||
|
return False # otherwise we should have seen X somewhere if it was in the cost
|
||||||
|
|
||||||
|
elif cost_lines > 0 or use_lines > 0:
|
||||||
|
if (cost_lines + extra_cost_lines) == 1 and use_lines > 0:
|
||||||
|
return True # dreams, etc.
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return correct
|
||||||
|
|
||||||
|
def check_kicker(card):
|
||||||
|
# also lazy and simple
|
||||||
|
if 'kicker' in card.text.text or 'kicked' in card.text.text:
|
||||||
|
# could also check for costs, at least make 'it's $ kicker,' not count as a kicker ability
|
||||||
|
newtext = card.text.text.replace(utils.reserved_mana_marker + ' kicker', '')
|
||||||
|
return 'kicker' in newtext and 'kicked' in newtext
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_counters(card):
|
||||||
|
uses = len(re.findall(re.escape(utils.counter_marker), card.text.text))
|
||||||
|
if uses > 0:
|
||||||
|
return uses > 1 and 'countertype ' + utils.counter_marker in card.text.text
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_choices(card):
|
||||||
|
bullets = len(re.findall(re.escape(utils.bullet_marker), card.text.text))
|
||||||
|
obracks = len(re.findall(re.escape(utils.choice_open_delimiter), card.text.text))
|
||||||
|
cbracks = len(re.findall(re.escape(utils.choice_close_delimiter), card.text.text))
|
||||||
|
if bullets + obracks + cbracks > 0:
|
||||||
|
if not (obracks == cbracks and bullets > 0):
|
||||||
|
return False
|
||||||
|
# could compile ahead of time
|
||||||
|
choice_regex = (re.escape(utils.choice_open_delimiter) + re.escape(utils.unary_marker)
|
||||||
|
+ r'.*' + re.escape(utils.bullet_marker) + r'.*'
|
||||||
|
+ re.escape(utils.choice_close_delimiter))
|
||||||
|
nochoices = re.sub(choice_regex, '', card.text.text)
|
||||||
|
nobullets = len(re.findall(re.escape(utils.bullet_marker), nochoices))
|
||||||
|
noobracks = len(re.findall(re.escape(utils.choice_open_delimiter), nochoices))
|
||||||
|
nocbracks = len(re.findall(re.escape(utils.choice_close_delimiter), nochoices))
|
||||||
|
return nobullets + noobracks + nocbracks == 0
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_auras(card):
|
||||||
|
# a bit loose
|
||||||
|
if 'enchantment' in card.types or 'aura' in card.subtypes or 'enchant' in card.text.text:
|
||||||
|
return 'enchantment' in card.types or 'aura' in card.subtypes or 'enchant' in card.text.text
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_equipment(card):
|
||||||
|
# probably even looser, chould check for actual equip abilities and noncreatureness
|
||||||
|
if 'equipment' in card.subtypes:
|
||||||
|
return 'equip' in card.text.text
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_planeswalkers(card):
|
||||||
|
if 'planeswalker' in card.types:
|
||||||
|
good_lines = 0
|
||||||
|
bad_lines = 0
|
||||||
|
initial_re = r'^[+-]?' + re.escape(utils.unary_marker) + re.escape(utils.unary_counter) + '*:'
|
||||||
|
initial_re_X = r'^[-+]' + re.escape(utils.x_marker) + '+:'
|
||||||
|
for line in card.text_lines:
|
||||||
|
if len(re.findall(initial_re, line.text)) == 1:
|
||||||
|
good_lines += 1
|
||||||
|
elif len(re.findall(initial_re_X, line.text)) == 1:
|
||||||
|
good_lines += 1
|
||||||
|
elif 'can be your commander' in line.text:
|
||||||
|
pass
|
||||||
|
elif 'countertype' in line.text or 'transform' in line.text:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
bad_lines += 1
|
||||||
|
return good_lines > 1 and bad_lines == 0
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_levelup(card):
|
||||||
|
if 'level' in card.text.text:
|
||||||
|
uplines = 0
|
||||||
|
llines = 0
|
||||||
|
for line in card.text_lines:
|
||||||
|
if 'countertype ' + utils.counter_marker + ' level' in line.text:
|
||||||
|
uplines += 1
|
||||||
|
llines += 1
|
||||||
|
elif 'with level up' in line.text:
|
||||||
|
llines += 1
|
||||||
|
elif 'level up' in line.text:
|
||||||
|
uplines += 1
|
||||||
|
elif 'level' in line.text:
|
||||||
|
llines += 1
|
||||||
|
return uplines == 1 and llines > 0
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_activated(card):
|
||||||
|
activated = 0
|
||||||
|
for line in card.text_lines:
|
||||||
|
if '.' in line.text:
|
||||||
|
subtext = re.sub(r'"[^"]*"', '', line.text)
|
||||||
|
if 'forecast' in subtext:
|
||||||
|
pass
|
||||||
|
elif 'return ' + utils.this_marker + ' from your graveyard' in subtext:
|
||||||
|
pass
|
||||||
|
elif 'on the stack' in subtext:
|
||||||
|
pass
|
||||||
|
elif ':' in subtext:
|
||||||
|
activated += 1
|
||||||
|
if activated > 0:
|
||||||
|
return list_only(card.types, ['creature', 'land', 'artifact', 'enchantment', 'planeswalker', 'tribal'])
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_triggered(card):
|
||||||
|
triggered = 0
|
||||||
|
triggered_2 = 0
|
||||||
|
for line in card.text_lines:
|
||||||
|
if 'when ' + utils.this_marker + ' enters the battlefield' in line.text:
|
||||||
|
triggered += 1
|
||||||
|
if 'when ' + utils.this_marker + ' leaves the battlefield' in line.text:
|
||||||
|
triggered += 1
|
||||||
|
if 'when ' + utils.this_marker + ' dies' in line.text:
|
||||||
|
triggered += 1
|
||||||
|
elif 'at the beginning' == line.text[:16] or 'when' == line.text[:4]:
|
||||||
|
if 'from your graveyard' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'in your graveyard' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'if ' + utils.this_marker + ' is suspended' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'if that card is exiled' in line.text or 'if ' + utils.this_marker + ' is exiled' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'when the creature ' + utils.this_marker + ' haunts' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'when you cycle ' + utils.this_marker in line.text or 'when you cast ' + utils.this_marker in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'this turn' in line.text or 'this combat' in line.text or 'your next upkeep' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'from your library' in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
elif 'you discard ' + utils.this_marker in line.text or 'you to discard ' + utils.this_marker in line.text:
|
||||||
|
triggered_2 += 1
|
||||||
|
else:
|
||||||
|
triggered += 1
|
||||||
|
|
||||||
|
if triggered > 0:
|
||||||
|
return list_only(card.types, ['creature', 'land', 'artifact', 'enchantment', 'planeswalker', 'tribal'])
|
||||||
|
elif triggered_2:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_chosen(card):
|
||||||
|
if 'chosen' in card.text.text:
|
||||||
|
return ('choose' in card.text.text
|
||||||
|
or 'chosen at random' in card.text.text
|
||||||
|
or 'name' in card.text.text
|
||||||
|
or 'is chosen' in card.text.text
|
||||||
|
or 'search' in card.text.text)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def check_shuffle(card):
|
||||||
|
retval = None
|
||||||
|
# sadly, this does not detect spurious shuffling
|
||||||
|
for line in card.text_lines:
|
||||||
|
if 'search' in line.text and 'library' in line.text:
|
||||||
|
thisval = ('shuffle' in line.text
|
||||||
|
or 'searches' in line.text
|
||||||
|
or 'searched' in line.text
|
||||||
|
or 'searching' in line.text
|
||||||
|
or 'rest' in line.text
|
||||||
|
or 'instead' in line.text)
|
||||||
|
if retval is None:
|
||||||
|
retval = thisval
|
||||||
|
else:
|
||||||
|
retval = retval and thisval
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def check_quotes(card):
|
||||||
|
retval = None
|
||||||
|
for line in card.text_lines:
|
||||||
|
quotes = len(re.findall(re.escape('"'), line.text))
|
||||||
|
# HACK: the '" pattern in the training set is actually incorrect
|
||||||
|
quotes += len(re.findall(re.escape('\'"'), line.text))
|
||||||
|
if quotes > 0:
|
||||||
|
thisval = quotes % 2 == 0
|
||||||
|
if retval is None:
|
||||||
|
retval = thisval
|
||||||
|
else:
|
||||||
|
retval = retval and thisval
|
||||||
|
return retval
|
||||||
|
|
||||||
|
props = OrderedDict([
|
||||||
|
('types', check_types),
|
||||||
|
('pt', check_pt),
|
||||||
|
('lands', check_lands),
|
||||||
|
('X', check_X),
|
||||||
|
('kicker', check_kicker),
|
||||||
|
('counters', check_counters),
|
||||||
|
('choices', check_choices),
|
||||||
|
('quotes', check_quotes),
|
||||||
|
('auras', check_auras),
|
||||||
|
('equipment', check_equipment),
|
||||||
|
('planeswalkers', check_planeswalkers),
|
||||||
|
('levelup', check_levelup),
|
||||||
|
('chosen', check_chosen),
|
||||||
|
('shuffle', check_shuffle),
|
||||||
|
('activated', check_activated),
|
||||||
|
('triggered', check_triggered),
|
||||||
|
])
|
||||||
|
|
||||||
|
def process_props(cards, dump = False, uncovered = False):
|
||||||
|
total_all = 0
|
||||||
|
total_good = 0
|
||||||
|
total_bad = 0
|
||||||
|
total_uncovered = 0
|
||||||
|
values = OrderedDict([(k, (0,0,0)) for k in props])
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
total_all += 1
|
||||||
|
overall = True
|
||||||
|
any_prop = False
|
||||||
|
for prop in props:
|
||||||
|
(total, good, bad) = values[prop]
|
||||||
|
this_prop = props[prop](card)
|
||||||
|
if not this_prop is None:
|
||||||
|
total += 1
|
||||||
|
if not prop == 'types':
|
||||||
|
any_prop = True
|
||||||
|
if this_prop:
|
||||||
|
good += 1
|
||||||
|
else:
|
||||||
|
bad += 1
|
||||||
|
overall = False
|
||||||
|
if card.name not in ['demonic pact', 'lavaclaw reaches',
|
||||||
|
"ertai's trickery", 'rumbling aftershocks', # i hate these
|
||||||
|
] and dump:
|
||||||
|
print('---- ' + prop + ' ----')
|
||||||
|
print(card.encode())
|
||||||
|
print(card.format())
|
||||||
|
values[prop] = (total, good, bad)
|
||||||
|
if overall:
|
||||||
|
total_good += 1
|
||||||
|
else:
|
||||||
|
total_bad += 1
|
||||||
|
if not any_prop:
|
||||||
|
total_uncovered += 1
|
||||||
|
if uncovered:
|
||||||
|
print('---- uncovered ----')
|
||||||
|
print(card.encode())
|
||||||
|
print(card.format())
|
||||||
|
|
||||||
|
return ((total_all, total_good, total_bad, total_uncovered),
|
||||||
|
values)
|
||||||
|
|
||||||
|
def main(fname, oname = None, verbose = False, dump = False):
|
||||||
|
# may need to set special arguments here
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||||
|
|
||||||
|
do_grams = False
|
||||||
|
|
||||||
|
if do_grams:
|
||||||
|
rg = {}
|
||||||
|
for card in cards:
|
||||||
|
g = rare_grams(card, thresh=2, grams=2)
|
||||||
|
if len(card.text_words) > 0:
|
||||||
|
g = int(1.0 + (float(g) * 100.0 / float(len(card.text_words))))
|
||||||
|
if g in rg:
|
||||||
|
rg[g] += 1
|
||||||
|
else:
|
||||||
|
rg[g] = 1
|
||||||
|
if g >= 60:
|
||||||
|
print g
|
||||||
|
print card.format()
|
||||||
|
|
||||||
|
tot = 0
|
||||||
|
vmax = sum(rg.values())
|
||||||
|
pct90 = None
|
||||||
|
pct95 = None
|
||||||
|
pct99 = None
|
||||||
|
for i in sorted(rg):
|
||||||
|
print str(i) + ' rare ngrams: ' + str(rg[i])
|
||||||
|
tot += rg[i]
|
||||||
|
if pct90 is None and tot >= vmax * 0.90:
|
||||||
|
pct90 = i
|
||||||
|
if pct95 is None and tot >= vmax * 0.95:
|
||||||
|
pct95 = i
|
||||||
|
if pct99 is None and tot >= vmax * 0.99:
|
||||||
|
pct99 = i
|
||||||
|
|
||||||
|
print '90% - ' + str(pct90)
|
||||||
|
print '95% - ' + str(pct95)
|
||||||
|
print '99% - ' + str(pct99)
|
||||||
|
|
||||||
|
else:
|
||||||
|
((total_all, total_good, total_bad, total_uncovered),
|
||||||
|
values) = process_props(cards, dump=dump)
|
||||||
|
|
||||||
|
# summary
|
||||||
|
print('-- overall --')
|
||||||
|
print(' total : ' + str(total_all))
|
||||||
|
print(' good : ' + str(total_good) + ' ' + pct(total_good, total_all))
|
||||||
|
print(' bad : ' + str(total_bad) + ' ' + pct(total_bad, total_all))
|
||||||
|
print(' uncocoverd: ' + str(total_uncovered) + ' ' + pct(total_uncovered, total_all))
|
||||||
|
print('----')
|
||||||
|
|
||||||
|
# breakdown
|
||||||
|
for prop in props:
|
||||||
|
(total, good, bad) = values[prop]
|
||||||
|
print(prop + ':')
|
||||||
|
print(' total: ' + str(total) + ' ' + pct(total, total_all))
|
||||||
|
print(' good : ' + str(good) + ' ' + pct(good, total_all))
|
||||||
|
print(' bad : ' + str(bad) + ' ' + pct(bad, total_all))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('outfile', nargs='?', default=None,
|
||||||
|
help='name of output file, will be overwritten')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
parser.add_argument('-d', '--dump', action='store_true',
|
||||||
|
help='print invalid cards')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, args.outfile, verbose=args.verbose, dump=args.dump)
|
||||||
|
exit(0)
|
||||||
|
|
123
scripts/ngrams.py
Executable file
123
scripts/ngrams.py
Executable file
|
@ -0,0 +1,123 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
import jdecode
|
||||||
|
import nltk_model as model
|
||||||
|
|
||||||
|
def update_ngrams(lines, gramdict, grams):
|
||||||
|
for line in lines:
|
||||||
|
for i in range(0, len(line) - (grams - 1)):
|
||||||
|
ngram = ' '.join([line[i + j] for j in range(0, grams)])
|
||||||
|
if ngram in gramdict:
|
||||||
|
gramdict[ngram] += 1
|
||||||
|
else:
|
||||||
|
gramdict[ngram] = 1
|
||||||
|
|
||||||
|
def describe_bins(gramdict, bins):
|
||||||
|
bins = sorted(bins)
|
||||||
|
counts = [0 for _ in range(0, len(bins) + 1)]
|
||||||
|
|
||||||
|
for ngram in gramdict:
|
||||||
|
for i in range(0, len(bins) + 1):
|
||||||
|
if i < len(bins):
|
||||||
|
if gramdict[ngram] <= bins[i]:
|
||||||
|
counts[i] += 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# didn't fit into any of the smaller bins, stick in on the end
|
||||||
|
counts[-1] += 1
|
||||||
|
|
||||||
|
for i in range(0, len(counts)):
|
||||||
|
if counts[i] > 0:
|
||||||
|
print (' ' + (str(bins[i]) if i < len(bins) else str(bins[-1]) + '+')
|
||||||
|
+ ': ' + str(counts[i]))
|
||||||
|
|
||||||
|
def extract_language(cards, separate_lines = True):
|
||||||
|
if separate_lines:
|
||||||
|
lang = [line.vectorize() for card in cards for line in card.text_lines]
|
||||||
|
else:
|
||||||
|
lang = [card.text.vectorize() for card in cards]
|
||||||
|
return map(lambda s: s.split(), lang)
|
||||||
|
|
||||||
|
def build_ngram_model(cards, n, separate_lines = True, verbose = False):
|
||||||
|
if verbose:
|
||||||
|
print('generating ' + str(n) + '-gram model')
|
||||||
|
lang = extract_language(cards, separate_lines=separate_lines)
|
||||||
|
if verbose:
|
||||||
|
print('found ' + str(len(lang)) + ' sentences')
|
||||||
|
lm = model.NgramModel(n, lang, pad_left=True, pad_right=True)
|
||||||
|
if verbose:
|
||||||
|
print(lm)
|
||||||
|
return lm
|
||||||
|
|
||||||
|
def main(fname, oname, gmin = 2, gmax = 8, nltk = False, sep = False, verbose = False):
|
||||||
|
# may need to set special arguments here
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||||
|
gmin = int(gmin)
|
||||||
|
gmax = int(gmax)
|
||||||
|
|
||||||
|
if nltk:
|
||||||
|
n = gmin
|
||||||
|
lm = build_ngram_model(cards, n, separate_lines=sep, verbose=verbose)
|
||||||
|
if verbose:
|
||||||
|
teststr = 'when @ enters the battlefield'
|
||||||
|
print('litmus test: perplexity of ' + repr(teststr))
|
||||||
|
print(' ' + str(lm.perplexity(teststr.split())))
|
||||||
|
if verbose:
|
||||||
|
print('pickling module to ' + oname)
|
||||||
|
with open(oname, 'wb') as f:
|
||||||
|
pickle.dump(lm, f)
|
||||||
|
|
||||||
|
else:
|
||||||
|
bins = [1, 2, 3, 10, 30, 100, 300, 1000]
|
||||||
|
if gmin < 2 or gmax < gmin:
|
||||||
|
print 'invalid gram sizes: ' + str(gmin) + '-' + str(gmax)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
for grams in range(gmin, gmax+1):
|
||||||
|
if verbose:
|
||||||
|
print 'generating ' + str(grams) + '-grams...'
|
||||||
|
gramdict = {}
|
||||||
|
for card in cards:
|
||||||
|
update_ngrams(card.text_lines_words, gramdict, grams)
|
||||||
|
|
||||||
|
oname_full = oname + '.' + str(grams) + 'g'
|
||||||
|
if verbose:
|
||||||
|
print(' writing ' + str(len(gramdict)) + ' unique ' + str(grams)
|
||||||
|
+ '-grams to ' + oname_full)
|
||||||
|
describe_bins(gramdict, bins)
|
||||||
|
|
||||||
|
with open(oname_full, 'wt') as f:
|
||||||
|
for ngram in sorted(gramdict,
|
||||||
|
lambda x,y: cmp(gramdict[x], gramdict[y]),
|
||||||
|
reverse = True):
|
||||||
|
f.write((ngram + ': ' + str(gramdict[ngram]) + '\n').encode('utf-8'))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('outfile', #nargs='?', default=None,
|
||||||
|
help='base name of output file, outputs ending in .2g, .3g etc. will be produced')
|
||||||
|
parser.add_argument('-min', '--min', action='store', default='2',
|
||||||
|
help='minimum gram size to compute')
|
||||||
|
parser.add_argument('-max', '--max', action='store', default='8',
|
||||||
|
help='maximum gram size to compute')
|
||||||
|
parser.add_argument('-nltk', '--nltk', action='store_true',
|
||||||
|
help='use nltk model.NgramModel, with n = min')
|
||||||
|
parser.add_argument('-s', '--separate', action='store_true',
|
||||||
|
help='separate card text into lines when constructing nltk model')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, args.outfile, gmin=args.min, gmax=args.max, nltk=args.nltk,
|
||||||
|
sep=args.separate, verbose=args.verbose)
|
||||||
|
exit(0)
|
159
scripts/pairing.py
Executable file
159
scripts/pairing.py
Executable file
|
@ -0,0 +1,159 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import zipfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
||||||
|
import utils
|
||||||
|
import jdecode
|
||||||
|
import ngrams
|
||||||
|
import analysis
|
||||||
|
import mtg_validate
|
||||||
|
|
||||||
|
from cbow import CBOW
|
||||||
|
|
||||||
|
separate_lines=True
|
||||||
|
|
||||||
|
def select_card(cards, stats, i):
|
||||||
|
card = cards[i]
|
||||||
|
nearest = stats['dists']['cbow'][i]
|
||||||
|
perp = stats['ngram']['perp'][i]
|
||||||
|
perp_per = stats['ngram']['perp_per'][i]
|
||||||
|
perp_max = stats['ngram']['perp_max'][i]
|
||||||
|
|
||||||
|
if nearest > 0.9 or perp_per > 2.0 or perp_max > 10.0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
((_, total_good, _, _), _) = mtg_validate.process_props([card])
|
||||||
|
if not total_good == 1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# print '===='
|
||||||
|
# print nearest
|
||||||
|
# print perp
|
||||||
|
# print perp_per
|
||||||
|
# print perp_max
|
||||||
|
# print '----'
|
||||||
|
# print card.format()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def compare_to_real(card, realcard):
|
||||||
|
ctypes = ' '.join(sorted(card.types))
|
||||||
|
rtypes = ' '.join(sorted(realcard.types))
|
||||||
|
return ctypes == rtypes and realcard.cost.check_colors(card.cost.get_colors())
|
||||||
|
|
||||||
|
def writecard(card, name, writer):
|
||||||
|
gatherer = False
|
||||||
|
for_forum = True
|
||||||
|
vdump = True
|
||||||
|
fmt = card.format(gatherer = gatherer, for_forum = for_forum, vdump = vdump)
|
||||||
|
oldname = card.name
|
||||||
|
# alter name used in image
|
||||||
|
card.name = name
|
||||||
|
writer.write(card.to_mse().encode('utf-8'))
|
||||||
|
card.name = oldname
|
||||||
|
fstring = ''
|
||||||
|
if card.json:
|
||||||
|
fstring += 'JSON:\n' + card.json + '\n'
|
||||||
|
if card.raw:
|
||||||
|
fstring += 'raw:\n' + card.raw + '\n'
|
||||||
|
fstring += '\n'
|
||||||
|
fstring += fmt + '\n'
|
||||||
|
fstring = fstring.replace('<', '(').replace('>', ')')
|
||||||
|
writer.write(('\n' + fstring[:-1]).replace('\n', '\n\t\t').encode('utf-8'))
|
||||||
|
writer.write('\n'.encode('utf-8'))
|
||||||
|
|
||||||
|
def main(fname, oname, n=20, verbose=False):
|
||||||
|
cbow = CBOW()
|
||||||
|
realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose)
|
||||||
|
real_by_name = {c.name: c for c in realcards}
|
||||||
|
lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose)
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||||
|
stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose)
|
||||||
|
|
||||||
|
selected = []
|
||||||
|
for i in range(0, len(cards)):
|
||||||
|
if select_card(cards, stats, i):
|
||||||
|
selected += [(i, cards[i])]
|
||||||
|
|
||||||
|
limit = 3000
|
||||||
|
|
||||||
|
random.shuffle(selected)
|
||||||
|
#selected = selected[:limit]
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print('computing nearest cards for ' + str(len(selected)) + ' candindates...')
|
||||||
|
cbow_nearest = cbow.nearest_par(map(lambda (i, c): c, selected))
|
||||||
|
for i in range(0, len(selected)):
|
||||||
|
(j, card) = selected[i]
|
||||||
|
selected[i] = (j, card, cbow_nearest[i])
|
||||||
|
if verbose:
|
||||||
|
print('...done')
|
||||||
|
|
||||||
|
final = []
|
||||||
|
for (i, card, nearest) in selected:
|
||||||
|
for dist, rname in nearest:
|
||||||
|
realcard = real_by_name[rname]
|
||||||
|
if compare_to_real(card, realcard):
|
||||||
|
final += [(i, card, realcard, dist)]
|
||||||
|
break
|
||||||
|
|
||||||
|
for (i, card, realcard, dist) in final:
|
||||||
|
print '-- real --'
|
||||||
|
print realcard.format()
|
||||||
|
print '-- fake --'
|
||||||
|
print card.format()
|
||||||
|
print '-- stats --'
|
||||||
|
perp_per = stats['ngram']['perp_per'][i]
|
||||||
|
perp_max = stats['ngram']['perp_max'][i]
|
||||||
|
print dist
|
||||||
|
print perp_per
|
||||||
|
print perp_max
|
||||||
|
print '----'
|
||||||
|
|
||||||
|
if not oname is None:
|
||||||
|
with open(oname, 'wt') as ofile:
|
||||||
|
ofile.write(utils.mse_prepend)
|
||||||
|
for (i, card, realcard, dist) in final:
|
||||||
|
name = realcard.name
|
||||||
|
writecard(realcard, name, ofile)
|
||||||
|
writecard(card, name, ofile)
|
||||||
|
ofile.write('version control:\n\ttype: none\napprentice code: ')
|
||||||
|
# Copy whatever output file is produced, name the copy 'set' (yes, no extension).
|
||||||
|
if os.path.isfile('set'):
|
||||||
|
print 'ERROR: tried to overwrite existing file "set" - aborting.'
|
||||||
|
return
|
||||||
|
shutil.copyfile(oname, 'set')
|
||||||
|
# Use the freaky mse extension instead of zip.
|
||||||
|
with zipfile.ZipFile(oname+'.mse-set', mode='w') as zf:
|
||||||
|
try:
|
||||||
|
# Zip up the set file into oname.mse-set.
|
||||||
|
zf.write('set')
|
||||||
|
finally:
|
||||||
|
if verbose:
|
||||||
|
print 'Made an MSE set file called ' + oname + '.mse-set.'
|
||||||
|
# The set file is useless outside the .mse-set, delete it.
|
||||||
|
os.remove('set')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('outfile', nargs='?', default=None,
|
||||||
|
help='output file, defaults to none')
|
||||||
|
parser.add_argument('-n', '--n', action='store',
|
||||||
|
help='number of cards to consider for each pairing')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, args.outfile, n=args.n, verbose=args.verbose)
|
||||||
|
exit(0)
|
172
scripts/sanity.py
Executable file
172
scripts/sanity.py
Executable file
|
@ -0,0 +1,172 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
import utils
|
||||||
|
import jdecode
|
||||||
|
import cardlib
|
||||||
|
import transforms
|
||||||
|
|
||||||
|
def check_lines(fname):
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)
|
||||||
|
|
||||||
|
prelines = set()
|
||||||
|
keylines = set()
|
||||||
|
mainlines = set()
|
||||||
|
costlines = set()
|
||||||
|
postlines = set()
|
||||||
|
|
||||||
|
known = ['enchant ', 'equip', 'countertype', 'multikicker', 'kicker',
|
||||||
|
'suspend', 'echo', 'awaken', 'bestow', 'buyback',
|
||||||
|
'cumulative', 'dash', 'entwine', 'evoke', 'fortify',
|
||||||
|
'flashback', 'madness', 'morph', 'megamorph', 'miracle', 'ninjutsu',
|
||||||
|
'overload', 'prowl', 'recover', 'reinforce', 'replicate', 'scavenge',
|
||||||
|
'splice', 'surge', 'unearth', 'transfigure', 'transmute',
|
||||||
|
]
|
||||||
|
known = []
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
prel, keyl, mainl, costl, postl = transforms.separate_lines(card.text.encode(randomize=False))
|
||||||
|
if card.bside:
|
||||||
|
prel2, keyl2, mainl2, costl2, postl2 = transforms.separate_lines(card.bside.text.encode(randomize=False))
|
||||||
|
prel += prel2
|
||||||
|
keyl += keyl2
|
||||||
|
mainl += mainl2
|
||||||
|
costl += costl2
|
||||||
|
postl += postl2
|
||||||
|
|
||||||
|
for line in prel:
|
||||||
|
if line.strip() == '':
|
||||||
|
print(card.name, card.text.text)
|
||||||
|
if any(line.startswith(s) for s in known):
|
||||||
|
line = 'known'
|
||||||
|
prelines.add(line)
|
||||||
|
for line in postl:
|
||||||
|
if line.strip() == '':
|
||||||
|
print(card.name, card.text.text)
|
||||||
|
if any(line.startswith(s) for s in known):
|
||||||
|
line = 'known'
|
||||||
|
postlines.add(line)
|
||||||
|
for line in keyl:
|
||||||
|
if line.strip() == '':
|
||||||
|
print(card.name, card.text.text)
|
||||||
|
if any(line.startswith(s) for s in known):
|
||||||
|
line = 'known'
|
||||||
|
keylines.add(line)
|
||||||
|
for line in mainl:
|
||||||
|
if line.strip() == '':
|
||||||
|
print(card.name, card.text.text)
|
||||||
|
# if any(line.startswith(s) for s in known):
|
||||||
|
# line = 'known'
|
||||||
|
mainlines.add(line)
|
||||||
|
for line in costl:
|
||||||
|
if line.strip() == '':
|
||||||
|
print(card.name, card.text.text)
|
||||||
|
# if any(line.startswith(s) for s in known) or 'cycling' in line or 'monstrosity' in line:
|
||||||
|
# line = 'known'
|
||||||
|
costlines.add(line)
|
||||||
|
|
||||||
|
print('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}'
|
||||||
|
.format(len(prelines), len(keylines), len(mainlines), len(postlines)))
|
||||||
|
|
||||||
|
print('\nprelines')
|
||||||
|
for line in sorted(prelines):
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
print('\npostlines')
|
||||||
|
for line in sorted(postlines):
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
print('\ncostlines')
|
||||||
|
for line in sorted(costlines):
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
print('\nkeylines')
|
||||||
|
for line in sorted(keylines):
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
print('\nmainlines')
|
||||||
|
for line in sorted(mainlines):
|
||||||
|
#if any(s in line for s in ['champion', 'devour', 'tribute']):
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
def check_vocab(fname):
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)
|
||||||
|
|
||||||
|
vocab = {}
|
||||||
|
for card in cards:
|
||||||
|
words = card.text.vectorize().split()
|
||||||
|
if card.bside:
|
||||||
|
words += card.bside.text.vectorize().split()
|
||||||
|
for word in words:
|
||||||
|
if not word in vocab:
|
||||||
|
vocab[word] = 1
|
||||||
|
else:
|
||||||
|
vocab[word] += 1
|
||||||
|
|
||||||
|
for word in sorted(vocab, lambda x,y: cmp(vocab[x], vocab[y]), reverse = True):
|
||||||
|
print('{:8d} : {:s}'.format(vocab[word], word))
|
||||||
|
|
||||||
|
n = 3
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
words = card.text.vectorize().split()
|
||||||
|
if card.bside:
|
||||||
|
words += card.bside.text.vectorize().split()
|
||||||
|
for word in words:
|
||||||
|
if vocab[word] <= n:
|
||||||
|
#if 'name' in word:
|
||||||
|
print('\n{:8d} : {:s}'.format(vocab[word], word))
|
||||||
|
print(card.encode())
|
||||||
|
break
|
||||||
|
|
||||||
|
def check_characters(fname, vname):
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)
|
||||||
|
|
||||||
|
tokens = {c for c in utils.cardsep}
|
||||||
|
for card in cards:
|
||||||
|
for c in card.encode():
|
||||||
|
tokens.add(c)
|
||||||
|
|
||||||
|
token_to_idx = {tok:i+1 for i, tok in enumerate(sorted(tokens))}
|
||||||
|
idx_to_token = {i+1:tok for i, tok in enumerate(sorted(tokens))}
|
||||||
|
|
||||||
|
print('Vocabulary: ({:d} symbols)'.format(len(token_to_idx)))
|
||||||
|
for token in sorted(token_to_idx):
|
||||||
|
print('{:8s} : {:4d}'.format(repr(token), token_to_idx[token]))
|
||||||
|
|
||||||
|
# compliant with torch-rnn
|
||||||
|
if vname:
|
||||||
|
json_data = {'token_to_idx':token_to_idx, 'idx_to_token':idx_to_token}
|
||||||
|
print('writing vocabulary to {:s}'.format(vname))
|
||||||
|
with open(vname, 'w') as f:
|
||||||
|
json.dump(json_data, f)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', nargs='?', default=os.path.join(libdir, '../data/output.txt'),
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('-lines', action='store_true',
|
||||||
|
help='show behavior of line separation')
|
||||||
|
parser.add_argument('-vocab', action='store_true',
|
||||||
|
help='show vocabulary counts from encoded card text')
|
||||||
|
parser.add_argument('-chars', action='store_true',
|
||||||
|
help='generate and display vocabulary of characters used in encoding')
|
||||||
|
parser.add_argument('--vocab_name', default=None,
|
||||||
|
help='json file to write vocabulary to')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.lines:
|
||||||
|
check_lines(args.infile)
|
||||||
|
if args.vocab:
|
||||||
|
check_vocab(args.infile)
|
||||||
|
if args.chars:
|
||||||
|
check_characters(args.infile, args.vocab_name)
|
||||||
|
|
||||||
|
exit(0)
|
122
scripts/streamcards.py
Executable file
122
scripts/streamcards.py
Executable file
|
@ -0,0 +1,122 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# -- STOLEN FROM torch-rnn/scripts/streamfile.py -- #
|
||||||
|
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import signal
|
||||||
|
import traceback
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
# correctly setting up a stream that won't get orphaned and left clutting the operating
|
||||||
|
# system proceeds in 3 parts:
|
||||||
|
# 1) invoke install_suicide_handlers() to ensure correct behavior on interrupt
|
||||||
|
# 2) get threads by invoking spawn_stream_threads
|
||||||
|
# 3) invoke wait_and_kill_self_noreturn(threads)
|
||||||
|
# or, use the handy wrapper that does it for you
|
||||||
|
|
||||||
|
def spawn_stream_threads(fds, runthread, mkargs):
|
||||||
|
threads = []
|
||||||
|
for i, fd in enumerate(fds):
|
||||||
|
stream_thread = threading.Thread(target=runthread, args=mkargs(i, fd))
|
||||||
|
stream_thread.daemon = True
|
||||||
|
stream_thread.start()
|
||||||
|
threads.append(stream_thread)
|
||||||
|
return threads
|
||||||
|
|
||||||
|
def force_kill_self_noreturn():
|
||||||
|
# We have a strange issue here, which is that our threads will refuse to die
|
||||||
|
# to a normal exit() or sys.exit() because they're all blocked in write() calls
|
||||||
|
# on full pipes; the simplest workaround seems to be to ask the OS to terminate us.
|
||||||
|
# This kinda works, but...
|
||||||
|
#os.kill(os.getpid(), signal.SIGTERM)
|
||||||
|
# psutil might have useful features like checking if the pid has been reused before killing it.
|
||||||
|
# Also we might have child processes like l2e luajits to think about.
|
||||||
|
me = psutil.Process(os.getpid())
|
||||||
|
for child in me.children(recursive=True):
|
||||||
|
child.terminate()
|
||||||
|
me.terminate()
|
||||||
|
|
||||||
|
def handler_kill_self(signum, frame):
|
||||||
|
if signum != signal.SIGQUIT:
|
||||||
|
traceback.print_stack(frame)
|
||||||
|
print('caught signal {:d} - streamer sending SIGTERM to self'.format(signum))
|
||||||
|
force_kill_self_noreturn()
|
||||||
|
|
||||||
|
def install_suicide_handlers():
|
||||||
|
for sig in [signal.SIGHUP, signal.SIGINT, signal.SIGQUIT]:
|
||||||
|
signal.signal(sig, handler_kill_self)
|
||||||
|
|
||||||
|
def wait_and_kill_self_noreturn(threads):
|
||||||
|
running = True
|
||||||
|
while running:
|
||||||
|
running = False
|
||||||
|
for thread in threads:
|
||||||
|
if thread.is_alive():
|
||||||
|
running = True
|
||||||
|
if(os.getppid() <= 1):
|
||||||
|
# exit if parent process died (and we were reparented to init)
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
force_kill_self_noreturn()
|
||||||
|
|
||||||
|
def streaming_noreturn(fds, write_stream, mkargs):
|
||||||
|
install_suicide_handlers()
|
||||||
|
threads = spawn_stream_threads(fds, write_stream, mkargs)
|
||||||
|
wait_and_kill_self_noreturn(threads)
|
||||||
|
assert False, 'should not return from streaming'
|
||||||
|
|
||||||
|
# -- END STOLEN FROM torch-rnn/scripts/streamfile.py -- #
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
import utils
|
||||||
|
import jdecode
|
||||||
|
import transforms
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
fds = args.fds
|
||||||
|
fname = args.fname
|
||||||
|
block_size = args.block_size
|
||||||
|
main_seed = args.seed if args.seed != 0 else None
|
||||||
|
|
||||||
|
# simple default encoding for now, will add more options with the curriculum
|
||||||
|
# learning feature
|
||||||
|
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)
|
||||||
|
|
||||||
|
def write_stream(i, fd):
|
||||||
|
local_random = random.Random(main_seed)
|
||||||
|
local_random.jumpahead(i)
|
||||||
|
local_cards = [card for card in cards]
|
||||||
|
with open('/proc/self/fd/'+str(fd), 'wt') as f:
|
||||||
|
while True:
|
||||||
|
local_random.shuffle(local_cards)
|
||||||
|
for card in local_cards:
|
||||||
|
f.write(card.encode(randomize_mana=True, randomize_lines=True))
|
||||||
|
f.write(utils.cardsep)
|
||||||
|
|
||||||
|
def mkargs(i, fd):
|
||||||
|
return i, fd
|
||||||
|
|
||||||
|
streaming_noreturn(fds, write_stream, mkargs)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('fds', type=int, nargs='+',
|
||||||
|
help='file descriptors to write streams to')
|
||||||
|
parser.add_argument('-f', '--fname', default=os.path.join(libdir, '../data/output.txt'),
|
||||||
|
help='file to read cards from')
|
||||||
|
parser.add_argument('-n', '--block_size', type=int, default=10000,
|
||||||
|
help='number of characters each stream should read/write at a time')
|
||||||
|
parser.add_argument('-s', '--seed', type=int, default=0,
|
||||||
|
help='random seed')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
main(args)
|
58
scripts/sum.py
Executable file
58
scripts/sum.py
Executable file
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
|
||||||
|
def main(fname):
|
||||||
|
with open(fname, 'rt') as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
cardstats = text.split('\n')
|
||||||
|
nonempty = 0
|
||||||
|
name_avg = 0
|
||||||
|
name_dupes = 0
|
||||||
|
card_avg = 0
|
||||||
|
card_dupes = 0
|
||||||
|
|
||||||
|
for c in cardstats:
|
||||||
|
fields = c.split('|')
|
||||||
|
if len(fields) < 4:
|
||||||
|
continue
|
||||||
|
nonempty += 1
|
||||||
|
idx = int(fields[0])
|
||||||
|
name = str(fields[1])
|
||||||
|
ndist = float(fields[2])
|
||||||
|
cdist = float(fields[3])
|
||||||
|
|
||||||
|
name_avg += ndist
|
||||||
|
if ndist == 1.0:
|
||||||
|
name_dupes += 1
|
||||||
|
card_avg += cdist
|
||||||
|
if cdist == 1.0:
|
||||||
|
card_dupes += 1
|
||||||
|
|
||||||
|
name_avg = name_avg / float(nonempty)
|
||||||
|
card_avg = card_avg / float(nonempty)
|
||||||
|
|
||||||
|
print str(nonempty) + ' cards'
|
||||||
|
print '-- names --'
|
||||||
|
print 'avg distance: ' + str(name_avg)
|
||||||
|
print 'num duplicates: ' + str(name_dupes)
|
||||||
|
print '-- cards --'
|
||||||
|
print 'avg distance: ' + str(card_avg)
|
||||||
|
print 'num duplicates: ' + str(card_dupes)
|
||||||
|
print '----'
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='data file to process')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile)
|
||||||
|
exit(0)
|
Loading…
Reference in a new issue