various improvements, added cbow
This commit is contained in:
parent
08dc3944f8
commit
49e386ac4f
9 changed files with 29818 additions and 34 deletions
BIN
data/cbow.bin
Normal file
BIN
data/cbow.bin
Normal file
Binary file not shown.
29488
data/cbow.txt
Normal file
29488
data/cbow.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -6529,7 +6529,7 @@
|
||||||
|
|
||||||
|sigil of the new dawn||enchantment||||{^^^WW}|whenever a creature is put into your graveyard from the battlefield, you may pay {^WW}. if you do, return that card to your hand.|
|
|sigil of the new dawn||enchantment||||{^^^WW}|whenever a creature is put into your graveyard from the battlefield, you may pay {^WW}. if you do, return that card to your hand.|
|
||||||
|
|
||||||
|benalish commander||creature||human soldier|*/*|{^^^WW}|countertype % time\@'s power and toughness are each equal to the number of soldiers you control.\suspend x~{XXWWWW}. X can't be &. \whenever a % counter is removed from @ while it's exiled, put a &^/&^ white soldier creature token onto the battlefield.|
|
|benalish commander||creature||human soldier|*/*|{^^^WW}|countertype % time\@'s power and toughness are each equal to the number of soldiers you control.\suspend X~{XXWWWW}. X can't be &. \whenever a % counter is removed from @ while it's exiled, put a &^/&^ white soldier creature token onto the battlefield.|
|
||||||
|
|
||||||
|deluge||instant||||{^^UU}|tap all creatures without flying.|
|
|deluge||instant||||{^^UU}|tap all creatures without flying.|
|
||||||
|
|
||||||
|
@ -8148,7 +8148,7 @@
|
||||||
|
|
||||||
|ventifact bottle||artifact||||{^^^}|countertype % charge\{XX^}, T: put X % counters on @. activate this ability only any time you could cast a sorcery.\at the beginning of your precombat main phase, if @ has a % counter on it, tap it and remove all % counters from it. add {^} to your mana pool for each % counter removed this way.|
|
|ventifact bottle||artifact||||{^^^}|countertype % charge\{XX^}, T: put X % counters on @. activate this ability only any time you could cast a sorcery.\at the beginning of your precombat main phase, if @ has a % counter on it, tap it and remove all % counters from it. add {^} to your mana pool for each % counter removed this way.|
|
||||||
|
|
||||||
|aeon chronicler||creature||avatar|*/*|{^^^UUUU}|countertype % time\@'s power and toughness are each equal to the number of cards in your hand.\suspend x~{XX^^^UU}. X can't be &. \whenever a % counter is removed from @ while it's exiled, draw a card.|
|
|aeon chronicler||creature||avatar|*/*|{^^^UUUU}|countertype % time\@'s power and toughness are each equal to the number of cards in your hand.\suspend X~{XX^^^UU}. X can't be &. \whenever a % counter is removed from @ while it's exiled, draw a card.|
|
||||||
|
|
||||||
|brine seer||creature||human wizard|&^/&^|{^^^UU}|{^^UU}, T: reveal any number of blue cards in your hand. uncast target spell unless its controller pays {^} for each card revealed this way.|
|
|brine seer||creature||human wizard|&^/&^|{^^^UU}|{^^UU}, T: reveal any number of blue cards in your hand. uncast target spell unless its controller pays {^} for each card revealed this way.|
|
||||||
|
|
||||||
|
@ -9290,7 +9290,7 @@
|
||||||
|
|
||||||
|gaea's liege||creature||avatar|*/*|{^^^GGGGGG}|as long as @ isn't attacking, its power and toughness are each equal to the number of forests you control. as long as @ is attacking, its power and toughness are each equal to the number of forests defending player controls.\T: target land becomes a forest until @ leaves the battlefield.|
|
|gaea's liege||creature||avatar|*/*|{^^^GGGGGG}|as long as @ isn't attacking, its power and toughness are each equal to the number of forests you control. as long as @ is attacking, its power and toughness are each equal to the number of forests defending player controls.\T: target land becomes a forest until @ leaves the battlefield.|
|
||||||
|
|
||||||
|swell of courage||instant||||{^^^WWWW}|creatures you control get +&^^/+&^^ until end of turn.\reinforce x~{XXWWWW}|
|
|swell of courage||instant||||{^^^WWWW}|creatures you control get +&^^/+&^^ until end of turn.\reinforce X~{XXWWWW}|
|
||||||
|
|
||||||
|battlegrace angel||creature||angel|&^^^^/&^^^^|{^^^WWWW}|flying\exalted \whenever a creature you control attacks alone, it gains lifelink until end of turn.|
|
|battlegrace angel||creature||angel|&^^^^/&^^^^|{^^^WWWW}|flying\exalted \whenever a creature you control attacks alone, it gains lifelink until end of turn.|
|
||||||
|
|
||||||
|
@ -9919,7 +9919,7 @@
|
||||||
|
|
||||||
|bellowing saddlebrute||creature||orc warrior|&^^^^/&^^^^^|{^^^BB}|raid ~ when @ enters the battlefield, you lose &^^^^ life unless you attacked with a creature this turn.|
|
|bellowing saddlebrute||creature||orc warrior|&^^^^/&^^^^^|{^^^BB}|raid ~ when @ enters the battlefield, you lose &^^^^ life unless you attacked with a creature this turn.|
|
||||||
|
|
||||||
|detritivore||creature||lhurgoyf|*/*|{^^RRRR}|countertype % time\@'s power and toughness are each equal to the number of nonbasic land cards in your opponents' graveyards.\suspend x~{XX^^^RR}. X can't be &. \whenever a % counter is removed from @ while it's exiled, destroy target nonbasic land.|
|
|detritivore||creature||lhurgoyf|*/*|{^^RRRR}|countertype % time\@'s power and toughness are each equal to the number of nonbasic land cards in your opponents' graveyards.\suspend X~{XX^^^RR}. X can't be &. \whenever a % counter is removed from @ while it's exiled, destroy target nonbasic land.|
|
||||||
|
|
||||||
|gruul signet||artifact||||{^^}|{^}, T: add {RRGG} to your mana pool.|
|
|gruul signet||artifact||||{^^}|{^}, T: add {RRGG} to your mana pool.|
|
||||||
|
|
||||||
|
@ -17008,7 +17008,7 @@
|
||||||
|
|
||||||
|viashino grappler||creature||viashino|&^^^/&^|{^^RR}|{GG}: @ gains trample until end of turn.|
|
|viashino grappler||creature||viashino|&^^^/&^|{^^RR}|{GG}: @ gains trample until end of turn.|
|
||||||
|
|
||||||
|fungal behemoth||creature||fungus|*/*|{^^^GG}|countertype % time\@'s power and toughness are each equal to the number of +&^/+&^ counters on creatures you control.\suspend x~{XXGGGG}. X can't be &. \whenever a % counter is removed from @ while it's exiled, you may put a +&^/+&^ counter on target creature.|
|
|fungal behemoth||creature||fungus|*/*|{^^^GG}|countertype % time\@'s power and toughness are each equal to the number of +&^/+&^ counters on creatures you control.\suspend X~{XXGGGG}. X can't be &. \whenever a % counter is removed from @ while it's exiled, you may put a +&^/+&^ counter on target creature.|
|
||||||
|
|
||||||
|defang||enchantment||aura||{^WW}|enchant creature\prevent all damage that would be dealt by enchanted creature.|
|
|defang||enchantment||aura||{^WW}|enchant creature\prevent all damage that would be dealt by enchanted creature.|
|
||||||
|
|
||||||
|
@ -18757,7 +18757,7 @@
|
||||||
|
|
||||||
|arcane lighthouse||land|||||T: add {^} to your mana pool.\{^}, T: until end of turn, creatures your opponents control lose hexproof and shroud and can't have hexproof or shroud.|
|
|arcane lighthouse||land|||||T: add {^} to your mana pool.\{^}, T: until end of turn, creatures your opponents control lose hexproof and shroud and can't have hexproof or shroud.|
|
||||||
|
|
||||||
|roiling horror||creature||horror|*/*|{^^^BBBB}|countertype % time\@'s power and toughness are each equal to your life total minus the life total of an opponent with the most life.\suspend x~{XXBBBBBB}. X can't be &. \whenever a % counter is removed from @ while it's exiled, target player loses &^ life and you gain &^ life.|
|
|roiling horror||creature||horror|*/*|{^^^BBBB}|countertype % time\@'s power and toughness are each equal to your life total minus the life total of an opponent with the most life.\suspend X~{XXBBBBBB}. X can't be &. \whenever a % counter is removed from @ while it's exiled, target player loses &^ life and you gain &^ life.|
|
||||||
|
|
||||||
|illusionary terrain||enchantment||||{UUUU}|cumulative upkeep {^^} \as @ enters the battlefield, choose two basic land types.\basic lands of the first chosen type are the second chosen type.|
|
|illusionary terrain||enchantment||||{UUUU}|cumulative upkeep {^^} \as @ enters the battlefield, choose two basic land types.\basic lands of the first chosen type are the second chosen type.|
|
||||||
|
|
||||||
|
|
31
decode.py
31
decode.py
|
@ -7,8 +7,10 @@ sys.path.append(libdir)
|
||||||
import utils
|
import utils
|
||||||
import jdecode
|
import jdecode
|
||||||
import cardlib
|
import cardlib
|
||||||
|
from cbow import CBOW
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True, gatherer = False, for_forum = False):
|
def main(fname, oname = None, verbose = True,
|
||||||
|
gatherer = False, for_forum = False, creativity = False):
|
||||||
cards = []
|
cards = []
|
||||||
valid = 0
|
valid = 0
|
||||||
invalid = 0
|
invalid = 0
|
||||||
|
@ -51,17 +53,28 @@ def main(fname, oname = None, verbose = True, gatherer = False, for_forum = Fals
|
||||||
print (str(valid) + ' valid, ' + str(invalid) + ' invalid, '
|
print (str(valid) + ' valid, ' + str(invalid) + ' invalid, '
|
||||||
+ str(unparsed) + ' failed to parse.')
|
+ str(unparsed) + ' failed to parse.')
|
||||||
|
|
||||||
|
if creativity:
|
||||||
|
cbow = CBOW()
|
||||||
|
|
||||||
|
def writecards(writer):
|
||||||
|
for card in cards:
|
||||||
|
writer.write((card.format(gatherer = gatherer, for_forum = for_forum)).encode('utf-8'))
|
||||||
|
if creativity:
|
||||||
|
writer.write('~~ closest cards ~~\n'.encode('utf-8'))
|
||||||
|
nearest = cbow.nearest(card)
|
||||||
|
for dist, cardname in nearest:
|
||||||
|
if for_forum:
|
||||||
|
cardname = '[card]' + cardname + '[/card]'
|
||||||
|
writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8'))
|
||||||
|
writer.write('\n'.encode('utf-8'))
|
||||||
|
|
||||||
if oname:
|
if oname:
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Writing output to: ' + oname
|
print 'Writing output to: ' + oname
|
||||||
with open(oname, 'w') as ofile:
|
with open(oname, 'w') as ofile:
|
||||||
for card in cards:
|
writecards(ofile)
|
||||||
ofile.write((card.format(gatherer = gatherer, for_forum = for_forum)
|
|
||||||
+ '\n').encode('utf-8'))
|
|
||||||
else:
|
else:
|
||||||
for card in cards:
|
writecards(sys.stdout)
|
||||||
sys.stdout.write((card.format(gatherer = gatherer, for_forum = for_forum)
|
|
||||||
+ '\n').encode('utf-8'))
|
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,10 +90,12 @@ if __name__ == '__main__':
|
||||||
help='emulate Gatherer visual spoiler')
|
help='emulate Gatherer visual spoiler')
|
||||||
parser.add_argument('-f', '--forum', action='store_true',
|
parser.add_argument('-f', '--forum', action='store_true',
|
||||||
help='use pretty mana encoding for mtgsalvation forum')
|
help='use pretty mana encoding for mtgsalvation forum')
|
||||||
|
parser.add_argument('-c', '--creativity', action='store_true',
|
||||||
|
help='use CBOW fuzzy matching to check creativity of cards')
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
help='verbose output')
|
help='verbose output')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.infile, args.outfile, verbose = args.verbose,
|
main(args.infile, args.outfile, verbose = args.verbose,
|
||||||
gatherer = args.gatherer, for_forum = args.forum)
|
gatherer = args.gatherer, for_forum = args.forum, creativity = args.creativity)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
34
encode.py
34
encode.py
|
@ -29,7 +29,9 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
||||||
final_sep = True
|
final_sep = True
|
||||||
|
|
||||||
# set the properties of the encoding
|
# set the properties of the encoding
|
||||||
if encoding in ['std']:
|
if encoding in ['vec']:
|
||||||
|
pass
|
||||||
|
elif encoding in ['std']:
|
||||||
if dupes == 0:
|
if dupes == 0:
|
||||||
dupes = 1
|
dupes = 1
|
||||||
elif encoding in ['rmana']:
|
elif encoding in ['rmana']:
|
||||||
|
@ -125,22 +127,12 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
||||||
random.seed(1371367)
|
random.seed(1371367)
|
||||||
random.shuffle(cards)
|
random.shuffle(cards)
|
||||||
|
|
||||||
if oname:
|
def writecards(writer):
|
||||||
if verbose:
|
|
||||||
print 'Writing output to: ' + oname
|
|
||||||
with open(oname, 'w') as ofile:
|
|
||||||
for card in cards:
|
|
||||||
ofile.write(card.encode(fmt_ordered = fmt_ordered,
|
|
||||||
fmt_labeled = fmt_labeled,
|
|
||||||
fieldsep = fieldsep,
|
|
||||||
randomize_fields = randomize_fields,
|
|
||||||
randomize_mana = randomize_mana,
|
|
||||||
initial_sep = initial_sep,
|
|
||||||
final_sep = final_sep)
|
|
||||||
+ utils.cardsep)
|
|
||||||
else:
|
|
||||||
for card in cards:
|
for card in cards:
|
||||||
sys.stdout.write(card.encode(fmt_ordered = fmt_ordered,
|
if encoding in ['vec']:
|
||||||
|
writer.write(card.vectorize() + '\n\n')
|
||||||
|
else:
|
||||||
|
writer.write(card.encode(fmt_ordered = fmt_ordered,
|
||||||
fmt_labeled = fmt_labeled,
|
fmt_labeled = fmt_labeled,
|
||||||
fieldsep = fieldsep,
|
fieldsep = fieldsep,
|
||||||
randomize_fields = randomize_fields,
|
randomize_fields = randomize_fields,
|
||||||
|
@ -148,6 +140,14 @@ def main(fname, oname = None, verbose = True, dupes = 0, encoding = 'std', stabl
|
||||||
initial_sep = initial_sep,
|
initial_sep = initial_sep,
|
||||||
final_sep = final_sep)
|
final_sep = final_sep)
|
||||||
+ utils.cardsep)
|
+ utils.cardsep)
|
||||||
|
|
||||||
|
if oname:
|
||||||
|
if verbose:
|
||||||
|
print 'Writing output to: ' + oname
|
||||||
|
with open(oname, 'w') as ofile:
|
||||||
|
writecards(ofile)
|
||||||
|
else:
|
||||||
|
writecards(sys.stdout)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
|
parser.add_argument('-d', '--duplicate', metavar='N', type=int, default=0,
|
||||||
help='number of times to duplicate each card')
|
help='number of times to duplicate each card')
|
||||||
parser.add_argument('-e', '--encoding', default='std',
|
parser.add_argument('-e', '--encoding', default='std',
|
||||||
choices=['std', 'rmana', 'rmana_dual', 'rfields'])
|
choices=['std', 'rmana', 'rmana_dual', 'rfields', 'vec'])
|
||||||
parser.add_argument('-s', '--stable', action='store_true',
|
parser.add_argument('-s', '--stable', action='store_true',
|
||||||
help="don't randomize the order of the cards")
|
help="don't randomize the order of the cards")
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
|
|
@ -6,6 +6,32 @@ import utils
|
||||||
import transforms
|
import transforms
|
||||||
from manalib import Manacost, Manatext
|
from manalib import Manacost, Manatext
|
||||||
|
|
||||||
|
# Some text prettification stuff that people may not have installed
|
||||||
|
try:
|
||||||
|
from titlecase import titlecase
|
||||||
|
except ImportError:
|
||||||
|
def titlecase(s):
|
||||||
|
return s.title()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import textwrap
|
||||||
|
import nltk.data
|
||||||
|
sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
|
||||||
|
# This crazy thing is actually invoked as an unpass, so newlines are still
|
||||||
|
# encoded.
|
||||||
|
def sentencecase(s):
|
||||||
|
s = s.replace(utils.x_marker, utils.reserved_marker)
|
||||||
|
lines = s.split(utils.newline)
|
||||||
|
clines = []
|
||||||
|
for line in lines:
|
||||||
|
if line:
|
||||||
|
sentences = sent_tokenizer.tokenize(line)
|
||||||
|
clines += [' '.join([sent.capitalize() for sent in sentences])]
|
||||||
|
return utils.newline.join(clines).replace(utils.reserved_marker, utils.x_marker)
|
||||||
|
except ImportError:
|
||||||
|
def sentencecase(s):
|
||||||
|
return s
|
||||||
|
|
||||||
# These are used later to determine what the fields of the Card object are called.
|
# These are used later to determine what the fields of the Card object are called.
|
||||||
# Define them here because they have nothing to do with the actual format.
|
# Define them here because they have nothing to do with the actual format.
|
||||||
field_name = 'name'
|
field_name = 'name'
|
||||||
|
@ -495,7 +521,7 @@ class Card:
|
||||||
def format(self, gatherer = False, for_forum = False):
|
def format(self, gatherer = False, for_forum = False):
|
||||||
outstr = ''
|
outstr = ''
|
||||||
if gatherer:
|
if gatherer:
|
||||||
cardname = self.__dict__[field_name].title()
|
cardname = titlecase(self.__dict__[field_name])
|
||||||
if not cardname:
|
if not cardname:
|
||||||
cardname = '_NONAME_'
|
cardname = '_NONAME_'
|
||||||
if for_forum:
|
if for_forum:
|
||||||
|
@ -516,11 +542,11 @@ class Card:
|
||||||
|
|
||||||
outstr += '\n'
|
outstr += '\n'
|
||||||
|
|
||||||
basetypes = self.__dict__[field_types]
|
basetypes = map(str.capitalize, self.__dict__[field_types])
|
||||||
if len(basetypes) < 1:
|
if len(basetypes) < 1:
|
||||||
basetypes = ['_NOTYPE_']
|
basetypes = ['_NOTYPE_']
|
||||||
|
|
||||||
outstr += ' '.join(self.__dict__[field_supertypes] + basetypes)
|
outstr += ' '.join(map(str.capitalize, self.__dict__[field_supertypes]) + basetypes)
|
||||||
|
|
||||||
if self.__dict__[field_subtypes]:
|
if self.__dict__[field_subtypes]:
|
||||||
outstr += (' ' + utils.dash_marker + ' ' +
|
outstr += (' ' + utils.dash_marker + ' ' +
|
||||||
|
@ -540,6 +566,7 @@ class Card:
|
||||||
mtext = transforms.text_unpass_2_counters(mtext)
|
mtext = transforms.text_unpass_2_counters(mtext)
|
||||||
mtext = transforms.text_unpass_3_unary(mtext)
|
mtext = transforms.text_unpass_3_unary(mtext)
|
||||||
mtext = transforms.text_unpass_4_symbols(mtext, for_forum)
|
mtext = transforms.text_unpass_4_symbols(mtext, for_forum)
|
||||||
|
mtext = sentencecase(mtext)
|
||||||
mtext = transforms.text_unpass_5_cardname(mtext, cardname)
|
mtext = transforms.text_unpass_5_cardname(mtext, cardname)
|
||||||
mtext = transforms.text_unpass_6_newlines(mtext)
|
mtext = transforms.text_unpass_6_newlines(mtext)
|
||||||
newtext = Manatext('')
|
newtext = Manatext('')
|
||||||
|
@ -615,3 +642,41 @@ class Card:
|
||||||
outstr += self.bside.format(gatherer = gatherer, for_forum = for_forum)
|
outstr += self.bside.format(gatherer = gatherer, for_forum = for_forum)
|
||||||
|
|
||||||
return outstr
|
return outstr
|
||||||
|
|
||||||
|
def vectorize(self):
|
||||||
|
ld = '('
|
||||||
|
rd = ')'
|
||||||
|
outstr = ''
|
||||||
|
|
||||||
|
if self.__dict__[field_rarity]:
|
||||||
|
outstr += ld + self.__dict__[field_rarity] + rd + ' '
|
||||||
|
|
||||||
|
coststr = self.__dict__[field_cost].vectorize(delimit = True)
|
||||||
|
if coststr:
|
||||||
|
outstr += coststr + ' '
|
||||||
|
|
||||||
|
typestr = ' '.join(map(lambda s: '(' + s + ')',
|
||||||
|
self.__dict__[field_supertypes] + self.__dict__[field_types]))
|
||||||
|
if typestr:
|
||||||
|
outstr += typestr + ' '
|
||||||
|
|
||||||
|
if self.__dict__[field_subtypes]:
|
||||||
|
outstr += ' '.join(self.__dict__[field_subtypes]) + ' '
|
||||||
|
|
||||||
|
if self.__dict__[field_pt]:
|
||||||
|
outstr += ' '.join(map(lambda s: '(' + s + ')',
|
||||||
|
self.__dict__[field_pt].replace('/', '/ /').split()))
|
||||||
|
outstr += ' '
|
||||||
|
|
||||||
|
if self.__dict__[field_loyalty]:
|
||||||
|
outstr += '((' + self.__dict__[field_loyalty] + ')) '
|
||||||
|
|
||||||
|
outstr += self.__dict__[field_text].vectorize()
|
||||||
|
|
||||||
|
if self.bside:
|
||||||
|
outstr = '_ASIDE_ ' + outstr + '\n\n_BSIDE_ ' + self.bside.vectorize()
|
||||||
|
|
||||||
|
return outstr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
181
lib/cbow.py
Normal file
181
lib/cbow.py
Normal file
|
@ -0,0 +1,181 @@
|
||||||
|
# Infinite thanks to Talcos from the mtgsalvation forums, who among
|
||||||
|
# many, many other things wrote the original version of this code.
|
||||||
|
# I have merely ported it to fit my needs.
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
import math
|
||||||
|
import utils
|
||||||
|
import cardlib
|
||||||
|
import transforms
|
||||||
|
|
||||||
|
# # this would be nice, but doing it naively makes things worse
|
||||||
|
# from joblib import Parallel, delayed
|
||||||
|
# import multiprocessing
|
||||||
|
|
||||||
|
libdir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
datadir = os.path.realpath(os.path.join(libdir, '../data'))
|
||||||
|
|
||||||
|
# # multithreading control parameters
|
||||||
|
# cores = multiprocessing.cpu_count()
|
||||||
|
# segments = cores / 2 if cores / 2 > 0 else 1
|
||||||
|
|
||||||
|
# max length of vocabulary entries
|
||||||
|
max_w = 50
|
||||||
|
|
||||||
|
|
||||||
|
#### snip! ####
|
||||||
|
|
||||||
|
def read_vector_file(fname):
|
||||||
|
with open(fname, 'rb') as f:
|
||||||
|
words = int(f.read(4))
|
||||||
|
size = int(f.read(4))
|
||||||
|
vocab = [' '] * (words * max_w)
|
||||||
|
M = []
|
||||||
|
for b in range(0,words):
|
||||||
|
a = 0
|
||||||
|
while True:
|
||||||
|
c = f.read(1)
|
||||||
|
vocab[b * max_w + a] = c;
|
||||||
|
if len(c) == 0 or c == ' ':
|
||||||
|
break
|
||||||
|
if (a < max_w) and vocab[b * max_w + a] != '\n':
|
||||||
|
a += 1
|
||||||
|
tmp = list(struct.unpack('f'*size,f.read(4 * size)))
|
||||||
|
length = math.sqrt(sum([tmp[i] * tmp[i] for i in range(0,len(tmp))]))
|
||||||
|
for i in range(0,len(tmp)):
|
||||||
|
tmp[i] /= length
|
||||||
|
M.append(tmp)
|
||||||
|
return ((''.join(vocab)).split(),M)
|
||||||
|
|
||||||
|
def makevector(vocabulary,vecs,sequence):
|
||||||
|
words = sequence.split()
|
||||||
|
indices = []
|
||||||
|
for word in words:
|
||||||
|
if word not in vocabulary:
|
||||||
|
#print("Missing word in vocabulary: " + word)
|
||||||
|
continue
|
||||||
|
#return [0.0]*len(vecs[0])
|
||||||
|
indices.append(vocabulary.index(word))
|
||||||
|
#res = map(sum,[vecs[i] for i in indices])
|
||||||
|
res = None
|
||||||
|
for v in [vecs[i] for i in indices]:
|
||||||
|
if res == None:
|
||||||
|
res = v
|
||||||
|
else:
|
||||||
|
res = [x + y for x, y in zip(res,v)]
|
||||||
|
length = math.sqrt(sum([res[i] * res[i] for i in range(0,len(res))]))
|
||||||
|
for i in range(0,len(res)):
|
||||||
|
res[i] /= length
|
||||||
|
return res
|
||||||
|
|
||||||
|
#### !snip ####
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import numpy
|
||||||
|
def cosine_similarity(v1,v2):
|
||||||
|
A = numpy.array([v1,v2])
|
||||||
|
|
||||||
|
# from http://stackoverflow.com/questions/17627219/whats-the-fastest-way-in-python-to-calculate-cosine-similarity-given-sparse-mat
|
||||||
|
|
||||||
|
# base similarity matrix (all dot products)
|
||||||
|
# replace this with A.dot(A.T).todense() for sparse representation
|
||||||
|
similarity = numpy.dot(A, A.T)
|
||||||
|
|
||||||
|
# squared magnitude of preference vectors (number of occurrences)
|
||||||
|
square_mag = numpy.diag(similarity)
|
||||||
|
|
||||||
|
# inverse squared magnitude
|
||||||
|
inv_square_mag = 1 / square_mag
|
||||||
|
|
||||||
|
# if it doesn't occur, set it's inverse magnitude to zero (instead of inf)
|
||||||
|
inv_square_mag[numpy.isinf(inv_square_mag)] = 0
|
||||||
|
|
||||||
|
# inverse of the magnitude
|
||||||
|
inv_mag = numpy.sqrt(inv_square_mag)
|
||||||
|
|
||||||
|
# cosine similarity (elementwise multiply by inverse magnitudes)
|
||||||
|
cosine = similarity * inv_mag
|
||||||
|
cosine = cosine.T * inv_mag
|
||||||
|
|
||||||
|
return cosine[0][1]
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
def cosine_similarity(v1,v2):
|
||||||
|
#compute cosine similarity of v1 to v2: (v1 dot v1)/{||v1||*||v2||)
|
||||||
|
sumxx, sumxy, sumyy = 0, 0, 0
|
||||||
|
for i in range(len(v1)):
|
||||||
|
x = v1[i]; y = v2[i]
|
||||||
|
sumxx += x*x
|
||||||
|
sumyy += y*y
|
||||||
|
sumxy += x*y
|
||||||
|
return sumxy/math.sqrt(sumxx*sumyy)
|
||||||
|
|
||||||
|
def cosine_similarity_name(cardvec, v, name):
|
||||||
|
return (cosine_similarity(cardvec, v), name)
|
||||||
|
|
||||||
|
|
||||||
|
class CBOW:
|
||||||
|
def __init__(self, verbose = True,
|
||||||
|
vector_fname = os.path.join(datadir, 'cbow.bin'),
|
||||||
|
card_fname = os.path.join(datadir, 'output.txt')):
|
||||||
|
self.verbose = verbose
|
||||||
|
self.cardvecs = []
|
||||||
|
|
||||||
|
if self.verbose:
|
||||||
|
print 'Building a cbow model...'
|
||||||
|
|
||||||
|
if self.verbose:
|
||||||
|
print ' Reading binary vector data from: ' + vector_fname
|
||||||
|
(vocab, vecs) = read_vector_file(vector_fname)
|
||||||
|
self.vocab = vocab
|
||||||
|
self.vecs = vecs
|
||||||
|
|
||||||
|
if self.verbose:
|
||||||
|
print ' Reading encoded cards from: ' + card_fname
|
||||||
|
print ' They\'d better be in the same order as the file used to build the vector model!'
|
||||||
|
with open(card_fname, 'rt') as f:
|
||||||
|
text = f.read()
|
||||||
|
for card_src in text.split(utils.cardsep):
|
||||||
|
if card_src:
|
||||||
|
card = cardlib.Card(card_src)
|
||||||
|
name = card.name
|
||||||
|
self.cardvecs += [(name, makevector(self.vocab,
|
||||||
|
self.vecs,
|
||||||
|
card.vectorize()))]
|
||||||
|
|
||||||
|
# self.par = Parallel(n_jobs=segments)
|
||||||
|
|
||||||
|
if self.verbose:
|
||||||
|
print '... Done.'
|
||||||
|
print ' vocab size: ' + str(len(self.vocab))
|
||||||
|
print ' raw vecs: ' + str(len(self.vecs))
|
||||||
|
print ' card vecs: ' + str(len(self.cardvecs))
|
||||||
|
|
||||||
|
def nearest(self, card, n=5):
|
||||||
|
if isinstance(card, cardlib.Card):
|
||||||
|
words = card.vectorize().split('\n\n')[0]
|
||||||
|
else:
|
||||||
|
# assume it's a string (that's already a vector)
|
||||||
|
words = card
|
||||||
|
|
||||||
|
if not words:
|
||||||
|
return []
|
||||||
|
|
||||||
|
cardvec = makevector(self.vocab, self.vecs, words)
|
||||||
|
|
||||||
|
comparisons = [cosine_similarity_name(cardvec, v, name) for (name, v) in self.cardvecs]
|
||||||
|
# comparisons = self.par(delayed(cosine_similarity_name)(cardvec, v, name)
|
||||||
|
# for (name, v) in self.cardvecs)
|
||||||
|
|
||||||
|
comparisons.sort(reverse = True)
|
||||||
|
comp_n = comparisons[:n]
|
||||||
|
|
||||||
|
if isinstance(card, cardlib.Card) and card.bside:
|
||||||
|
comp_n += self.nearest(card.bside)
|
||||||
|
|
||||||
|
return comp_n
|
|
@ -126,6 +126,18 @@ class Manacost:
|
||||||
else:
|
else:
|
||||||
return utils.mana_open_delimiter + ''.join(self.sequence) + utils.mana_close_delimiter
|
return utils.mana_open_delimiter + ''.join(self.sequence) + utils.mana_close_delimiter
|
||||||
|
|
||||||
|
def vectorize(self, delimit = False):
|
||||||
|
if self.none:
|
||||||
|
return ''
|
||||||
|
elif delimit:
|
||||||
|
ld = '('
|
||||||
|
rd = ')'
|
||||||
|
else:
|
||||||
|
ld = ''
|
||||||
|
rd = ''
|
||||||
|
return ' '.join(map(lambda s: ld + s + rd, self.sequence))
|
||||||
|
|
||||||
|
|
||||||
class Manatext:
|
class Manatext:
|
||||||
'''text representation with embedded mana costs'''
|
'''text representation with embedded mana costs'''
|
||||||
|
|
||||||
|
@ -176,3 +188,25 @@ class Manatext:
|
||||||
for cost in self.costs:
|
for cost in self.costs:
|
||||||
text = text.replace(utils.reserved_mana_marker, cost.encode(randomize = randomize), 1)
|
text = text.replace(utils.reserved_mana_marker, cost.encode(randomize = randomize), 1)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def vectorize(self):
|
||||||
|
text = self.text
|
||||||
|
special_chars = [utils.reserved_mana_marker,
|
||||||
|
utils.dash_marker,
|
||||||
|
utils.bullet_marker,
|
||||||
|
utils.this_marker,
|
||||||
|
utils.counter_marker,
|
||||||
|
utils.choice_open_delimiter,
|
||||||
|
utils.choice_close_delimiter,
|
||||||
|
utils.newline,
|
||||||
|
#utils.x_marker,
|
||||||
|
utils.tap_marker,
|
||||||
|
utils.untap_marker,
|
||||||
|
utils.newline,
|
||||||
|
';', ':', '"', ',', '.']
|
||||||
|
for char in special_chars:
|
||||||
|
text = text.replace(char, ' ' + char + ' ')
|
||||||
|
text = text.replace('/', '/ /')
|
||||||
|
for cost in self.costs:
|
||||||
|
text = text.replace(utils.reserved_mana_marker, cost.vectorize(), 1)
|
||||||
|
return ' '.join(text.split())
|
||||||
|
|
|
@ -121,6 +121,7 @@ def text_pass_4b_x(s):
|
||||||
s = s.replace(' x ', ' ' + x_marker + ' ')
|
s = s.replace(' x ', ' ' + x_marker + ' ')
|
||||||
s = s.replace('x:', x_marker + ':')
|
s = s.replace('x:', x_marker + ':')
|
||||||
s = s.replace('x~', x_marker + '~')
|
s = s.replace('x~', x_marker + '~')
|
||||||
|
s = s.replace(u'x\u2014', x_marker + u'\u2014')
|
||||||
s = s.replace('x.', x_marker + '.')
|
s = s.replace('x.', x_marker + '.')
|
||||||
s = s.replace('x,', x_marker + ',')
|
s = s.replace('x,', x_marker + ',')
|
||||||
s = s.replace('x/x', x_marker + '/' + x_marker)
|
s = s.replace('x/x', x_marker + '/' + x_marker)
|
||||||
|
|
Loading…
Reference in a new issue