added support for checking cardnames

This commit is contained in:
Bill Zorn 2015-08-01 22:16:30 -07:00
parent 758f48b790
commit 70bec99138
2 changed files with 87 additions and 1 deletions

View file

@ -8,6 +8,10 @@ import utils
import jdecode import jdecode
import cardlib import cardlib
from cbow import CBOW from cbow import CBOW
from namediff import Namediff
def exclude_sets(cardset):
return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration'
def main(fname, oname = None, verbose = True, def main(fname, oname = None, verbose = True,
gatherer = False, for_forum = False, creativity = False, norarity = False): gatherer = False, for_forum = False, creativity = False, norarity = False):
@ -38,7 +42,23 @@ def main(fname, oname = None, verbose = True,
for json_cardname in sorted(json_srcs): for json_cardname in sorted(json_srcs):
if len(json_srcs[json_cardname]) > 0: if len(json_srcs[json_cardname]) > 0:
jcards = json_srcs[json_cardname] jcards = json_srcs[json_cardname]
card = cardlib.Card(json_srcs[json_cardname][0], fmt_ordered = decode_fields)
# look for a normal rarity version, in a set we can use
idx = 0
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
while (idx < len(jcards)
and (card.rarity == utils.rarity_special_marker
or exclude_sets(jcards[idx][utils.json_field_set_name]))):
idx += 1
if idx < len(jcards):
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
# if there isn't one, settle with index 0
if idx >= len(jcards):
idx = 0
card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields)
# we could go back and look for a card satisfying one of the criteria,
# but eh
if card.valid: if card.valid:
valid += 1 valid += 1
elif card.parsed: elif card.parsed:
@ -84,6 +104,7 @@ def main(fname, oname = None, verbose = True,
if creativity: if creativity:
cbow = CBOW() cbow = CBOW()
namediff = Namediff()
def writecards(writer): def writecards(writer):
for card in cards: for card in cards:
@ -92,6 +113,14 @@ def main(fname, oname = None, verbose = True,
writer.write('~~ closest cards ~~\n'.encode('utf-8')) writer.write('~~ closest cards ~~\n'.encode('utf-8'))
nearest = cbow.nearest(card) nearest = cbow.nearest(card)
for dist, cardname in nearest: for dist, cardname in nearest:
cardname = namediff.names[cardname]
if for_forum:
cardname = '[card]' + cardname + '[/card]'
writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8'))
writer.write('~~ closest names ~~\n'.encode('utf-8'))
nearest = namediff.nearest(card.name)
for dist, cardname in nearest:
cardname = namediff.names[cardname]
if for_forum: if for_forum:
cardname = '[card]' + cardname + '[/card]' cardname = '[card]' + cardname + '[/card]'
writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8')) writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8'))

57
lib/namediff.py Normal file
View file

@ -0,0 +1,57 @@
import difflib
import os
import jdecode
import cardlib
libdir = os.path.dirname(os.path.realpath(__file__))
datadir = os.path.realpath(os.path.join(libdir, '../data'))
class Namediff:
def __init__(self, verbose = True,
json_fname = os.path.join(datadir, 'AllSets.json')):
self.verbose = verbose
self.names = {}
if self.verbose:
print 'Setting up namediff...'
if self.verbose:
print ' Reading names from: ' + json_fname
json_srcs = jdecode.mtg_open_json(json_fname, verbose)
namecount = 0
for json_cardname in sorted(json_srcs):
if len(json_srcs[json_cardname]) > 0:
jcards = json_srcs[json_cardname]
# just use the first one
idx = 0
card = cardlib.Card(jcards[idx])
name = card.name
jname = jcards[idx]['name']
if name in self.names:
print ' Duplicate name ' + name + ', ignoring.'
else:
self.names[name] = jname
namecount += 1
print ' Read ' + str(namecount) + ' unique cardnames'
print ' Building SequenceMatcher objects.'
self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names]
print '... Done.'
def nearest(self, name, n=3):
for m in self.matchers:
m.set_seq1(name)
ratios = [(m.ratio(), m.b) for m in self.matchers]
ratios.sort(reverse = True)
if ratios[0][0] >= 1:
return ratios[:1]
else:
return ratios[:n]