diff --git a/decode.py b/decode.py index 94944bc..e6a1516 100755 --- a/decode.py +++ b/decode.py @@ -8,6 +8,10 @@ import utils import jdecode import cardlib from cbow import CBOW +from namediff import Namediff + +def exclude_sets(cardset): + return cardset == 'Unglued' or cardset == 'Unhinged' or cardset == 'Celebration' def main(fname, oname = None, verbose = True, gatherer = False, for_forum = False, creativity = False, norarity = False): @@ -38,7 +42,23 @@ def main(fname, oname = None, verbose = True, for json_cardname in sorted(json_srcs): if len(json_srcs[json_cardname]) > 0: jcards = json_srcs[json_cardname] - card = cardlib.Card(json_srcs[json_cardname][0], fmt_ordered = decode_fields) + + # look for a normal rarity version, in a set we can use + idx = 0 + card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields) + while (idx < len(jcards) + and (card.rarity == utils.rarity_special_marker + or exclude_sets(jcards[idx][utils.json_field_set_name]))): + idx += 1 + if idx < len(jcards): + card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields) + # if there isn't one, settle with index 0 + if idx >= len(jcards): + idx = 0 + card = cardlib.Card(jcards[idx], fmt_ordered = decode_fields) + # we could go back and look for a card satisfying one of the criteria, + # but eh + if card.valid: valid += 1 elif card.parsed: @@ -84,6 +104,7 @@ def main(fname, oname = None, verbose = True, if creativity: cbow = CBOW() + namediff = Namediff() def writecards(writer): for card in cards: @@ -92,6 +113,14 @@ def main(fname, oname = None, verbose = True, writer.write('~~ closest cards ~~\n'.encode('utf-8')) nearest = cbow.nearest(card) for dist, cardname in nearest: + cardname = namediff.names[cardname] + if for_forum: + cardname = '[card]' + cardname + '[/card]' + writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8')) + writer.write('~~ closest names ~~\n'.encode('utf-8')) + nearest = namediff.nearest(card.name) + for dist, cardname in nearest: + cardname = namediff.names[cardname] if for_forum: cardname = '[card]' + cardname + '[/card]' writer.write((cardname + ': ' + str(dist) + '\n').encode('utf-8')) diff --git a/lib/namediff.py b/lib/namediff.py new file mode 100644 index 0000000..71cdf2e --- /dev/null +++ b/lib/namediff.py @@ -0,0 +1,57 @@ +import difflib +import os +import jdecode +import cardlib + +libdir = os.path.dirname(os.path.realpath(__file__)) +datadir = os.path.realpath(os.path.join(libdir, '../data')) + +class Namediff: + def __init__(self, verbose = True, + json_fname = os.path.join(datadir, 'AllSets.json')): + self.verbose = verbose + self.names = {} + + if self.verbose: + print 'Setting up namediff...' + + if self.verbose: + print ' Reading names from: ' + json_fname + json_srcs = jdecode.mtg_open_json(json_fname, verbose) + namecount = 0 + for json_cardname in sorted(json_srcs): + if len(json_srcs[json_cardname]) > 0: + jcards = json_srcs[json_cardname] + + # just use the first one + idx = 0 + card = cardlib.Card(jcards[idx]) + name = card.name + jname = jcards[idx]['name'] + + if name in self.names: + print ' Duplicate name ' + name + ', ignoring.' + else: + self.names[name] = jname + namecount += 1 + + print ' Read ' + str(namecount) + ' unique cardnames' + print ' Building SequenceMatcher objects.' + + self.matchers = [difflib.SequenceMatcher(b=n, autojunk=False) for n in self.names] + + print '... Done.' + + def nearest(self, name, n=3): + for m in self.matchers: + m.set_seq1(name) + ratios = [(m.ratio(), m.b) for m in self.matchers] + ratios.sort(reverse = True) + + if ratios[0][0] >= 1: + return ratios[:1] + else: + return ratios[:n] + + +