updated to new mtgjson version and added legacy support for formats
without a rarity field
This commit is contained in:
parent
f0e631e015
commit
758f48b790
5 changed files with 42511 additions and 41737 deletions
BIN
data/cbow.bin
BIN
data/cbow.bin
Binary file not shown.
3
data/cbow.sh
Executable file
3
data/cbow.sh
Executable file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
word2vec -train cbow.txt -output cbow.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 8 -binary 1 -iter 15
|
29860
data/cbow.txt
29860
data/cbow.txt
File diff suppressed because it is too large
Load diff
54343
data/output.txt
54343
data/output.txt
File diff suppressed because it is too large
Load diff
40
decode.py
40
decode.py
|
@ -10,12 +10,27 @@ import cardlib
|
||||||
from cbow import CBOW
|
from cbow import CBOW
|
||||||
|
|
||||||
def main(fname, oname = None, verbose = True,
|
def main(fname, oname = None, verbose = True,
|
||||||
gatherer = False, for_forum = False, creativity = False):
|
gatherer = False, for_forum = False, creativity = False, norarity = False):
|
||||||
cards = []
|
cards = []
|
||||||
valid = 0
|
valid = 0
|
||||||
invalid = 0
|
invalid = 0
|
||||||
unparsed = 0
|
unparsed = 0
|
||||||
|
|
||||||
|
if norarity:
|
||||||
|
decode_fields = [
|
||||||
|
cardlib.field_name,
|
||||||
|
cardlib.field_supertypes,
|
||||||
|
cardlib.field_types,
|
||||||
|
cardlib.field_loyalty,
|
||||||
|
cardlib.field_subtypes,
|
||||||
|
#cardlib.field_rarity,
|
||||||
|
cardlib.field_pt,
|
||||||
|
cardlib.field_cost,
|
||||||
|
cardlib.field_text,
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
decode_fields = cardlib.fmt_ordered_default
|
||||||
|
|
||||||
if fname[-5:] == '.json':
|
if fname[-5:] == '.json':
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'This looks like a json file: ' + fname
|
print 'This looks like a json file: ' + fname
|
||||||
|
@ -23,7 +38,7 @@ def main(fname, oname = None, verbose = True,
|
||||||
for json_cardname in sorted(json_srcs):
|
for json_cardname in sorted(json_srcs):
|
||||||
if len(json_srcs[json_cardname]) > 0:
|
if len(json_srcs[json_cardname]) > 0:
|
||||||
jcards = json_srcs[json_cardname]
|
jcards = json_srcs[json_cardname]
|
||||||
card = cardlib.Card(json_srcs[json_cardname][0])
|
card = cardlib.Card(json_srcs[json_cardname][0], fmt_ordered = decode_fields)
|
||||||
if card.valid:
|
if card.valid:
|
||||||
valid += 1
|
valid += 1
|
||||||
elif card.parsed:
|
elif card.parsed:
|
||||||
|
@ -40,7 +55,7 @@ def main(fname, oname = None, verbose = True,
|
||||||
text = f.read()
|
text = f.read()
|
||||||
for card_src in text.split(utils.cardsep):
|
for card_src in text.split(utils.cardsep):
|
||||||
if card_src:
|
if card_src:
|
||||||
card = cardlib.Card(card_src)
|
card = cardlib.Card(card_src, fmt_ordered = decode_fields)
|
||||||
if card.valid:
|
if card.valid:
|
||||||
valid += 1
|
valid += 1
|
||||||
elif card.parsed:
|
elif card.parsed:
|
||||||
|
@ -53,6 +68,20 @@ def main(fname, oname = None, verbose = True,
|
||||||
print (str(valid) + ' valid, ' + str(invalid) + ' invalid, '
|
print (str(valid) + ' valid, ' + str(invalid) + ' invalid, '
|
||||||
+ str(unparsed) + ' failed to parse.')
|
+ str(unparsed) + ' failed to parse.')
|
||||||
|
|
||||||
|
good_count = 0
|
||||||
|
bad_count = 0
|
||||||
|
for card in cards:
|
||||||
|
if not card.parsed and not card.text.text:
|
||||||
|
bad_count += 1
|
||||||
|
else:
|
||||||
|
good_count += 1
|
||||||
|
if good_count + bad_count > 15:
|
||||||
|
break
|
||||||
|
# random heuristic
|
||||||
|
if bad_count > 10:
|
||||||
|
print 'Saw a bunch of unparsed cards with no text:'
|
||||||
|
print 'If this is a legacy format, try rerunning with --norarity'
|
||||||
|
|
||||||
if creativity:
|
if creativity:
|
||||||
cbow = CBOW()
|
cbow = CBOW()
|
||||||
|
|
||||||
|
@ -92,10 +121,13 @@ if __name__ == '__main__':
|
||||||
help='use pretty mana encoding for mtgsalvation forum')
|
help='use pretty mana encoding for mtgsalvation forum')
|
||||||
parser.add_argument('-c', '--creativity', action='store_true',
|
parser.add_argument('-c', '--creativity', action='store_true',
|
||||||
help='use CBOW fuzzy matching to check creativity of cards')
|
help='use CBOW fuzzy matching to check creativity of cards')
|
||||||
|
parser.add_argument('--norarity', action='store_true',
|
||||||
|
help='the card format has no rarity field; use for legacy input')
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
help='verbose output')
|
help='verbose output')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.infile, args.outfile, verbose = args.verbose,
|
main(args.infile, args.outfile, verbose = args.verbose,
|
||||||
gatherer = args.gatherer, for_forum = args.forum, creativity = args.creativity)
|
gatherer = args.gatherer, for_forum = args.forum, creativity = args.creativity,
|
||||||
|
norarity = args.norarity)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
Loading…
Reference in a new issue