mtgencode/scripts/sanity.py

143 lines
4.6 KiB
Python
Executable file

#!/usr/bin/env python
import sys
import os
import re
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
sys.path.append(libdir)
import utils
import jdecode
import transforms
def check_lines(fname):
cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)
prelines = set()
keylines = set()
mainlines = set()
costlines = set()
postlines = set()
known = ['enchant ', 'equip', 'countertype', 'multikicker', 'kicker',
'suspend', 'echo', 'awaken', 'bestow', 'buyback',
'cumulative', 'dash', 'entwine', 'evoke', 'fortify',
'flashback', 'madness', 'morph', 'megamorph', 'miracle', 'ninjutsu',
'overload', 'prowl', 'recover', 'reinforce', 'replicate', 'scavenge',
'splice', 'surge', 'unearth', 'transfigure', 'transmute',
]
known = []
for card in cards:
prel, keyl, mainl, costl, postl = transforms.separate_lines(card.text.encode(randomize=False))
if card.bside:
prel2, keyl2, mainl2, costl2, postl2 = transforms.separate_lines(card.bside.text.encode(randomize=False))
prel += prel2
keyl += keyl2
mainl += mainl2
costl += costl2
postl += postl2
for line in prel:
if line.strip() == '':
print(card.name, card.text.text)
if any(line.startswith(s) for s in known):
line = 'known'
prelines.add(line)
for line in postl:
if line.strip() == '':
print(card.name, card.text.text)
if any(line.startswith(s) for s in known):
line = 'known'
postlines.add(line)
for line in keyl:
if line.strip() == '':
print(card.name, card.text.text)
if any(line.startswith(s) for s in known):
line = 'known'
keylines.add(line)
for line in mainl:
if line.strip() == '':
print(card.name, card.text.text)
# if any(line.startswith(s) for s in known):
# line = 'known'
mainlines.add(line)
for line in costl:
if line.strip() == '':
print(card.name, card.text.text)
# if any(line.startswith(s) for s in known) or 'cycling' in line or 'monstrosity' in line:
# line = 'known'
costlines.add(line)
print('prel: {:d}, keyl: {:d}, mainl: {:d}, postl {:d}'
.format(len(prelines), len(keylines), len(mainlines), len(postlines)))
print('\nprelines')
for line in sorted(prelines):
print(line)
print('\npostlines')
for line in sorted(postlines):
print(line)
print('\ncostlines')
for line in sorted(costlines):
print(line)
print('\nkeylines')
for line in sorted(keylines):
print(line)
print('\nmainlines')
for line in sorted(mainlines):
#if any(s in line for s in ['champion', 'devour', 'tribute']):
print(line)
def check_vocab(fname):
cards = jdecode.mtg_open_file(fname, verbose=True, linetrans=True)
vocab = {}
for card in cards:
words = card.text.vectorize().split()
if card.bside:
words += card.bside.text.vectorize().split()
for word in words:
if not word in vocab:
vocab[word] = 1
else:
vocab[word] += 1
for word in sorted(vocab, lambda x,y: cmp(vocab[x], vocab[y]), reverse = True):
print('{:8d} : {:s}'.format(vocab[word], word))
n = 3
for card in cards:
words = card.text.vectorize().split()
if card.bside:
words += card.bside.text.vectorize().split()
for word in words:
if vocab[word] <= n:
#if 'name' in word:
print('\n{:8d} : {:s}'.format(vocab[word], word))
print(card.encode())
break
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('infile', nargs='?', default=os.path.join(libdir, '../data/output.txt'),
help='encoded card file or json corpus to process')
parser.add_argument('-lines', action='store_true',
help='show behavior of line separation')
parser.add_argument('-vocab', action='store_true',
help='show vocabulary counts from encoded card text')
args = parser.parse_args()
if args.lines:
check_lines(args.infile)
if args.vocab:
check_vocab(args.infile)
exit(0)