ipython and beginning of pairing
This commit is contained in:
parent
00159593bb
commit
d047ded658
4 changed files with 384 additions and 9 deletions
333
mtg_sweep1.ipynb
Normal file
333
mtg_sweep1.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -8,9 +8,14 @@ from collections import OrderedDict
|
|||
import scipy
|
||||
import scipy.stats
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
def mean_nonan(l):
|
||||
filtered = [x for x in l if not math.isnan(x)]
|
||||
return np.mean(filtered)
|
||||
|
||||
def gmean_nonzero(l):
|
||||
filtered = [x for x in l if x != 0]
|
||||
filtered = [x for x in l if x != 0 and not math.isnan(x)]
|
||||
return scipy.stats.gmean(filtered)
|
||||
|
||||
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||
|
@ -18,7 +23,7 @@ sys.path.append(libdir)
|
|||
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
||||
import jdecode
|
||||
|
||||
import validate
|
||||
import mtg_validate
|
||||
import ngrams
|
||||
|
||||
def annotate_values(values):
|
||||
|
@ -66,7 +71,7 @@ def get_statistics(fname, lm = None, sep = False, verbose=False):
|
|||
|
||||
# validate
|
||||
((total_all, total_good, total_bad, total_uncovered),
|
||||
values) = validate.process_props(cards)
|
||||
values) = mtg_validate.process_props(cards)
|
||||
|
||||
stats['props'] = annotate_values(values)
|
||||
stats['props']['overall'] = OrderedDict([('total', total_all),
|
||||
|
@ -97,8 +102,8 @@ def get_statistics(fname, lm = None, sep = False, verbose=False):
|
|||
if cdist == 1.0:
|
||||
card_dupes += 1
|
||||
|
||||
dists['name_mean'] = np.mean(dists['name'])
|
||||
dists['cbow_mean'] = np.mean(dists['cbow'])
|
||||
dists['name_mean'] = mean_nonan(dists['name'])
|
||||
dists['cbow_mean'] = mean_nonan(dists['cbow'])
|
||||
dists['name_geomean'] = gmean_nonzero(dists['name'])
|
||||
dists['cbow_geomean'] = gmean_nonzero(dists['cbow'])
|
||||
stats['dists'] = dists
|
||||
|
@ -125,19 +130,20 @@ def get_statistics(fname, lm = None, sep = False, verbose=False):
|
|||
ngram['perp'] += [perp]
|
||||
ngram['perp_per'] += [perp_per]
|
||||
|
||||
ngram['perp_mean'] = np.mean(ngram['perp'])
|
||||
ngram['perp_per_mean'] = np.mean(ngram['perp_per'])
|
||||
ngram['perp_mean'] = mean_nonan(ngram['perp'])
|
||||
ngram['perp_per_mean'] = mean_nonan(ngram['perp_per'])
|
||||
ngram['perp_geomean'] = gmean_nonzero(ngram['perp'])
|
||||
ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per'])
|
||||
stats['ngram'] = ngram
|
||||
|
||||
print_statistics(stats)
|
||||
return stats
|
||||
|
||||
|
||||
def main(infile, verbose = False):
|
||||
lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))),
|
||||
3, separate_lines=True, verbose=True)
|
||||
get_statistics(infile, lm=lm, sep=True, verbose=verbose)
|
||||
stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose)
|
||||
print_statistics(stats)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
|
36
scripts/pairing.py
Executable file
36
scripts/pairing.py
Executable file
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
import os
|
||||
|
||||
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||
sys.path.append(libdir)
|
||||
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
||||
import jdecode
|
||||
import ngrams
|
||||
import analysis
|
||||
|
||||
separate_lines=True
|
||||
|
||||
def main(fname, n=20, verbose=False):
|
||||
realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose)
|
||||
lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose)
|
||||
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||
stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose)
|
||||
|
||||
print 'derp'
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('infile', #nargs='?'. default=None,
|
||||
help='encoded card file or json corpus to process')
|
||||
parser.add_argument('-n', '--n', action='store',
|
||||
help='number of cards to consider for each pairing')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
main(args.infile, n=args.n, verbose=args.verbose)
|
||||
exit(0)
|
Loading…
Reference in a new issue