ipython and beginning of pairing
This commit is contained in:
parent
00159593bb
commit
d047ded658
4 changed files with 384 additions and 9 deletions
333
mtg_sweep1.ipynb
Normal file
333
mtg_sweep1.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -8,9 +8,14 @@ from collections import OrderedDict
|
||||||
import scipy
|
import scipy
|
||||||
import scipy.stats
|
import scipy.stats
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import math
|
||||||
|
|
||||||
|
def mean_nonan(l):
|
||||||
|
filtered = [x for x in l if not math.isnan(x)]
|
||||||
|
return np.mean(filtered)
|
||||||
|
|
||||||
def gmean_nonzero(l):
|
def gmean_nonzero(l):
|
||||||
filtered = [x for x in l if x != 0]
|
filtered = [x for x in l if x != 0 and not math.isnan(x)]
|
||||||
return scipy.stats.gmean(filtered)
|
return scipy.stats.gmean(filtered)
|
||||||
|
|
||||||
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
@ -18,7 +23,7 @@ sys.path.append(libdir)
|
||||||
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
||||||
import jdecode
|
import jdecode
|
||||||
|
|
||||||
import validate
|
import mtg_validate
|
||||||
import ngrams
|
import ngrams
|
||||||
|
|
||||||
def annotate_values(values):
|
def annotate_values(values):
|
||||||
|
@ -66,7 +71,7 @@ def get_statistics(fname, lm = None, sep = False, verbose=False):
|
||||||
|
|
||||||
# validate
|
# validate
|
||||||
((total_all, total_good, total_bad, total_uncovered),
|
((total_all, total_good, total_bad, total_uncovered),
|
||||||
values) = validate.process_props(cards)
|
values) = mtg_validate.process_props(cards)
|
||||||
|
|
||||||
stats['props'] = annotate_values(values)
|
stats['props'] = annotate_values(values)
|
||||||
stats['props']['overall'] = OrderedDict([('total', total_all),
|
stats['props']['overall'] = OrderedDict([('total', total_all),
|
||||||
|
@ -97,8 +102,8 @@ def get_statistics(fname, lm = None, sep = False, verbose=False):
|
||||||
if cdist == 1.0:
|
if cdist == 1.0:
|
||||||
card_dupes += 1
|
card_dupes += 1
|
||||||
|
|
||||||
dists['name_mean'] = np.mean(dists['name'])
|
dists['name_mean'] = mean_nonan(dists['name'])
|
||||||
dists['cbow_mean'] = np.mean(dists['cbow'])
|
dists['cbow_mean'] = mean_nonan(dists['cbow'])
|
||||||
dists['name_geomean'] = gmean_nonzero(dists['name'])
|
dists['name_geomean'] = gmean_nonzero(dists['name'])
|
||||||
dists['cbow_geomean'] = gmean_nonzero(dists['cbow'])
|
dists['cbow_geomean'] = gmean_nonzero(dists['cbow'])
|
||||||
stats['dists'] = dists
|
stats['dists'] = dists
|
||||||
|
@ -125,19 +130,20 @@ def get_statistics(fname, lm = None, sep = False, verbose=False):
|
||||||
ngram['perp'] += [perp]
|
ngram['perp'] += [perp]
|
||||||
ngram['perp_per'] += [perp_per]
|
ngram['perp_per'] += [perp_per]
|
||||||
|
|
||||||
ngram['perp_mean'] = np.mean(ngram['perp'])
|
ngram['perp_mean'] = mean_nonan(ngram['perp'])
|
||||||
ngram['perp_per_mean'] = np.mean(ngram['perp_per'])
|
ngram['perp_per_mean'] = mean_nonan(ngram['perp_per'])
|
||||||
ngram['perp_geomean'] = gmean_nonzero(ngram['perp'])
|
ngram['perp_geomean'] = gmean_nonzero(ngram['perp'])
|
||||||
ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per'])
|
ngram['perp_per_geomean'] = gmean_nonzero(ngram['perp_per'])
|
||||||
stats['ngram'] = ngram
|
stats['ngram'] = ngram
|
||||||
|
|
||||||
print_statistics(stats)
|
return stats
|
||||||
|
|
||||||
|
|
||||||
def main(infile, verbose = False):
|
def main(infile, verbose = False):
|
||||||
lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))),
|
lm = ngrams.build_ngram_model(jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt'))),
|
||||||
3, separate_lines=True, verbose=True)
|
3, separate_lines=True, verbose=True)
|
||||||
get_statistics(infile, lm=lm, sep=True, verbose=verbose)
|
stats = get_statistics(infile, lm=lm, sep=True, verbose=verbose)
|
||||||
|
print_statistics(stats)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
|
36
scripts/pairing.py
Executable file
36
scripts/pairing.py
Executable file
|
@ -0,0 +1,36 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../lib')
|
||||||
|
sys.path.append(libdir)
|
||||||
|
datadir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data')
|
||||||
|
import jdecode
|
||||||
|
import ngrams
|
||||||
|
import analysis
|
||||||
|
|
||||||
|
separate_lines=True
|
||||||
|
|
||||||
|
def main(fname, n=20, verbose=False):
|
||||||
|
realcards = jdecode.mtg_open_file(str(os.path.join(datadir, 'output.txt')), verbose=verbose)
|
||||||
|
lm = ngrams.build_ngram_model(realcards, 3, separate_lines=separate_lines, verbose=verbose)
|
||||||
|
cards = jdecode.mtg_open_file(fname, verbose=verbose)
|
||||||
|
stats = analysis.get_statistics(fname, lm=lm, sep=separate_lines, verbose=verbose)
|
||||||
|
|
||||||
|
print 'derp'
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('infile', #nargs='?'. default=None,
|
||||||
|
help='encoded card file or json corpus to process')
|
||||||
|
parser.add_argument('-n', '--n', action='store',
|
||||||
|
help='number of cards to consider for each pairing')
|
||||||
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='verbose output')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.infile, n=args.n, verbose=args.verbose)
|
||||||
|
exit(0)
|
Loading…
Reference in a new issue