the encoding now makes 2 manacosts, one before and one after the text!
there are some other (temporary) changes too, things are a mess. let's worry about that later.
This commit is contained in:
parent
b4f8d26a20
commit
eef2e70e28
3 changed files with 120 additions and 22 deletions
119
datamine.py
119
datamine.py
|
@ -21,6 +21,26 @@ def prettymana(s, for_forum):
|
||||||
else:
|
else:
|
||||||
return '{' + s[0] + '/' + s[1] + '}'
|
return '{' + s[0] + '/' + s[1] + '}'
|
||||||
|
|
||||||
|
# format a list of rows of data into nice columns
|
||||||
|
def padrows(l):
|
||||||
|
# get length for each field
|
||||||
|
lens = []
|
||||||
|
for ll in l:
|
||||||
|
for i, field in enumerate(ll):
|
||||||
|
if i < len(lens):
|
||||||
|
lens[i] = max(len(str(field)), lens[i])
|
||||||
|
else:
|
||||||
|
lens += [len(str(field))]
|
||||||
|
# now pad out to that length
|
||||||
|
padded = []
|
||||||
|
for ll in l:
|
||||||
|
padded += ['']
|
||||||
|
for i, field in enumerate(ll):
|
||||||
|
s = str(field)
|
||||||
|
pad = ' ' * (lens[i] - len(s))
|
||||||
|
padded[-1] += (s + pad + ' ')
|
||||||
|
return padded
|
||||||
|
|
||||||
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
punctuation_chars = r'[+\-*",.:;WUBRGPV/XTQ|\\&^\{\}@ \n=~%\[\]]'
|
||||||
creature_keywords = [
|
creature_keywords = [
|
||||||
# evergreen
|
# evergreen
|
||||||
|
@ -44,12 +64,47 @@ creature_keywords = [
|
||||||
'fear',
|
'fear',
|
||||||
'shroud',
|
'shroud',
|
||||||
'intimidate',
|
'intimidate',
|
||||||
# rare ones that work the same way and interfere
|
# expert level keywords
|
||||||
'rampage',
|
'absorb',
|
||||||
'infect',
|
'amplify',
|
||||||
|
'annihilator',
|
||||||
|
'battle cry',
|
||||||
|
'bolster',
|
||||||
|
'bloodthirst',
|
||||||
'bushido',
|
'bushido',
|
||||||
|
'changeling',
|
||||||
|
'convoke',
|
||||||
|
'devour',
|
||||||
|
'evolve',
|
||||||
'exalted',
|
'exalted',
|
||||||
|
'extort',
|
||||||
|
'fading',
|
||||||
|
'flanking',
|
||||||
|
'frenzy',
|
||||||
|
'graft',
|
||||||
|
'haunt',
|
||||||
|
'horsemanship',
|
||||||
|
'infect',
|
||||||
|
'modular',
|
||||||
|
#'morph',
|
||||||
|
#'ninjutsu',
|
||||||
|
'persist',
|
||||||
|
'poisonous',
|
||||||
|
'provoke',
|
||||||
|
#'prowl',
|
||||||
|
'rampage',
|
||||||
|
'ripple',
|
||||||
|
#'scavenge',
|
||||||
'shadow',
|
'shadow',
|
||||||
|
'soulbond',
|
||||||
|
'soulshift',
|
||||||
|
'split second',
|
||||||
|
'sunburst',
|
||||||
|
'undying',
|
||||||
|
#'unearth',
|
||||||
|
'unleash',
|
||||||
|
'vanishing',
|
||||||
|
'wither',
|
||||||
] # there are other keywords out there, these are just easy to detect
|
] # there are other keywords out there, these are just easy to detect
|
||||||
|
|
||||||
# data aggregating classes
|
# data aggregating classes
|
||||||
|
@ -233,7 +288,7 @@ class Card:
|
||||||
self.bside = None
|
self.bside = None
|
||||||
|
|
||||||
fields = self.raw.split(encode.fieldsep)
|
fields = self.raw.split(encode.fieldsep)
|
||||||
if not len(fields) == 10:
|
if not len(fields) >= 10:
|
||||||
self._parsed = False
|
self._parsed = False
|
||||||
self._valid = False
|
self._valid = False
|
||||||
self.fields = fields
|
self.fields = fields
|
||||||
|
@ -315,21 +370,30 @@ class Card:
|
||||||
# SUPER HACK
|
# SUPER HACK
|
||||||
if 'creature' in self.types:
|
if 'creature' in self.types:
|
||||||
for line in self.text_lines:
|
for line in self.text_lines:
|
||||||
|
orig_line = line
|
||||||
guess = []
|
guess = []
|
||||||
for keyword in creature_keywords:
|
for keyword in creature_keywords:
|
||||||
if keyword in line:
|
if keyword in line:
|
||||||
guess += [keyword]
|
guess += [keyword]
|
||||||
line = line.replace(keyword, '')
|
line = line.replace(keyword, '')
|
||||||
if re.sub(punctuation_chars, ' ', line).split() == [] or 'protect' in line:
|
# yeah, I said it was a hack
|
||||||
|
if re.sub(punctuation_chars, ' ', line).split() == [] or 'protect' in line or 'walk' in line or 'sliver creatures' in line or 'you control have' in line:
|
||||||
for word in guess:
|
for word in guess:
|
||||||
if word not in self.creature_words:
|
if word not in self.creature_words:
|
||||||
self.creature_words += [word]
|
self.creature_words += [word]
|
||||||
|
# elif len(guess) > 0 and len(line) < 30:
|
||||||
|
# print orig_line
|
||||||
else:
|
else:
|
||||||
self.text = None
|
self.text = None
|
||||||
self.text_lines = []
|
self.text_lines = []
|
||||||
self.text_words = []
|
self.text_words = []
|
||||||
self.creature_words = []
|
self.creature_words = []
|
||||||
|
|
||||||
|
if len(fields) > 10:
|
||||||
|
self.cost2 = Manacost(fields[9])
|
||||||
|
else:
|
||||||
|
self.cost2 = None
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return ''.join([
|
return ''.join([
|
||||||
encode.fieldsep,
|
encode.fieldsep,
|
||||||
|
@ -360,7 +424,10 @@ def main(fname, oname = None, verbose = False):
|
||||||
cwords = 0
|
cwords = 0
|
||||||
allwords = {}
|
allwords = {}
|
||||||
|
|
||||||
mcolor = 'G'
|
correct = 0
|
||||||
|
correct_len = 0
|
||||||
|
incorrect = 0
|
||||||
|
incorrect_len = 0
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for cardtext in cardtexts:
|
for cardtext in cardtexts:
|
||||||
|
@ -371,30 +438,48 @@ def main(fname, oname = None, verbose = False):
|
||||||
continue
|
continue
|
||||||
cards += [card]
|
cards += [card]
|
||||||
|
|
||||||
|
if not str(card.cost) == str(card.cost2):
|
||||||
|
if not card.cost2.check_colors(card.cost.colors):
|
||||||
|
print card.raw + '\n'
|
||||||
|
incorrect += 1
|
||||||
|
if card.text:
|
||||||
|
incorrect_len += len(card.text)
|
||||||
|
else:
|
||||||
|
correct += 1
|
||||||
|
if card.text:
|
||||||
|
correct_len += len(card.text)
|
||||||
|
|
||||||
if 'creature' in card.types:
|
if 'creature' in card.types:
|
||||||
creatures += 1
|
creatures += 1
|
||||||
if card.creature_words:
|
if card.creature_words:
|
||||||
cwords += 1
|
cwords += 1
|
||||||
|
|
||||||
if card.cost.check_colors(mcolor):
|
|
||||||
print ' '.join(card.text_words)
|
|
||||||
|
|
||||||
for word in card.text_words:
|
for word in card.text_words:
|
||||||
if word in allwords:
|
if word in allwords:
|
||||||
allwords[word] += 1
|
allwords[word] += 1
|
||||||
else:
|
else:
|
||||||
allwords[word] = 1
|
allwords[word] = 1
|
||||||
|
|
||||||
|
print '\n====================\n'
|
||||||
|
|
||||||
# print str(creatures) + ' creatures, ' + str(cwords) + ' with keywords'
|
for card in cards:
|
||||||
# print str(len(allwords)) + ' unique words in card text'
|
if (not str(card.cost) == str(card.cost2)) and card.cost2.check_colors(card.cost.colors):
|
||||||
# i = 0
|
print card.raw + '\n'
|
||||||
# for word in sorted(allwords, key=allwords.get, reverse=True):
|
|
||||||
# i += 1
|
|
||||||
# if i > 0:
|
|
||||||
# break
|
|
||||||
# print word + ': ' + str(allwords[word])
|
|
||||||
|
|
||||||
|
print '\n====================\n'
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
if str(card.cost) == str(card.cost2):
|
||||||
|
print card.raw + '\n'
|
||||||
|
|
||||||
|
print '\n====================\n'
|
||||||
|
|
||||||
|
print str(creatures) + ' creatures, ' + str(cwords) + ' with keywords'
|
||||||
|
print str(len(allwords)) + ' unique words in card text'
|
||||||
|
|
||||||
|
print str(incorrect) + ' cost mismatches, ' + str(correct) + ' cost matches.'
|
||||||
|
print str(incorrect_len / incorrect) + ' average length of cost mismatches.'
|
||||||
|
print str(correct_len / correct) + ' average length of cost matches.'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
|
|
|
@ -574,6 +574,12 @@ def encode(card):
|
||||||
text = replace_newlines(text)
|
text = replace_newlines(text)
|
||||||
encoding += text.strip()
|
encoding += text.strip()
|
||||||
encoding += fieldsep
|
encoding += fieldsep
|
||||||
|
|
||||||
|
# HACK: put the cost again after the text
|
||||||
|
if 'manaCost' in card:
|
||||||
|
encoding += replace_mana(card['manaCost'].lower())
|
||||||
|
encoding += fieldsep
|
||||||
|
|
||||||
# if 'flavor' in card:
|
# if 'flavor' in card:
|
||||||
# encoding += card['flavor'].lower()
|
# encoding += card['flavor'].lower()
|
||||||
# encoding += fieldsep
|
# encoding += fieldsep
|
||||||
|
|
|
@ -132,7 +132,7 @@ def cleanup_choice(s):
|
||||||
def forum_reorder(s):
|
def forum_reorder(s):
|
||||||
fields = s.split('|')
|
fields = s.split('|')
|
||||||
# should see ten of em
|
# should see ten of em
|
||||||
if not len(fields) == 10:
|
if not len(fields) >= 10:
|
||||||
#print 'badlen ' + str(len(fields))
|
#print 'badlen ' + str(len(fields))
|
||||||
return s
|
return s
|
||||||
# first and last should be empty, if we had | on the ends
|
# first and last should be empty, if we had | on the ends
|
||||||
|
@ -147,12 +147,19 @@ def forum_reorder(s):
|
||||||
pt = fields[6]
|
pt = fields[6]
|
||||||
cost = fields[7]
|
cost = fields[7]
|
||||||
text = fields[8]
|
text = fields[8]
|
||||||
|
if len(fields) > 10:
|
||||||
|
cost2 = fields[9]
|
||||||
|
else:
|
||||||
|
cost2 = None
|
||||||
|
|
||||||
new_s = ''
|
new_s = ''
|
||||||
if not name == '':
|
if not name == '':
|
||||||
new_s += name + '\n'
|
new_s += name + '\n'
|
||||||
if not cost == '':
|
if not cost == '':
|
||||||
new_s += cost + '\n'
|
new_s += cost
|
||||||
|
if cost2:
|
||||||
|
new_s += ' ~ ' + cost2
|
||||||
|
new_s += '\n'
|
||||||
|
|
||||||
if not supertypes == '':
|
if not supertypes == '':
|
||||||
new_s += supertypes + ' '
|
new_s += supertypes + ' '
|
||||||
|
|
Loading…
Reference in a new issue