-
Notifications
You must be signed in to change notification settings - Fork 15
/
gtp_scores.py
83 lines (65 loc) · 2.67 KB
/
gtp_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# encoding: utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from copy import deepcopy
import numpy as np
from utils import sample_from_list
class GTPScores(object):
def __init__(self, ground_truth_pairs):
self.gtp_max_precisions = OrderedDict([
(gtp, 0) for gtp in ground_truth_pairs
])
@property
def ground_truth_pairs(self):
return list(self.gtp_max_precisions)
@property
def remaining_gain(self):
return len(self.gtp_max_precisions) - self.score
@property
def score(self):
return sum([mp for mp in self.gtp_max_precisions.values()])
def get_remaining_gain_for(self, gtp):
return 1 - self.gtp_max_precisions[gtp]
def get_remaining_gains(self):
return OrderedDict([
(gtp, 1 - mp)
for gtp, mp in self.gtp_max_precisions.items()
])
def copy(self):
return deepcopy(self)
def copy_reset(self):
return GTPScores(self.ground_truth_pairs)
def update_with_gps(self, gps):
"""Update with list of graph patterns and return precision gain."""
precision_gain = 0
for gp in gps:
for gtp, precision in gp.gtp_precisions.items():
old = self.gtp_max_precisions[gtp]
if precision > old:
precision_gain += precision - old
self.gtp_max_precisions[gtp] = precision
return precision_gain
def remaining_gain_sample_gtps(self, max_n=None):
"""Sample ground truth pairs according to remaining gains.
This method draws up to max_n ground truth pairs using their remaining
gains as sample probabilities. GTPs with remaining gain of 0 are never
returned, so if less than n probabilities are > 0 it draws less gtps.
:param max_n: Up to n items to sample.
:return: list of ground truth pairs sampled according to their remaining
gains in gtp_scores with max length of n.
"""
gtps, gains = zip(*self.get_remaining_gains().items())
return sample_from_list(gtps, gains, max_n)
def __len__(self):
return len(self.gtp_max_precisions)
def __sub__(self, other):
if not isinstance(other, GTPScores):
raise TypeError('other should be GTPScore obj as well')
if self.ground_truth_pairs != other.ground_truth_pairs:
raise TypeError("can't compare GTPScores over different gtps")
return np.sum(
np.array(self.gtp_max_precisions.values()) -
np.array(other.gtp_max_precisions.values())
)