| |
| |
| |
|
|
| import math |
| import copy |
| from collections import defaultdict |
|
|
| import numpy as np |
|
|
|
|
| def precook(s, n=4, out=False): |
| """ |
| Takes a string as input and returns an object that can be given to |
| either cook_refs or cook_test. This is optional: cook_refs and cook_test |
| can take string arguments as well. |
| :param s: string : sentence to be converted into ngrams |
| :param n: int : number of ngrams for which representation is calculated |
| :return: term frequency vector for occuring ngrams |
| """ |
| words = s.split() |
| counts = defaultdict(int) |
| for k in range(1, n + 1): |
| for i in range(len(words) - k + 1): |
| ngram = tuple(words[i: i + k]) |
| counts[ngram] += 1 |
| return counts |
|
|
|
|
| def cook_refs(refs, n=4): |
| """Takes a list of reference sentences for a single segment |
| and returns an object that encapsulates everything that BLEU |
| needs to know about them. |
| :param refs: list of string : reference sentences for some image |
| :param n: int : number of ngrams for which (ngram) representation is calculated |
| :return: result (list of dict) |
| """ |
| |
| return [precook(ref, n) for ref in refs] |
|
|
|
|
| def cook_test(test, n=4): |
| """Takes a test sentence and returns an object that |
| encapsulates everything that BLEU needs to know about it. |
| :param test: list of string : hypothesis sentence for some image |
| :param n: int : number of ngrams for which (ngram) representation is calculated |
| :return: result (dict) |
| """ |
| return precook(test, n, True) |
|
|
|
|
| class CiderScorer(object): |
| """CIDEr scorer.""" |
|
|
| def copy(self): |
| """copy the refs.""" |
| new = CiderScorer(n=self.n) |
| new.ctest = copy.copy(self.ctest) |
| new.crefs = copy.copy(self.crefs) |
| return new |
|
|
| def __init__(self, test=None, refs=None, n=4, sigma=6.0): |
| """singular instance""" |
| self.n = n |
| self.sigma = sigma |
| self.crefs = [] |
| self.ctest = [] |
| self.document_frequency = defaultdict(float) |
| self.cook_append(test, refs) |
| self.ref_len = None |
|
|
| def cook_append(self, test, refs): |
| """called by constructor and __iadd__ to avoid creating new instances.""" |
|
|
| if refs is not None: |
| self.crefs.append(cook_refs(refs)) |
| if test is not None: |
| |
| self.ctest.append(cook_test(test)) |
| else: |
| self.ctest.append(None) |
|
|
| def size(self): |
| assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % ( |
| len(self.crefs), |
| len(self.ctest), |
| ) |
| return len(self.crefs) |
|
|
| def __iadd__(self, other): |
| """add an instance (e.g., from another sentence).""" |
|
|
| if type(other) is tuple: |
| |
| self.cook_append(other[0], other[1]) |
| else: |
| self.ctest.extend(other.ctest) |
| self.crefs.extend(other.crefs) |
|
|
| return self |
|
|
| def compute_doc_freq(self): |
| """ |
| Compute term frequency for reference data. |
| This will be used to compute idf (inverse document frequency later) |
| The term frequency is stored in the object |
| :return: None |
| """ |
| for refs in self.crefs: |
| |
| for ngram in set([ngram for ref in refs for (ngram, count) in ref.items()]): |
| self.document_frequency[ngram] += 1 |
| |
|
|
| def compute_cider(self): |
| def counts2vec(cnts): |
| """ |
| Function maps counts of ngram to vector of tfidf weights. |
| The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights. |
| The n-th entry of array denotes length of n-grams. |
| :param cnts: |
| :return: vec (array of dict), norm (array of float), length (int) |
| """ |
| vec = [defaultdict(float) for _ in range(self.n)] |
| length = 0 |
| norm = [0.0 for _ in range(self.n)] |
| for ngram, term_freq in cnts.items(): |
| |
| df = np.log(max(1.0, self.document_frequency[ngram])) |
| |
| n = len(ngram) - 1 |
| |
| vec[n][ngram] = float(term_freq) * (self.ref_len - df) |
| |
| norm[n] += pow(vec[n][ngram], 2) |
|
|
| if n == 1: |
| length += term_freq |
| norm = [np.sqrt(n) for n in norm] |
| return vec, norm, length |
|
|
| def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref): |
| """ |
| Compute the cosine similarity of two vectors. |
| :param vec_hyp: array of dictionary for vector corresponding to hypothesis |
| :param vec_ref: array of dictionary for vector corresponding to reference |
| :param norm_hyp: array of float for vector corresponding to hypothesis |
| :param norm_ref: array of float for vector corresponding to reference |
| :param length_hyp: int containing length of hypothesis |
| :param length_ref: int containing length of reference |
| :return: array of score for each n-grams cosine similarity |
| """ |
| delta = float(length_hyp - length_ref) |
| |
| val = np.array([0.0 for _ in range(self.n)]) |
| for n in range(self.n): |
| |
| for ngram, count in vec_hyp[n].items(): |
| |
| val[n] += ( |
| min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram] |
| ) |
|
|
| if (norm_hyp[n] != 0) and (norm_ref[n] != 0): |
| val[n] /= norm_hyp[n] * norm_ref[n] |
|
|
| assert not math.isnan(val[n]) |
| |
| val[n] *= np.e ** (-(delta**2) / (2 * self.sigma**2)) |
| return val |
|
|
| |
| self.ref_len = np.log(float(len(self.crefs))) |
| if len(self.crefs) == 1: |
| self.ref_len = 1 |
| scores = [] |
| for test, refs in zip(self.ctest, self.crefs): |
| |
| vec, norm, length = counts2vec(test) |
| |
| score = np.array([0.0 for _ in range(self.n)]) |
| for ref in refs: |
| vec_ref, norm_ref, length_ref = counts2vec(ref) |
| score += sim(vec, vec_ref, norm, norm_ref, length, length_ref) |
| |
| score_avg = np.mean(score) |
| |
| score_avg /= len(refs) |
| |
| |
| |
| scores.append(score_avg) |
| return scores |
|
|
| def compute_score(self, option=None, verbose=0): |
| |
| self.compute_doc_freq() |
| |
| assert len(self.ctest) >= max(self.document_frequency.values()) |
| |
| score = self.compute_cider() |
| |
| |
| return np.mean(np.array(score)), np.array(score) |
|
|