File size: 10,021 Bytes
c6f92cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.

# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

# Part of the code is from
# `https://github.com/mwray/Joint-Part-of-Speech-Embeddings/tree/main/src/evaluation/NDCG.py`
# and
# `https://github.com/mwray/Joint-Part-of-Speech-Embeddings/tree/main/src/evaluation/mAP.py`
# Modified by Yue Zhao

import numpy as np


def calculate_DCG(similarity_matrix, relevancy_matrix, k_counts):
    """
    Calculates the Discounted Cumulative Gain (DCG) between two modalities for
    the first modality.
    DCG = \sum_{i=1}^k \frac{rel_i}{log_2(i + 1)}
    i.e. the sum of the k relevant retrievals which is calculated as the scaled
    relevancy for the ith item. The scale is designed such that early
    retrievals are more important than later retrievals.
    Params:
        - similarity_matrix: matrix of size n1 x n2 where n1 is the number of
          items in the first modality and n2 is the number of items in the
          second modality. The [ith,jth] element is the predicted similarity
          between the ith item from the first modality and the jth item from
          the second modality.
        - relevancy_matrix: matrix of size n1 x n2 (see similarity_matrix
          above). The [ith, jth] element is the semantic relevancy between the
          ith item from the first modality and the jth item from the second
          modality.
        - k_counts: matrix of size n1 x n2 (see similarity_matrix above) which
          includes information on which items to use to calculate the DCG for
          (see calculate_k_counts for more info on this matrix).
    Returns:
        - The DCG for each item in the first modality, a n1 length vector.
    """
    x_sz, y_sz = similarity_matrix.shape
    ranks = np.argsort(similarity_matrix)[:, ::-1]
    # Create vector of size (n,) where n is the length of the last dimension in
    # similarity matrix
    # This vector is of the form log(i+1)
    logs = np.log2(np.arange(y_sz) + 2)
    # Convert logs into the divisor for the DCG calculation, of size similarity
    # matrix
    divisors = np.repeat(np.expand_dims(logs, axis=0), x_sz, axis=0)

    # mask out the sorted relevancy matrix to only use the first k relevant
    # retrievals for each item.
    columns = np.repeat(np.expand_dims(np.arange(x_sz), axis=1), y_sz, axis=1)
    numerators = relevancy_matrix[columns, ranks] * k_counts
    # Calculate the final DCG score (note that this isn't expected to sum to 1)
    return np.sum(numerators / divisors, axis=1)


def calculate_k_counts(relevancy_matrix):
    """
    Works out the maximum number of allowed retrievals when working out the
    Discounted Cumulative Gain. For each query the DCG only uses the first k
    items retrieved which constitute the k relevant items for that query
    (otherwise the nDCG scores can be deceptively high for bad rankings).
    Params:
        - relevancy_matrix: matrix of size n1 x n2 where n1 is the number of
          items in the first modality and n2 is the number of items in the
          second modality.  The [ith, jth] element is the semantic relevancy
          between the ith item from the first modality and the jth item from
          the second modality.
    Returns:
        - Matrix of size n1 x n2 (see relevancy matrix for more info). This is
          created as a mask such that if the [ith, jth] element is 1 it
          represents a valid item to use for the calculation of DCG for the
          ith item after sorting. For example, if relevancy matrix of:
        [[1, 0.5, 0],
          [0, 0  , 1]]
          is given, then the k_counts matrix will be:
        [[1, 1, 0],
         [1, 0, 0]]
         i.e. the first row has 2 non-zero items, so the first two retrieved
         items should be used in the calculation. In the second row there is
         only 1 relevant item, therefore only the first retrieved item should
         be used for the DCG calculation.
    """
    return (np.sort(relevancy_matrix)[:, ::-1] > 0).astype(int)


def calculate_IDCG(relevancy_matrix, k_counts):
    """
    Calculates the Ideal Discounted Cumulative Gain (IDCG) which is the value
    of the Discounted Cumulative Gain (DCG) for a perfect retrieval, i.e. the
    items in the second modality were retrieved in order of their descending
    relevancy.
    Params:
        - relevancy_matrix: matrix of size n1 x n2 where n1 is the number of
          items in the first modality and n2 is the number of items in the
          second modality. The [ith, jth] element is the semantic relevancy
          between the ith item from the first modality and the jth item from
          the second modality.
        - k_counts: matrix of size n1 x n2 (see similarity_matrix above) which
          includes information on which items to use to calculate the DCG for
          (see calculate_k_counts for more info on this matrix).
    """
    return calculate_DCG(relevancy_matrix, relevancy_matrix, k_counts)


def calculate_nDCG(similarity_matrix, relevancy_matrix, k_counts=None, IDCG=None, reduction='mean'):
    """
    Calculates the normalised Discounted Cumulative Gain (nDCG) between two
    modalities for the first modality using the Discounted Cumulative Gain
    (DCG) and the Ideal Discounted Cumulative Gain (IDCG).
    nDCG = \frac{DCG}{IDCG}
    Params:
        - similarity_matrix: matrix of size n1 x n2 where n1 is the number of
          items in the first modality and n2 is the number of items in the second
          modality. The [ith,jth] element is the predicted similarity between
          the ith item from the first modality and the jth item from the second
          modality.
        - relevancy_matrix: matrix of size n1 x n2 (see similarity_matrix
          above). The [ith, jth] element is the semantic relevancy between the
          ith item from the first modality and the jth item from the second
          modality.
        - k_counts: optional parameter: matrix of size n1 x n2 (see
          similarity_matrix above) which includes information on which items to
          use to calculate the DCG for (see calculate_k_counts for more info on
          this matrix). This will be calculated using calculate_IDCG if not
          present, but should be pre-processed for efficiency.
        - IDCG: Optional parameter which includes the pre-processed Ideal
          Discounted Cumulative Gain (IDCG). This is a vector of size n1 (see
          similarity_matrix above) which contains the IDCG value for each item
          from the first modality. This will be calculated using calculate_IDCG
          if not present, but should be pre-processed for efficiency.
        - reduction: what to use to reduce the different nDCG scores. By
          default this applies np.mean across all different queries.
    Returns:
        - The nDCG values for the first modality.
    """
    if k_counts is None:
        k_counts = calculate_k_counts(relevancy_matrix)
    DCG = calculate_DCG(similarity_matrix, relevancy_matrix, k_counts)
    if IDCG is None:
        IDCG = calculate_IDCG(relevancy_matrix, k_counts)
    if reduction == 'mean':
        return np.mean(DCG / IDCG)
    elif reduction is None:
        return DCG / IDCG


def calculate_mAP(sim_mat, relevancy_matrix):
    """
    Computes the mean average precision according to the following formula of
    average precision:
    \frac{\sum_{k=1}^n p(k) x rel(k)}{num_rel_docs}
    where p(k) is the precision at k, rel(k) is an indicator function
    determining whether the kth returned item is relevant or not and
    num_rel_docs is the number of relevant items to find within the search.
    The mean average precision is the mean of the average precision for each
    query item (i.e row in the matrix)
    This function takes in two parameters:
        - sim_mat: a NxM matrix which represents the similarity between two
        modalities (with modality 1 being of size N and modality 2 of size M).
        - relevancy_matrix: an NxM matrix which represents the relevancy between two
        modalities of items (with modality 1 being of size N and modality 2 of
        size M).
    """
    # Find the order of the items in modality 2 according to modality 1
    ranked_order = (-sim_mat).argsort()
    ranked_sim_mat = sim_mat[np.arange(sim_mat.shape[0])[:, None], ranked_order]
    # re-order the relevancy matrix to accommodate the proposals
    ranked_rel_mat = relevancy_matrix[np.arange(relevancy_matrix.shape[0])[:, None], ranked_order]

    # find the number of relevant items found at each k
    cumulative_rel_mat = np.cumsum(ranked_rel_mat, axis=1)
    # Mask this ensuring that it is non zero if the kth term is 1 (rel(k) above)
    cumulative_rel_mat[ranked_rel_mat != 1] = 0
    # find the divisor for p(k)
    divisor = np.arange(ranked_rel_mat.shape[1]) + 1

    # find the number of relevant docs per query item
    number_rel_docs = np.sum(ranked_rel_mat == 1, axis=1)

    # find the average precision per query, within np.sum finds p(k) * rel(k)
    avg_precision = np.sum(cumulative_rel_mat / divisor, axis=1) / number_rel_docs
    mAP = np.mean(avg_precision)
    return mAP


def get_mAP(similarity_matrix, rel_matrix):
    vis_map = calculate_mAP(similarity_matrix, rel_matrix)
    txt_map = calculate_mAP(similarity_matrix.T, rel_matrix.T)
    return vis_map, txt_map, (vis_map + txt_map) / 2


def get_nDCG(similarity_matrix, rel_matrix):
    vis_k_counts = calculate_k_counts(rel_matrix)
    txt_k_counts = calculate_k_counts(rel_matrix.T)
    vis_IDCG = calculate_IDCG(rel_matrix, vis_k_counts)
    txt_IDCG = calculate_IDCG(rel_matrix.T, txt_k_counts)
    vis_nDCG = calculate_nDCG(similarity_matrix, rel_matrix, k_counts=vis_k_counts, IDCG=vis_IDCG)
    txt_nDCG = calculate_nDCG(similarity_matrix.T, rel_matrix.T, k_counts=txt_k_counts, IDCG=txt_IDCG)
    return vis_nDCG, txt_nDCG, (vis_nDCG + txt_nDCG) / 2