CharadesEgo / lavila /utils /evaluation_ek100mir.py
gina9726's picture
Upload demo files
c6f92cc verified
raw
history blame contribute delete
No virus
10 kB
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# Part of the code is from
# `https://github.com/mwray/Joint-Part-of-Speech-Embeddings/tree/main/src/evaluation/NDCG.py`
# and
# `https://github.com/mwray/Joint-Part-of-Speech-Embeddings/tree/main/src/evaluation/mAP.py`
# Modified by Yue Zhao
import numpy as np
def calculate_DCG(similarity_matrix, relevancy_matrix, k_counts):
"""
Calculates the Discounted Cumulative Gain (DCG) between two modalities for
the first modality.
DCG = \sum_{i=1}^k \frac{rel_i}{log_2(i + 1)}
i.e. the sum of the k relevant retrievals which is calculated as the scaled
relevancy for the ith item. The scale is designed such that early
retrievals are more important than later retrievals.
Params:
- similarity_matrix: matrix of size n1 x n2 where n1 is the number of
items in the first modality and n2 is the number of items in the
second modality. The [ith,jth] element is the predicted similarity
between the ith item from the first modality and the jth item from
the second modality.
- relevancy_matrix: matrix of size n1 x n2 (see similarity_matrix
above). The [ith, jth] element is the semantic relevancy between the
ith item from the first modality and the jth item from the second
modality.
- k_counts: matrix of size n1 x n2 (see similarity_matrix above) which
includes information on which items to use to calculate the DCG for
(see calculate_k_counts for more info on this matrix).
Returns:
- The DCG for each item in the first modality, a n1 length vector.
"""
x_sz, y_sz = similarity_matrix.shape
ranks = np.argsort(similarity_matrix)[:, ::-1]
# Create vector of size (n,) where n is the length of the last dimension in
# similarity matrix
# This vector is of the form log(i+1)
logs = np.log2(np.arange(y_sz) + 2)
# Convert logs into the divisor for the DCG calculation, of size similarity
# matrix
divisors = np.repeat(np.expand_dims(logs, axis=0), x_sz, axis=0)
# mask out the sorted relevancy matrix to only use the first k relevant
# retrievals for each item.
columns = np.repeat(np.expand_dims(np.arange(x_sz), axis=1), y_sz, axis=1)
numerators = relevancy_matrix[columns, ranks] * k_counts
# Calculate the final DCG score (note that this isn't expected to sum to 1)
return np.sum(numerators / divisors, axis=1)
def calculate_k_counts(relevancy_matrix):
"""
Works out the maximum number of allowed retrievals when working out the
Discounted Cumulative Gain. For each query the DCG only uses the first k
items retrieved which constitute the k relevant items for that query
(otherwise the nDCG scores can be deceptively high for bad rankings).
Params:
- relevancy_matrix: matrix of size n1 x n2 where n1 is the number of
items in the first modality and n2 is the number of items in the
second modality. The [ith, jth] element is the semantic relevancy
between the ith item from the first modality and the jth item from
the second modality.
Returns:
- Matrix of size n1 x n2 (see relevancy matrix for more info). This is
created as a mask such that if the [ith, jth] element is 1 it
represents a valid item to use for the calculation of DCG for the
ith item after sorting. For example, if relevancy matrix of:
[[1, 0.5, 0],
[0, 0 , 1]]
is given, then the k_counts matrix will be:
[[1, 1, 0],
[1, 0, 0]]
i.e. the first row has 2 non-zero items, so the first two retrieved
items should be used in the calculation. In the second row there is
only 1 relevant item, therefore only the first retrieved item should
be used for the DCG calculation.
"""
return (np.sort(relevancy_matrix)[:, ::-1] > 0).astype(int)
def calculate_IDCG(relevancy_matrix, k_counts):
"""
Calculates the Ideal Discounted Cumulative Gain (IDCG) which is the value
of the Discounted Cumulative Gain (DCG) for a perfect retrieval, i.e. the
items in the second modality were retrieved in order of their descending
relevancy.
Params:
- relevancy_matrix: matrix of size n1 x n2 where n1 is the number of
items in the first modality and n2 is the number of items in the
second modality. The [ith, jth] element is the semantic relevancy
between the ith item from the first modality and the jth item from
the second modality.
- k_counts: matrix of size n1 x n2 (see similarity_matrix above) which
includes information on which items to use to calculate the DCG for
(see calculate_k_counts for more info on this matrix).
"""
return calculate_DCG(relevancy_matrix, relevancy_matrix, k_counts)
def calculate_nDCG(similarity_matrix, relevancy_matrix, k_counts=None, IDCG=None, reduction='mean'):
"""
Calculates the normalised Discounted Cumulative Gain (nDCG) between two
modalities for the first modality using the Discounted Cumulative Gain
(DCG) and the Ideal Discounted Cumulative Gain (IDCG).
nDCG = \frac{DCG}{IDCG}
Params:
- similarity_matrix: matrix of size n1 x n2 where n1 is the number of
items in the first modality and n2 is the number of items in the second
modality. The [ith,jth] element is the predicted similarity between
the ith item from the first modality and the jth item from the second
modality.
- relevancy_matrix: matrix of size n1 x n2 (see similarity_matrix
above). The [ith, jth] element is the semantic relevancy between the
ith item from the first modality and the jth item from the second
modality.
- k_counts: optional parameter: matrix of size n1 x n2 (see
similarity_matrix above) which includes information on which items to
use to calculate the DCG for (see calculate_k_counts for more info on
this matrix). This will be calculated using calculate_IDCG if not
present, but should be pre-processed for efficiency.
- IDCG: Optional parameter which includes the pre-processed Ideal
Discounted Cumulative Gain (IDCG). This is a vector of size n1 (see
similarity_matrix above) which contains the IDCG value for each item
from the first modality. This will be calculated using calculate_IDCG
if not present, but should be pre-processed for efficiency.
- reduction: what to use to reduce the different nDCG scores. By
default this applies np.mean across all different queries.
Returns:
- The nDCG values for the first modality.
"""
if k_counts is None:
k_counts = calculate_k_counts(relevancy_matrix)
DCG = calculate_DCG(similarity_matrix, relevancy_matrix, k_counts)
if IDCG is None:
IDCG = calculate_IDCG(relevancy_matrix, k_counts)
if reduction == 'mean':
return np.mean(DCG / IDCG)
elif reduction is None:
return DCG / IDCG
def calculate_mAP(sim_mat, relevancy_matrix):
"""
Computes the mean average precision according to the following formula of
average precision:
\frac{\sum_{k=1}^n p(k) x rel(k)}{num_rel_docs}
where p(k) is the precision at k, rel(k) is an indicator function
determining whether the kth returned item is relevant or not and
num_rel_docs is the number of relevant items to find within the search.
The mean average precision is the mean of the average precision for each
query item (i.e row in the matrix)
This function takes in two parameters:
- sim_mat: a NxM matrix which represents the similarity between two
modalities (with modality 1 being of size N and modality 2 of size M).
- relevancy_matrix: an NxM matrix which represents the relevancy between two
modalities of items (with modality 1 being of size N and modality 2 of
size M).
"""
# Find the order of the items in modality 2 according to modality 1
ranked_order = (-sim_mat).argsort()
ranked_sim_mat = sim_mat[np.arange(sim_mat.shape[0])[:, None], ranked_order]
# re-order the relevancy matrix to accommodate the proposals
ranked_rel_mat = relevancy_matrix[np.arange(relevancy_matrix.shape[0])[:, None], ranked_order]
# find the number of relevant items found at each k
cumulative_rel_mat = np.cumsum(ranked_rel_mat, axis=1)
# Mask this ensuring that it is non zero if the kth term is 1 (rel(k) above)
cumulative_rel_mat[ranked_rel_mat != 1] = 0
# find the divisor for p(k)
divisor = np.arange(ranked_rel_mat.shape[1]) + 1
# find the number of relevant docs per query item
number_rel_docs = np.sum(ranked_rel_mat == 1, axis=1)
# find the average precision per query, within np.sum finds p(k) * rel(k)
avg_precision = np.sum(cumulative_rel_mat / divisor, axis=1) / number_rel_docs
mAP = np.mean(avg_precision)
return mAP
def get_mAP(similarity_matrix, rel_matrix):
vis_map = calculate_mAP(similarity_matrix, rel_matrix)
txt_map = calculate_mAP(similarity_matrix.T, rel_matrix.T)
return vis_map, txt_map, (vis_map + txt_map) / 2
def get_nDCG(similarity_matrix, rel_matrix):
vis_k_counts = calculate_k_counts(rel_matrix)
txt_k_counts = calculate_k_counts(rel_matrix.T)
vis_IDCG = calculate_IDCG(rel_matrix, vis_k_counts)
txt_IDCG = calculate_IDCG(rel_matrix.T, txt_k_counts)
vis_nDCG = calculate_nDCG(similarity_matrix, rel_matrix, k_counts=vis_k_counts, IDCG=vis_IDCG)
txt_nDCG = calculate_nDCG(similarity_matrix.T, rel_matrix.T, k_counts=txt_k_counts, IDCG=txt_IDCG)
return vis_nDCG, txt_nDCG, (vis_nDCG + txt_nDCG) / 2