Spaces:

vectara
/

leaderboard

Running on CPU Upgrade

leaderboard / src /backend /evaluate_model.py

Minseok Bae

Refine the code style

156ef43 11 months ago

4.02 kB

	import logging
	import pandas as pd

	import src.envs as envs

	from src.backend.model_operations import SummaryGenerator, EvaluationModel
	import src.backend.util as util

	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s')


	class Evaluator:
	"""A class to evaluate summaries generated by a language model.

	Attributes:
	model (str): The name or path of the model.
	revision (str): The model revision.
	precision (str): The precision setting of the model.
	num_fewshot (int): Number of few-shot examples to use.
	batch_size (int): Batch size for processing.
	device (str): The device to run the model on.
	no_cache (bool): Flag to disable caching.
	limit (int): Limit on the number of items to process.
	write_out (bool): Whether to write results to a file.
	output_base_path (str): Base path for output files.
	summary_generator (SummaryGenerator): Instance for generating summaries.
	eval_model (EvaluationModel): Instance for evaluating summaries.
	"""
	def __init__(self, model, revision, precision, batch_size,
	device, no_cache, limit, write_out=True,
	output_base_path='logs'):
	"""Initializes the Evaluator with the given model and settings.

	Args:
	model (str): The name or path of the model.
	revision (str): The model revision.
	precision (str): The precision setting of the model.
	num_fewshot (int): Number of few-shot examples to use.
	batch_size (int): Batch size for processing.
	device (str): The device to run the model on.
	no_cache (bool): Flag to disable caching.
	limit (int): Limit on the number of items to process.
	write_out (bool): Whether to write results to a file.
	output_base_path (str): Base path for output files.
	"""
	self.model = model
	self.revision = revision
	self.precision = precision
	self.batch_size = batch_size
	self.device = device
	self.no_cache = no_cache
	self.limit = limit
	self.write_out = write_out
	self.output_base_path = output_base_path
	try:
	self.summary_generator = SummaryGenerator(model, revision)
	self.eval_model = EvaluationModel(envs.HEM_PATH)
	except Exception as e:
	logging.error(f"Error initializing Evaluator: {e}")
	raise

	def evaluate(self):
	"""
	Performs the evaluation process by generating summaries
	and computing metrics.

	Returns:
	dict: A dictionary containing evaluation results.
	"""
	try:
	df = pd.read_csv(envs.DATASET_PATH)
	generated_summaries_df = self.summary_generator.generate_summaries(df)

	avg_summary_len = self.summary_generator.avg_length
	answer_rate = self.summary_generator.answer_rate

	hallucination_scores = self.eval_model.evaluate_hallucination(
	generated_summaries_df)
	factual_consistency_rate = self.eval_model.compute_factual_consistency_rate()
	hallucination_rate = self.eval_model.hallucination_rate

	results = util.format_results(model_name=self.model, revision=self.revision,
	precision=self.precision,
	factual_consistency_rate=factual_consistency_rate,
	hallucination_rate=hallucination_rate,
	answer_rate=answer_rate,
	avg_summary_len=avg_summary_len)
	return results
	except FileNotFoundError:
	logging.error(f"File not found: {envs.DATASET_PATH}")
	raise
	except Exception as e:
	logging.error(f"Error during evaluation: {e}")
	raise