Spaces:

burtenshaw
/

my_personas_generator

Running

App Files Files Community

my_personas_generator / personas.py

burtenshaw HF staff

first commit

7b3a105 2 days ago

raw

history blame contribute delete

No virus

2.92 kB

	from typing import TYPE_CHECKING, Any, Dict, List, Union

	from distilabel.llms import InferenceEndpointsLLM
	from distilabel.pipeline import Pipeline
	from distilabel.steps import LoadDataFromDicts
	from distilabel.steps.tasks.base import Task
	from distilabel.steps import KeepColumns
	from distilabel.steps.base import StepResources

	from distilabel.steps.tasks.typing import ChatType
	from distilabel.steps.tasks import TextGeneration


	SYSTEM_PROMPT_TEXT_TO_PERSONA: str = (
	"You are an expert in analyzing the text content and assigning finding the general type of persona that could be associated with such a way of expressing. "
	"Please use one or two sentences for the definition, but try to make it as fine-grained if input texts involve many detailed elements. "
	"The persona definition must go straight to the point, be assertive. The following are starts of persona definitions:\n"
	"A machine learning researcher...\n"
	"A pedriatric nurse whose...\n"
	"An urban planner focused on..."
	)

	TEXT_TO_PERSONA_PROMPT: str = (
	"What is the likely profession, interest, or role of the person who would write or be interested in this text?\n\n"
	"## Text\n"
	"{text}"
	)


	class TextToPersona(Task):
	"""
	You are an expert in analyzing the text content and assigning finding the general type of persona that could be associated with such a way of expressing.
	Please use one or two sentences for the definition, but try to make it as fine-grained if input texts involve many detailed elements.
	The persona definition must go straight to the point, be assertive. For example, you can start the definition as:
	A machine learning researcher... or A pedriatric nurse whose...
	See Figure 3 in PersonaHub paper.
	"""

	system_prompt: str = SYSTEM_PROMPT_TEXT_TO_PERSONA

	@property
	def inputs(self) -> List[str]:
	"""The inputs for the task are the `text`."""
	return ["text"]

	def format_input(self, input: Dict[str, Any]) -> "ChatType":
	"""The input is formatted as a `ChatType`."""
	return [
	{"role": "system", "content": self.system_prompt},
	{
	"role": "user",
	"content": TEXT_TO_PERSONA_PROMPT.format(text=input["text"]), # type: ignore
	},
	]

	@property
	def outputs(self) -> List[str]:
	"""The output for the task is the persona definition."""
	return ["persona", "model_name"]

	def format_output(
	self, output: Union[str, None], input: Dict[str, Any]
	) -> Dict[str, Any]:
	"""The output is formatted as a list with the score of each instruction.
	Args:
	output: the raw output of the LLM.
	input: the input to the task. Used for obtaining the number of responses.
	Returns:
	A dict with the persona definition.
	"""
	return {"persona": output}