Spaces:
Runtime error
Runtime error
from typing import TYPE_CHECKING, Any, Dict, List, Union | |
from distilabel.llms import InferenceEndpointsLLM | |
from distilabel.pipeline import Pipeline | |
from distilabel.steps import LoadDataFromDicts | |
from distilabel.steps.tasks.base import Task | |
from distilabel.steps import KeepColumns | |
from distilabel.steps.base import StepResources | |
from distilabel.steps.tasks.typing import ChatType | |
from distilabel.steps.tasks import TextGeneration | |
SYSTEM_PROMPT_TEXT_TO_PERSONA: str = ( | |
"You are an expert in analyzing the text content and assigning finding the general type of persona that could be associated with such a way of expressing. " | |
"Please use one or two sentences for the definition, but try to make it as fine-grained if input texts involve many detailed elements. " | |
"The persona definition must go straight to the point, be assertive. The following are starts of persona definitions:\n" | |
"A machine learning researcher...\n" | |
"A pedriatric nurse whose...\n" | |
"An urban planner focused on..." | |
) | |
TEXT_TO_PERSONA_PROMPT: str = ( | |
"What is the likely profession, interest, or role of the person who would write or be interested in this text?\n\n" | |
"## Text\n" | |
"{text}" | |
) | |
class TextToPersona(Task): | |
""" | |
You are an expert in analyzing the text content and assigning finding the general type of persona that could be associated with such a way of expressing. | |
Please use one or two sentences for the definition, but try to make it as fine-grained if input texts involve many detailed elements. | |
The persona definition must go straight to the point, be assertive. For example, you can start the definition as: | |
A machine learning researcher... or A pedriatric nurse whose... | |
See Figure 3 in PersonaHub paper. | |
""" | |
system_prompt: str = SYSTEM_PROMPT_TEXT_TO_PERSONA | |
def inputs(self) -> List[str]: | |
"""The inputs for the task are the `text`.""" | |
return ["text"] | |
def format_input(self, input: Dict[str, Any]) -> "ChatType": | |
"""The input is formatted as a `ChatType`.""" | |
return [ | |
{"role": "system", "content": self.system_prompt}, | |
{ | |
"role": "user", | |
"content": TEXT_TO_PERSONA_PROMPT.format(text=input["text"]), # type: ignore | |
}, | |
] | |
def outputs(self) -> List[str]: | |
"""The output for the task is the persona definition.""" | |
return ["persona", "model_name"] | |
def format_output( | |
self, output: Union[str, None], input: Dict[str, Any] | |
) -> Dict[str, Any]: | |
"""The output is formatted as a list with the score of each instruction. | |
Args: | |
output: the raw output of the LLM. | |
input: the input to the task. Used for obtaining the number of responses. | |
Returns: | |
A dict with the persona definition. | |
""" | |
return {"persona": output} | |