File size: 2,922 Bytes
7b3a105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from typing import TYPE_CHECKING, Any, Dict, List, Union

from distilabel.llms import InferenceEndpointsLLM
from distilabel.pipeline import Pipeline
from distilabel.steps import LoadDataFromDicts
from distilabel.steps.tasks.base import Task
from distilabel.steps import KeepColumns
from distilabel.steps.base import StepResources

from distilabel.steps.tasks.typing import ChatType
from distilabel.steps.tasks import TextGeneration


SYSTEM_PROMPT_TEXT_TO_PERSONA: str = (
    "You are an expert in analyzing the text content and assigning finding the general type of persona that could be associated with such a way of expressing. "
    "Please use one or two sentences for the definition, but try to make it as fine-grained if input texts involve many detailed elements. "
    "The persona definition must go straight to the point, be assertive. The following are starts of persona definitions:\n"
    "A machine learning researcher...\n"
    "A pedriatric nurse whose...\n"
    "An urban planner focused on..."
)

TEXT_TO_PERSONA_PROMPT: str = (
    "What is the likely profession, interest, or role of the person who would write or be interested in this text?\n\n"
    "## Text\n"
    "{text}"
)


class TextToPersona(Task):
    """
    You are an expert in analyzing the text content and assigning finding the general type of persona that could be associated with such a way of expressing.
    Please use one or two sentences for the definition, but try to make it as fine-grained if input texts involve many detailed elements.
    The persona definition must go straight to the point, be assertive. For example, you can start the definition as:
    A machine learning researcher... or A pedriatric nurse whose...
    See Figure 3 in PersonaHub paper.
    """

    system_prompt: str = SYSTEM_PROMPT_TEXT_TO_PERSONA

    @property
    def inputs(self) -> List[str]:
        """The inputs for the task are the `text`."""
        return ["text"]

    def format_input(self, input: Dict[str, Any]) -> "ChatType":
        """The input is formatted as a `ChatType`."""
        return [
            {"role": "system", "content": self.system_prompt},
            {
                "role": "user",
                "content": TEXT_TO_PERSONA_PROMPT.format(text=input["text"]),  # type: ignore
            },
        ]

    @property
    def outputs(self) -> List[str]:
        """The output for the task is the persona definition."""
        return ["persona", "model_name"]

    def format_output(
        self, output: Union[str, None], input: Dict[str, Any]
    ) -> Dict[str, Any]:
        """The output is formatted as a list with the score of each instruction.
        Args:
            output: the raw output of the LLM.
            input: the input to the task. Used for obtaining the number of responses.
        Returns:
            A dict with the persona definition.
        """
        return {"persona": output}