File size: 2,433 Bytes
e027012 f5faae7 e027012 f5faae7 e027012 f5faae7 e027012 f5faae7 e027012 f5faae7 e027012 073db2c e027012 f5faae7 e027012 f5faae7 e027012 f5faae7 e027012 f5faae7 e027012 f5faae7 073db2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from api_wrappers import grazie_wrapper
def build_prompt_ref(prediction, reference):
return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without
providing any additional feedback or commentary:
START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
{prediction}
END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
For reference, consider this as an example of a good commit message for the same commit that is both concise and
specific:
START OF THE REFERENCE COMMIT MESSAGE
{reference}
END OF THE REFERENCE COMMIT MESSAGE
YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the
lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
"""
def build_prompt_noref(prediction, diff):
return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without
providing any additional feedback or commentary:
START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
{prediction}
END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
These are the code changes included in the commit:
START OF THE CODE CHANGES
{diff}
END OF THE CODE CHANGES
YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the
lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
"""
N_RETRIES = 3
def get_number_for_prompt(prompt):
outputs = []
result = None
for i in range(N_RETRIES):
try:
output = grazie_wrapper.generate_for_prompt(prompt).strip().split()[-1]
outputs.append(output)
result = int(output)
break
except ValueError:
continue
if result is None:
raise RuntimeError(f"LLM cannot generate a number. Its outputs were: {str(outputs)}")
return result
def compute_ref(prediction, reference, n_requests):
prompt = build_prompt_ref(prediction, reference)
results = [
get_number_for_prompt(prompt)
for _ in range(n_requests)
]
return sum(results) / len(results)
def compute_noref(prediction, diff, n_requests):
prompt = build_prompt_noref(prediction, diff)
results = [
get_number_for_prompt(prompt)
for _ in range(n_requests)
]
return sum(results) / len(results)
|