Spaces:

JetBrains-Research
/

commit-message-editing-visualization

Sleeping

Petr Tsvetkov

Generate a dataset for the labeling app

6676c5a 7 months ago

1.45 kB

	import config
	from api_wrappers import hf_data_loader

	N_EXAMPLES = 15


	def get_example_prompt_end_to_start(start_msg, end_msg):
	return f"""START OF THE EXAMPLE

	For the following edited commit message:
	START OF THE EDITED COMMIT MESSAGE
	{end_msg}
	END OF THE EDITED COMMIT MESSAGE

	You would output the following initial commit message:
	START OF THE INITIAL COMMIT MESSAGE
	{start_msg}
	END OF THE INITIAL COMMIT MESSAGE

	END OF THE EXAMPLE"""


	def get_example_prompt_start_to_end(start_msg, end_msg):
	return f"""START OF THE EXAMPLE

	For the following LLM-generated commit message:
	START OF THE GENERATED COMMIT MESSAGE
	{start_msg}
	END OF THE GENERATED COMMIT MESSAGE

	You would output the following improved commit message:
	START OF THE IMPROVED COMMIT MESSAGE
	{end_msg}
	END OF THE IMPROVED COMMIT MESSAGE

	END OF THE EXAMPLE"""


	manual_df = hf_data_loader.load_raw_rewriting_as_pandas()[['commit_msg_start', 'commit_msg_end']]
	manual_df = manual_df.sample(n=N_EXAMPLES, random_state=config.RANDOM_STATE)


	def generate_examples(end_to_start):
	prompt_fn = get_example_prompt_end_to_start if end_to_start else get_example_prompt_start_to_end
	examples = [
	prompt_fn(row['commit_msg_start'], row['commit_msg_end'])
	for _, row in manual_df.iterrows()
	]

	return "\n".join(examples)


	EXAMPLES_END_TO_START = generate_examples(end_to_start=True)
	EXAMPLES_START_TO_END = generate_examples(end_to_start=False)