DockFormerPP

Running

App Files Files Community

DockFormerPP / run_on_seq.py

bshor

add code

0fdcb79 8 days ago

raw

history blame

3.2 kB

	import json
	import os
	import tempfile

	import Bio.PDB
	import Bio.SeqUtils
	from Bio import pairwise2

	from run_pretrained_model import run_on_folder


	def get_seq_based_on_template(seq: str, template_path: str, output_path: str):
	# get a list of all residues in template
	parser = Bio.PDB.PDBParser()
	template_structure = parser.get_structure("template", template_path)
	chain = template_structure[0].get_chains().__next__()
	template_residues = [i for i in chain.get_residues() if "CA" in i
	and Bio.SeqUtils.seq1(i.get_resname()) not in ("X", "", " ")]
	template_seq = "".join([Bio.SeqUtils.seq1(i.get_resname()) for i in template_residues])

	# align the sequence to the template
	alignment = pairwise2.align.globalxx(seq, template_seq, one_alignment_only=True)[0]
	aligned_seq, aligned_template_seq = alignment.seqA, alignment.seqB

	# create a new pdb file with the aligned residues
	new_structure = Bio.PDB.Structure.Structure("new_structure")
	new_model = Bio.PDB.Model.Model(0)
	new_structure.add(new_model)
	new_chain = Bio.PDB.Chain.Chain("A") # Using chain ID 'A' for the output
	new_model.add(new_chain)

	template_ind = -1
	seq_ind = 0
	print(aligned_seq, aligned_template_seq, len(template_residues))
	for seq_res, template_res in zip(aligned_seq, aligned_template_seq):
	if template_res != "-":
	template_ind += 1

	if seq_res != "-":
	seq_ind += 1

	if seq_res == "-":
	continue

	if template_res == "-":
	seq_res_3_letter = Bio.SeqUtils.seq3(seq_res).upper()
	residue = Bio.PDB.Residue.Residue((' ', seq_ind, ' '), seq_res_3_letter, '')
	atom = Bio.PDB.Atom.Atom("C", (0.0, 0.0, 0.0), 1.0, 1.0, ' ', "CA", 0, element="C")
	residue.add(atom)
	new_chain.add(residue)
	else:
	residue = template_residues[template_ind].copy()
	residue.detach_parent()
	residue.id = (' ', seq_ind, ' ')
	new_chain.add(residue)
	io = Bio.PDB.PDBIO()
	io.set_structure(new_structure)
	io.save(output_path)


	def run_on_sample_seqs(seq1: str, template1_path: str, seq2: str, template_path2: str, output_path: str,
	run_config_path: str):
	temp_dir = tempfile.TemporaryDirectory()
	temp_dir_path = temp_dir.name

	get_seq_based_on_template(seq1, template1_path, f"{temp_dir_path}/prot_r.pdb")
	get_seq_based_on_template(seq2, template_path2, f"{temp_dir_path}/prot_l.pdb")
	json_data = {
	"input_r_structure": f"prot_r.pdb",
	"input_l_structure": f"prot_l.pdb",
	}
	tmp_json_folder = f"{temp_dir_path}/jsons"
	os.makedirs(tmp_json_folder, exist_ok=True)
	json.dump(json_data, open(f"{tmp_json_folder}/input.json", "w"))
	tmp_output_folder = f"{temp_dir_path}/output"

	run_on_folder(tmp_json_folder, tmp_output_folder, run_config_path, skip_relaxation=True,
	long_sequence_inference=False, skip_exists=False)

	os.rename(tmp_output_folder + "/predictions/input_predicted_joined.pdb", output_path)
	print("moved output to ", output_path)

	temp_dir.cleanup()