Spaces:
Running
Running
File size: 3,204 Bytes
0fdcb79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import json
import os
import tempfile
import Bio.PDB
import Bio.SeqUtils
from Bio import pairwise2
from run_pretrained_model import run_on_folder
def get_seq_based_on_template(seq: str, template_path: str, output_path: str):
# get a list of all residues in template
parser = Bio.PDB.PDBParser()
template_structure = parser.get_structure("template", template_path)
chain = template_structure[0].get_chains().__next__()
template_residues = [i for i in chain.get_residues() if "CA" in i
and Bio.SeqUtils.seq1(i.get_resname()) not in ("X", "", " ")]
template_seq = "".join([Bio.SeqUtils.seq1(i.get_resname()) for i in template_residues])
# align the sequence to the template
alignment = pairwise2.align.globalxx(seq, template_seq, one_alignment_only=True)[0]
aligned_seq, aligned_template_seq = alignment.seqA, alignment.seqB
# create a new pdb file with the aligned residues
new_structure = Bio.PDB.Structure.Structure("new_structure")
new_model = Bio.PDB.Model.Model(0)
new_structure.add(new_model)
new_chain = Bio.PDB.Chain.Chain("A") # Using chain ID 'A' for the output
new_model.add(new_chain)
template_ind = -1
seq_ind = 0
print(aligned_seq, aligned_template_seq, len(template_residues))
for seq_res, template_res in zip(aligned_seq, aligned_template_seq):
if template_res != "-":
template_ind += 1
if seq_res != "-":
seq_ind += 1
if seq_res == "-":
continue
if template_res == "-":
seq_res_3_letter = Bio.SeqUtils.seq3(seq_res).upper()
residue = Bio.PDB.Residue.Residue((' ', seq_ind, ' '), seq_res_3_letter, '')
atom = Bio.PDB.Atom.Atom("C", (0.0, 0.0, 0.0), 1.0, 1.0, ' ', "CA", 0, element="C")
residue.add(atom)
new_chain.add(residue)
else:
residue = template_residues[template_ind].copy()
residue.detach_parent()
residue.id = (' ', seq_ind, ' ')
new_chain.add(residue)
io = Bio.PDB.PDBIO()
io.set_structure(new_structure)
io.save(output_path)
def run_on_sample_seqs(seq1: str, template1_path: str, seq2: str, template_path2: str, output_path: str,
run_config_path: str):
temp_dir = tempfile.TemporaryDirectory()
temp_dir_path = temp_dir.name
get_seq_based_on_template(seq1, template1_path, f"{temp_dir_path}/prot_r.pdb")
get_seq_based_on_template(seq2, template_path2, f"{temp_dir_path}/prot_l.pdb")
json_data = {
"input_r_structure": f"prot_r.pdb",
"input_l_structure": f"prot_l.pdb",
}
tmp_json_folder = f"{temp_dir_path}/jsons"
os.makedirs(tmp_json_folder, exist_ok=True)
json.dump(json_data, open(f"{tmp_json_folder}/input.json", "w"))
tmp_output_folder = f"{temp_dir_path}/output"
run_on_folder(tmp_json_folder, tmp_output_folder, run_config_path, skip_relaxation=True,
long_sequence_inference=False, skip_exists=False)
os.rename(tmp_output_folder + "/predictions/input_predicted_joined.pdb", output_path)
print("moved output to ", output_path)
temp_dir.cleanup()
|