1 from ost
import conop, mol
3 def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
5 This function returns a cleaned-up (simplified) version of the protein
6 structure. Different parameters affect the behaviour of the function.
8 :param strip_water: Whether to remove water from the structure
9 :param canonicalize: Whether to strip off modifications of amino acids and map
10 them back to their parent standard amino acid, e.g. selenium methionine to
11 methionine.For more complex amino acids, where the relation between the
12 modified and the standard parent amino acid is not known, sidechain atoms
13 are removed. D-peptide-linking residues are completely removed as well.
14 :param remove_ligands: Whether to remove ligands from the structure
16 :return: a cleaned version of the entity
19 builder = conop.GetBuilder()
20 if not hasattr(builder,
"compound_lib") :
21 raise RuntimeError(
"Cannot cleanup structure, since the default builder doesn't use the compound library")
22 compound_lib = builder.compound_lib
23 clean_entity = entity.Copy()
24 ed = clean_entity.EditXCS()
27 _StripWater(clean_entity, ed)
30 _CanonicalizeResidues(clean_entity, ed, compound_lib)
33 _RemoveLigands(clean_entity, ed)
37 def _StripWater(clean_entity, ed) :
39 This function removes water residues from the structure
41 for res
in clean_entity.residues:
43 if res.chem_class == mol.WATER:
44 ed.DeleteResidue(res.handle)
48 def _RemoveLigands(clean_entity, ed) :
50 This function removes ligands from the structure
52 for res
in clean_entity.residues:
55 if not res.IsPeptideLinking()
and res.atoms[0].is_hetatom
and res.chem_class != mol.WATER:
56 ed.DeleteResidue(res.handle)
60 def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
62 This function strips off modifications of amino acids and maps
63 them back to their parent standard amino acid, e.g. selenium methionine to
64 methionine.For more complex amino acids, where the relation between the
65 modified and the standard parent amino acid is not known, sidechain atoms
66 are removed. D-peptide-linking residues are completely removed as well.
69 for res
in clean_entity.residues:
70 if res.IsValid()
and res.IsPeptideLinking() :
71 parent_olc = res.one_letter_code
72 if parent_olc ==
"X" :
73 _DeleteSidechain(res, ed)
74 for atom
in res.atoms:
75 atom.is_hetatom =
False
77 parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
78 parent_res = compound_lib.FindCompound(parent_tlc)
80 _DeleteSidechain(res, ed)
81 for atom
in res.atoms:
82 atom.is_hetatom =
False
83 print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc
86 modif_atom_names = set([atom.name
for atom
in res.atoms
87 if atom.element !=
"H" and atom.element !=
"D" ])
89 if res.FindAtom(
"OXT").IsValid() :
90 parent_atom_names = set([atom.name
for atom
in parent_res.atom_specs
91 if atom.element !=
"H" and atom.element !=
"D" ])
93 parent_atom_names = set([atom.name
for atom
in parent_res.atom_specs
94 if atom.element !=
"H" and atom.element !=
"D" and not atom.is_leaving ])
95 additional_parent_atoms = parent_atom_names - modif_atom_names
96 additional_modif_atoms = modif_atom_names - parent_atom_names
98 if res.atoms[0].is_hetatom :
100 ed.RenameResidue(res, parent_tlc)
101 if additional_parent_atoms:
102 if additional_modif_atoms:
104 _Replacement(res, ed, old_name)
108 elif additional_modif_atoms:
110 _Addition(res, ed, additional_modif_atoms)
116 if additional_parent_atoms:
117 _DeleteSidechain(res, ed)
121 def _Replacement(res, ed, old_name) :
123 if old_name ==
"MSE" :
124 for atom
in res.atoms:
125 atom.is_hetatom =
False
126 sel = res.FindAtom(
"SE")
128 ed.InsertAtom( res,
"SD", sel.pos,
"S", sel.occupancy, sel.b_factor )
131 _DeleteSidechain(res, ed)
133 _DeleteSidechain(res, ed)
136 def _Deletion(res, ed) :
137 _DeleteSidechain(res, ed)
138 for atom
in res.atoms :
139 atom.is_hetatom =
False
142 def _Addition(res, ed, additional_modif_atoms) :
143 for add_atom_name
in additional_modif_atoms:
144 add_atom = res.FindAtom( add_atom_name )
145 if add_atom.IsValid() :
146 ed.DeleteAtom( add_atom )
147 for atom
in res.atoms:
148 atom.is_hetatom =
False
151 def _Unchanged(res, ed) :
152 if res.chem_class == mol.D_PEPTIDE_LINKING:
153 ed.DeleteResidue(res)
155 _DeleteSidechain(res, ed)
156 for atom
in res.atoms :
157 atom.is_hetatom =
False
160 def _DeleteSidechain(res, ed) :
161 for atom
in res.atoms:
162 if not atom.name
in [
'CA',
'CB',
'C',
'N',
'O']: