OpenStructure
Loading...
Searching...
No Matches
cleanup.py
Go to the documentation of this file.
1from ost import conop, mol
2
3def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
4 """
5 This function returns a cleaned-up (simplified) version of the protein
6 structure. Different parameters affect the behaviour of the function.
7
8 :param strip_water: Whether to remove water from the structure
9 :param canonicalize: Whether to strip off modifications of amino acids and map
10 them back to their parent standard amino acid, e.g. selenium methionine to
11 methionine.For more complex amino acids, where the relation between the
12 modified and the standard parent amino acid is not known, sidechain atoms
13 are removed. D-peptide-linking residues are completely removed as well.
14 :param remove_ligands: Whether to remove ligands from the structure
15
16 :return: a cleaned version of the entity
17 """
18 #setup
19 lib = conop.GetDefaultLib()
20 if not lib:
21 raise RuntimeError("Cleanup requires a compound library.")
22 clean_entity = entity.Copy()
23 ed = clean_entity.EditXCS()
24 #remove water residues
25 if strip_water:
26 _StripWater(clean_entity, ed)
27 #replace modified residues before removing ligands to avoid removing MSE and others
28 if canonicalize:
29 _CanonicalizeResidues(clean_entity, ed, lib)
30 #remove all hetatoms that are not water
31 if remove_ligands:
32 _RemoveLigands(clean_entity, ed)
33 return clean_entity
34
35
36def _StripWater(clean_entity, ed) :
37 """
38 This function removes water residues from the structure
39 """
40 for res in clean_entity.residues:
41 if res.IsValid():
42 if res.chem_class == mol.WATER:
43 ed.DeleteResidue(res.handle)
44 ed.UpdateICS()
45 return
46
47def _RemoveLigands(clean_entity, ed) :
48 """
49 This function removes ligands from the structure
50 """
51 for res in clean_entity.residues:
52 if res.IsValid():
53 #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein()
54 if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER:
55 ed.DeleteResidue(res.handle)
56 ed.UpdateICS()
57 return
58
59def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
60 """
61 This function strips off modifications of amino acids and maps
62 them back to their parent standard amino acid, e.g. selenium methionine to
63 methionine.For more complex amino acids, where the relation between the
64 modified and the standard parent amino acid is not known, sidechain atoms
65 are removed. D-peptide-linking residues are completely removed as well.
66 """
67
68 for res in clean_entity.residues:
69 if res.IsValid() and res.IsPeptideLinking() :
70 parent_olc = res.one_letter_code
71 if parent_olc == "X" :
72 _DeleteSidechain(res, ed)
73 for atom in res.atoms:
74 atom.is_hetatom = False
75 else:
76 parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
77 parent_res = compound_lib.FindCompound(parent_tlc)
78 if not parent_res:
79 _DeleteSidechain(res, ed)
80 for atom in res.atoms:
81 atom.is_hetatom = False
82 print("Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc)
83 else:
84 #collect atom's names
85 modif_atom_names = set([atom.name for atom in res.atoms
86 if atom.element != "H" and atom.element != "D" ])
87 #if the res is the first or last take all the atoms from the parent res
88 if res.FindAtom("OXT").IsValid() :
89 parent_atom_names = set([atom.name for atom in parent_res.atom_specs
90 if atom.element != "H" and atom.element != "D" ])
91 else:
92 parent_atom_names = set([atom.name for atom in parent_res.atom_specs
93 if atom.element != "H" and atom.element != "D" and not atom.is_leaving ])
94 additional_parent_atoms = parent_atom_names - modif_atom_names
95 additional_modif_atoms = modif_atom_names - parent_atom_names
96 #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION
97 if res.atoms[0].is_hetatom :
98 old_name = res.name
99 ed.RenameResidue(res, parent_tlc)
100 if additional_parent_atoms:
101 if additional_modif_atoms:
102 #replacement
103 _Replacement(res, ed, old_name)
104 else:
105 #deletion
106 _Deletion(res, ed)
107 elif additional_modif_atoms:
108 #addition
109 _Addition(res, ed, additional_modif_atoms)
110 else:
111 #unchanged, later check stereochemistry or H atoms
112 _Unchanged(res, ed)
113 #the res is a peptide but not a ligand (is a protein res)
114 else:
115 if additional_parent_atoms:# if the sidechain is incomplete
116 _DeleteSidechain(res, ed)
117 ed.UpdateICS()
118 return
119
120def _Replacement(res, ed, old_name) :
121 #TEMP ONLY MSE
122 if old_name == "MSE" :
123 for atom in res.atoms:
124 atom.is_hetatom = False
125 sel = res.FindAtom("SE")
126 if sel.IsValid() :
127 ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2
128 ed.DeleteAtom( sel )
129 else:
130 _DeleteSidechain(res, ed)
131 else:
132 _DeleteSidechain(res, ed)
133 return
134
135def _Deletion(res, ed) :
136 _DeleteSidechain(res, ed)
137 for atom in res.atoms :
138 atom.is_hetatom = False
139 return
140
141def _Addition(res, ed, additional_modif_atoms) :
142 for add_atom_name in additional_modif_atoms:
143 add_atom = res.FindAtom( add_atom_name )
144 if add_atom.IsValid() :
145 ed.DeleteAtom( add_atom )
146 for atom in res.atoms:
147 atom.is_hetatom = False
148 return
149
150def _Unchanged(res, ed) :
151 if res.chem_class == mol.D_PEPTIDE_LINKING:
152 ed.DeleteResidue(res)
153 else:
154 _DeleteSidechain(res, ed)
155 for atom in res.atoms :
156 atom.is_hetatom = False
157 return
158
159def _DeleteSidechain(res, ed) :
160 for atom in res.atoms:
161 if not atom.name in ['CA','CB','C','N','O']:
162 ed.DeleteAtom(atom)
163 return
164
165#visible functions
166__all__ = [Cleanup]
_DeleteSidechain(res, ed)
Definition cleanup.py:159
_Deletion(res, ed)
Definition cleanup.py:135
_StripWater(clean_entity, ed)
Definition cleanup.py:36
_RemoveLigands(clean_entity, ed)
Definition cleanup.py:47
Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True)
Definition cleanup.py:3
_Unchanged(res, ed)
Definition cleanup.py:150
_Addition(res, ed, additional_modif_atoms)
Definition cleanup.py:141
_CanonicalizeResidues(clean_entity, ed, compound_lib)
Definition cleanup.py:59
_Replacement(res, ed, old_name)
Definition cleanup.py:120