1 from ost
import conop, mol
3 def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
5 This function returns a cleaned-up (simplified) version of the protein
6 structure. Different parameters affect the behaviour of the function.
8 :param strip_water: Whether to remove water from the structure
9 :param canonicalize: Whether to strip off modifications of amino acids and map
10 them back to their parent standard amino acid, e.g. selenium methionine to
11 methionine.For more complex amino acids, where the relation between the
12 modified and the standard parent amino acid is not known, sidechain atoms
13 are removed. D-peptide-linking residues are completely removed as well.
14 :param remove_ligands: Whether to remove ligands from the structure
16 :return: a cleaned version of the entity
19 lib = conop.GetDefaultLib()
21 raise RuntimeError(
"Cleanup requires a compound library.")
22 clean_entity = entity.Copy()
23 ed = clean_entity.EditXCS()
26 _StripWater(clean_entity, ed)
29 _CanonicalizeResidues(clean_entity, ed, lib)
32 _RemoveLigands(clean_entity, ed)
36 def _StripWater(clean_entity, ed) :
38 This function removes water residues from the structure
40 for res
in clean_entity.residues:
42 if res.chem_class == mol.WATER:
43 ed.DeleteResidue(res.handle)
47 def _RemoveLigands(clean_entity, ed) :
49 This function removes ligands from the structure
51 for res
in clean_entity.residues:
54 if not res.IsPeptideLinking()
and res.atoms[0].is_hetatom
and res.chem_class != mol.WATER:
55 ed.DeleteResidue(res.handle)
59 def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
61 This function strips off modifications of amino acids and maps
62 them back to their parent standard amino acid, e.g. selenium methionine to
63 methionine.For more complex amino acids, where the relation between the
64 modified and the standard parent amino acid is not known, sidechain atoms
65 are removed. D-peptide-linking residues are completely removed as well.
68 for res
in clean_entity.residues:
69 if res.IsValid()
and res.IsPeptideLinking() :
70 parent_olc = res.one_letter_code
71 if parent_olc ==
"X" :
72 _DeleteSidechain(res, ed)
73 for atom
in res.atoms:
74 atom.is_hetatom =
False
76 parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
77 parent_res = compound_lib.FindCompound(parent_tlc)
79 _DeleteSidechain(res, ed)
80 for atom
in res.atoms:
81 atom.is_hetatom =
False
82 print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc
85 modif_atom_names = set([atom.name
for atom
in res.atoms
86 if atom.element !=
"H" and atom.element !=
"D" ])
88 if res.FindAtom(
"OXT").IsValid() :
89 parent_atom_names = set([atom.name
for atom
in parent_res.atom_specs
90 if atom.element !=
"H" and atom.element !=
"D" ])
92 parent_atom_names = set([atom.name
for atom
in parent_res.atom_specs
93 if atom.element !=
"H" and atom.element !=
"D" and not atom.is_leaving ])
94 additional_parent_atoms = parent_atom_names - modif_atom_names
95 additional_modif_atoms = modif_atom_names - parent_atom_names
97 if res.atoms[0].is_hetatom :
99 ed.RenameResidue(res, parent_tlc)
100 if additional_parent_atoms:
101 if additional_modif_atoms:
103 _Replacement(res, ed, old_name)
107 elif additional_modif_atoms:
109 _Addition(res, ed, additional_modif_atoms)
115 if additional_parent_atoms:
116 _DeleteSidechain(res, ed)
120 def _Replacement(res, ed, old_name) :
122 if old_name ==
"MSE" :
123 for atom
in res.atoms:
124 atom.is_hetatom =
False
125 sel = res.FindAtom(
"SE")
127 ed.InsertAtom( res,
"SD", sel.pos,
"S", sel.occupancy, sel.b_factor )
130 _DeleteSidechain(res, ed)
132 _DeleteSidechain(res, ed)
135 def _Deletion(res, ed) :
136 _DeleteSidechain(res, ed)
137 for atom
in res.atoms :
138 atom.is_hetatom =
False
141 def _Addition(res, ed, additional_modif_atoms) :
142 for add_atom_name
in additional_modif_atoms:
143 add_atom = res.FindAtom( add_atom_name )
144 if add_atom.IsValid() :
145 ed.DeleteAtom( add_atom )
146 for atom
in res.atoms:
147 atom.is_hetatom =
False
150 def _Unchanged(res, ed) :
151 if res.chem_class == mol.D_PEPTIDE_LINKING:
152 ed.DeleteResidue(res)
154 _DeleteSidechain(res, ed)
155 for atom
in res.atoms :
156 atom.is_hetatom =
False
159 def _DeleteSidechain(res, ed) :
160 for atom
in res.atoms:
161 if not atom.name
in [
'CA',
'CB',
'C',
'N',
'O']: