00001 from ost import conop, mol
00002
00003 def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
00004 """
00005 This function returns a cleaned-up (simplified) version of the protein
00006 structure. Different parameters affect the behaviour of the function.
00007
00008 :param strip_water: Whether to remove water from the structure
00009 :param canonicalize: Whether to strip off modifications of amino acids and map
00010 them back to their parent standard amino acid, e.g. selenium methionine to
00011 methionine.For more complex amino acids, where the relation between the
00012 modified and the standard parent amino acid is not known, sidechain atoms
00013 are removed. D-peptide-linking residues are completely removed as well.
00014 :param remove_ligands: Whether to remove ligands from the structure
00015
00016 :return: a cleaned version of the entity
00017 """
00018
00019 lib = conop.GetDefaultLib()
00020 if not lib:
00021 raise RuntimeError("Cleanup requires a compound library.")
00022 clean_entity = entity.Copy()
00023 ed = clean_entity.EditXCS()
00024
00025 if strip_water:
00026 _StripWater(clean_entity, ed)
00027
00028 if canonicalize:
00029 _CanonicalizeResidues(clean_entity, ed, lib)
00030
00031 if remove_ligands:
00032 _RemoveLigands(clean_entity, ed)
00033 return clean_entity
00034
00035
00036 def _StripWater(clean_entity, ed) :
00037 """
00038 This function removes water residues from the structure
00039 """
00040 for res in clean_entity.residues:
00041 if res.IsValid():
00042 if res.chem_class == mol.WATER:
00043 ed.DeleteResidue(res.handle)
00044 ed.UpdateICS()
00045 return
00046
00047 def _RemoveLigands(clean_entity, ed) :
00048 """
00049 This function removes ligands from the structure
00050 """
00051 for res in clean_entity.residues:
00052 if res.IsValid():
00053
00054 if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER:
00055 ed.DeleteResidue(res.handle)
00056 ed.UpdateICS()
00057 return
00058
00059 def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
00060 """
00061 This function strips off modifications of amino acids and maps
00062 them back to their parent standard amino acid, e.g. selenium methionine to
00063 methionine.For more complex amino acids, where the relation between the
00064 modified and the standard parent amino acid is not known, sidechain atoms
00065 are removed. D-peptide-linking residues are completely removed as well.
00066 """
00067
00068 for res in clean_entity.residues:
00069 if res.IsValid() and res.IsPeptideLinking() :
00070 parent_olc = res.one_letter_code
00071 if parent_olc == "X" :
00072 _DeleteSidechain(res, ed)
00073 for atom in res.atoms:
00074 atom.is_hetatom = False
00075 else:
00076 parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
00077 parent_res = compound_lib.FindCompound(parent_tlc)
00078 if not parent_res:
00079 _DeleteSidechain(res, ed)
00080 for atom in res.atoms:
00081 atom.is_hetatom = False
00082 print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc
00083 else:
00084
00085 modif_atom_names = set([atom.name for atom in res.atoms
00086 if atom.element != "H" and atom.element != "D" ])
00087
00088 if res.FindAtom("OXT").IsValid() :
00089 parent_atom_names = set([atom.name for atom in parent_res.atom_specs
00090 if atom.element != "H" and atom.element != "D" ])
00091 else:
00092 parent_atom_names = set([atom.name for atom in parent_res.atom_specs
00093 if atom.element != "H" and atom.element != "D" and not atom.is_leaving ])
00094 additional_parent_atoms = parent_atom_names - modif_atom_names
00095 additional_modif_atoms = modif_atom_names - parent_atom_names
00096
00097 if res.atoms[0].is_hetatom :
00098 old_name = res.name
00099 ed.RenameResidue(res, parent_tlc)
00100 if additional_parent_atoms:
00101 if additional_modif_atoms:
00102
00103 _Replacement(res, ed, old_name)
00104 else:
00105
00106 _Deletion(res, ed)
00107 elif additional_modif_atoms:
00108
00109 _Addition(res, ed, additional_modif_atoms)
00110 else:
00111
00112 _Unchanged(res, ed)
00113
00114 else:
00115 if additional_parent_atoms:
00116 _DeleteSidechain(res, ed)
00117 ed.UpdateICS()
00118 return
00119
00120 def _Replacement(res, ed, old_name) :
00121
00122 if old_name == "MSE" :
00123 for atom in res.atoms:
00124 atom.is_hetatom = False
00125 sel = res.FindAtom("SE")
00126 if sel.IsValid() :
00127 ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor )
00128 ed.DeleteAtom( sel )
00129 else:
00130 _DeleteSidechain(res, ed)
00131 else:
00132 _DeleteSidechain(res, ed)
00133 return
00134
00135 def _Deletion(res, ed) :
00136 _DeleteSidechain(res, ed)
00137 for atom in res.atoms :
00138 atom.is_hetatom = False
00139 return
00140
00141 def _Addition(res, ed, additional_modif_atoms) :
00142 for add_atom_name in additional_modif_atoms:
00143 add_atom = res.FindAtom( add_atom_name )
00144 if add_atom.IsValid() :
00145 ed.DeleteAtom( add_atom )
00146 for atom in res.atoms:
00147 atom.is_hetatom = False
00148 return
00149
00150 def _Unchanged(res, ed) :
00151 if res.chem_class == mol.D_PEPTIDE_LINKING:
00152 ed.DeleteResidue(res)
00153 else:
00154 _DeleteSidechain(res, ed)
00155 for atom in res.atoms :
00156 atom.is_hetatom = False
00157 return
00158
00159 def _DeleteSidechain(res, ed) :
00160 for atom in res.atoms:
00161 if not atom.name in ['CA','CB','C','N','O']:
00162 ed.DeleteAtom(atom)
00163 return
00164
00165
00166 __all__ = [Cleanup]