00001 """
00002 Superposition of structures made simple.
00003
00004 Authors: Stefan Bienert
00005 """
00006
00007 import math
00008 import ost.mol.alg
00009 import ost.seq.alg
00010
00011 def ParseAtomNames(atoms):
00012 """
00013 Parses different representations of a list of atom names and returns a
00014 :class:`set`, understandable by :func:`~ost.mol.alg.MatchResidueByNum`. In
00015 essence, this function translates
00016
00017 * None to ``None``
00018
00019 * 'all' to ``None``
00020
00021 * 'backbone' to ``set(['N', 'CA', 'C', 'O'])``
00022
00023 * 'aname1, aname2' to ``set(['aname1', 'aname2'])``
00024
00025 * ``['aname1', 'aname2']`` to ``set(['aname1', 'aname2'])``
00026
00027 :param atoms: Identifier or list of atoms
00028 :type atoms: :class:`str`, :class:`list`, :class:`set`
00029 :returns: A :class:`set` of atoms.
00030 """
00031
00032 if atoms==None:
00033 return None
00034 if isinstance(atoms, str):
00035 if atoms.upper()=='ALL':
00036 return None
00037 if atoms.upper()=='BACKBONE':
00038 return set(['N', 'CA', 'C', 'O'])
00039
00040 return set([a.strip() for a in atoms.split(',')])
00041 return set(atoms)
00042
00043
00044 def _EmptyView(view):
00045 """
00046 for internal use, only
00047 """
00048 if isinstance(view, ost.mol.EntityHandle):
00049 return view.CreateEmptyView()
00050 return view.handle.CreateEmptyView()
00051
00052
00053 def _fetch_atoms(r_a, r_b, result_a, result_b, atmset):
00054 """
00055 for internal use, only
00056 """
00057
00058 for a_a in r_a.GetAtomList():
00059 if atmset==None or a_a.name in atmset:
00060 a_b = r_b.FindAtom(a_a.name)
00061 if a_b.IsValid():
00062 result_a.AddAtom(a_a)
00063 result_b.AddAtom(a_b)
00064 return result_a, result_b
00065
00066
00067 def _no_of_chains(ent_a, ent_b):
00068 """
00069 for internal use, only
00070 """
00071
00072 if ent_a.chain_count < ent_b.chain_count:
00073 return ent_a.chain_count
00074 return ent_b.chain_count
00075
00076
00077 def MatchResidueByNum(ent_a, ent_b, atoms='all'):
00078 """
00079 Returns a tuple of views containing exactly the same number of atoms.
00080 Residues are matched by residue number. A subset of atoms to be included in
00081 the views can be specified in the **atoms** argument. Regardless of what the
00082 list of **atoms** says, only those present in two matched residues will be
00083 included in the views. Chains are processed in the order they occur in the
00084 entities. If **ent_a** and **ent_b** contain a different number of chains,
00085 processing stops with the lower count.
00086
00087 :param ent_a: The first entity
00088 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00089 :param ent_b: The second entity
00090 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00091 :param atoms: The subset of atoms to be included in the two views.
00092 :type atoms: :class:`str`, :class:`list`, :class:`set`
00093 :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
00094 residues matched by number. Each residue will have the same number
00095 & type of atoms.
00096 """
00097
00098 result_a=_EmptyView(ent_a)
00099 result_b=_EmptyView(ent_b)
00100 n_chains=_no_of_chains(ent_a, ent_b)
00101 atmset=ParseAtomNames(atoms)
00102
00103 for i in range(0, n_chains):
00104 chain_a=ent_a.chains[i]
00105 chain_b=ent_b.chains[i]
00106 residues_a=iter(chain_a.residues)
00107
00108 if chain_a.InSequence() and chain_b.InSequence():
00109 residues_b=iter(chain_b.residues)
00110
00111 try:
00112 while True:
00113 r_a=residues_a.next()
00114 r_b=residues_b.next()
00115 while r_a.number!=r_b.number:
00116 while r_a.number<r_b.number:
00117 r_a=residues_a.next()
00118 while r_b.number<r_a.number:
00119 r_b=residues_b.next()
00120 assert r_a.number==r_b.number
00121 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00122 except StopIteration:
00123 pass
00124 else:
00125
00126 try:
00127 while True:
00128 r_a=residues_a.next()
00129 r_b=chain_b.FindResidue(r_a.number)
00130 if r_b.IsValid():
00131 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00132 except StopIteration:
00133 pass
00134 result_a.AddAllInclusiveBonds()
00135 result_b.AddAllInclusiveBonds()
00136 return result_a, result_b
00137
00138
00139 def MatchResidueByIdx(ent_a, ent_b, atoms='all'):
00140 """
00141 Returns a tuple of views containing exactly the same number of atoms.
00142 Residues are matched by position in the chains of an entity. A subset of
00143 atoms to be included in the views can be specified in the **atoms** argument.
00144 Regardless of what the list of **atoms** says, only those present in two
00145 matched residues will be included in the views. Chains are processed in order
00146 of appearance. If **ent_a** and **ent_b** contain a different number of
00147 chains, processing stops with the lower count. The number of residues per
00148 chain is supposed to be the same.
00149
00150 :param ent_a: The first entity
00151 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00152 :param ent_b: The second entity
00153 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00154 :param atoms: The subset of atoms to be included in the two views.
00155 :type atoms: :class:`str`, :class:`list`, :class:`set`
00156 :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
00157 residues matched by position. Each residue will have the same number
00158 & type of atoms.
00159 """
00160 not_supported="MatchResidueByIdx has no support for chains of different "\
00161 +"lengths"
00162
00163 result_a=_EmptyView(ent_a)
00164 result_b=_EmptyView(ent_b)
00165 n_chains=_no_of_chains(ent_a, ent_b)
00166 atmset=ParseAtomNames(atoms)
00167
00168 for i in range(0, n_chains):
00169 chain_a=ent_a.chains[i]
00170 chain_b=ent_b.chains[i]
00171
00172 if chain_a.residue_count!=chain_b.residue_count:
00173 raise RuntimeError(not_supported)
00174
00175 for r_a, r_b in zip(chain_a.residues, chain_b.residues):
00176 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00177 result_a.AddAllInclusiveBonds()
00178 result_b.AddAllInclusiveBonds()
00179 return result_a, result_b
00180
00181
00182 def _MatchResidueByAln(ent_a, ent_b, atoms, alnmethod):
00183 """
00184 For internal use, only
00185 """
00186
00187 result_a = _EmptyView(ent_a)
00188 result_b = _EmptyView(ent_b)
00189 n_chains = _no_of_chains(ent_a, ent_b)
00190 atmset = ParseAtomNames(atoms)
00191
00192 for i in range(0, n_chains):
00193
00194 chain_a = ent_a.chains[i]
00195 chain_b = ent_b.chains[i]
00196 chain_view_a = chain_a.Select('peptide=true')
00197 chain_view_b = chain_b.Select('peptide=true')
00198 if chain_view_a.chain_count == 0 or chain_view_b.chain_count == 0:
00199
00200 continue
00201
00202 s_a = ''.join([r.one_letter_code for r in chain_view_a.residues])
00203 s_b = ''.join([r.one_letter_code for r in chain_view_b.residues])
00204
00205 seq_a = ost.seq.CreateSequence(chain_a.name, s_a)
00206 seq_b = ost.seq.CreateSequence(chain_b.name, s_b)
00207 aln_a_b = alnmethod(seq_a, seq_b, ost.seq.alg.BLOSUM62)
00208 if not aln_a_b:
00209
00210 continue
00211
00212 max_aln_res = -1
00213 for a in range(0, len(aln_a_b)):
00214 aln = aln_a_b[a]
00215 aln_res_len = 0
00216 match_list = list()
00217 for i in range(0, aln.GetLength()):
00218 if aln.sequences[0][i]!='-' and aln.sequences[1][i]!='-':
00219 aln_res_len += 1
00220 match_list.append(i)
00221 if aln_res_len > max_aln_res:
00222 max_aln_res = aln_res_len
00223 max_aln_idx = a
00224 max_matches = match_list
00225
00226 aln = aln_a_b[max_aln_idx]
00227
00228 aln.AttachView(0, chain_view_a)
00229 aln.AttachView(1, chain_view_b)
00230
00231 for i in max_matches:
00232 r_a = aln.GetResidue(0,i)
00233 r_b = aln.GetResidue(1,i)
00234 result_a, result_b = _fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00235 result_a.AddAllInclusiveBonds()
00236 result_b.AddAllInclusiveBonds()
00237 return result_a, result_b
00238
00239 def MatchResidueByLocalAln(ent_a, ent_b, atoms='all'):
00240 """
00241 Match residues by local alignment. Takes **ent_a** and **ent_b**, extracts the
00242 sequences chain-wise and aligns them in Smith/Waterman manner using the
00243 BLOSUM62 matrix for scoring. Only residues which are marked as :attr:`peptide
00244 linking <ost.mol.ResidueHandle.peptide_linking>` are considered for alignment.
00245 The residues of the entities are then matched based on this alignment. Only
00246 atoms present in both residues are included in the views. Chains are processed
00247 in order of appearance. If **ent_a** and **ent_b** contain a different number
00248 of chains, processing stops with the lower count.
00249
00250 :param ent_a: The first entity
00251 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00252 :param ent_b: The second entity
00253 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00254 :param atoms: The subset of atoms to be included in the two views.
00255 :type atoms: :class:`str`, :class:`list`, :class:`set`
00256 :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
00257 residues. Each residue will have the same number & type of atoms.
00258 """
00259 return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.LocalAlign)
00260
00261 def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all'):
00262 """
00263 Match residues by global alignment.
00264 Same as :func:`MatchResidueByLocalAln` but performs a global Needleman/Wunsch
00265 alignment of the sequences using the BLOSUM62 matrix for scoring.
00266
00267 :param ent_a: The first entity
00268 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00269 :param ent_b: The second entity
00270 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00271 :param atoms: The subset of atoms to be included in the two views.
00272 :type atoms: :class:`str`, :class:`list`, :class:`set`
00273 :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
00274 residues. Each residue will have the same number & type of atoms.
00275 """
00276 return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.GlobalAlign)
00277
00278
00279 def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False,
00280 max_iterations=5, distance_threshold=3.0):
00281 """
00282 Superposes the model entity onto the reference. To do so, two views are
00283 created, returned with the result. *atoms* describes what goes into these
00284 views and *match* the selection method. For superposition,
00285 :func:`SuperposeSVD` or :func:`IterativeSuperposeSVD` are called (depending on
00286 *iterative*). For matching, the following methods are recognised:
00287
00288 * ``number`` - select residues by residue number, includes *atoms*, calls
00289 :func:`MatchResidueByNum`
00290
00291 * ``index`` - select residues by index in chain, includes *atoms*, calls
00292 :func:`MatchResidueByIdx`
00293
00294 * ``local-aln`` - select residues from a Smith/Waterman alignment, includes
00295 *atoms*, calls :func:`MatchResidueByLocalAln`
00296
00297 * ``global-aln`` - select residues from a Needleman/Wunsch alignment, includes
00298 *atoms*, calls :func:`MatchResidueByGlobalAln`
00299
00300 :param ent_a: The model entity (superposition transform is applied on full
00301 entity handle here)
00302 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00303
00304 :param ent_b: The reference entity
00305 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00306
00307 :param match: Method to gather residues/ atoms
00308 :type match: :class:`str`
00309
00310 :param atoms: The subset of atoms to be used in the superposition
00311 :type atoms: :class:`str`, :class:`list`, :class:`set`
00312
00313 :param iterative: Whether or not to use iterative superpositon.
00314 :type iterative: :class:`bool`
00315
00316 :param max_iterations: Max. number of iterations for
00317 :func:`IterativeSuperposeSVD`
00318 (only if *iterative* = True)
00319 :type max_iterations: :class:`int`
00320
00321 :param distance_threshold: Distance threshold for
00322 :func:`IterativeSuperposeSVD`
00323 (only if *iterative* = True)
00324 :type distance_threshold: :class:`float`
00325
00326 :returns: An instance of :class:`SuperpositionResult`.
00327 """
00328 not_supported = "Superpose called with unsupported matching request."
00329
00330 if match.upper() == 'NUMBER':
00331 view_a, view_b = MatchResidueByNum(ent_a, ent_b, atoms)
00332 elif match.upper() == 'INDEX':
00333 view_a, view_b = MatchResidueByIdx(ent_a, ent_b, atoms)
00334 elif match.upper() == 'LOCAL-ALN':
00335 view_a, view_b = MatchResidueByLocalAln(ent_a, ent_b, atoms)
00336 elif match.upper() == 'GLOBAL-ALN':
00337 view_a, view_b = MatchResidueByGlobalAln(ent_a, ent_b, atoms)
00338 else:
00339 raise ValueError(not_supported)
00340
00341 if iterative:
00342 res = ost.mol.alg.IterativeSuperposeSVD(view_a, view_b, max_iterations,
00343 distance_threshold)
00344 else:
00345 res = ost.mol.alg.SuperposeSVD(view_a, view_b)
00346 return res