00001 """
00002 Superposition of structures made simple.
00003
00004 Authors: Stefan Bienert
00005 """
00006
00007 import math
00008 import ost.mol.alg
00009 import ost.seq.alg
00010
00011 def ParseAtomNames(atoms):
00012 """
00013 Parses different representations of a list of atom names and returns a
00014 :class:`set`, understandable by :func:`~ost.mol.alg.MatchResidueByNum`. In
00015 essence, this function translates
00016
00017 * None to ``None``
00018
00019 * 'all' to ``None``
00020
00021 * 'backbone' to ``set(['N', 'CA', 'C', 'O'])``
00022
00023 * 'aname1, aname2' to ``set(['aname1', 'aname2'])``
00024
00025 * ``['aname1', 'aname2']`` to ``set(['aname1', 'aname2'])``
00026
00027 :param atoms: Identifier or list of atoms
00028 :type atoms: :class:`str`, :class:`list`, :class:`set`
00029 :returns: A :class:`set` of atoms.
00030 """
00031
00032 if atoms==None:
00033 return None
00034 if isinstance(atoms, str):
00035 if atoms.upper()=='ALL':
00036 return None
00037 if atoms.upper()=='BACKBONE':
00038 return set(['N', 'CA', 'C', 'O'])
00039
00040 return set([a.strip() for a in atoms.split(',')])
00041 return set(atoms)
00042
00043
00044 def _EmptyView(view):
00045 """
00046 for internal use, only
00047 """
00048 if isinstance(view, ost.mol.EntityHandle):
00049 return view.CreateEmptyView()
00050 return view.handle.CreateEmptyView()
00051
00052
00053 def _fetch_atoms(r_a, r_b, result_a, result_b, atmset):
00054 """
00055 for internal use, only
00056 """
00057
00058 for a_a in r_a.GetAtomList():
00059 if atmset==None or a_a.name in atmset:
00060 a_b = r_b.FindAtom(a_a.name)
00061 if a_b.IsValid():
00062 result_a.AddAtom(a_a)
00063 result_b.AddAtom(a_b)
00064 return result_a, result_b
00065
00066
00067 def _no_of_chains(ent_a, ent_b):
00068 """
00069 for internal use, only
00070 """
00071
00072 if ent_a.chain_count < ent_b.chain_count:
00073 return ent_a.chain_count
00074 return ent_b.chain_count
00075
00076
00077 def MatchResidueByNum(ent_a, ent_b, atoms='all'):
00078 """
00079 Returns a tuple of views containing exactly the same number of atoms.
00080 Residues are matched by residue number. A subset of atoms to be included in
00081 the views can be specified in the **atoms** argument. Regardless of what the
00082 list of **atoms** says, only those present in two matched residues will be
00083 included in the views. Chains are processed in the order they occur in the
00084 entities. If **ent_a** and **ent_b** contain a different number of chains,
00085 processing stops with the lower count.
00086
00087 :param ent_a: The first entity
00088 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00089 :param ent_b: The second entity
00090 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00091 :param atoms: The subset of atoms to be included in the two views.
00092 :type atoms: :class:`str`, :class:`list`, :class:`set`
00093 :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
00094 residues matched by number. Each residue will have the same number
00095 & type of atoms.
00096 """
00097
00098 result_a=_EmptyView(ent_a)
00099 result_b=_EmptyView(ent_b)
00100 n_chains=_no_of_chains(ent_a, ent_b)
00101 atmset=ParseAtomNames(atoms)
00102
00103 for i in range(0, n_chains):
00104 chain_a=ent_a.chains[i]
00105 chain_b=ent_b.chains[i]
00106 residues_a=iter(chain_a.residues)
00107
00108 if chain_a.InSequence() and chain_b.InSequence():
00109 residues_b=iter(chain_b.residues)
00110
00111 try:
00112 while True:
00113 r_a=residues_a.next()
00114 r_b=residues_b.next()
00115 while r_a.number!=r_b.number:
00116 while r_a.number<r_b.number:
00117 r_a=residues_a.next()
00118 while r_b.number<r_a.number:
00119 r_b=residues_b.next()
00120 assert r_a.number==r_b.number
00121 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00122 except StopIteration:
00123 pass
00124 else:
00125
00126 try:
00127 while True:
00128 r_a=residues_a.next()
00129 r_b=chain_b.FindResidue(r_a.number)
00130 if r_b.IsValid():
00131 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00132 except StopIteration:
00133 pass
00134 result_a.AddAllInclusiveBonds()
00135 result_b.AddAllInclusiveBonds()
00136 return result_a, result_b
00137
00138
00139 def MatchResidueByIdx(ent_a, ent_b, atoms='all'):
00140 """
00141 Returns a tuple of views containing exactly the same number of atoms.
00142 Residues are matched by position in the chains of an entity. A subset of
00143 atoms to be included in the views can be specified in the **atoms** argument.
00144 Regardless of what the list of **atoms** says, only those present in two
00145 matched residues will be included in the views. Chains are processed in order
00146 of appearance. If **ent_a** and **ent_b** contain a different number of
00147 chains, processing stops with the lower count. The number of residues per
00148 chain is supposed to be the same.
00149
00150 :param ent_a: The first entity
00151 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00152 :param ent_b: The second entity
00153 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00154 :param atoms: The subset of atoms to be included in the two views.
00155 :type atoms: :class:`str`, :class:`list`, :class:`set`
00156 :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
00157 residues matched by position. Each residue will have the same number
00158 & type of atoms.
00159 """
00160 not_supported="MatchResidueByIdx has no support for chains of different "\
00161 +"lengths"
00162
00163 result_a=_EmptyView(ent_a)
00164 result_b=_EmptyView(ent_b)
00165 n_chains=_no_of_chains(ent_a, ent_b)
00166 atmset=ParseAtomNames(atoms)
00167
00168 for i in range(0, n_chains):
00169 chain_a=ent_a.chains[i]
00170 chain_b=ent_b.chains[i]
00171
00172 if chain_a.residue_count!=chain_b.residue_count:
00173 raise RuntimeError(not_supported)
00174
00175 for r_a, r_b in zip(chain_a.residues, chain_b.residues):
00176 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00177 result_a.AddAllInclusiveBonds()
00178 result_b.AddAllInclusiveBonds()
00179 return result_a, result_b
00180
00181
00182 def _MatchResidueByAln(ent_a, ent_b, atoms, alnmethod):
00183 """
00184 For internal use, only
00185 """
00186
00187 result_a = _EmptyView(ent_a)
00188 result_b = _EmptyView(ent_b)
00189 n_chains = _no_of_chains(ent_a, ent_b)
00190 atmset = ParseAtomNames(atoms)
00191
00192 for i in range(0, n_chains):
00193 chain_a = ent_a.chains[i]
00194 chain_b = ent_b.chains[i]
00195
00196 s_a = ''.join([r.one_letter_code
00197 for r in chain_a.Select('protein=True').residues])
00198 s_b = ''.join([r.one_letter_code
00199 for r in chain_b.Select('protein=True').residues])
00200
00201 seq_a = ost.seq.CreateSequence(chain_a.name, s_a)
00202 seq_b = ost.seq.CreateSequence(chain_b.name, s_b)
00203 aln_a_b = alnmethod(seq_a, seq_b, ost.seq.alg.BLOSUM62)
00204
00205 max_aln_res = 0
00206 for a in range(0, len(aln_a_b)):
00207 aln = aln_a_b[a]
00208 aln_res_len = 0
00209 match_list = list()
00210 for i in range(0, aln.GetLength()):
00211 if aln.sequences[0][i]!='-' and aln.sequences[1][i]!='-':
00212 aln_res_len += 1
00213 match_list.append(i)
00214 if aln_res_len > max_aln_res:
00215 max_aln_res = aln_res_len
00216 max_aln_idx = a
00217 max_matches = match_list
00218
00219 aln = aln_a_b[max_aln_idx]
00220
00221 aln.AttachView(0, chain_a.Select('protein=True'))
00222 aln.AttachView(1, chain_b.Select('protein=True'))
00223
00224 for i in max_matches:
00225 r_a = aln.GetResidue(0,i)
00226 r_b = aln.GetResidue(1,i)
00227 result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
00228 result_a.AddAllInclusiveBonds()
00229 result_b.AddAllInclusiveBonds()
00230 return result_a, result_b
00231
00232 def MatchResidueByLocalAln(ent_a, ent_b, atoms='all'):
00233 """
00234 Match residues by local alignment. Takes **ent_a** and **ent_b**, extracts
00235 the sequences chain-wise and aligns them in Smith/Waterman manner using the
00236 BLOSUM62 matrix for scoring. The residues of the entities are then matched
00237 based on this alignment. Only atoms present in both residues are included in
00238 the views. Chains are processed in order of appearance. If **ent_a** and
00239 **ent_b** contain a different number of chains, processing stops with
00240 the lower count.
00241
00242 :param ent_a: The first entity
00243 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00244 :param ent_b: The second entity
00245 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00246 :param atoms: The subset of atoms to be included in the two views.
00247 :type atoms: :class:`str`, :class:`list`, :class:`set`
00248 :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
00249 residues. Each residue will have the same number & type of atoms.
00250 """
00251 return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.LocalAlign)
00252
00253 def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all'):
00254 """
00255 Match residues by global alignment. Takes **ent_a** and **ent_b**, extracts
00256 the sequences chain-wise and aligns them in Needleman/Wunsch manner using the
00257 BLOSUM62 matrix for scoring. The residues of the entities are then matched
00258 based on this alignment. Only atoms present in both residues are included in
00259 the views. Chains are processed in order of appearance. If **ent_a** and
00260 **ent_b** contain a different number of chains, processing stops with
00261 the lower count.
00262
00263 :param ent_a: The first entity
00264 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00265 :param ent_b: The second entity
00266 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00267 :param atoms: The subset of atoms to be included in the two views.
00268 :type atoms: :class:`str`, :class:`list`, :class:`set`
00269 :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
00270 residues. Each residue will have the same number & type of atoms.
00271 """
00272 return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.GlobalAlign)
00273
00274
00275 def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False, max_iterations=5, distance_threshold=3.0):
00276 """
00277 Superposes the model entity onto the reference. To do so, two views are
00278 created, returned with the result. **atoms** describes what goes into these
00279 views and **match** the selection method. For superposition,
00280 :func:`~ost.mol.alg.SuperposeSVD` is called. For matching, the following methods
00281 are recognised:
00282
00283 * ``number`` - select residues by residue number, includes **atoms**, calls
00284 :func:`~ost.mol.alg.MatchResidueByNum`
00285
00286 * ``index`` - select residues by index in chain, includes **atoms**, calls
00287 :func:`~ost.mol.alg.MatchResidueByIdx`
00288
00289 * ``local-aln`` - select residues from a Smith/Waterman alignment, includes
00290 **atoms**, calls :func:`~ost.mol.alg.MatchResidueByLocalAln`
00291
00292 * ``global-aln`` - select residues from a Needleman/Wunsch alignment, includes
00293 **atoms**, calls :func:`~ost.mol.alg.MatchResidueByGlobalAln`
00294
00295 There is also an option to use **iterative** matching which allows for an
00296 iterative approach to superposing two structures. **iterative** takes two
00297 additional parameters, **max_iteration** and **distance_threshold**.
00298
00299 :param ent_a: The model entity
00300 :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00301
00302 :param ent_b: The reference entity
00303 :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00304
00305 :param match: Method to gather residues/ atoms
00306 :type match: :class:`str`
00307
00308 :param atoms: The subset of atoms to be used in the superposition
00309 :type atoms: :class:`str`, :class:`list`, :class:`set`
00310
00311 :param max_iterations: They number of iterations that will be run during
00312 iterative superposition
00313 :type max_iterations: :class:`int`
00314
00315 :param distance_threshold: The distance threshold between which two atoms
00316 that will be used in the next superposition
00317 iteration
00318 :type distance_threshold: :class:`float`
00319
00320 :returns: An instance of :class:`SuperpositionResult`, containing members
00321
00322 * ``rmsd`` - RMSD of the superposed entities
00323
00324 * ``view1`` - First :class:`~ost.mol.EntityView` used
00325
00326 * ``view2`` - Second :class:`~ost.mol.EntityView` used
00327 """
00328 not_supported="Superpose called with unsupported matching request."
00329
00330 if match.upper() == 'NUMBER':
00331 view_a, view_b = MatchResidueByNum(ent_a, ent_b, atoms)
00332 elif match.upper() == 'INDEX':
00333 view_a, view_b=MatchResidueByIdx(ent_a, ent_b, atoms)
00334 elif match.upper() == 'LOCAL-ALN':
00335 view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms,
00336 ost.seq.alg.LocalAlign)
00337 elif match.upper() == 'GLOBAL-ALN':
00338 view_a, view_b=_MatchResidueByAln(ent_a, ent_b, atoms,
00339 ost.seq.alg.GlobalAlign)
00340 else:
00341 raise ValueError(not_supported)
00342
00343 if iterative:
00344 res=ost.mol.alg.IterativeSuperposeSVD(view_a, view_b, max_iterations, distance_threshold)
00345 else:
00346 res=ost.mol.alg.SuperposeSVD(view_a, view_b)
00347 return res