OpenStructure
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
chain_mapping.py
Go to the documentation of this file.
1 """
2 Chain mapping aims to identify a one-to-one relationship between chains in a
3 reference structure and a model.
4 """
5 
6 import itertools
7 import copy
8 
9 import numpy as np
10 
11 from scipy.special import factorial
12 from scipy.special import binom # as of Python 3.8, the math module implements
13  # comb, i.e. n choose k
14 
15 import ost
16 from ost import seq
17 from ost import mol
18 from ost import geom
19 
20 from ost.mol.alg import lddt
21 from ost.mol.alg import bb_lddt
22 from ost.mol.alg import qsscore
23 
24 def _CSel(ent, cnames):
25  """ Returns view with specified chains
26 
27  Ensures that quotation marks are around chain names to not confuse
28  OST query language with weird special characters.
29  """
30  query = "cname=" + ','.join([mol.QueryQuoteName(cname) for cname in cnames])
31  return ent.Select(query)
32 
34  """ Result object for the chain mapping functions in :class:`ChainMapper`
35 
36  Constructor is directly called within the functions, no need to construct
37  such objects yourself.
38  """
39  def __init__(self, target, model, chem_groups, chem_mapping,
40  unmapped_mdl_chains, mapping, alns, opt_score=None):
41  self._target_target = target
42  self._model_model = model
43  self._chem_groups_chem_groups = chem_groups
44  self._chem_mapping_chem_mapping = chem_mapping
45  self._unmapped_mdl_chains_unmapped_mdl_chains = unmapped_mdl_chains
46  self._mapping_mapping = mapping
47  self._alns_alns = alns
48  self._opt_score_opt_score = opt_score
49 
50  @property
51  def target(self):
52  """ Target/reference structure, i.e. :attr:`ChainMapper.target`
53 
54  :type: :class:`ost.mol.EntityView`
55  """
56  return self._target_target
57 
58  @property
59  def model(self):
60  """ Model structure that gets mapped onto :attr:`~target`
61 
62  Underwent same processing as :attr:`ChainMapper.target`, i.e.
63  only contains peptide/nucleotide chains of sufficient size.
64 
65  :type: :class:`ost.mol.EntityView`
66  """
67  return self._model_model
68 
69  @property
70  def chem_groups(self):
71  """ Groups of chemically equivalent chains in :attr:`~target`
72 
73  Same as :attr:`ChainMapper.chem_group`
74 
75  :class:`list` of :class:`list` of :class:`str` (chain names)
76  """
77  return self._chem_groups_chem_groups
78 
79  @property
80  def chem_mapping(self):
81  """ Assigns chains in :attr:`~model` to :attr:`~chem_groups`.
82 
83  :class:`list` of :class:`list` of :class:`str` (chain names)
84  """
85  return self._chem_mapping_chem_mapping
86 
87  @property
89  """ Model chains that cannot be mapped to :attr:`chem_groups`
90 
91  Depends on parameterization of :class:`ChainMapper`
92 
93  :class:`list` of class:`str` (chain names)
94  """
95  return self._unmapped_mdl_chains_unmapped_mdl_chains
96 
97  @property
98  def mapping(self):
99  """ Mapping of :attr:`~model` chains onto :attr:`~target`
100 
101  Exact same shape as :attr:`~chem_groups` but containing the names of the
102  mapped chains in :attr:`~model`. May contain None for :attr:`~target`
103  chains that are not covered. No guarantee that all chains in
104  :attr:`~model` are mapped.
105 
106  :class:`list` of :class:`list` of :class:`str` (chain names)
107  """
108  return self._mapping_mapping
109 
110  @property
111  def alns(self):
112  """ Alignments of mapped chains in :attr:`~target` and :attr:`~model`
113 
114  Each alignment is accessible with ``alns[(t_chain,m_chain)]``. First
115  sequence is the sequence of :attr:`target` chain, second sequence the
116  one from :attr:`~model`. The respective :class:`ost.mol.EntityView` are
117  attached with :func:`ost.seq.ConstSequenceHandle.AttachView`.
118 
119  :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
120  :class:`ost.seq.AlignmentHandle`
121  """
122  return self._alns_alns
123 
124  @property
125  def opt_score(self):
126  """ Placeholder property without any guarantee of being set
127 
128  Different scores get optimized in the various chain mapping algorithms.
129  Some of them may set their final optimal score in that property.
130  Consult the documentation of the respective chain mapping algorithm
131  for more information. Won't be in the return dict of
132  :func:`JSONSummary`.
133  """
134  return self._opt_score_opt_score
135 
136  def GetFlatMapping(self, mdl_as_key=False):
137  """ Returns flat mapping as :class:`dict` for all mapable chains
138 
139  :param mdl_as_key: Default is target chain name as key and model chain
140  name as value. This can be reversed with this flag.
141  :returns: :class:`dict` with :class:`str` as key/value that describe
142  one-to-one mapping
143  """
144  flat_mapping = dict()
145  for trg_chem_group, mdl_chem_group in zip(self.chem_groupschem_groups,
146  self.mappingmapping):
147  for a,b in zip(trg_chem_group, mdl_chem_group):
148  if a is not None and b is not None:
149  if mdl_as_key:
150  flat_mapping[b] = a
151  else:
152  flat_mapping[a] = b
153  return flat_mapping
154 
155  def JSONSummary(self):
156  """ Returns JSON serializable summary of results
157  """
158  json_dict = dict()
159  json_dict["chem_groups"] = self.chem_groupschem_groups
160  json_dict["mapping"] = self.mappingmapping
161  json_dict["flat_mapping"] = self.GetFlatMappingGetFlatMapping()
162  json_dict["alns"] = list()
163  for aln in self.alnsalns.values():
164  trg_seq = aln.GetSequence(0)
165  mdl_seq = aln.GetSequence(1)
166  aln_dict = {"trg_ch": trg_seq.GetName(), "trg_seq": str(trg_seq),
167  "mdl_ch": mdl_seq.GetName(), "mdl_seq": str(mdl_seq)}
168  json_dict["alns"].append(aln_dict)
169  return json_dict
170 
171 
173 
174  """ Result object for :func:`ChainMapper.GetRepr`
175 
176  Constructor is directly called within the function, no need to construct
177  such objects yourself.
178 
179  :param lDDT: lDDT for this mapping. Depends on how you call
180  :func:`ChainMapper.GetRepr` whether this is backbone only or
181  full atom lDDT.
182  :type lDDT: :class:`float`
183  :param substructure: The full substructure for which we searched for a
184  representation
185  :type substructure: :class:`ost.mol.EntityView`
186  :param ref_view: View pointing to the same underlying entity as
187  *substructure* but only contains the stuff that is mapped
188  :type ref_view: :class:`mol.EntityView`
189  :param mdl_view: The matching counterpart in model
190  :type mdl_view: :class:`mol.EntityView`
191  """
192  def __init__(self, lDDT, substructure, ref_view, mdl_view):
193  self._lDDT_lDDT = lDDT
194  self._substructure_substructure = substructure
195  assert(len(ref_view.residues) == len(mdl_view.residues))
196  self._ref_view_ref_view = ref_view
197  self._mdl_view_mdl_view = mdl_view
198 
199  # lazily evaluated attributes
200  self._ref_bb_pos_ref_bb_pos = None
201  self._mdl_bb_pos_mdl_bb_pos = None
202  self._ref_full_bb_pos_ref_full_bb_pos = None
203  self._mdl_full_bb_pos_mdl_full_bb_pos = None
204  self._superposition_superposition = None
205  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos = None
206  self._ost_query_ost_query = None
207  self._flat_mapping_flat_mapping = None
208  self._inconsistent_residues_inconsistent_residues = None
209 
210  @property
211  def lDDT(self):
212  """ lDDT of representation result
213 
214  Depends on how you call :func:`ChainMapper.GetRepr` whether this is
215  backbone only or full atom lDDT.
216 
217  :type: :class:`float`
218  """
219  return self._lDDT_lDDT
220 
221  @property
222  def substructure(self):
223  """ The full substructure for which we searched for a
224  representation
225 
226  :type: :class:`ost.mol.EntityView`
227  """
228  return self._substructure_substructure
229 
230  @property
231  def ref_view(self):
232  """ View which contains the mapped subset of :attr:`substructure`
233 
234  :type: :class:`ost.mol.EntityView`
235  """
236  return self._ref_view_ref_view
237 
238  @property
239  def mdl_view(self):
240  """ The :attr:`ref_view` representation in the model
241 
242  :type: :class:`ost.mol.EntityView`
243  """
244  return self._mdl_view_mdl_view
245 
246  @property
247  def ref_residues(self):
248  """ The reference residues
249 
250  :type: class:`mol.ResidueViewList`
251  """
252  return self.ref_viewref_view.residues
253 
254  @property
255  def mdl_residues(self):
256  """ The model residues
257 
258  :type: :class:`mol.ResidueViewList`
259  """
260  return self.mdl_viewmdl_view.residues
261 
262  @property
264  """ A list of mapped residue whose names do not match (eg. ALA in the
265  reference and LEU in the model).
266 
267  The mismatches are reported as a tuple of :class:`~ost.mol.ResidueView`
268  (reference, model), or as an empty list if all the residue names match.
269 
270  :type: :class:`list`
271  """
272  if self._inconsistent_residues_inconsistent_residues is None:
273  self._inconsistent_residues_inconsistent_residues = self._GetInconsistentResidues_GetInconsistentResidues(
274  self.ref_residuesref_residues, self.mdl_residuesmdl_residues)
275  return self._inconsistent_residues_inconsistent_residues
276 
277  @property
278  def ref_bb_pos(self):
279  """ Representative backbone positions for reference residues.
280 
281  Thats CA positions for peptides and C3' positions for Nucleotides.
282 
283  :type: :class:`geom.Vec3List`
284  """
285  if self._ref_bb_pos_ref_bb_pos is None:
286  self._ref_bb_pos_ref_bb_pos = self._GetBBPos_GetBBPos(self.ref_residuesref_residues)
287  return self._ref_bb_pos_ref_bb_pos
288 
289  @property
290  def mdl_bb_pos(self):
291  """ Representative backbone positions for model residues.
292 
293  Thats CA positions for peptides and C3' positions for Nucleotides.
294 
295  :type: :class:`geom.Vec3List`
296  """
297  if self._mdl_bb_pos_mdl_bb_pos is None:
298  self._mdl_bb_pos_mdl_bb_pos = self._GetBBPos_GetBBPos(self.mdl_residuesmdl_residues)
299  return self._mdl_bb_pos_mdl_bb_pos
300 
301  @property
302  def ref_full_bb_pos(self):
303  """ Representative backbone positions for reference residues.
304 
305  Thats N, CA and C positions for peptides and O5', C5', C4', C3', O3'
306  positions for Nucleotides.
307 
308  :type: :class:`geom.Vec3List`
309  """
310  if self._ref_full_bb_pos_ref_full_bb_pos is None:
311  self._ref_full_bb_pos_ref_full_bb_pos = self._GetFullBBPos_GetFullBBPos(self.ref_residuesref_residues)
312  return self._ref_full_bb_pos_ref_full_bb_pos
313 
314  @property
315  def mdl_full_bb_pos(self):
316  """ Representative backbone positions for reference residues.
317 
318  Thats N, CA and C positions for peptides and O5', C5', C4', C3', O3'
319  positions for Nucleotides.
320 
321  :type: :class:`geom.Vec3List`
322  """
323  if self._mdl_full_bb_pos_mdl_full_bb_pos is None:
324  self._mdl_full_bb_pos_mdl_full_bb_pos = self._GetFullBBPos_GetFullBBPos(self.mdl_residuesmdl_residues)
325  return self._mdl_full_bb_pos_mdl_full_bb_pos
326 
327 
328  @property
329  def superposition(self):
330  """ Superposition of mdl residues onto ref residues
331 
332  Superposition computed as minimal RMSD superposition on
333  :attr:`ref_bb_pos` and :attr:`mdl_bb_pos`. If number of positions is
334  smaller 3, the full_bb_pos equivalents are used instead.
335 
336  :type: :class:`ost.mol.alg.SuperpositionResult`
337  """
338  if self._superposition_superposition is None:
339  if len(self.mdl_bb_posmdl_bb_pos) < 3:
340  self._superposition_superposition = _GetSuperposition(self.mdl_full_bb_posmdl_full_bb_pos,
341  self.ref_full_bb_posref_full_bb_pos, False)
342  else:
343  self._superposition_superposition = _GetSuperposition(self.mdl_bb_posmdl_bb_pos,
344  self.ref_bb_posref_bb_pos, False)
345  return self._superposition_superposition
346 
347  @property
348  def transform(self):
349  """ Transformation to superpose mdl residues onto ref residues
350 
351  :type: :class:`ost.geom.Mat4`
352  """
353  return self.superpositionsuperposition.transformation
354 
355  @property
357  """ :attr:`mdl_bb_pos` with :attr:`transform applied`
358 
359  :type: :class:`geom.Vec3List`
360  """
361  if self._superposed_mdl_bb_pos_superposed_mdl_bb_pos is None:
362  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos = geom.Vec3List(self.mdl_bb_posmdl_bb_pos)
363  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos.ApplyTransform(self.transformtransform)
364  return self._superposed_mdl_bb_pos_superposed_mdl_bb_pos
365 
366  @property
367  def bb_rmsd(self):
368  """ RMSD of the binding site backbone atoms after :attr:`superposition`
369 
370  :type: :class:`float`
371  """
372  return self.superpositionsuperposition.rmsd
373 
374 
375  @property
376  def ost_query(self):
377  """ query for mdl residues in OpenStructure query language
378 
379  Repr can be selected as ``full_mdl.Select(ost_query)``
380 
381  Returns invalid query if residue numbers have insertion codes.
382 
383  :type: :class:`str`
384  """
385  if self._ost_query_ost_query is None:
386  chain_rnums = dict()
387  for r in self.mdl_residuesmdl_residues:
388  chname = r.GetChain().GetName()
389  rnum = r.GetNumber().GetNum()
390  if chname not in chain_rnums:
391  chain_rnums[chname] = list()
392  chain_rnums[chname].append(str(rnum))
393  chain_queries = list()
394  for k,v in chain_rnums.items():
395  q = f"(cname={mol.QueryQuoteName(k)} and "
396  q += f"rnum={','.join(v)})"
397  chain_queries.append(q)
398  self._ost_query_ost_query = " or ".join(chain_queries)
399  return self._ost_query_ost_query
400 
401  def JSONSummary(self):
402  """ Returns JSON serializable summary of results
403  """
404  json_dict = dict()
405  json_dict["lDDT"] = self.lDDTlDDT
406  json_dict["ref_residues"] = [r.GetQualifiedName() for r in \
407  self.ref_residuesref_residues]
408  json_dict["mdl_residues"] = [r.GetQualifiedName() for r in \
409  self.mdl_residuesmdl_residues]
410  json_dict["transform"] = list(self.transformtransform.data)
411  json_dict["bb_rmsd"] = self.bb_rmsdbb_rmsd
412  json_dict["ost_query"] = self.ost_queryost_query
413  json_dict["flat_mapping"] = self.GetFlatChainMappingGetFlatChainMapping()
414  return json_dict
415 
416  def GetFlatChainMapping(self, mdl_as_key=False):
417  """ Returns flat mapping of all chains in the representation
418 
419  :param mdl_as_key: Default is target chain name as key and model chain
420  name as value. This can be reversed with this flag.
421  :returns: :class:`dict` with :class:`str` as key/value that describe
422  one-to-one mapping
423  """
424  flat_mapping = dict()
425  for trg_res, mdl_res in zip(self.ref_residuesref_residues, self.mdl_residuesmdl_residues):
426  if mdl_as_key:
427  flat_mapping[mdl_res.chain.name] = trg_res.chain.name
428  else:
429  flat_mapping[trg_res.chain.name] = mdl_res.chain.name
430  return flat_mapping
431 
432  def _GetFullBBPos(self, residues):
433  """ Helper to extract full backbone positions
434  """
435  exp_pep_atoms = ["N", "CA", "C"]
436  exp_nuc_atoms = ["\"O5'\"", "\"C5'\"", "\"C4'\"", "\"C3'\"", "\"O3'\""]
437  bb_pos = geom.Vec3List()
438  for r in residues:
439  if r.GetChemType() == mol.ChemType.NUCLEOTIDES:
440  exp_atoms = exp_nuc_atoms
441  elif r.GetChemType() == mol.ChemType.AMINOACIDS:
442  exp_atoms = exp_pep_atoms
443  else:
444  raise RuntimeError("Something terrible happened... RUN...")
445  for aname in exp_atoms:
446  a = r.FindAtom(aname)
447  if not a.IsValid():
448  raise RuntimeError("Something terrible happened... "
449  "RUN...")
450  bb_pos.append(a.GetPos())
451  return bb_pos
452 
453  def _GetBBPos(self, residues):
454  """ Helper to extract single representative position for each residue
455  """
456  bb_pos = geom.Vec3List()
457  for r in residues:
458  at = r.FindAtom("CA")
459  if not at.IsValid():
460  at = r.FindAtom("C3'")
461  if not at.IsValid():
462  raise RuntimeError("Something terrible happened... RUN...")
463  bb_pos.append(at.GetPos())
464  return bb_pos
465 
466  def _GetInconsistentResidues(self, ref_residues, mdl_residues):
467  """ Helper to extract a list of inconsistent residues.
468  """
469  if len(ref_residues) != len(mdl_residues):
470  raise ValueError("Something terrible happened... Reference and "
471  "model lengths differ... RUN...")
472  inconsistent_residues = list()
473  for ref_residue, mdl_residue in zip(ref_residues, mdl_residues):
474  if ref_residue.name != mdl_residue.name:
475  inconsistent_residues.append((ref_residue, mdl_residue))
476  return inconsistent_residues
477 
478 
480  """ Class to compute chain mappings
481 
482  All algorithms are performed on processed structures which fulfill
483  criteria as given in constructor arguments (*min_pep_length*,
484  "min_nuc_length") and only contain residues which have all required backbone
485  atoms. for peptide residues thats N, CA, C and CB (no CB for GLY), for
486  nucleotide residues thats O5', C5', C4', C3' and O3'.
487 
488  Chain mapping is a three step process:
489 
490  * Group chemically identical chains in *target* using pairwise
491  alignments that are either computed with Needleman-Wunsch (NW) or
492  simply derived from residue numbers (*resnum_alignments* flag).
493  In case of NW, *pep_subst_mat*, *pep_gap_open* and *pep_gap_ext*
494  and their nucleotide equivalents are relevant. Two chains are
495  considered identical if they fulfill the *pep_seqid_thr* and
496  have at least *min_pep_length* aligned residues. Same logic
497  is applied for nucleotides with respective thresholds.
498 
499  * Map chains in an input model to these groups. Parameters for generating
500  alignments are the same as above. Model chains are mapped to the chem
501  group with highest sequence identity. Optionally, you can set a minimum
502  sequence identity threshold with *mdl_map_pep_seqid_thr*/
503  *mdl_map_nuc_seqid_thr* to avoid nonsensical mappings. You can either
504  get the group mapping with :func:`GetChemMapping` or directly call
505  one of the full fletched one-to-one chain mapping functions which
506  execute that step internally.
507 
508  * Obtain one-to-one mapping for chains in an input model and
509  *target* with one of the available mapping functions. Just to get an
510  idea of complexity. If *target* and *model* are octamers, there are
511  ``8! = 40320`` possible chain mappings.
512 
513  :param target: Target structure onto which models are mapped.
514  Computations happen on a selection only containing
515  polypeptides and polynucleotides.
516  :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
517  :param resnum_alignments: Use residue numbers instead of
518  Needleman-Wunsch to compute pairwise
519  alignments. Relevant for :attr:`~chem_groups`
520  and related attributes.
521  :type resnum_alignments: :class:`bool`
522  :param pep_seqid_thr: Threshold used to decide when two chains are
523  identical. 95 percent tolerates the few mutations
524  crystallographers like to do.
525  :type pep_seqid_thr: :class:`float`
526  :param nuc_seqid_thr: Nucleotide equivalent for *pep_seqid_thr*
527  :type nuc_seqid_thr: :class:`float`
528  :param pep_subst_mat: Substitution matrix to align peptide sequences,
529  irrelevant if *resnum_alignments* is True,
530  defaults to seq.alg.BLOSUM62
531  :type pep_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
532  :param pep_gap_open: Gap open penalty to align peptide sequences,
533  irrelevant if *resnum_alignments* is True
534  :type pep_gap_open: :class:`int`
535  :param pep_gap_ext: Gap extension penalty to align peptide sequences,
536  irrelevant if *resnum_alignments* is True
537  :type pep_gap_ext: :class:`int`
538  :param nuc_subst_mat: Nucleotide equivalent for *pep_subst_mat*,
539  defaults to seq.alg.NUC44
540  :type nuc_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
541  :param nuc_gap_open: Nucleotide equivalent for *pep_gap_open*
542  :type nuc_gap_open: :class:`int`
543  :param nuc_gap_ext: Nucleotide equivalent for *pep_gap_ext*
544  :type nuc_gap_ext: :class:`int`
545  :param min_pep_length: Minimal number of residues for a peptide chain to be
546  considered in target and in models.
547  :type min_pep_length: :class:`int`
548  :param min_nuc_length: Minimal number of residues for a nucleotide chain to be
549  considered in target and in models.
550  :type min_nuc_length: :class:`int`
551  :param n_max_naive: Max possible chain mappings that are enumerated in
552  :func:`~GetNaivelDDTMapping` /
553  :func:`~GetDecomposerlDDTMapping`. A
554  :class:`RuntimeError` is raised in case of bigger
555  complexity.
556  :type n_max_naive: :class:`int`
557  :param mdl_map_pep_seqid_thr: To avoid non-sensical mapping of model chains
558  to reference chem groups. If a value larger
559  than 0.0 is provided, minimal criteria for
560  assignment becomes a sequence identity of
561  *mdl_map_pep_seqid_thr* and at least
562  *min_pep_length* aligned residues. If set to
563  zero, it simply assigns a model chain to the
564  chem group with highest sequence identity.
565  :type mdl_map_pep_seqid_thr: :class:`float`
566  :param mdl_map_nuc_seqid_thr: Nucleotide equivalent of
567  *mdl_map_pep_seqid_thr*
568  :type mdl_map_nuc_seqid_thr: :class:`float`
569  """
570  def __init__(self, target, resnum_alignments=False,
571  pep_seqid_thr = 95., nuc_seqid_thr = 95.,
572  pep_subst_mat = seq.alg.BLOSUM62, pep_gap_open = -11,
573  pep_gap_ext = -1, nuc_subst_mat = seq.alg.NUC44,
574  nuc_gap_open = -4, nuc_gap_ext = -4,
575  min_pep_length = 6, min_nuc_length = 4,
576  n_max_naive = 1e8,
577  mdl_map_pep_seqid_thr = 0.,
578  mdl_map_nuc_seqid_thr = 0.):
579 
580  # attributes
581  self.resnum_alignmentsresnum_alignments = resnum_alignments
582  self.pep_seqid_thrpep_seqid_thr = pep_seqid_thr
583  self.nuc_seqid_thrnuc_seqid_thr = nuc_seqid_thr
584  self.min_pep_lengthmin_pep_length = min_pep_length
585  self.min_nuc_lengthmin_nuc_length = min_nuc_length
586  self.n_max_naiven_max_naive = n_max_naive
587  self.mdl_map_pep_seqid_thrmdl_map_pep_seqid_thr = mdl_map_pep_seqid_thr
588  self.mdl_map_nuc_seqid_thrmdl_map_nuc_seqid_thr = mdl_map_nuc_seqid_thr
589 
590  # lazy computed attributes
591  self._chem_groups_chem_groups = None
592  self._chem_group_alignments_chem_group_alignments = None
593  self._chem_group_ref_seqs_chem_group_ref_seqs = None
594  self._chem_group_types_chem_group_types = None
595 
596  # helper class to generate pairwise alignments
597  self.aligneraligner = _Aligner(resnum_aln = resnum_alignments,
598  pep_subst_mat = pep_subst_mat,
599  pep_gap_open = pep_gap_open,
600  pep_gap_ext = pep_gap_ext,
601  nuc_subst_mat = nuc_subst_mat,
602  nuc_gap_open = nuc_gap_open,
603  nuc_gap_ext = nuc_gap_ext)
604 
605  # target structure preprocessing
606  self._target, self._polypep_seqs, self._polynuc_seqs_polynuc_seqs = \
607  self.ProcessStructureProcessStructure(target)
608 
609  @property
610  def target(self):
611  """Target structure that only contains peptides/nucleotides
612 
613  Contains only residues that have the backbone representatives
614  (CA for peptide and C3' for nucleotides) to avoid ATOMSEQ alignment
615  inconsistencies when switching between all atom and backbone only
616  representations.
617 
618  :type: :class:`ost.mol.EntityView`
619  """
620  return self._target
621 
622  @property
623  def polypep_seqs(self):
624  """Sequences of peptide chains in :attr:`~target`
625 
626  Respective :class:`EntityView` from *target* for each sequence s are
627  available as ``s.GetAttachedView()``
628 
629  :type: :class:`ost.seq.SequenceList`
630  """
631  return self._polypep_seqs
632 
633  @property
634  def polynuc_seqs(self):
635  """Sequences of nucleotide chains in :attr:`~target`
636 
637  Respective :class:`EntityView` from *target* for each sequence s are
638  available as ``s.GetAttachedView()``
639 
640  :type: :class:`ost.seq.SequenceList`
641  """
642  return self._polynuc_seqs_polynuc_seqs
643 
644  @property
645  def chem_groups(self):
646  """Groups of chemically equivalent chains in :attr:`~target`
647 
648  First chain in group is the one with longest sequence.
649 
650  :getter: Computed on first use (cached)
651  :type: :class:`list` of :class:`list` of :class:`str` (chain names)
652  """
653  if self._chem_groups_chem_groups is None:
654  self._chem_groups_chem_groups = list()
655  for a in self.chem_group_alignmentschem_group_alignments:
656  self._chem_groups_chem_groups.append([s.GetName() for s in a.sequences])
657  return self._chem_groups_chem_groups
658 
659  @property
661  """MSA for each group in :attr:`~chem_groups`
662 
663  Sequences in MSAs exhibit same order as in :attr:`~chem_groups` and
664  have the respective :class:`ost.mol.EntityView` from *target* attached.
665 
666  :getter: Computed on first use (cached)
667  :type: :class:`ost.seq.AlignmentList`
668  """
669  if self._chem_group_alignments_chem_group_alignments is None:
670  self._chem_group_alignments_chem_group_alignments, self._chem_group_types_chem_group_types = \
671  _GetChemGroupAlignments(self.polypep_seqspolypep_seqs, self.polynuc_seqspolynuc_seqs,
672  self.aligneraligner,
673  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
674  min_pep_length=self.min_pep_lengthmin_pep_length,
675  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
676  min_nuc_length=self.min_nuc_lengthmin_nuc_length)
677 
678  return self._chem_group_alignments_chem_group_alignments
679 
680  @property
682  """Reference (longest) sequence for each group in :attr:`~chem_groups`
683 
684  Respective :class:`EntityView` from *target* for each sequence s are
685  available as ``s.GetAttachedView()``
686 
687  :getter: Computed on first use (cached)
688  :type: :class:`ost.seq.SequenceList`
689  """
690  if self._chem_group_ref_seqs_chem_group_ref_seqs is None:
691  self._chem_group_ref_seqs_chem_group_ref_seqs = seq.CreateSequenceList()
692  for a in self.chem_group_alignmentschem_group_alignments:
693  s = seq.CreateSequence(a.GetSequence(0).GetName(),
694  a.GetSequence(0).GetGaplessString())
695  s.AttachView(a.GetSequence(0).GetAttachedView())
696  self._chem_group_ref_seqs_chem_group_ref_seqs.AddSequence(s)
697  return self._chem_group_ref_seqs_chem_group_ref_seqs
698 
699  @property
700  def chem_group_types(self):
701  """ChemType of each group in :attr:`~chem_groups`
702 
703  Specifying if groups are poly-peptides/nucleotides, i.e.
704  :class:`ost.mol.ChemType.AMINOACIDS` or
705  :class:`ost.mol.ChemType.NUCLEOTIDES`
706 
707  :getter: Computed on first use (cached)
708  :type: :class:`list` of :class:`ost.mol.ChemType`
709  """
710  if self._chem_group_types_chem_group_types is None:
711  self._chem_group_alignments_chem_group_alignments, self._chem_group_types_chem_group_types = \
712  _GetChemGroupAlignments(self.polypep_seqspolypep_seqs, self.polynuc_seqspolynuc_seqs,
713  self.aligneraligner,
714  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
715  min_pep_length=self.min_pep_lengthmin_pep_length,
716  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
717  min_nuc_length=self.min_nuc_lengthmin_nuc_length)
718 
719  return self._chem_group_types_chem_group_types
720 
721  def GetChemMapping(self, model):
722  """Maps sequences in *model* to chem_groups of target
723 
724  :param model: Model from which to extract sequences, a
725  selection that only includes peptides and nucleotides
726  is performed and returned along other results.
727  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
728  :returns: Tuple with two lists of length `len(self.chem_groups)`, a list
729  and an :class:`ost.mol.EntityView` representing *model*:
730  1) Each element is a :class:`list` with mdl chain names that
731  map to the chem group at that position.
732  2) Each element is a :class:`ost.seq.AlignmentList` aligning
733  these mdl chain sequences to the chem group ref sequences.
734  3) The model chains that could not be mapped to the reference
735  4) A selection of *model* that only contains polypeptides and
736  polynucleotides whose ATOMSEQ exactly matches the sequence
737  info in the returned alignments.
738  """
739  mdl, mdl_pep_seqs, mdl_nuc_seqs = self.ProcessStructureProcessStructure(model)
740  mapping = [list() for x in self.chem_groupschem_groups]
741  unmapped_mdl_chains = list()
742  alns = [seq.AlignmentList() for x in self.chem_groupschem_groups]
743 
744  for s in mdl_pep_seqs:
745  idx, aln = _MapSequence(self.chem_group_ref_seqschem_group_ref_seqs,
746  self.chem_group_typeschem_group_types,
747  s, mol.ChemType.AMINOACIDS,
748  self.aligneraligner,
749  seq_id_thr = self.mdl_map_pep_seqid_thrmdl_map_pep_seqid_thr,
750  min_aln_length = self.min_pep_lengthmin_pep_length)
751  if idx is None:
752  unmapped_mdl_chains.append(s.GetName())
753  else:
754  mapping[idx].append(s.GetName())
755  alns[idx].append(aln)
756 
757  for s in mdl_nuc_seqs:
758  idx, aln = _MapSequence(self.chem_group_ref_seqschem_group_ref_seqs,
759  self.chem_group_typeschem_group_types,
760  s, mol.ChemType.NUCLEOTIDES,
761  self.aligneraligner,
762  seq_id_thr = self.mdl_map_nuc_seqid_thrmdl_map_nuc_seqid_thr,
763  min_aln_length = self.min_nuc_lengthmin_nuc_length)
764  if idx is None:
765  unmapped_mdl_chains.append(s.GetName())
766  else:
767  mapping[idx].append(s.GetName())
768  alns[idx].append(aln)
769 
770  return (mapping, alns, unmapped_mdl_chains, mdl)
771 
772 
773  def GetlDDTMapping(self, model, inclusion_radius=15.0,
774  thresholds=[0.5, 1.0, 2.0, 4.0], strategy="heuristic",
775  steep_opt_rate = None, block_seed_size = 5,
776  block_blocks_per_chem_group = 5,
777  chem_mapping_result = None,
778  heuristic_n_max_naive = 40320):
779  """ Identify chain mapping by optimizing lDDT score
780 
781  Maps *model* chain sequences to :attr:`~chem_groups` and find mapping
782  based on backbone only lDDT score (CA for amino acids C3' for
783  Nucleotides).
784 
785  Either performs a naive search, i.e. enumerate all possible mappings or
786  executes a greedy strategy that tries to identify a (close to) optimal
787  mapping in an iterative way by starting from a start mapping (seed). In
788  each iteration, the one-to-one mapping that leads to highest increase
789  in number of conserved contacts is added with the additional requirement
790  that this added mapping must have non-zero interface counts towards the
791  already mapped chains. So basically we're "growing" the mapped structure
792  by only adding connected stuff.
793 
794  The available strategies:
795 
796  * **naive**: Enumerates all possible mappings and returns best
797 
798  * **greedy_fast**: perform all vs. all single chain lDDTs within the
799  respective ref/mdl chem groups. The mapping with highest number of
800  conserved contacts is selected as seed for greedy extension
801 
802  * **greedy_full**: try multiple seeds for greedy extension, i.e. try
803  all ref/mdl chain combinations within the respective chem groups and
804  retain the mapping leading to the best lDDT.
805 
806  * **greedy_block**: try multiple seeds for greedy extension, i.e. try
807  all ref/mdl chain combinations within the respective chem groups and
808  extend them to *block_seed_size*. *block_blocks_per_chem_group*
809  for each chem group are selected for exhaustive extension.
810 
811  * **heuristic**: Uses *naive* strategy if number of possible mappings
812  is within *heuristic_n_max_naive*. The default of 40320 corresponds
813  to an octamer (8!=40320). A structure with stoichiometry A6B2 would be
814  6!*2!=1440 etc. If the number of possible mappings is larger,
815  *greedy_full* is used.
816 
817  Sets :attr:`MappingResult.opt_score` in case of no trivial one-to-one
818  mapping.
819 
820  :param model: Model to map
821  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
822  :param inclusion_radius: Inclusion radius for lDDT
823  :type inclusion_radius: :class:`float`
824  :param thresholds: Thresholds for lDDT
825  :type thresholds: :class:`list` of :class:`float`
826  :param strategy: Strategy to find mapping. Must be in ["naive",
827  "greedy_fast", "greedy_full", "greedy_block"]
828  :type strategy: :class:`str`
829  :param steep_opt_rate: Only relevant for greedy strategies.
830  If set, every *steep_opt_rate* mappings, a simple
831  optimization is executed with the goal of
832  avoiding local minima. The optimization
833  iteratively checks all possible swaps of mappings
834  within their respective chem groups and accepts
835  swaps that improve lDDT score. Iteration stops as
836  soon as no improvement can be achieved anymore.
837  :type steep_opt_rate: :class:`int`
838  :param block_seed_size: Param for *greedy_block* strategy - Initial seeds
839  are extended by that number of chains.
840  :type block_seed_size: :class:`int`
841  :param block_blocks_per_chem_group: Param for *greedy_block* strategy -
842  Number of blocks per chem group that
843  are extended in an initial search
844  for high scoring local solutions.
845  :type block_blocks_per_chem_group: :class:`int`
846  :param chem_mapping_result: Pro param. The result of
847  :func:`~GetChemMapping` where you provided
848  *model*. If set, *model* parameter is not
849  used.
850  :type chem_mapping_result: :class:`tuple`
851  :returns: A :class:`MappingResult`
852  """
853 
854  strategies = ["naive", "greedy_fast", "greedy_full", "greedy_block",
855  "heuristic"]
856  if strategy not in strategies:
857  raise RuntimeError(f"Strategy must be in {strategies}")
858 
859  if chem_mapping_result is None:
860  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
861  self.GetChemMappingGetChemMapping(model)
862  else:
863  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
864  chem_mapping_result
865 
866  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
867  self.chem_group_alignmentschem_group_alignments,
868  chem_mapping,
869  chem_group_alns)
870 
871  # check for the simplest case
872  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
873  if one_to_one is not None:
874  alns = dict()
875  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
876  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
877  if ref_ch is not None and mdl_ch is not None:
878  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
879  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
880  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
881  alns[(ref_ch, mdl_ch)] = aln
882  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
883  unmapped_mdl_chains, one_to_one, alns)
884 
885  if strategy == "heuristic":
886  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
887  heuristic_n_max_naive):
888  strategy = "naive"
889  else:
890  strategy = "greedy_full"
891 
892  mapping = None
893  opt_lddt = None
894 
895  if strategy == "naive":
896  mapping, opt_lddt = _lDDTNaive(self.targettarget, mdl, inclusion_radius,
897  thresholds, self.chem_groupschem_groups,
898  chem_mapping, ref_mdl_alns,
899  self.n_max_naiven_max_naive)
900  else:
901  # its one of the greedy strategies - setup greedy searcher
902  the_greed = _lDDTGreedySearcher(self.targettarget, mdl, self.chem_groupschem_groups,
903  chem_mapping, ref_mdl_alns,
904  inclusion_radius=inclusion_radius,
905  thresholds=thresholds,
906  steep_opt_rate=steep_opt_rate)
907  if strategy == "greedy_fast":
908  mapping = _lDDTGreedyFast(the_greed)
909  elif strategy == "greedy_full":
910  mapping = _lDDTGreedyFull(the_greed)
911  elif strategy == "greedy_block":
912  mapping = _lDDTGreedyBlock(the_greed, block_seed_size,
913  block_blocks_per_chem_group)
914  # cached => lDDT computation is fast here
915  opt_lddt = the_greed.Score(mapping)
916 
917  alns = dict()
918  for ref_group, mdl_group in zip(self.chem_groupschem_groups, mapping):
919  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
920  if ref_ch is not None and mdl_ch is not None:
921  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
922  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
923  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
924  alns[(ref_ch, mdl_ch)] = aln
925 
926  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
927  unmapped_mdl_chains, mapping, alns,
928  opt_score = opt_lddt)
929 
930 
931  def GetQSScoreMapping(self, model, contact_d = 12.0, strategy = "heuristic",
932  block_seed_size = 5, block_blocks_per_chem_group = 5,
933  heuristic_n_max_naive = 40320, steep_opt_rate = None,
934  chem_mapping_result = None,
935  greedy_prune_contact_map = True):
936  """ Identify chain mapping based on QSScore
937 
938  Scoring is based on CA/C3' positions which are present in all chains of
939  a :attr:`chem_groups` as well as the *model* chains which are mapped to
940  that respective chem group.
941 
942  The following strategies are available:
943 
944  * **naive**: Naively iterate all possible mappings and return best based
945  on QS score.
946 
947  * **greedy_fast**: perform all vs. all single chain lDDTs within the
948  respective ref/mdl chem groups. The mapping with highest number of
949  conserved contacts is selected as seed for greedy extension.
950  Extension is based on QS-score.
951 
952  * **greedy_full**: try multiple seeds for greedy extension, i.e. try
953  all ref/mdl chain combinations within the respective chem groups and
954  retain the mapping leading to the best QS-score.
955 
956  * **greedy_block**: try multiple seeds for greedy extension, i.e. try
957  all ref/mdl chain combinations within the respective chem groups and
958  extend them to *block_seed_size*. *block_blocks_per_chem_group*
959  for each chem group are selected for exhaustive extension.
960 
961  * **heuristic**: Uses *naive* strategy if number of possible mappings
962  is within *heuristic_n_max_naive*. The default of 40320 corresponds
963  to an octamer (8!=40320). A structure with stoichiometry A6B2 would be
964  6!*2!=1440 etc. If the number of possible mappings is larger,
965  *greedy_full* is used.
966 
967  Sets :attr:`MappingResult.opt_score` in case of no trivial one-to-one
968  mapping.
969 
970  :param model: Model to map
971  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
972  :param contact_d: Max distance between two residues to be considered as
973  contact in qs scoring
974  :type contact_d: :class:`float`
975  :param strategy: Strategy for sampling, must be in ["naive",
976  "greedy_fast", "greedy_full", "greedy_block"]
977  :type strategy: :class:`str`
978  :param chem_mapping_result: Pro param. The result of
979  :func:`~GetChemMapping` where you provided
980  *model*. If set, *model* parameter is not
981  used.
982  :type chem_mapping_result: :class:`tuple`
983  :param greedy_prune_contact_map: Relevant for all strategies that use
984  greedy extensions. If True, only chains
985  with at least 3 contacts (8A CB
986  distance) towards already mapped chains
987  in trg/mdl are considered for
988  extension. All chains that give a
989  potential non-zero QS-score increase
990  are used otherwise (at least one
991  contact within 12A). The consequence
992  is reduced runtime and usually no
993  real reduction in accuracy.
994  :returns: A :class:`MappingResult`
995  """
996 
997  strategies = ["naive", "greedy_fast", "greedy_full", "greedy_block", "heuristic"]
998  if strategy not in strategies:
999  raise RuntimeError(f"strategy must be {strategies}")
1000 
1001  if chem_mapping_result is None:
1002  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1003  self.GetChemMappingGetChemMapping(model)
1004  else:
1005  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1006  chem_mapping_result
1007  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1008  self.chem_group_alignmentschem_group_alignments,
1009  chem_mapping,
1010  chem_group_alns)
1011  # check for the simplest case
1012  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
1013  if one_to_one is not None:
1014  alns = dict()
1015  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
1016  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1017  if ref_ch is not None and mdl_ch is not None:
1018  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1019  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1020  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1021  alns[(ref_ch, mdl_ch)] = aln
1022  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1023  unmapped_mdl_chains, one_to_one, alns)
1024 
1025  if strategy == "heuristic":
1026  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
1027  heuristic_n_max_naive):
1028  strategy = "naive"
1029  else:
1030  strategy = "greedy_full"
1031 
1032  mapping = None
1033  opt_qsscore = None
1034 
1035  if strategy == "naive":
1036  mapping, opt_qsscore = _QSScoreNaive(self.targettarget, mdl,
1037  self.chem_groupschem_groups,
1038  chem_mapping, ref_mdl_alns,
1039  contact_d, self.n_max_naiven_max_naive)
1040  else:
1041  # its one of the greedy strategies - setup greedy searcher
1042  the_greed = _QSScoreGreedySearcher(self.targettarget, mdl,
1043  self.chem_groupschem_groups,
1044  chem_mapping, ref_mdl_alns,
1045  contact_d = contact_d,
1046  steep_opt_rate=steep_opt_rate,
1047  greedy_prune_contact_map = greedy_prune_contact_map)
1048  if strategy == "greedy_fast":
1049  mapping = _QSScoreGreedyFast(the_greed)
1050  elif strategy == "greedy_full":
1051  mapping = _QSScoreGreedyFull(the_greed)
1052  elif strategy == "greedy_block":
1053  mapping = _QSScoreGreedyBlock(the_greed, block_seed_size,
1054  block_blocks_per_chem_group)
1055  # cached => QSScore computation is fast here
1056  opt_qsscore = the_greed.Score(mapping, check=False).QS_global
1057 
1058  alns = dict()
1059  for ref_group, mdl_group in zip(self.chem_groupschem_groups, mapping):
1060  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1061  if ref_ch is not None and mdl_ch is not None:
1062  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1063  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1064  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1065  alns[(ref_ch, mdl_ch)] = aln
1066 
1067  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1068  unmapped_mdl_chains, mapping, alns,
1069  opt_score = opt_qsscore)
1070 
1071  def GetRMSDMapping(self, model, strategy = "heuristic", subsampling=50,
1072  chem_mapping_result = None, heuristic_n_max_naive = 120):
1073  """Identify chain mapping based on minimal RMSD superposition
1074 
1075  Superposition and scoring is based on CA/C3' positions which are present
1076  in all chains of a :attr:`chem_groups` as well as the *model*
1077  chains which are mapped to that respective chem group.
1078 
1079  The following strategies are available:
1080 
1081  * **naive**: Naively iterate all possible mappings and return the one
1082  with lowes RMSD.
1083 
1084  * **greedy_single**: perform all vs. all single chain superpositions
1085  within the respective ref/mdl chem groups to use as starting points.
1086  For each starting point, iteratively add the model/target chain pair
1087  with lowest RMSD until a full mapping is achieved. The mapping with
1088  lowest RMSD is returned.
1089 
1090  * **greedy_iterative**: Same as greedy_single_rmsd exept that the
1091  transformation gets updated with each added chain pair.
1092 
1093  * **heuristic**: Uses *naive* strategy if number of possible mappings
1094  is within *heuristic_n_max_naive*. The default of 120 corresponds
1095  to a homo-pentamer (5!=120). If the number of possible mappings is
1096  larger, *greedy_iterative* is used.
1097 
1098  :param model: Model to map
1099  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1100  :param strategy: Strategy for sampling. Must be in ["naive",
1101  "greedy_single", "greedy_iterative"]
1102  :type strategy: :class:`str`
1103  :param subsampling: If given, only an equally distributed subset
1104  of CA/C3' positions in each chain are used for
1105  superposition/scoring.
1106  :type subsampling: :class:`int`
1107  :param chem_mapping_result: Pro param. The result of
1108  :func:`~GetChemMapping` where you provided
1109  *model*. If set, *model* parameter is not
1110  used.
1111  :type chem_mapping_result: :class:`tuple`
1112  :returns: A :class:`MappingResult`
1113  """
1114 
1115  strategies = ["naive", "greedy_single", "greedy_iterative", "heuristic"]
1116 
1117  if strategy not in strategies:
1118  raise RuntimeError(f"strategy must be {strategies}")
1119 
1120  if chem_mapping_result is None:
1121  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1122  self.GetChemMappingGetChemMapping(model)
1123  else:
1124  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1125  chem_mapping_result
1126  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1127  self.chem_group_alignmentschem_group_alignments,
1128  chem_mapping,
1129  chem_group_alns)
1130 
1131  # check for the simplest case
1132  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
1133  if one_to_one is not None:
1134  alns = dict()
1135  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
1136  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1137  if ref_ch is not None and mdl_ch is not None:
1138  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1139  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1140  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1141  alns[(ref_ch, mdl_ch)] = aln
1142  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1143  unmapped_mdl_chains, one_to_one, alns)
1144 
1145  trg_group_pos, mdl_group_pos = _GetRefPos(self.targettarget, mdl,
1146  self.chem_group_alignmentschem_group_alignments,
1147  chem_group_alns,
1148  max_pos = subsampling)
1149 
1150  if strategy == "heuristic":
1151  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
1152  heuristic_n_max_naive):
1153  strategy = "naive"
1154  else:
1155  strategy = "greedy_iterative"
1156 
1157  mapping = None
1158 
1159  if strategy.startswith("greedy"):
1160  # get transforms of any mdl chain onto any trg chain in same chem
1161  # group that fulfills gdtts threshold
1162  initial_transforms = list()
1163  initial_mappings = list()
1164  for trg_pos, trg_chains, mdl_pos, mdl_chains in zip(trg_group_pos,
1165  self.chem_groupschem_groups,
1166  mdl_group_pos,
1167  chem_mapping):
1168  for t_pos, t in zip(trg_pos, trg_chains):
1169  for m_pos, m in zip(mdl_pos, mdl_chains):
1170  if len(t_pos) >= 3 and len(m_pos) >= 3:
1171  transform = _GetSuperposition(m_pos, t_pos,
1172  False).transformation
1173  initial_transforms.append(transform)
1174  initial_mappings.append((t,m))
1175 
1176  if strategy == "greedy_single":
1177  mapping = _SingleRigidRMSD(initial_transforms,
1178  initial_mappings,
1179  self.chem_groupschem_groups,
1180  chem_mapping,
1181  trg_group_pos,
1182  mdl_group_pos)
1183 
1184 
1185  elif strategy == "greedy_iterative":
1186  mapping = _IterativeRigidRMSD(initial_transforms,
1187  initial_mappings,
1188  self.chem_groupschem_groups, chem_mapping,
1189  trg_group_pos, mdl_group_pos)
1190  elif strategy == "naive":
1191  mapping = _NaiveRMSD(self.chem_groupschem_groups, chem_mapping,
1192  trg_group_pos, mdl_group_pos,
1193  self.n_max_naiven_max_naive)
1194 
1195  # translate mapping format and return
1196  final_mapping = list()
1197  for ref_chains in self.chem_groupschem_groups:
1198  mapped_mdl_chains = list()
1199  for ref_ch in ref_chains:
1200  if ref_ch in mapping:
1201  mapped_mdl_chains.append(mapping[ref_ch])
1202  else:
1203  mapped_mdl_chains.append(None)
1204  final_mapping.append(mapped_mdl_chains)
1205 
1206  alns = dict()
1207  for ref_group, mdl_group in zip(self.chem_groupschem_groups, final_mapping):
1208  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1209  if ref_ch is not None and mdl_ch is not None:
1210  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1211  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1212  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1213  alns[(ref_ch, mdl_ch)] = aln
1214 
1215  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1216  unmapped_mdl_chains, final_mapping, alns)
1217 
1218  def GetMapping(self, model, n_max_naive = 40320):
1219  """ Convenience function to get mapping with currently preferred method
1220 
1221  If number of possible chain mappings is <= *n_max_naive*, a naive
1222  QS-score mapping is performed and optimal QS-score is guaranteed.
1223  For anything else, a QS-score mapping with the greedy_full strategy is
1224  performed (greedy_prune_contact_map = True). The default for
1225  *n_max_naive* of 40320 corresponds to an octamer (8!=40320). A
1226  structure with stoichiometry A6B2 would be 6!*2!=1440 etc.
1227 
1228  If :attr:`~target` has nucleotide sequences, the QS-score target
1229  function is replaced with a backbone only lDDT score that has
1230  an inclusion radius of 30A.
1231  """
1232  if len(self.polynuc_seqspolynuc_seqs) > 0:
1233  return self.GetlDDTMappingGetlDDTMapping(model, strategy = "heuristic",
1234  inclusion_radius = 30.0,
1235  heuristic_n_max_naive = n_max_naive)
1236  else:
1237  return self.GetQSScoreMappingGetQSScoreMapping(model, strategy="heuristic",
1238  greedy_prune_contact_map=True,
1239  heuristic_n_max_naive = n_max_naive)
1240 
1241  def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0,
1242  thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False,
1243  only_interchain=False, chem_mapping_result = None,
1244  global_mapping = None):
1245  """ Identify *topn* representations of *substructure* in *model*
1246 
1247  *substructure* defines a subset of :attr:`~target` for which one
1248  wants the *topn* representations in *model*. Representations are scored
1249  and sorted by lDDT.
1250 
1251  :param substructure: A :class:`ost.mol.EntityView` which is a subset of
1252  :attr:`~target`. Should be selected with the
1253  OpenStructure query language. Example: if you're
1254  interested in residues with number 42,43 and 85 in
1255  chain A:
1256  ``substructure=mapper.target.Select("cname=A and rnum=42,43,85")``
1257  A :class:`RuntimeError` is raised if *substructure*
1258  does not refer to the same underlying
1259  :class:`ost.mol.EntityHandle` as :attr:`~target`.
1260  :type substructure: :class:`ost.mol.EntityView`
1261  :param model: Structure in which one wants to find representations for
1262  *substructure*
1263  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1264  :param topn: Max number of representations that are returned
1265  :type topn: :class:`int`
1266  :param inclusion_radius: Inclusion radius for lDDT
1267  :type inclusion_radius: :class:`float`
1268  :param thresholds: Thresholds for lDDT
1269  :type thresholds: :class:`list` of :class:`float`
1270  :param bb_only: Only consider backbone atoms in lDDT computation
1271  :type bb_only: :class:`bool`
1272  :param only_interchain: Only score interchain contacts in lDDT. Useful
1273  if you want to identify interface patches.
1274  :type only_interchain: :class:`bool`
1275  :param chem_mapping_result: Pro param. The result of
1276  :func:`~GetChemMapping` where you provided
1277  *model*. If set, *model* parameter is not
1278  used.
1279  :type chem_mapping_result: :class:`tuple`
1280  :param global_mapping: Pro param. Specify a global mapping result. This
1281  fully defines the desired representation in the
1282  model but extracts it and enriches it with all
1283  the nice attributes of :class:`ReprResult`.
1284  The target attribute in *global_mapping* must be
1285  of the same entity as self.target and the model
1286  attribute of *global_mapping* must be of the same
1287  entity as *model*.
1288  :type global_mapping: :class:`MappingResult`
1289  :returns: :class:`list` of :class:`ReprResult`
1290  """
1291 
1292  if topn < 1:
1293  raise RuntimeError("topn must be >= 1")
1294 
1295  if global_mapping is not None:
1296  # ensure that this mapping is derived from the same structures
1297  if global_mapping.target.handle.GetHashCode() != \
1298  self.targettarget.handle.GetHashCode():
1299  raise RuntimeError("global_mapping.target must be the same "
1300  "entity as self.target")
1301  if global_mapping.model.handle.GetHashCode() != \
1302  model.handle.GetHashCode():
1303  raise RuntimeError("global_mapping.model must be the same "
1304  "entity as model param")
1305 
1306  # check whether substructure really is a subset of self.target
1307  for r in substructure.residues:
1308  ch_name = r.GetChain().GetName()
1309  rnum = r.GetNumber()
1310  target_r = self.targettarget.FindResidue(ch_name, rnum)
1311  if not target_r.IsValid():
1312  raise RuntimeError(f"substructure has residue "
1313  f"{r.GetQualifiedName()} which is not in "
1314  f"self.target")
1315  if target_r.handle.GetHashCode() != r.handle.GetHashCode():
1316  raise RuntimeError(f"substructure has residue "
1317  f"{r.GetQualifiedName()} which has an "
1318  f"equivalent in self.target but it does "
1319  f"not refer to the same underlying "
1320  f"EntityHandle")
1321  for a in r.atoms:
1322  target_a = target_r.FindAtom(a.GetName())
1323  if not target_a.IsValid():
1324  raise RuntimeError(f"substructure has atom "
1325  f"{a.GetQualifiedName()} which is not "
1326  f"in self.target")
1327  if a.handle.GetHashCode() != target_a.handle.GetHashCode():
1328  raise RuntimeError(f"substructure has atom "
1329  f"{a.GetQualifiedName()} which has an "
1330  f"equivalent in self.target but it does "
1331  f"not refer to the same underlying "
1332  f"EntityHandle")
1333 
1334  # check whether it contains either CA or C3'
1335  ca = r.FindAtom("CA")
1336  c3 = r.FindAtom("C3'") # FindAtom with prime in string is tested
1337  # and works
1338  if not ca.IsValid() and not c3.IsValid():
1339  raise RuntimeError("All residues in substructure must contain "
1340  "a backbone atom named CA or C3\'")
1341 
1342  # perform mapping and alignments on full structures
1343  if chem_mapping_result is None:
1344  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1345  self.GetChemMappingGetChemMapping(model)
1346  else:
1347  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1348  chem_mapping_result
1349  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1350  self.chem_group_alignmentschem_group_alignments,
1351  chem_mapping,
1352  chem_group_alns)
1353 
1354  # Get residue indices relative to full target chain
1355  substructure_res_indices = dict()
1356  for ch in substructure.chains:
1357  full_ch = self.targettarget.FindChain(ch.GetName())
1358  idx = [full_ch.GetResidueIndex(r.GetNumber()) for r in ch.residues]
1359  substructure_res_indices[ch.GetName()] = idx
1360 
1361  # strip down variables to make them specific to substructure
1362  # keep only chem_groups which are present in substructure
1363  substructure_chem_groups = list()
1364  substructure_chem_mapping = list()
1365 
1366  chnames = set([ch.GetName() for ch in substructure.chains])
1367  for chem_group, mapping in zip(self.chem_groupschem_groups, chem_mapping):
1368  substructure_chem_group = [ch for ch in chem_group if ch in chnames]
1369  if len(substructure_chem_group) > 0:
1370  substructure_chem_groups.append(substructure_chem_group)
1371  substructure_chem_mapping.append(mapping)
1372 
1373  # early stopping if no mdl chain can be mapped to substructure
1374  n_mapped_mdl_chains = sum([len(m) for m in substructure_chem_mapping])
1375  if n_mapped_mdl_chains == 0:
1376  return list()
1377 
1378  # strip the reference sequence in alignments to only contain
1379  # sequence from substructure
1380  substructure_ref_mdl_alns = dict()
1381  mdl_views = dict()
1382  for ch in mdl.chains:
1383  mdl_views[ch.GetName()] = _CSel(mdl, [ch.GetName()])
1384  for chem_group, mapping in zip(substructure_chem_groups,
1385  substructure_chem_mapping):
1386  for ref_ch in chem_group:
1387  for mdl_ch in mapping:
1388  full_aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1389  ref_seq = full_aln.GetSequence(0)
1390  # the ref sequence is tricky... we start with a gap only
1391  # sequence and only add olcs as defined by the residue
1392  # indices that we extracted before...
1393  tmp = ['-'] * len(full_aln)
1394  for idx in substructure_res_indices[ref_ch]:
1395  idx_in_seq = ref_seq.GetPos(idx)
1396  tmp[idx_in_seq] = ref_seq[idx_in_seq]
1397  ref_seq = seq.CreateSequence(ref_ch, ''.join(tmp))
1398  ref_seq.AttachView(_CSel(substructure, [ref_ch]))
1399  mdl_seq = full_aln.GetSequence(1)
1400  mdl_seq = seq.CreateSequence(mdl_seq.GetName(),
1401  mdl_seq.GetString())
1402  mdl_seq.AttachView(mdl_views[mdl_ch])
1403  aln = seq.CreateAlignment()
1404  aln.AddSequence(ref_seq)
1405  aln.AddSequence(mdl_seq)
1406  substructure_ref_mdl_alns[(ref_ch, mdl_ch)] = aln
1407 
1408  lddt_scorer = lddt.lDDTScorer(substructure,
1409  inclusion_radius = inclusion_radius,
1410  bb_only = bb_only)
1411  scored_mappings = list()
1412 
1413  if global_mapping:
1414  # construct mapping of substructure from global mapping
1415  flat_mapping = global_mapping.GetFlatMapping()
1416  mapping = list()
1417  for chem_group, chem_mapping in zip(substructure_chem_groups,
1418  substructure_chem_mapping):
1419  chem_group_mapping = list()
1420  for ch in chem_group:
1421  if ch in flat_mapping:
1422  mdl_ch = flat_mapping[ch]
1423  if mdl_ch in chem_mapping:
1424  chem_group_mapping.append(mdl_ch)
1425  else:
1426  chem_group_mapping.append(None)
1427  else:
1428  chem_group_mapping.append(None)
1429  mapping.append(chem_group_mapping)
1430  mappings = [mapping]
1431  else:
1432  mappings = list(_ChainMappings(substructure_chem_groups,
1433  substructure_chem_mapping,
1434  self.n_max_naiven_max_naive))
1435 
1436  # This step can be slow so give some hints in logs
1437  msg = "Computing initial ranking of %d chain mappings" % len(mappings)
1438  (ost.LogWarning if len(mappings) > 10000 else ost.LogInfo)(msg)
1439 
1440  for mapping in mappings:
1441  # chain_mapping and alns as input for lDDT computation
1442  lddt_chain_mapping = dict()
1443  lddt_alns = dict()
1444  n_res_aln = 0
1445  for ref_chem_group, mdl_chem_group in zip(substructure_chem_groups,
1446  mapping):
1447  for ref_ch, mdl_ch in zip(ref_chem_group, mdl_chem_group):
1448  # some mdl chains can be None
1449  if mdl_ch is not None:
1450  lddt_chain_mapping[mdl_ch] = ref_ch
1451  aln = substructure_ref_mdl_alns[(ref_ch, mdl_ch)]
1452  lddt_alns[mdl_ch] = aln
1453  tmp = [int(c[0] != '-' and c[1] != '-') for c in aln]
1454  n_res_aln += sum(tmp)
1455  # don't compute lDDT if no single residue in mdl and ref is aligned
1456  if n_res_aln == 0:
1457  continue
1458 
1459  lDDT, _ = lddt_scorer.lDDT(mdl, thresholds=thresholds,
1460  chain_mapping=lddt_chain_mapping,
1461  residue_mapping = lddt_alns,
1462  check_resnames = False,
1463  no_intrachain = only_interchain)
1464 
1465  if lDDT is None:
1466  ost.LogVerbose("No valid contacts in the reference")
1467  lDDT = 0.0 # that means, that we have not a single valid contact
1468  # in lDDT. For the code below to work, we just set it
1469  # to a terrible score => 0.0
1470 
1471  if len(scored_mappings) == 0:
1472  scored_mappings.append((lDDT, mapping))
1473  elif len(scored_mappings) < topn:
1474  scored_mappings.append((lDDT, mapping))
1475  scored_mappings.sort(reverse=True, key=lambda x: x[0])
1476  elif lDDT > scored_mappings[-1][0]:
1477  scored_mappings.append((lDDT, mapping))
1478  scored_mappings.sort(reverse=True, key=lambda x: x[0])
1479  scored_mappings = scored_mappings[:topn]
1480 
1481  # finalize and return
1482  results = list()
1483  for scored_mapping in scored_mappings:
1484  ref_view = substructure.handle.CreateEmptyView()
1485  mdl_view = mdl.handle.CreateEmptyView()
1486  for ref_ch_group, mdl_ch_group in zip(substructure_chem_groups,
1487  scored_mapping[1]):
1488  for ref_ch, mdl_ch in zip(ref_ch_group, mdl_ch_group):
1489  if ref_ch is not None and mdl_ch is not None:
1490  aln = substructure_ref_mdl_alns[(ref_ch, mdl_ch)]
1491  for col in aln:
1492  if col[0] != '-' and col[1] != '-':
1493  ref_view.AddResidue(col.GetResidue(0),
1494  mol.ViewAddFlag.INCLUDE_ALL)
1495  mdl_view.AddResidue(col.GetResidue(1),
1496  mol.ViewAddFlag.INCLUDE_ALL)
1497  results.append(ReprResult(scored_mapping[0], substructure,
1498  ref_view, mdl_view))
1499  return results
1500 
1501  def GetNMappings(self, model):
1502  """ Returns number of possible mappings
1503 
1504  :param model: Model with chains that are mapped onto
1505  :attr:`chem_groups`
1506  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1507  """
1508  chem_mapping, chem_group_alns, unmapped_mdl_chains, mdl = \
1509  self.GetChemMappingGetChemMapping(model)
1510  return _NMappings(self.chem_groupschem_groups, chem_mapping)
1511 
1512  def ProcessStructure(self, ent):
1513  """ Entity processing for chain mapping
1514 
1515  * Selects view containing peptide and nucleotide residues which have
1516  required backbone atoms present - for peptide residues thats
1517  N, CA, C and CB (no CB for GLY), for nucleotide residues thats
1518  O5', C5', C4', C3' and O3'.
1519  * filters view by chain lengths, see *min_pep_length* and
1520  *min_nuc_length* in constructor
1521  * Extracts atom sequences for each chain in that view
1522  * Attaches corresponding :class:`ost.mol.EntityView` to each sequence
1523  * If residue number alignments are used, strictly increasing residue
1524  numbers without insertion codes are ensured in each chain
1525 
1526  :param ent: Entity to process
1527  :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1528  :returns: Tuple with 3 elements: 1) :class:`ost.mol.EntityView`
1529  containing peptide and nucleotide residues 2)
1530  :class:`ost.seq.SequenceList` containing ATOMSEQ sequences
1531  for each polypeptide chain in returned view, sequences have
1532  :class:`ost.mol.EntityView` of according chains attached
1533  3) same for polynucleotide chains
1534  """
1535  view = ent.CreateEmptyView()
1536  exp_pep_atoms = ["N", "CA", "C", "CB"]
1537  exp_nuc_atoms = ["\"O5'\"", "\"C5'\"", "\"C4'\"", "\"C3'\"", "\"O3'\""]
1538  pep_query = "peptide=true and aname=" + ','.join(exp_pep_atoms)
1539  nuc_query = "nucleotide=true and aname=" + ','.join(exp_nuc_atoms)
1540 
1541  pep_sel = ent.Select(pep_query)
1542  for r in pep_sel.residues:
1543  if len(r.atoms) == 4:
1544  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1545  elif r.name == "GLY" and len(r.atoms) == 3:
1546  atom_names = [a.GetName() for a in r.atoms]
1547  if sorted(atom_names) == ["C", "CA", "N"]:
1548  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1549 
1550  nuc_sel = ent.Select(nuc_query)
1551  for r in nuc_sel.residues:
1552  if len(r.atoms) == 5:
1553  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1554 
1555  polypep_seqs = seq.CreateSequenceList()
1556  polynuc_seqs = seq.CreateSequenceList()
1557 
1558  if len(view.residues) == 0:
1559  # no residues survived => return
1560  return (view, polypep_seqs, polynuc_seqs)
1561 
1562  for ch in view.chains:
1563  n_res = len(ch.residues)
1564  n_pep = sum([r.IsPeptideLinking() for r in ch.residues])
1565  n_nuc = sum([r.IsNucleotideLinking() for r in ch.residues])
1566 
1567  # guarantee that we have either pep or nuc (no mix of the two)
1568  if n_pep > 0 and n_nuc > 0:
1569  raise RuntimeError(f"Must not mix peptide and nucleotide linking "
1570  f"residues in same chain ({ch.GetName()})")
1571 
1572  if (n_pep + n_nuc) != n_res:
1573  raise RuntimeError("All residues must either be peptide_linking "
1574  "or nucleotide_linking")
1575 
1576  # filter out short chains
1577  if n_pep > 0 and n_pep < self.min_pep_lengthmin_pep_length:
1578  continue
1579 
1580  if n_nuc > 0 and n_nuc < self.min_nuc_lengthmin_nuc_length:
1581  continue
1582 
1583  # the superfast residue number based alignment adds some
1584  # restrictions on the numbers themselves:
1585  # 1) no insertion codes 2) strictly increasing
1586  if self.resnum_alignmentsresnum_alignments:
1587  # check if no insertion codes are present in residue numbers
1588  ins_codes = [r.GetNumber().GetInsCode() for r in ch.residues]
1589  if len(set(ins_codes)) != 1 or ins_codes[0] != '\0':
1590  raise RuntimeError("Residue numbers in input structures must not "
1591  "contain insertion codes")
1592 
1593  # check if residue numbers are strictly increasing
1594  nums = [r.GetNumber().GetNum() for r in ch.residues]
1595  if not all(i < j for i, j in zip(nums, nums[1:])):
1596  raise RuntimeError("Residue numbers in input structures must be "
1597  "strictly increasing for each chain")
1598 
1599  s = ''.join([r.one_letter_code for r in ch.residues])
1600  s = seq.CreateSequence(ch.GetName(), s)
1601  s.AttachView(_CSel(view, [ch.GetName()]))
1602  if n_pep == n_res:
1603  polypep_seqs.AddSequence(s)
1604  elif n_nuc == n_res:
1605  polynuc_seqs.AddSequence(s)
1606  else:
1607  raise RuntimeError("This shouldnt happen")
1608 
1609  if len(polypep_seqs) == 0 and len(polynuc_seqs) == 0:
1610  raise RuntimeError(f"No chain fulfilled minimum length requirement "
1611  f"to be considered in chain mapping "
1612  f"({self.min_pep_length} for peptide chains, "
1613  f"{self.min_nuc_length} for nucleotide chains) "
1614  f"- mapping failed")
1615 
1616  # select for chains for which we actually extracted the sequence
1617  chain_names = [s.GetAttachedView().chains[0].name for s in polypep_seqs]
1618  chain_names += [s.GetAttachedView().chains[0].name for s in polynuc_seqs]
1619  view = _CSel(view, chain_names)
1620 
1621  return (view, polypep_seqs, polynuc_seqs)
1622 
1623  def Align(self, s1, s2, stype):
1624  """ Access to internal sequence alignment functionality
1625 
1626  Alignment parameterization is setup at ChainMapper construction
1627 
1628  :param s1: First sequence to align - must have view attached in case
1629  of resnum_alignments
1630  :type s1: :class:`ost.seq.SequenceHandle`
1631  :param s2: Second sequence to align - must have view attached in case
1632  of resnum_alignments
1633  :type s2: :class:`ost.seq.SequenceHandle`
1634  :param stype: Type of sequences to align, must be in
1635  [:class:`ost.mol.ChemType.AMINOACIDS`,
1636  :class:`ost.mol.ChemType.NUCLEOTIDES`]
1637  :returns: Pairwise alignment of s1 and s2
1638  """
1639  if stype not in [mol.ChemType.AMINOACIDS, mol.ChemType.NUCLEOTIDES]:
1640  raise RuntimeError("stype must be ost.mol.ChemType.AMINOACIDS or "
1641  "ost.mol.ChemType.NUCLEOTIDES")
1642  return self.aligneraligner.Align(s1, s2, chem_type = stype)
1643 
1644 
1645 # INTERNAL HELPERS
1646 
1647 class _Aligner:
1648  def __init__(self, pep_subst_mat = seq.alg.BLOSUM62, pep_gap_open = -5,
1649  pep_gap_ext = -2, nuc_subst_mat = seq.alg.NUC44,
1650  nuc_gap_open = -4, nuc_gap_ext = -4, resnum_aln = False):
1651  """ Helper class to compute alignments
1652 
1653  Sets default values for substitution matrix, gap open and gap extension
1654  penalties. They are only used in default mode (Needleman-Wunsch aln).
1655  If *resnum_aln* is True, only residue numbers of views that are attached
1656  to input sequences are considered.
1657  """
1658  self.pep_subst_matpep_subst_mat = pep_subst_mat
1659  self.pep_gap_openpep_gap_open = pep_gap_open
1660  self.pep_gap_extpep_gap_ext = pep_gap_ext
1661  self.nuc_subst_matnuc_subst_mat = nuc_subst_mat
1662  self.nuc_gap_opennuc_gap_open = nuc_gap_open
1663  self.nuc_gap_extnuc_gap_ext = nuc_gap_ext
1664  self.resnum_alnresnum_aln = resnum_aln
1665 
1666  def Align(self, s1, s2, chem_type=None):
1667  if self.resnum_alnresnum_aln:
1668  return self.ResNumAlignResNumAlign(s1, s2)
1669  else:
1670  if chem_type is None:
1671  raise RuntimeError("Must specify chem_type for NW alignment")
1672  return self.NWAlignNWAlign(s1, s2, chem_type)
1673 
1674  def NWAlign(self, s1, s2, chem_type):
1675  """ Returns pairwise alignment using Needleman-Wunsch algorithm
1676 
1677  :param s1: First sequence to align
1678  :type s1: :class:`ost.seq.SequenceHandle`
1679  :param s2: Second sequence to align
1680  :type s2: :class:`ost.seq.SequenceHandle`
1681  :param chem_type: Must be in [:class:`ost.mol.ChemType.AMINOACIDS`,
1682  :class:`ost.mol.ChemType.NUCLEOTIDES`], determines
1683  substitution matrix and gap open/extension penalties
1684  :type chem_type: :class:`ost.mol.ChemType`
1685  :returns: Alignment with s1 as first and s2 as second sequence
1686  """
1687  if chem_type == mol.ChemType.AMINOACIDS:
1688  return seq.alg.SemiGlobalAlign(s1, s2, self.pep_subst_matpep_subst_mat,
1689  gap_open=self.pep_gap_openpep_gap_open,
1690  gap_ext=self.pep_gap_extpep_gap_ext)[0]
1691  elif chem_type == mol.ChemType.NUCLEOTIDES:
1692  return seq.alg.SemiGlobalAlign(s1, s2, self.nuc_subst_matnuc_subst_mat,
1693  gap_open=self.nuc_gap_opennuc_gap_open,
1694  gap_ext=self.nuc_gap_extnuc_gap_ext)[0]
1695  else:
1696  raise RuntimeError("Invalid ChemType")
1697  return aln
1698 
1699  def ResNumAlign(self, s1, s2):
1700  """ Returns pairwise alignment using residue numbers of attached views
1701 
1702  Assumes that there are no insertion codes (alignment only on numerical
1703  component) and that resnums are strictly increasing (fast min/max
1704  identification). These requirements are assured if a structure has been
1705  processed by :class:`ChainMapper`.
1706 
1707  :param s1: First sequence to align, must have :class:`ost.mol.EntityView`
1708  attached
1709  :type s1: :class:`ost.seq.SequenceHandle`
1710  :param s2: Second sequence to align, must have :class:`ost.mol.EntityView`
1711  attached
1712  :type s2: :class:`ost.seq.SequenceHandle`
1713  """
1714  assert(s1.HasAttachedView())
1715  assert(s2.HasAttachedView())
1716  v1 = s1.GetAttachedView()
1717  rnums1 = [r.GetNumber().GetNum() for r in v1.residues]
1718  v2 = s2.GetAttachedView()
1719  rnums2 = [r.GetNumber().GetNum() for r in v2.residues]
1720 
1721  min_num = min(rnums1[0], rnums2[0])
1722  max_num = max(rnums1[-1], rnums2[-1])
1723  aln_length = max_num - min_num + 1
1724 
1725  aln_s1 = ['-'] * aln_length
1726  for r, rnum in zip(v1.residues, rnums1):
1727  aln_s1[rnum-min_num] = r.one_letter_code
1728 
1729  aln_s2 = ['-'] * aln_length
1730  for r, rnum in zip(v2.residues, rnums2):
1731  aln_s2[rnum-min_num] = r.one_letter_code
1732 
1733  aln = seq.CreateAlignment()
1734  aln.AddSequence(seq.CreateSequence(s1.GetName(), ''.join(aln_s1)))
1735  aln.AddSequence(seq.CreateSequence(s2.GetName(), ''.join(aln_s2)))
1736  return aln
1737 
1738 def _GetAlnPropsTwo(aln):
1739  """Returns basic properties of *aln* version two...
1740 
1741  :param aln: Alignment to compute properties
1742  :type aln: :class:`seq.AlignmentHandle`
1743  :returns: Tuple with 3 elements. 1) sequence identity in range [0, 100]
1744  considering aligned columns 2) Number of non gap characters in
1745  first sequence 3) Number of aligned characters.
1746  """
1747  assert(aln.GetCount() == 2)
1748  n_tot = sum([1 for col in aln if col[0] != '-'])
1749  n_aligned = sum([1 for col in aln if (col[0] != '-' and col[1] != '-')])
1750  return (seq.alg.SequenceIdentity(aln), n_tot, n_aligned)
1751 
1752 def _GetAlnPropsOne(aln):
1753 
1754  """Returns basic properties of *aln* version one...
1755 
1756  :param aln: Alignment to compute properties
1757  :type aln: :class:`seq.AlignmentHandle`
1758  :returns: Tuple with 2 elements. 1) sequence identify in range [0, 100]
1759  considering aligned columns 2) Number of aligned columns.
1760  """
1761  assert(aln.GetCount() == 2)
1762  seqid = seq.alg.SequenceIdentity(aln)
1763  n_aligned = sum([1 for col in aln if (col[0] != '-' and col[1] != '-')])
1764  return (seqid, n_aligned)
1765 
1766 def _GetChemGroupAlignments(pep_seqs, nuc_seqs, aligner, pep_seqid_thr=95.,
1767  min_pep_length=6, nuc_seqid_thr=95.,
1768  min_nuc_length=4):
1769  """Returns alignments with groups of chemically equivalent chains
1770 
1771  :param pep_seqs: List of polypeptide sequences
1772  :type pep_seqs: :class:`seq.SequenceList`
1773  :param nuc_seqs: List of polynucleotide sequences
1774  :type nuc_seqs: :class:`seq.SequenceList`
1775  :param aligner: Helper class to generate pairwise alignments
1776  :type aligner: :class:`_Aligner`
1777  :param pep_seqid_thr: Threshold used to decide when two peptide chains are
1778  identical. 95 percent tolerates the few mutations
1779  crystallographers like to do.
1780  :type pep_seqid_thr: :class:`float`
1781  :param min_pep_length: Additional threshold to avoid gappy alignments with high
1782  seqid. Number of aligned columns must be at least this
1783  number.
1784  :type min_pep_length: :class:`int`
1785  :param nuc_seqid_thr: Nucleotide equivalent of *pep_seqid_thr*
1786  :type nuc_seqid_thr: :class:`float`
1787  :param min_nuc_length: Nucleotide equivalent of *min_pep_length*
1788  :type min_nuc_length: :class:`int`
1789  :returns: Tuple with first element being an AlignmentList. Each alignment
1790  represents a group of chemically equivalent chains and the first
1791  sequence is the longest. Second element is a list of equivalent
1792  length specifying the types of the groups. List elements are in
1793  [:class:`ost.ChemType.AMINOACIDS`,
1794  :class:`ost.ChemType.NUCLEOTIDES`]
1795  """
1796  pep_groups = _GroupSequences(pep_seqs, pep_seqid_thr, min_pep_length, aligner,
1797  mol.ChemType.AMINOACIDS)
1798  nuc_groups = _GroupSequences(nuc_seqs, nuc_seqid_thr, min_nuc_length, aligner,
1799  mol.ChemType.NUCLEOTIDES)
1800  group_types = [mol.ChemType.AMINOACIDS] * len(pep_groups)
1801  group_types += [mol.ChemType.NUCLEOTIDES] * len(nuc_groups)
1802  groups = pep_groups
1803  groups.extend(nuc_groups)
1804  return (groups, group_types)
1805 
1806 def _GroupSequences(seqs, seqid_thr, min_length, aligner, chem_type):
1807  """Get list of alignments representing groups of equivalent sequences
1808 
1809  :param seqid_thr: Threshold used to decide when two chains are identical.
1810  :type seqid_thr: :class:`float`
1811  :param gap_thr: Additional threshold to avoid gappy alignments with high
1812  seqid. Number of aligned columns must be at least this
1813  number.
1814  :type gap_thr: :class:`int`
1815  :param aligner: Helper class to generate pairwise alignments
1816  :type aligner: :class:`_Aligner`
1817  :param chem_type: ChemType of seqs which is passed to *aligner*, must be in
1818  [:class:`ost.mol.ChemType.AMINOACIDS`,
1819  :class:`ost.mol.ChemType.NUCLEOTIDES`]
1820  :type chem_type: :class:`ost.mol.ChemType`
1821  :returns: A list of alignments, one alignment for each group
1822  with longest sequence (reference) as first sequence.
1823  :rtype: :class:`ost.seq.AlignmentList`
1824  """
1825  groups = list()
1826  for s_idx in range(len(seqs)):
1827  matching_group = None
1828  for g_idx in range(len(groups)):
1829  for g_s_idx in range(len(groups[g_idx])):
1830  aln = aligner.Align(seqs[s_idx], seqs[groups[g_idx][g_s_idx]],
1831  chem_type)
1832  sid, n_aligned = _GetAlnPropsOne(aln)
1833  if sid >= seqid_thr and n_aligned >= min_length:
1834  matching_group = g_idx
1835  break
1836  if matching_group is not None:
1837  break
1838 
1839  if matching_group is None:
1840  groups.append([s_idx])
1841  else:
1842  groups[matching_group].append(s_idx)
1843 
1844  # sort based on sequence length
1845  sorted_groups = list()
1846  for g in groups:
1847  if len(g) > 1:
1848  tmp = sorted([[len(seqs[i]), i] for i in g], reverse=True)
1849  sorted_groups.append([x[1] for x in tmp])
1850  else:
1851  sorted_groups.append(g)
1852 
1853  # translate from indices back to sequences and directly generate alignments
1854  # of the groups with the longest (first) sequence as reference
1855  aln_list = seq.AlignmentList()
1856  for g in sorted_groups:
1857  if len(g) == 1:
1858  # aln with one single sequence
1859  aln_list.append(seq.CreateAlignment(seqs[g[0]]))
1860  else:
1861  # obtain pairwise aln of first sequence (reference) to all others
1862  alns = seq.AlignmentList()
1863  i = g[0]
1864  for j in g[1:]:
1865  alns.append(aligner.Align(seqs[i], seqs[j], chem_type))
1866  # and merge
1867  aln_list.append(seq.alg.MergePairwiseAlignments(alns, seqs[i]))
1868 
1869  # transfer attached views
1870  seq_dict = {s.GetName(): s for s in seqs}
1871  for aln_idx in range(len(aln_list)):
1872  for aln_s_idx in range(aln_list[aln_idx].GetCount()):
1873  s_name = aln_list[aln_idx].GetSequence(aln_s_idx).GetName()
1874  s = seq_dict[s_name]
1875  aln_list[aln_idx].AttachView(aln_s_idx, s.GetAttachedView())
1876 
1877  return aln_list
1878 
1879 def _MapSequence(ref_seqs, ref_types, s, s_type, aligner,
1880  seq_id_thr=0.0, min_aln_length=0):
1881  """Tries top map *s* onto any of the sequences in *ref_seqs*
1882 
1883  Computes alignments of *s* to each of the reference sequences of equal type
1884  and sorts them by seqid*fraction_covered (seqid: sequence identity of
1885  aligned columns in alignment, fraction_covered: Fraction of non-gap
1886  characters in reference sequence that are covered by non-gap characters in
1887  *s*). Best scoring mapping is returned. Optionally, *seq_id*/
1888  *min_aln_length* thresholds can be enabled to avoid non-sensical mappings.
1889  However, *min_aln_length* only has an effect if *seq_id_thr* > 0!!!
1890 
1891  :param ref_seqs: Reference sequences
1892  :type ref_seqs: :class:`ost.seq.SequenceList`
1893  :param ref_types: Types of reference sequences, e.g.
1894  ost.mol.ChemType.AminoAcids
1895  :type ref_types: :class:`list` of :class:`ost.mol.ChemType`
1896  :param s: Sequence to map
1897  :type s: :class:`ost.seq.SequenceHandle`
1898  :param s_type: Type of *s*, only try mapping to sequences in *ref_seqs*
1899  with equal type as defined in *ref_types*
1900  :param aligner: Helper class to generate pairwise alignments
1901  :type aligner: :class:`_Aligner`
1902  :param seq_id_thr: Minimum sequence identity to be considered as match
1903  :type seq_id_thr: :class:`float`
1904  :param min_aln_length: Minimum number of aligned columns to be considered
1905  as match. Only has an effect if *seq_id_thr* > 0!
1906  :type min_aln_length: :class:`int`
1907  :returns: Tuple with two elements. 1) index of sequence in *ref_seqs* to
1908  which *s* can be mapped 2) Pairwise sequence alignment with
1909  sequence from *ref_seqs* as first sequence. Both elements are
1910  None if no mapping can be found or if thresholds are not
1911  fulfilled for any alignment.
1912  :raises: :class:`RuntimeError` if mapping is ambiguous, i.e. *s*
1913  successfully maps to more than one sequence in *ref_seqs*
1914  """
1915  scored_alns = list()
1916  for ref_idx, ref_seq in enumerate(ref_seqs):
1917  if ref_types[ref_idx] == s_type:
1918  aln = aligner.Align(ref_seq, s, s_type)
1919  seqid, n_tot, n_aligned = _GetAlnPropsTwo(aln)
1920  if seq_id_thr > 0:
1921  if seqid >= seq_id_thr and n_aligned >= min_aln_length:
1922  fraction_covered = float(n_aligned)/n_tot
1923  score = seqid * fraction_covered
1924  scored_alns.append((score, ref_idx, aln))
1925  else:
1926  fraction_covered = float(n_aligned)/n_tot
1927  score = seqid * fraction_covered
1928  scored_alns.append((score, ref_idx, aln))
1929 
1930  if len(scored_alns) == 0:
1931  return (None, None) # no mapping possible...
1932 
1933  scored_alns = sorted(scored_alns, key=lambda x: x[0], reverse=True)
1934  return (scored_alns[0][1], scored_alns[0][2])
1935 
1936 def _GetRefMdlAlns(ref_chem_groups, ref_chem_group_msas, mdl_chem_groups,
1937  mdl_chem_group_alns, pairs=None):
1938  """ Get all possible ref/mdl chain alignments given chem group mapping
1939 
1940  :param ref_chem_groups: :attr:`ChainMapper.chem_groups`
1941  :type ref_chem_groups: :class:`list` of :class:`list` of :class:`str`
1942  :param ref_chem_group_msas: :attr:`ChainMapper.chem_group_alignments`
1943  :type ref_chem_group_msas: :class:`ost.seq.AlignmentList`
1944  :param mdl_chem_groups: Groups of model chains that are mapped to
1945  *ref_chem_groups*. Return value of
1946  :func:`ChainMapper.GetChemMapping`.
1947  :type mdl_chem_groups: :class:`list` of :class:`list` of :class:`str`
1948  :param mdl_chem_group_alns: A pairwise sequence alignment for every chain
1949  in *mdl_chem_groups* that aligns these sequences
1950  to the respective reference sequence.
1951  Return values of
1952  :func:`ChainMapper.GetChemMapping`.
1953  :type mdl_chem_group_alns: :class:`list` of :class:`ost.seq.AlignmentList`
1954  :param pairs: Pro param - restrict return dict to specified pairs. A set of
1955  tuples in form (<trg_ch>, <mdl_ch>)
1956  :type pairs: :class:`set`
1957  :returns: A dictionary holding all possible ref/mdl chain alignments. Keys
1958  in that dictionary are tuples of the form (ref_ch, mdl_ch) and
1959  values are the respective pairwise alignments with first sequence
1960  being from ref, the second from mdl.
1961  """
1962  # alignment of each model chain to chem_group reference sequence
1963  mdl_alns = dict()
1964  for alns in mdl_chem_group_alns:
1965  for aln in alns:
1966  mdl_chain_name = aln.GetSequence(1).GetName()
1967  mdl_alns[mdl_chain_name] = aln
1968 
1969  # generate all alignments between ref/mdl chain atomseqs that we will
1970  # ever observe
1971  ref_mdl_alns = dict()
1972  for ref_chains, mdl_chains, ref_aln in zip(ref_chem_groups, mdl_chem_groups,
1973  ref_chem_group_msas):
1974  for ref_ch in ref_chains:
1975  for mdl_ch in mdl_chains:
1976  if pairs is not None and (ref_ch, mdl_ch) not in pairs:
1977  continue
1978  # obtain alignments of mdl and ref chains towards chem
1979  # group ref sequence and merge them
1980  aln_list = seq.AlignmentList()
1981  # do ref aln
1982  s1 = ref_aln.GetSequence(0)
1983  s2 = ref_aln.GetSequence(ref_chains.index(ref_ch))
1984  aln_list.append(seq.CreateAlignment(s1, s2))
1985  # do mdl aln
1986  aln_list.append(mdl_alns[mdl_ch])
1987  # merge
1988  ref_seq = seq.CreateSequence(s1.GetName(),
1989  s1.GetGaplessString())
1990  merged_aln = seq.alg.MergePairwiseAlignments(aln_list,
1991  ref_seq)
1992  # merged_aln:
1993  # seq1: ref seq of chem group
1994  # seq2: seq of ref chain
1995  # seq3: seq of mdl chain
1996  # => we need the alignment between seq2 and seq3
1997  s2 = merged_aln.GetSequence(1)
1998  s3 = merged_aln.GetSequence(2)
1999  # cut leading and trailing gap columns
2000  a = 0 # number of leading gap columns
2001  for idx in range(len(s2)):
2002  if s2[idx] != '-' or s3[idx] != '-':
2003  break
2004  a += 1
2005  b = 0 # number of trailing gap columns
2006  for idx in reversed(range(len(s2))):
2007  if s2[idx] != '-' or s3[idx] != '-':
2008  break
2009  b += 1
2010  s2 = seq.CreateSequence(s2.GetName(), s2[a: len(s2)-b])
2011  s3 = seq.CreateSequence(s3.GetName(), s3[a: len(s3)-b])
2012  ref_mdl_alns[(ref_ch, mdl_ch)] = seq.CreateAlignment(s2, s3)
2013 
2014  return ref_mdl_alns
2015 
2016 def _CheckOneToOneMapping(ref_chains, mdl_chains):
2017  """ Checks whether we already have a perfect one to one mapping
2018 
2019  That means each list in *ref_chains* has exactly one element and each
2020  list in *mdl_chains* has either one element (it's mapped) or is empty
2021  (ref chain has no mapped mdl chain). Returns None if no such mapping
2022  can be found.
2023 
2024  :param ref_chains: corresponds to :attr:`ChainMapper.chem_groups`
2025  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
2026  :param mdl_chains: mdl chains mapped to chem groups in *ref_chains*, i.e.
2027  the return value of :func:`ChainMapper.GetChemMapping`
2028  :type mdl_chains: class:`list` of :class:`list` of :class:`str`
2029  :returns: A :class:`list` of :class:`list` if a one to one mapping is found,
2030  None otherwise
2031  """
2032  only_one_to_one = True
2033  one_to_one = list()
2034  for ref, mdl in zip(ref_chains, mdl_chains):
2035  if len(ref) == 1 and len(mdl) == 1:
2036  one_to_one.append(mdl)
2037  elif len(ref) == 1 and len(mdl) == 0:
2038  one_to_one.append([None])
2039  else:
2040  only_one_to_one = False
2041  break
2042  if only_one_to_one:
2043  return one_to_one
2044  else:
2045  return None
2046 
2048  def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups,
2049  ref_mdl_alns, inclusion_radius = 15.0,
2050  thresholds = [0.5, 1.0, 2.0, 4.0],
2051  steep_opt_rate = None):
2052 
2053  """ Greedy extension of already existing but incomplete chain mappings
2054  """
2055  super().__init__(ref, ref_chem_groups, mdl, ref_mdl_alns,
2056  dist_thresh=inclusion_radius,
2057  dist_diff_thresholds=thresholds)
2058 
2059  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2060  self.steep_opt_ratesteep_opt_rate = steep_opt_rate
2061 
2062  self.neighborsneighbors = {k: set() for k in self.trgtrg.chain_names}
2063  for interface in self.trgtrg.interacting_chains:
2064  self.neighborsneighbors[interface[0]].add(interface[1])
2065  self.neighborsneighbors[interface[1]].add(interface[0])
2066 
2067  assert(len(ref_chem_groups) == len(mdl_chem_groups))
2068  self.ref_chem_groupsref_chem_groups = ref_chem_groups
2069  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2070  self.ref_ch_group_mapperref_ch_group_mapper = dict()
2071  self.mdl_ch_group_mappermdl_ch_group_mapper = dict()
2072  for g_idx, (ref_g, mdl_g) in enumerate(zip(ref_chem_groups,
2073  mdl_chem_groups)):
2074  for ch in ref_g:
2075  self.ref_ch_group_mapperref_ch_group_mapper[ch] = g_idx
2076  for ch in mdl_g:
2077  self.mdl_ch_group_mappermdl_ch_group_mapper[ch] = g_idx
2078 
2079  # keep track of mdl chains that potentially give lDDT contributions,
2080  # i.e. they have locations within inclusion_radius + max(thresholds)
2081  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.mdlmdl.chain_names}
2082  for interface in self.mdlmdl.potentially_contributing_chains:
2083  self.mdl_neighborsmdl_neighbors[interface[0]].add(interface[1])
2084  self.mdl_neighborsmdl_neighbors[interface[1]].add(interface[0])
2085 
2086  def ExtendMapping(self, mapping, max_ext = None):
2087 
2088  if len(mapping) == 0:
2089  raise RuntimError("Mapping must contain a starting point")
2090 
2091  # Ref chains onto which we can map. The algorithm starts with a mapping
2092  # on ref_ch. From there we can start to expand to connected neighbors.
2093  # All neighbors that we can reach from the already mapped chains are
2094  # stored in this set which will be updated during runtime
2095  map_targets = set()
2096  for ref_ch in mapping.keys():
2097  map_targets.update(self.neighborsneighbors[ref_ch])
2098 
2099  # remove the already mapped chains
2100  for ref_ch in mapping.keys():
2101  map_targets.discard(ref_ch)
2102 
2103  if len(map_targets) == 0:
2104  return mapping # nothing to extend
2105 
2106  # keep track of what model chains are not yet mapped for each chem group
2107  free_mdl_chains = list()
2108  for chem_group in self.mdl_chem_groupsmdl_chem_groups:
2109  tmp = [x for x in chem_group if x not in mapping.values()]
2110  free_mdl_chains.append(set(tmp))
2111 
2112  # keep track of what ref chains got a mapping
2113  newly_mapped_ref_chains = list()
2114 
2115  something_happened = True
2116  while something_happened:
2117  something_happened=False
2118 
2119  if self.steep_opt_ratesteep_opt_rate is not None:
2120  n_chains = len(newly_mapped_ref_chains)
2121  if n_chains > 0 and n_chains % self.steep_opt_ratesteep_opt_rate == 0:
2122  mapping = self._SteepOpt_SteepOpt(mapping, newly_mapped_ref_chains)
2123 
2124  if max_ext is not None and len(newly_mapped_ref_chains) >= max_ext:
2125  break
2126 
2127  max_n = 0
2128  max_mapping = None
2129  for ref_ch in map_targets:
2130  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ref_ch]
2131  for mdl_ch in free_mdl_chains[chem_group_idx]:
2132  # single chain score
2133  n_single = self._NSCConserved_NSCConserved(ref_ch, mdl_ch).sum()
2134  # scores towards neighbors that are already mapped
2135  n_inter = 0
2136  for neighbor in self.neighborsneighbors[ref_ch]:
2137  if neighbor in mapping and mapping[neighbor] in \
2138  self.mdl_neighborsmdl_neighbors[mdl_ch]:
2139  n_inter += self._NPairConserved_NPairConserved((ref_ch, neighbor),
2140  (mdl_ch, mapping[neighbor])).sum()
2141  n = n_single + n_inter
2142 
2143  if n_inter > 0 and n > max_n:
2144  # Only accept a new solution if its actually connected
2145  # i.e. n_inter > 0. Otherwise we could just map a big
2146  # fat mdl chain sitting somewhere in Nirvana
2147  max_n = n
2148  max_mapping = (ref_ch, mdl_ch)
2149 
2150  if max_n > 0:
2151  something_happened = True
2152  # assign new found mapping
2153  mapping[max_mapping[0]] = max_mapping[1]
2154 
2155  # add all neighboring chains to map targets as they are now
2156  # reachable
2157  for neighbor in self.neighborsneighbors[max_mapping[0]]:
2158  if neighbor not in mapping:
2159  map_targets.add(neighbor)
2160 
2161  # remove the ref chain from map targets
2162  map_targets.remove(max_mapping[0])
2163 
2164  # remove the mdl chain from free_mdl_chains - its taken...
2165  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[max_mapping[0]]
2166  free_mdl_chains[chem_group_idx].remove(max_mapping[1])
2167 
2168  # keep track of what ref chains got a mapping
2169  newly_mapped_ref_chains.append(max_mapping[0])
2170 
2171  return mapping
2172 
2173  def _SteepOpt(self, mapping, chains_to_optimize=None):
2174 
2175  # just optimize ALL ref chains if nothing specified
2176  if chains_to_optimize is None:
2177  chains_to_optimize = mapping.keys()
2178 
2179  # make sure that we only have ref chains which are actually mapped
2180  ref_chains = [x for x in chains_to_optimize if mapping[x] is not None]
2181 
2182  # group ref chains to be optimized into chem groups
2183  tmp = dict()
2184  for ch in ref_chains:
2185  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ch]
2186  if chem_group_idx in tmp:
2187  tmp[chem_group_idx].append(ch)
2188  else:
2189  tmp[chem_group_idx] = [ch]
2190  chem_groups = list(tmp.values())
2191 
2192  # try all possible mapping swaps. Swaps that improve the score are
2193  # immediately accepted and we start all over again
2194  current_lddt = self.FromFlatMappingFromFlatMapping(mapping)
2195  something_happened = True
2196  while something_happened:
2197  something_happened = False
2198  for chem_group in chem_groups:
2199  if something_happened:
2200  break
2201  for ch1, ch2 in itertools.combinations(chem_group, 2):
2202  swapped_mapping = dict(mapping)
2203  swapped_mapping[ch1] = mapping[ch2]
2204  swapped_mapping[ch2] = mapping[ch1]
2205  score = self.FromFlatMappingFromFlatMapping(swapped_mapping)
2206  if score > current_lddt:
2207  something_happened = True
2208  mapping = swapped_mapping
2209  current_lddt = score
2210  break
2211  return mapping
2212 
2213 
2214 def _lDDTNaive(trg, mdl, inclusion_radius, thresholds, chem_groups,
2215  chem_mapping, ref_mdl_alns, n_max_naive):
2216  """ Naively iterates all possible chain mappings and returns the best
2217  """
2218  best_mapping = None
2219  best_lddt = -1.0
2220 
2221  # Setup scoring
2222  lddt_scorer = bb_lddt.BBlDDTScorer(trg, chem_groups, mdl, ref_mdl_alns,
2223  dist_thresh=inclusion_radius,
2224  dist_diff_thresholds=thresholds)
2225  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2226  lDDT = lddt_scorer.Score(mapping, check=False)
2227  if lDDT > best_lddt:
2228  best_mapping = mapping
2229  best_lddt = lDDT
2230 
2231  return (best_mapping, best_lddt)
2232 
2233 
2234 def _GetSeeds(ref_chem_groups, mdl_chem_groups, mapped_ref_chains = set(),
2235  mapped_mdl_chains = set()):
2236  seeds = list()
2237  for ref_chains, mdl_chains in zip(ref_chem_groups,
2238  mdl_chem_groups):
2239  for ref_ch in ref_chains:
2240  if ref_ch not in mapped_ref_chains:
2241  for mdl_ch in mdl_chains:
2242  if mdl_ch not in mapped_mdl_chains:
2243  seeds.append((ref_ch, mdl_ch))
2244  return seeds
2245 
2246 
2247 def _lDDTGreedyFast(the_greed):
2248 
2249  something_happened = True
2250  mapping = dict()
2251 
2252  while something_happened:
2253  something_happened = False
2254  seeds = _GetSeeds(the_greed.ref_chem_groups,
2255  the_greed.mdl_chem_groups,
2256  mapped_ref_chains = set(mapping.keys()),
2257  mapped_mdl_chains = set(mapping.values()))
2258  # search for best scoring starting point
2259  n_best = 0
2260  best_seed = None
2261  for seed in seeds:
2262  n = the_greed._NSCConserved(seed[0], seed[1]).sum()
2263  if n > n_best:
2264  n_best = n
2265  best_seed = seed
2266  if n_best == 0:
2267  break # no proper seed found anymore...
2268  # add seed to mapping and start the greed
2269  mapping[best_seed[0]] = best_seed[1]
2270  mapping = the_greed.ExtendMapping(mapping)
2271  something_happened = True
2272 
2273  # translate mapping format and return
2274  final_mapping = list()
2275  for ref_chains in the_greed.ref_chem_groups:
2276  mapped_mdl_chains = list()
2277  for ref_ch in ref_chains:
2278  if ref_ch in mapping:
2279  mapped_mdl_chains.append(mapping[ref_ch])
2280  else:
2281  mapped_mdl_chains.append(None)
2282  final_mapping.append(mapped_mdl_chains)
2283 
2284  return final_mapping
2285 
2286 
2287 def _lDDTGreedyFull(the_greed):
2288  """ Uses each reference chain as starting point for expansion
2289  """
2290 
2291  seeds = _GetSeeds(the_greed.ref_chem_groups, the_greed.mdl_chem_groups)
2292  best_overall_score = -1.0
2293  best_overall_mapping = dict()
2294 
2295  for seed in seeds:
2296 
2297  # do initial extension
2298  mapping = the_greed.ExtendMapping({seed[0]: seed[1]})
2299 
2300  # repeat the process until we have a full mapping
2301  something_happened = True
2302  while something_happened:
2303  something_happened = False
2304  remnant_seeds = _GetSeeds(the_greed.ref_chem_groups,
2305  the_greed.mdl_chem_groups,
2306  mapped_ref_chains = set(mapping.keys()),
2307  mapped_mdl_chains = set(mapping.values()))
2308  if len(remnant_seeds) > 0:
2309  # still more mapping to be done
2310  best_score = -1.0
2311  best_mapping = None
2312  for remnant_seed in remnant_seeds:
2313  tmp_mapping = dict(mapping)
2314  tmp_mapping[remnant_seed[0]] = remnant_seed[1]
2315  tmp_mapping = the_greed.ExtendMapping(tmp_mapping)
2316  score = the_greed.FromFlatMapping(tmp_mapping)
2317  if score > best_score:
2318  best_score = score
2319  best_mapping = tmp_mapping
2320  if best_mapping is not None:
2321  something_happened = True
2322  mapping = best_mapping
2323 
2324  score = the_greed.FromFlatMapping(mapping)
2325  if score > best_overall_score:
2326  best_overall_score = score
2327  best_overall_mapping = mapping
2328 
2329  mapping = best_overall_mapping
2330 
2331  # translate mapping format and return
2332  final_mapping = list()
2333  for ref_chains in the_greed.ref_chem_groups:
2334  mapped_mdl_chains = list()
2335  for ref_ch in ref_chains:
2336  if ref_ch in mapping:
2337  mapped_mdl_chains.append(mapping[ref_ch])
2338  else:
2339  mapped_mdl_chains.append(None)
2340  final_mapping.append(mapped_mdl_chains)
2341 
2342  return final_mapping
2343 
2344 
2345 def _lDDTGreedyBlock(the_greed, seed_size, blocks_per_chem_group):
2346  """ try multiple seeds, i.e. try all ref/mdl chain combinations within the
2347  respective chem groups and compute single chain lDDTs. The
2348  *blocks_per_chem_group* best scoring ones are extend by *seed_size* chains
2349  and the best scoring one is exhaustively extended.
2350  """
2351 
2352  if seed_size is None or seed_size < 1:
2353  raise RuntimeError(f"seed_size must be an int >= 1 (got {seed_size})")
2354 
2355  if blocks_per_chem_group is None or blocks_per_chem_group < 1:
2356  raise RuntimeError(f"blocks_per_chem_group must be an int >= 1 "
2357  f"(got {blocks_per_chem_group})")
2358 
2359  max_ext = seed_size - 1 # -1 => start seed already has size 1
2360 
2361  ref_chem_groups = copy.deepcopy(the_greed.ref_chem_groups)
2362  mdl_chem_groups = copy.deepcopy(the_greed.mdl_chem_groups)
2363 
2364  mapping = dict()
2365  something_happened = True
2366  while something_happened:
2367  something_happened = False
2368  starting_blocks = list()
2369  for ref_chains, mdl_chains in zip(ref_chem_groups, mdl_chem_groups):
2370  if len(mdl_chains) == 0:
2371  continue # nothing to map
2372  ref_chains_copy = list(ref_chains)
2373  for i in range(blocks_per_chem_group):
2374  if len(ref_chains_copy) == 0:
2375  break
2376  seeds = list()
2377  for ref_ch in ref_chains_copy:
2378  seeds += [(ref_ch, mdl_ch) for mdl_ch in mdl_chains]
2379  # extend starting seeds to *seed_size* and retain best scoring
2380  # block for further extension
2381  best_score = -1.0
2382  best_mapping = None
2383  best_seed = None
2384  for s in seeds:
2385  seed = dict(mapping)
2386  seed.update({s[0]: s[1]})
2387  seed = the_greed.ExtendMapping(seed, max_ext = max_ext)
2388  seed_lddt = the_greed.FromFlatMapping(seed)
2389  if seed_lddt > best_score:
2390  best_score = seed_lddt
2391  best_mapping = seed
2392  best_seed = s
2393  if best_mapping != None:
2394  starting_blocks.append(best_mapping)
2395  if best_seed[0] in ref_chains_copy:
2396  # remove that ref chain to enforce diversity
2397  ref_chains_copy.remove(best_seed[0])
2398 
2399  # fully expand initial starting blocks
2400  best_lddt = 0.0
2401  best_mapping = None
2402  for seed in starting_blocks:
2403  seed = the_greed.ExtendMapping(seed)
2404  seed_lddt = the_greed.FromFlatMapping(seed)
2405  if seed_lddt > best_lddt:
2406  best_lddt = seed_lddt
2407  best_mapping = seed
2408 
2409  if best_lddt == 0.0:
2410  break # no proper mapping found anymore
2411 
2412  something_happened = True
2413  mapping.update(best_mapping)
2414  for ref_ch, mdl_ch in best_mapping.items():
2415  for group_idx in range(len(ref_chem_groups)):
2416  if ref_ch in ref_chem_groups[group_idx]:
2417  ref_chem_groups[group_idx].remove(ref_ch)
2418  if mdl_ch in mdl_chem_groups[group_idx]:
2419  mdl_chem_groups[group_idx].remove(mdl_ch)
2420 
2421  # translate mapping format and return
2422  final_mapping = list()
2423  for ref_chains in the_greed.ref_chem_groups:
2424  mapped_mdl_chains = list()
2425  for ref_ch in ref_chains:
2426  if ref_ch in mapping:
2427  mapped_mdl_chains.append(mapping[ref_ch])
2428  else:
2429  mapped_mdl_chains.append(None)
2430  final_mapping.append(mapped_mdl_chains)
2431 
2432  return final_mapping
2433 
2434 
2436  def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups,
2437  ref_mdl_alns, contact_d = 12.0,
2438  steep_opt_rate = None, greedy_prune_contact_map=False):
2439  """ Greedy extension of already existing but incomplete chain mappings
2440  """
2441  super().__init__(ref, ref_chem_groups, mdl, ref_mdl_alns,
2442  contact_d = contact_d)
2443  self.refref = ref
2444  self.mdlmdl = mdl
2445  self.ref_mdl_alnsref_mdl_alns = ref_mdl_alns
2446  self.steep_opt_ratesteep_opt_rate = steep_opt_rate
2447 
2448  if greedy_prune_contact_map:
2449  self.neighborsneighbors = {k: set() for k in self.qsent1qsent1.chain_names}
2450  for p in self.qsent1qsent1.interacting_chains:
2451  if np.count_nonzero(self.qsent1qsent1.PairDist(p[0], p[1])<=8) >= 3:
2452  self.neighborsneighbors[p[0]].add(p[1])
2453  self.neighborsneighbors[p[1]].add(p[0])
2454 
2455  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.qsent2qsent2.chain_names}
2456  for p in self.qsent2qsent2.interacting_chains:
2457  if np.count_nonzero(self.qsent2qsent2.PairDist(p[0], p[1])<=8) >= 3:
2458  self.mdl_neighborsmdl_neighbors[p[0]].add(p[1])
2459  self.mdl_neighborsmdl_neighbors[p[1]].add(p[0])
2460 
2461 
2462  else:
2463  self.neighborsneighbors = {k: set() for k in self.qsent1qsent1.chain_names}
2464  for p in self.qsent1qsent1.interacting_chains:
2465  self.neighborsneighbors[p[0]].add(p[1])
2466  self.neighborsneighbors[p[1]].add(p[0])
2467 
2468  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.qsent2qsent2.chain_names}
2469  for p in self.qsent2qsent2.interacting_chains:
2470  self.mdl_neighborsmdl_neighbors[p[0]].add(p[1])
2471  self.mdl_neighborsmdl_neighbors[p[1]].add(p[0])
2472 
2473  assert(len(ref_chem_groups) == len(mdl_chem_groups))
2474  self.ref_chem_groupsref_chem_groups = ref_chem_groups
2475  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2476  self.ref_ch_group_mapperref_ch_group_mapper = dict()
2477  self.mdl_ch_group_mappermdl_ch_group_mapper = dict()
2478  for g_idx, (ref_g, mdl_g) in enumerate(zip(ref_chem_groups,
2479  mdl_chem_groups)):
2480  for ch in ref_g:
2481  self.ref_ch_group_mapperref_ch_group_mapper[ch] = g_idx
2482  for ch in mdl_g:
2483  self.mdl_ch_group_mappermdl_ch_group_mapper[ch] = g_idx
2484 
2485  # cache for lDDT based single chain conserved contacts
2486  # used to identify starting points for further extension by QS score
2487  # key: tuple (ref_ch, mdl_ch) value: number of conserved contacts
2488  self.single_chain_scorersingle_chain_scorer = dict()
2489  self.single_chain_cachesingle_chain_cache = dict()
2490  for ch in self.refref.chains:
2491  single_chain_ref = _CSel(self.refref, [ch.GetName()])
2492  self.single_chain_scorersingle_chain_scorer[ch.GetName()] = \
2493  lddt.lDDTScorer(single_chain_ref, bb_only = True)
2494 
2495  def SCCounts(self, ref_ch, mdl_ch):
2496  if not (ref_ch, mdl_ch) in self.single_chain_cachesingle_chain_cache:
2497  alns = dict()
2498  alns[mdl_ch] = self.ref_mdl_alnsref_mdl_alns[(ref_ch, mdl_ch)]
2499  mdl_sel = _CSel(self.mdlmdl, [mdl_ch])
2500  s = self.single_chain_scorersingle_chain_scorer[ref_ch]
2501  _,_,_,conserved,_,_,_ = s.lDDT(mdl_sel,
2502  residue_mapping=alns,
2503  return_dist_test=True,
2504  no_interchain=True,
2505  chain_mapping={mdl_ch: ref_ch},
2506  check_resnames=False)
2507  self.single_chain_cachesingle_chain_cache[(ref_ch, mdl_ch)] = conserved
2508  return self.single_chain_cachesingle_chain_cache[(ref_ch, mdl_ch)]
2509 
2510  def ExtendMapping(self, mapping, max_ext = None):
2511 
2512  if len(mapping) == 0:
2513  raise RuntimError("Mapping must contain a starting point")
2514 
2515  # Ref chains onto which we can map. The algorithm starts with a mapping
2516  # on ref_ch. From there we can start to expand to connected neighbors.
2517  # All neighbors that we can reach from the already mapped chains are
2518  # stored in this set which will be updated during runtime
2519  map_targets = set()
2520  for ref_ch in mapping.keys():
2521  map_targets.update(self.neighborsneighbors[ref_ch])
2522 
2523  # remove the already mapped chains
2524  for ref_ch in mapping.keys():
2525  map_targets.discard(ref_ch)
2526 
2527  if len(map_targets) == 0:
2528  return mapping # nothing to extend
2529 
2530  # keep track of what model chains are not yet mapped for each chem group
2531  free_mdl_chains = list()
2532  for chem_group in self.mdl_chem_groupsmdl_chem_groups:
2533  tmp = [x for x in chem_group if x not in mapping.values()]
2534  free_mdl_chains.append(set(tmp))
2535 
2536  # keep track of what ref chains got a mapping
2537  newly_mapped_ref_chains = list()
2538 
2539  something_happened = True
2540  while something_happened:
2541  something_happened=False
2542 
2543  if self.steep_opt_ratesteep_opt_rate is not None:
2544  n_chains = len(newly_mapped_ref_chains)
2545  if n_chains > 0 and n_chains % self.steep_opt_ratesteep_opt_rate == 0:
2546  mapping = self._SteepOpt_SteepOpt(mapping, newly_mapped_ref_chains)
2547 
2548  if max_ext is not None and len(newly_mapped_ref_chains) >= max_ext:
2549  break
2550 
2551  score_result = self.FromFlatMappingFromFlatMapping(mapping)
2552  old_score = score_result.QS_global
2553  nominator = score_result.weighted_scores
2554  denominator = score_result.weight_sum + score_result.weight_extra_all
2555 
2556  max_diff = 0.0
2557  max_mapping = None
2558  for ref_ch in map_targets:
2559  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ref_ch]
2560  for mdl_ch in free_mdl_chains[chem_group_idx]:
2561  # we're not computing full QS-score here, we directly hack
2562  # into the QS-score formula to compute a diff
2563  nominator_diff = 0.0
2564  denominator_diff = 0.0
2565  for neighbor in self.neighborsneighbors[ref_ch]:
2566  if neighbor in mapping and mapping[neighbor] in \
2567  self.mdl_neighborsmdl_neighbors[mdl_ch]:
2568  # it's a newly added interface if (ref_ch, mdl_ch)
2569  # are added to mapping
2570  int1 = (ref_ch, neighbor)
2571  int2 = (mdl_ch, mapping[neighbor])
2572  a, b, c, d = self._MappedInterfaceScores_MappedInterfaceScores(int1, int2)
2573  nominator_diff += a # weighted_scores
2574  denominator_diff += b # weight_sum
2575  denominator_diff += d # weight_extra_all
2576  # the respective interface penalties are subtracted
2577  # from denominator
2578  denominator_diff -= self._InterfacePenalty1_InterfacePenalty1(int1)
2579  denominator_diff -= self._InterfacePenalty2_InterfacePenalty2(int2)
2580 
2581  if nominator_diff > 0:
2582  # Only accept a new solution if its actually connected
2583  # i.e. nominator_diff > 0.
2584  new_nominator = nominator + nominator_diff
2585  new_denominator = denominator + denominator_diff
2586  new_score = 0.0
2587  if new_denominator != 0.0:
2588  new_score = new_nominator/new_denominator
2589  diff = new_score - old_score
2590  if diff > max_diff:
2591  max_diff = diff
2592  max_mapping = (ref_ch, mdl_ch)
2593 
2594  if max_mapping is not None:
2595  something_happened = True
2596  # assign new found mapping
2597  mapping[max_mapping[0]] = max_mapping[1]
2598 
2599  # add all neighboring chains to map targets as they are now
2600  # reachable
2601  for neighbor in self.neighborsneighbors[max_mapping[0]]:
2602  if neighbor not in mapping:
2603  map_targets.add(neighbor)
2604 
2605  # remove the ref chain from map targets
2606  map_targets.remove(max_mapping[0])
2607 
2608  # remove the mdl chain from free_mdl_chains - its taken...
2609  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[max_mapping[0]]
2610  free_mdl_chains[chem_group_idx].remove(max_mapping[1])
2611 
2612  # keep track of what ref chains got a mapping
2613  newly_mapped_ref_chains.append(max_mapping[0])
2614 
2615  return mapping
2616 
2617  def _SteepOpt(self, mapping, chains_to_optimize=None):
2618 
2619  # just optimize ALL ref chains if nothing specified
2620  if chains_to_optimize is None:
2621  chains_to_optimize = mapping.keys()
2622 
2623  # make sure that we only have ref chains which are actually mapped
2624  ref_chains = [x for x in chains_to_optimize if mapping[x] is not None]
2625 
2626  # group ref chains to be optimized into chem groups
2627  tmp = dict()
2628  for ch in ref_chains:
2629  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ch]
2630  if chem_group_idx in tmp:
2631  tmp[chem_group_idx].append(ch)
2632  else:
2633  tmp[chem_group_idx] = [ch]
2634  chem_groups = list(tmp.values())
2635 
2636  # try all possible mapping swaps. Swaps that improve the score are
2637  # immediately accepted and we start all over again
2638  score_result = self.FromFlatMappingFromFlatMapping(mapping)
2639  current_score = score_result.QS_global
2640  something_happened = True
2641  while something_happened:
2642  something_happened = False
2643  for chem_group in chem_groups:
2644  if something_happened:
2645  break
2646  for ch1, ch2 in itertools.combinations(chem_group, 2):
2647  swapped_mapping = dict(mapping)
2648  swapped_mapping[ch1] = mapping[ch2]
2649  swapped_mapping[ch2] = mapping[ch1]
2650  score_result = self.FromFlatMappingFromFlatMapping(swapped_mapping)
2651  if score_result.QS_global > current_score:
2652  something_happened = True
2653  mapping = swapped_mapping
2654  current_score = score_result.QS_global
2655  break
2656  return mapping
2657 
2658 
2659 def _QSScoreNaive(trg, mdl, chem_groups, chem_mapping, ref_mdl_alns, contact_d,
2660  n_max_naive):
2661  best_mapping = None
2662  best_score = -1.0
2663  # qs_scorer implements caching, score calculation is thus as fast as it gets
2664  # you'll just hit a wall when the number of possible mappings becomes large
2665  qs_scorer = qsscore.QSScorer(trg, chem_groups, mdl, ref_mdl_alns)
2666  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2667  score_result = qs_scorer.Score(mapping, check=False)
2668  if score_result.QS_global > best_score:
2669  best_mapping = mapping
2670  best_score = score_result.QS_global
2671  return (best_mapping, best_score)
2672 
2673 
2674 def _QSScoreGreedyFast(the_greed):
2675 
2676  something_happened = True
2677  mapping = dict()
2678  while something_happened:
2679  something_happened = False
2680  # search for best scoring starting point, we're using lDDT here
2681  n_best = 0
2682  best_seed = None
2683  seeds = _GetSeeds(the_greed.ref_chem_groups,
2684  the_greed.mdl_chem_groups,
2685  mapped_ref_chains = set(mapping.keys()),
2686  mapped_mdl_chains = set(mapping.values()))
2687  for seed in seeds:
2688  n = the_greed.SCCounts(seed[0], seed[1])
2689  if n > n_best:
2690  n_best = n
2691  best_seed = seed
2692  if n_best == 0:
2693  break # no proper seed found anymore...
2694  # add seed to mapping and start the greed
2695  mapping[best_seed[0]] = best_seed[1]
2696  mapping = the_greed.ExtendMapping(mapping)
2697  something_happened = True
2698 
2699  # translate mapping format and return
2700  final_mapping = list()
2701  for ref_chains in the_greed.ref_chem_groups:
2702  mapped_mdl_chains = list()
2703  for ref_ch in ref_chains:
2704  if ref_ch in mapping:
2705  mapped_mdl_chains.append(mapping[ref_ch])
2706  else:
2707  mapped_mdl_chains.append(None)
2708  final_mapping.append(mapped_mdl_chains)
2709 
2710  return final_mapping
2711 
2712 
2713 def _QSScoreGreedyFull(the_greed):
2714  """ Uses each reference chain as starting point for expansion
2715  """
2716 
2717  seeds = _GetSeeds(the_greed.ref_chem_groups, the_greed.mdl_chem_groups)
2718  best_overall_score = -1.0
2719  best_overall_mapping = dict()
2720 
2721  for seed in seeds:
2722 
2723  # do initial extension
2724  mapping = the_greed.ExtendMapping({seed[0]: seed[1]})
2725 
2726  # repeat the process until we have a full mapping
2727  something_happened = True
2728  while something_happened:
2729  something_happened = False
2730  remnant_seeds = _GetSeeds(the_greed.ref_chem_groups,
2731  the_greed.mdl_chem_groups,
2732  mapped_ref_chains = set(mapping.keys()),
2733  mapped_mdl_chains = set(mapping.values()))
2734  if len(remnant_seeds) > 0:
2735  # still more mapping to be done
2736  best_score = -1.0
2737  best_mapping = None
2738  for remnant_seed in remnant_seeds:
2739  tmp_mapping = dict(mapping)
2740  tmp_mapping[remnant_seed[0]] = remnant_seed[1]
2741  tmp_mapping = the_greed.ExtendMapping(tmp_mapping)
2742  score_result = the_greed.FromFlatMapping(tmp_mapping)
2743  if score_result.QS_global > best_score:
2744  best_score = score_result.QS_global
2745  best_mapping = tmp_mapping
2746  if best_mapping is not None:
2747  something_happened = True
2748  mapping = best_mapping
2749 
2750  score_result = the_greed.FromFlatMapping(mapping)
2751  if score_result.QS_global > best_overall_score:
2752  best_overall_score = score_result.QS_global
2753  best_overall_mapping = mapping
2754 
2755  mapping = best_overall_mapping
2756 
2757  # translate mapping format and return
2758  final_mapping = list()
2759  for ref_chains in the_greed.ref_chem_groups:
2760  mapped_mdl_chains = list()
2761  for ref_ch in ref_chains:
2762  if ref_ch in mapping:
2763  mapped_mdl_chains.append(mapping[ref_ch])
2764  else:
2765  mapped_mdl_chains.append(None)
2766  final_mapping.append(mapped_mdl_chains)
2767 
2768  return final_mapping
2769 
2770 
2771 def _QSScoreGreedyBlock(the_greed, seed_size, blocks_per_chem_group):
2772  """ try multiple seeds, i.e. try all ref/mdl chain combinations within the
2773  respective chem groups and compute single chain lDDTs. The
2774  *blocks_per_chem_group* best scoring ones are extend by *seed_size* chains
2775  and the best scoring one with respect to QS score is exhaustively extended.
2776  """
2777 
2778  if seed_size is None or seed_size < 1:
2779  raise RuntimeError(f"seed_size must be an int >= 1 (got {seed_size})")
2780 
2781  if blocks_per_chem_group is None or blocks_per_chem_group < 1:
2782  raise RuntimeError(f"blocks_per_chem_group must be an int >= 1 "
2783  f"(got {blocks_per_chem_group})")
2784 
2785  max_ext = seed_size - 1 # -1 => start seed already has size 1
2786 
2787  ref_chem_groups = copy.deepcopy(the_greed.ref_chem_groups)
2788  mdl_chem_groups = copy.deepcopy(the_greed.mdl_chem_groups)
2789 
2790  mapping = dict()
2791 
2792  something_happened = True
2793  while something_happened:
2794  something_happened = False
2795  starting_blocks = list()
2796  for ref_chains, mdl_chains in zip(ref_chem_groups, mdl_chem_groups):
2797  if len(mdl_chains) == 0:
2798  continue # nothing to map
2799  ref_chains_copy = list(ref_chains)
2800  for i in range(blocks_per_chem_group):
2801  if len(ref_chains_copy) == 0:
2802  break
2803  seeds = list()
2804  for ref_ch in ref_chains_copy:
2805  seeds += [(ref_ch, mdl_ch) for mdl_ch in mdl_chains]
2806  # extend starting seeds to *seed_size* and retain best scoring block
2807  # for further extension
2808  best_score = -1.0
2809  best_mapping = None
2810  best_seed = None
2811  for s in seeds:
2812  seed = dict(mapping)
2813  seed.update({s[0]: s[1]})
2814  seed = the_greed.ExtendMapping(seed, max_ext = max_ext)
2815  score_result = the_greed.FromFlatMapping(seed)
2816  if score_result.QS_global > best_score:
2817  best_score = score_result.QS_global
2818  best_mapping = seed
2819  best_seed = s
2820  if best_mapping != None:
2821  starting_blocks.append(best_mapping)
2822  if best_seed[0] in ref_chains_copy:
2823  # remove selected ref chain to enforce diversity
2824  ref_chains_copy.remove(best_seed[0])
2825 
2826  # fully expand initial starting blocks
2827  best_score = -1.0
2828  best_mapping = None
2829  for seed in starting_blocks:
2830  seed = the_greed.ExtendMapping(seed)
2831  score_result = the_greed.FromFlatMapping(seed)
2832  if score_result.QS_global > best_score:
2833  best_score = score_result.QS_global
2834  best_mapping = seed
2835 
2836  if best_mapping is not None and len(best_mapping) > len(mapping):
2837  # this even accepts extensions that lead to no increase in QS-score
2838  # at least they make sense from an lDDT perspective
2839  something_happened = True
2840  mapping.update(best_mapping)
2841  for ref_ch, mdl_ch in best_mapping.items():
2842  for group_idx in range(len(ref_chem_groups)):
2843  if ref_ch in ref_chem_groups[group_idx]:
2844  ref_chem_groups[group_idx].remove(ref_ch)
2845  if mdl_ch in mdl_chem_groups[group_idx]:
2846  mdl_chem_groups[group_idx].remove(mdl_ch)
2847 
2848  # translate mapping format and return
2849  final_mapping = list()
2850  for ref_chains in the_greed.ref_chem_groups:
2851  mapped_mdl_chains = list()
2852  for ref_ch in ref_chains:
2853  if ref_ch in mapping:
2854  mapped_mdl_chains.append(mapping[ref_ch])
2855  else:
2856  mapped_mdl_chains.append(None)
2857  final_mapping.append(mapped_mdl_chains)
2858 
2859  return final_mapping
2860 
2861 def _SingleRigidRMSD(initial_transforms, initial_mappings, chem_groups,
2862  chem_mapping, trg_group_pos, mdl_group_pos):
2863  """
2864  Takes initial transforms and sequentially adds chain pairs with lowest RMSD.
2865  The mapping from the transform that leads to lowest overall RMSD is
2866  returned.
2867  """
2868  best_mapping = dict()
2869  best_ssd = float("inf") # we're actually going for summed squared distances
2870  # Since all positions have same lengths and we do a
2871  # full mapping, lowest SSD has a guarantee of also
2872  # being lowest RMSD
2873  for transform in initial_transforms:
2874  mapping = dict()
2875  mapped_mdl_chains = set()
2876  ssd = 0.0
2877  for trg_chains, mdl_chains, trg_pos, mdl_pos, in zip(chem_groups,
2878  chem_mapping,
2879  trg_group_pos,
2880  mdl_group_pos):
2881  if len(trg_pos) == 0 or len(mdl_pos) == 0:
2882  continue # cannot compute valid rmsd
2883  ssds = list()
2884  t_mdl_pos = list()
2885  for m_pos in mdl_pos:
2886  t_m_pos = geom.Vec3List(m_pos)
2887  t_m_pos.ApplyTransform(transform)
2888  t_mdl_pos.append(t_m_pos)
2889  for t_pos, t in zip(trg_pos, trg_chains):
2890  for t_m_pos, m in zip(t_mdl_pos, mdl_chains):
2891  ssd = t_pos.GetSummedSquaredDistances(t_m_pos)
2892  ssds.append((ssd, (t,m)))
2893  ssds.sort()
2894  for item in ssds:
2895  p = item[1]
2896  if p[0] not in mapping and p[1] not in mapped_mdl_chains:
2897  mapping[p[0]] = p[1]
2898  mapped_mdl_chains.add(p[1])
2899  ssd += item[0]
2900 
2901  if ssd < best_ssd:
2902  best_ssd = ssd
2903  best_mapping = mapping
2904 
2905  return best_mapping
2906 
2907 def _IterativeRigidRMSD(initial_transforms, initial_mappings, chem_groups,
2908  chem_mapping, trg_group_pos, mdl_group_pos):
2909  """ Takes initial transforms and sequentially adds chain pairs with
2910  lowest RMSD. With each added chain pair, the transform gets updated.
2911  Thus the naming iterative. The mapping from the initial transform that
2912  leads to best overall RMSD score is returned.
2913  """
2914 
2915  # to directly retrieve positions using chain names
2916  trg_pos_dict = dict()
2917  for trg_pos, trg_chains in zip(trg_group_pos, chem_groups):
2918  for t_pos, t in zip(trg_pos, trg_chains):
2919  trg_pos_dict[t] = t_pos
2920  mdl_pos_dict = dict()
2921  for mdl_pos, mdl_chains in zip(mdl_group_pos, chem_mapping):
2922  for m_pos, m in zip(mdl_pos, mdl_chains):
2923  mdl_pos_dict[m] = m_pos
2924 
2925  best_mapping = dict()
2926  best_rmsd = float("inf")
2927  for initial_transform, initial_mapping in zip(initial_transforms,
2928  initial_mappings):
2929  mapping = {initial_mapping[0]: initial_mapping[1]}
2930  transform = geom.Mat4(initial_transform)
2931  mapped_trg_pos = geom.Vec3List(trg_pos_dict[initial_mapping[0]])
2932  mapped_mdl_pos = geom.Vec3List(mdl_pos_dict[initial_mapping[1]])
2933 
2934  # the following variables contain the chains which are
2935  # available for mapping
2936  trg_chain_groups = [set(group) for group in chem_groups]
2937  mdl_chain_groups = [set(group) for group in chem_mapping]
2938 
2939  # search and kick out inital mapping
2940  for group in trg_chain_groups:
2941  if initial_mapping[0] in group:
2942  group.remove(initial_mapping[0])
2943  break
2944  for group in mdl_chain_groups:
2945  if initial_mapping[1] in group:
2946  group.remove(initial_mapping[1])
2947  break
2948 
2949  something_happened = True
2950  while something_happened:
2951  # search for best mapping given current transform
2952  something_happened=False
2953  best_sc_mapping = None
2954  best_sc_group_idx = None
2955  best_sc_rmsd = float("inf")
2956  group_idx = 0
2957  for trg_chains, mdl_chains in zip(trg_chain_groups, mdl_chain_groups):
2958  for t in trg_chains:
2959  t_pos = trg_pos_dict[t]
2960  for m in mdl_chains:
2961  m_pos = mdl_pos_dict[m]
2962  t_m_pos = geom.Vec3List(m_pos)
2963  t_m_pos.ApplyTransform(transform)
2964  rmsd = t_pos.GetRMSD(t_m_pos)
2965  if rmsd < best_sc_rmsd:
2966  best_sc_rmsd = rmsd
2967  best_sc_mapping = (t,m)
2968  best_sc_group_idx = group_idx
2969  group_idx += 1
2970 
2971  if best_sc_mapping is not None:
2972  something_happened = True
2973  mapping[best_sc_mapping[0]] = best_sc_mapping[1]
2974  mapped_trg_pos.extend(trg_pos_dict[best_sc_mapping[0]])
2975  mapped_mdl_pos.extend(mdl_pos_dict[best_sc_mapping[1]])
2976  trg_chain_groups[best_sc_group_idx].remove(best_sc_mapping[0])
2977  mdl_chain_groups[best_sc_group_idx].remove(best_sc_mapping[1])
2978 
2979  transform = _GetSuperposition(mapped_mdl_pos, mapped_trg_pos,
2980  False).transformation
2981 
2982  # compute overall RMSD for current transform
2983  mapped_mdl_pos.ApplyTransform(transform)
2984  rmsd = mapped_trg_pos.GetRMSD(mapped_mdl_pos)
2985 
2986  if rmsd < best_rmsd:
2987  best_rmsd = rmsd
2988  best_mapping = mapping
2989 
2990  return best_mapping
2991 
2992 def _NaiveRMSD(chem_groups, chem_mapping, trg_group_pos, mdl_group_pos,
2993  n_max_naive):
2994 
2995  # to directly retrieve positions using chain names
2996  trg_pos_dict = dict()
2997  for trg_pos, trg_chains in zip(trg_group_pos, chem_groups):
2998  for t_pos, t in zip(trg_pos, trg_chains):
2999  trg_pos_dict[t] = t_pos
3000  mdl_pos_dict = dict()
3001  for mdl_pos, mdl_chains in zip(mdl_group_pos, chem_mapping):
3002  for m_pos, m in zip(mdl_pos, mdl_chains):
3003  mdl_pos_dict[m] = m_pos
3004 
3005  best_mapping = dict()
3006  best_rmsd = float("inf")
3007 
3008  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
3009  trg_pos = geom.Vec3List()
3010  mdl_pos = geom.Vec3List()
3011  for trg_group, mdl_group in zip(chem_groups, mapping):
3012  for trg_ch, mdl_ch in zip(trg_group, mdl_group):
3013  if trg_ch is not None and mdl_ch is not None:
3014  trg_pos.extend(trg_pos_dict[trg_ch])
3015  mdl_pos.extend(mdl_pos_dict[mdl_ch])
3016  superpose_res = mol.alg.SuperposeSVD(mdl_pos, trg_pos)
3017  if superpose_res.rmsd < best_rmsd:
3018  best_rmsd = superpose_res.rmsd
3019  best_mapping = mapping
3020 
3021  # this is stupid...
3022  tmp = dict()
3023  for chem_group, mapping in zip(chem_groups, best_mapping):
3024  for trg_ch, mdl_ch in zip(chem_group, mapping):
3025  tmp[trg_ch] = mdl_ch
3026 
3027  return tmp
3028 
3029 
3030 def _GetRefPos(trg, mdl, trg_msas, mdl_alns, max_pos = None):
3031  """ Extracts reference positions which are present in trg and mdl
3032  """
3033 
3034  # select only backbone atoms, makes processing simpler later on
3035  # (just select res.atoms[0].GetPos() as ref pos)
3036  bb_trg = trg.Select("aname=\"CA\",\"C3'\"")
3037  bb_mdl = mdl.Select("aname=\"CA\",\"C3'\"")
3038 
3039  # mdl_alns are pairwise, let's construct MSAs
3040  mdl_msas = list()
3041  for aln_list in mdl_alns:
3042  if len(aln_list) > 0:
3043  tmp = aln_list[0].GetSequence(0)
3044  ref_seq = seq.CreateSequence(tmp.GetName(), tmp.GetGaplessString())
3045  mdl_msas.append(seq.alg.MergePairwiseAlignments(aln_list, ref_seq))
3046  else:
3047  mdl_msas.append(seq.CreateAlignment())
3048 
3049  trg_pos = list()
3050  mdl_pos = list()
3051 
3052  for trg_msa, mdl_msa in zip(trg_msas, mdl_msas):
3053 
3054  if mdl_msa.GetCount() > 0:
3055  # make sure they have the same ref sequence (should be a given...)
3056  assert(trg_msa.GetSequence(0).GetGaplessString() == \
3057  mdl_msa.GetSequence(0).GetGaplessString())
3058  else:
3059  # if mdl_msa is empty, i.e. no model chain maps to the chem group
3060  # represented by trg_msa, we just continue. The result will be
3061  # empty position lists added to trg_pos and mdl_pos.
3062  pass
3063 
3064  # check which columns in MSAs are fully covered (indices relative to
3065  # first sequence)
3066  trg_indices = _GetFullyCoveredIndices(trg_msa)
3067  mdl_indices = _GetFullyCoveredIndices(mdl_msa)
3068 
3069  # get indices where both, mdl and trg, are fully covered
3070  indices = sorted(list(trg_indices.intersection(mdl_indices)))
3071 
3072  # subsample if necessary
3073  if max_pos is not None and len(indices) > max_pos:
3074  step = int(len(indices)/max_pos)
3075  indices = [indices[i] for i in range(0, len(indices), step)]
3076 
3077  # translate to column indices in the respective MSAs
3078  trg_indices = _RefIndicesToColumnIndices(trg_msa, indices)
3079  mdl_indices = _RefIndicesToColumnIndices(mdl_msa, indices)
3080 
3081  # extract positions
3082  trg_pos.append(list())
3083  mdl_pos.append(list())
3084  for s_idx in range(trg_msa.GetCount()):
3085  trg_pos[-1].append(_ExtractMSAPos(trg_msa, s_idx, trg_indices,
3086  bb_trg))
3087  # first seq in mdl_msa is ref sequence in trg and does not belong to mdl
3088  for s_idx in range(1, mdl_msa.GetCount()):
3089  mdl_pos[-1].append(_ExtractMSAPos(mdl_msa, s_idx, mdl_indices,
3090  bb_mdl))
3091 
3092  return (trg_pos, mdl_pos)
3093 
3094 def _GetFullyCoveredIndices(msa):
3095  """ Helper for _GetRefPos
3096 
3097  Returns a set containing the indices relative to first sequence in msa which
3098  are fully covered in all other sequences
3099 
3100  --AA-A-A
3101  -BBBB-BB
3102  CCCC-C-C
3103 
3104  => (0,1,3)
3105  """
3106  indices = set()
3107  ref_idx = 0
3108  for col in msa:
3109  if sum([1 for olc in col if olc != '-']) == col.GetRowCount():
3110  indices.add(ref_idx)
3111  if col[0] != '-':
3112  ref_idx += 1
3113  return indices
3114 
3115 def _RefIndicesToColumnIndices(msa, indices):
3116  """ Helper for _GetRefPos
3117 
3118  Returns a list of mapped indices. indices refer to non-gap one letter
3119  codes in the first msa sequence. The returnes mapped indices are translated
3120  to the according msa column indices
3121  """
3122  ref_idx = 0
3123  mapping = dict()
3124  for col_idx, col in enumerate(msa):
3125  if col[0] != '-':
3126  mapping[ref_idx] = col_idx
3127  ref_idx += 1
3128  return [mapping[i] for i in indices]
3129 
3130 def _ExtractMSAPos(msa, s_idx, indices, view):
3131  """ Helper for _GetRefPos
3132 
3133  Returns a geom.Vec3List containing positions refering to given msa sequence.
3134  => Chain with corresponding name is mapped onto sequence and the position of
3135  the first atom of each residue specified in indices is extracted.
3136  Indices refers to column indices in msa!
3137  """
3138  s = msa.GetSequence(s_idx)
3139  s_v = _CSel(view, [s.GetName()])
3140 
3141  # sanity check
3142  assert(len(s.GetGaplessString()) == len(s_v.residues))
3143 
3144  residue_idx = [s.GetResidueIndex(i) for i in indices]
3145  return geom.Vec3List([s_v.residues[i].atoms[0].pos for i in residue_idx])
3146 
3147 def _NChemGroupMappings(ref_chains, mdl_chains):
3148  """ Number of mappings within one chem group
3149 
3150  :param ref_chains: Reference chains
3151  :type ref_chains: :class:`list` of :class:`str`
3152  :param mdl_chains: Model chains that are mapped onto *ref_chains*
3153  :type mdl_chains: :class:`list` of :class:`str`
3154  :returns: Number of possible mappings of *mdl_chains* onto *ref_chains*
3155  """
3156  n_ref = len(ref_chains)
3157  n_mdl = len(mdl_chains)
3158  if n_ref == n_mdl:
3159  return factorial(n_ref)
3160  elif n_ref > n_mdl:
3161  n_choose_k = binom(n_ref, n_mdl)
3162  return n_choose_k * factorial(n_mdl)
3163  else:
3164  n_choose_k = binom(n_mdl, n_ref)
3165  return n_choose_k * factorial(n_ref)
3166 
3167 def _NMappings(ref_chains, mdl_chains):
3168  """ Number of mappings for a full chem mapping
3169 
3170  :param ref_chains: Chem groups of reference
3171  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
3172  :param mdl_chains: Model chains that map onto those chem groups
3173  :type mdl_chains: :class:`list` of :class:`list` of :class:`str`
3174  :returns: Number of possible mappings of *mdl_chains* onto *ref_chains*
3175  """
3176  assert(len(ref_chains) == len(mdl_chains))
3177  n = 1
3178  for a,b in zip(ref_chains, mdl_chains):
3179  n *= _NChemGroupMappings(a,b)
3180  return n
3181 
3182 def _NMappingsWithin(ref_chains, mdl_chains, max_mappings):
3183  """ Check whether total number of mappings is smaller than given maximum
3184 
3185  In principle the same as :func:`_NMappings` but it stops as soon as the
3186  maximum is hit.
3187 
3188  :param ref_chains: Chem groups of reference
3189  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
3190  :param mdl_chains: Model chains that map onto those chem groups
3191  :type mdl_chains: :class:`list` of :class:`list` of :class:`str`
3192  :param max_mappings: Number of max allowed mappings
3193  :returns: Whether number of possible mappings of *mdl_chains* onto
3194  *ref_chains* is below or equal *max_mappings*.
3195  """
3196  assert(len(ref_chains) == len(mdl_chains))
3197  n = 1
3198  for a,b in zip(ref_chains, mdl_chains):
3199  n *= _NChemGroupMappings(a,b)
3200  if n > max_mappings:
3201  return False
3202  return True
3203 
3204 def _RefSmallerGenerator(ref_chains, mdl_chains):
3205  """ Returns all possible ways to map mdl_chains onto ref_chains
3206 
3207  Specific for the case where len(ref_chains) < len(mdl_chains)
3208  """
3209  for c in itertools.combinations(mdl_chains, len(ref_chains)):
3210  for p in itertools.permutations(c):
3211  yield list(p)
3212 
3213 def _RefLargerGenerator(ref_chains, mdl_chains):
3214  """ Returns all possible ways to map mdl_chains onto ref_chains
3215 
3216  Specific for the case where len(ref_chains) > len(mdl_chains)
3217  Ref chains without mapped mdl chain are assigned None
3218  """
3219  n_ref = len(ref_chains)
3220  n_mdl = len(mdl_chains)
3221  for c in itertools.combinations(range(n_ref), n_mdl):
3222  for p in itertools.permutations(mdl_chains):
3223  ret_list = [None] * n_ref
3224  for idx, ch in zip(c, p):
3225  ret_list[idx] = ch
3226  yield ret_list
3227 
3228 def _RefEqualGenerator(ref_chains, mdl_chains):
3229  """ Returns all possible ways to map mdl_chains onto ref_chains
3230 
3231  Specific for the case where len(ref_chains) == len(mdl_chains)
3232  """
3233  for p in itertools.permutations(mdl_chains):
3234  yield list(p)
3235 
3236 def _RefEmptyGenerator(ref_chains, mdl_chains):
3237  yield list()
3238 
3239 def _ConcatIterators(iterators):
3240  for item in itertools.product(*iterators):
3241  yield list(item)
3242 
3243 def _ChainMappings(ref_chains, mdl_chains, n_max=None):
3244  """Returns all possible ways to map *mdl_chains* onto fixed *ref_chains*
3245 
3246  :param ref_chains: List of list of chemically equivalent chains in reference
3247  :type ref_chains: :class:`list` of :class:`list`
3248  :param mdl_chains: Equally long list of list of chemically equivalent chains
3249  in model that map on those ref chains.
3250  :type mdl_chains: :class:`list` of :class:`list`
3251  :param n_max: Aborts and raises :class:`RuntimeError` if max number of
3252  mappings is above this threshold.
3253  :type n_max: :class:`int`
3254  :returns: Iterator over all possible mappings of *mdl_chains* onto fixed
3255  *ref_chains*. Potentially contains None as padding when number of
3256  model chains for a certain mapping is smaller than the according
3257  reference chains.
3258  Example: _ChainMappings([['A', 'B', 'C'], ['D', 'E']],
3259  [['x', 'y'], ['i', 'j']])
3260  gives an iterator over: [[['x', 'y', None], ['i', 'j']],
3261  [['x', 'y', None], ['j', 'i']],
3262  [['y', 'x', None], ['i', 'j']],
3263  [['y', 'x', None], ['j', 'i']],
3264  [['x', None, 'y'], ['i', 'j']],
3265  [['x', None, 'y'], ['j', 'i']],
3266  [['y', None, 'x'], ['i', 'j']],
3267  [['y', None, 'x'], ['j', 'i']],
3268  [[None, 'x', 'y'], ['i', 'j']],
3269  [[None, 'x', 'y'], ['j', 'i']],
3270  [[None, 'y', 'x'], ['i', 'j']],
3271  [[None, 'y', 'x'], ['j', 'i']]]
3272  """
3273  assert(len(ref_chains) == len(mdl_chains))
3274 
3275  if n_max is not None:
3276  if not _NMappingsWithin(ref_chains, mdl_chains, n_max):
3277  raise RuntimeError(f"Too many mappings. Max allowed: {n_max}")
3278 
3279  # one iterator per mapping representing all mdl combinations relative to
3280  # reference
3281  iterators = list()
3282  for ref, mdl in zip(ref_chains, mdl_chains):
3283  if len(ref) == 0:
3284  iterators.append(_RefEmptyGenerator(ref, mdl))
3285  elif len(ref) == len(mdl):
3286  iterators.append(_RefEqualGenerator(ref, mdl))
3287  elif len(ref) < len(mdl):
3288  iterators.append(_RefSmallerGenerator(ref, mdl))
3289  else:
3290  iterators.append(_RefLargerGenerator(ref, mdl))
3291 
3292  return _ConcatIterators(iterators)
3293 
3294 
3295 def _GetSuperposition(pos_one, pos_two, iterative):
3296  """ Computes minimal RMSD superposition for pos_one onto pos_two
3297 
3298  :param pos_one: Positions that should be superposed onto *pos_two*
3299  :type pos_one: :class:`geom.Vec3List`
3300  :param pos_two: Reference positions
3301  :type pos_two: :class:`geom.Vec3List`
3302  :iterative: Whether iterative superposition should be used. Iterative
3303  potentially raises, uses standard superposition as fallback.
3304  :type iterative: :class:`bool`
3305  :returns: Transformation matrix to superpose *pos_one* onto *pos_two*
3306  :rtype: :class:`ost.mol.alg.SuperpositionResult`
3307  """
3308  res = None
3309  if iterative:
3310  try:
3311  res = mol.alg.IterativeSuperposeSVD(pos_one, pos_two)
3312  except:
3313  pass # triggers fallback below
3314  if res is None:
3315  res = mol.alg.SuperposeSVD(pos_one, pos_two)
3316  return res
3317 
3318 # specify public interface
3319 __all__ = ('ChainMapper', 'ReprResult', 'MappingResult')
def _NSCConserved(self, trg_ch, mdl_ch)
Definition: bb_lddt.py:481
def _NPairConserved(self, trg_int, mdl_int)
Definition: bb_lddt.py:504
def FromFlatMapping(self, flat_mapping)
Definition: bb_lddt.py:457
def __init__(self, pep_subst_mat=seq.alg.BLOSUM62, pep_gap_open=-5, pep_gap_ext=-2, nuc_subst_mat=seq.alg.NUC44, nuc_gap_open=-4, nuc_gap_ext=-4, resnum_aln=False)
def Align(self, s1, s2, chem_type=None)
def NWAlign(self, s1, s2, chem_type)
def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups, ref_mdl_alns, contact_d=12.0, steep_opt_rate=None, greedy_prune_contact_map=False)
def ExtendMapping(self, mapping, max_ext=None)
def _SteepOpt(self, mapping, chains_to_optimize=None)
def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups, ref_mdl_alns, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], steep_opt_rate=None)
def ExtendMapping(self, mapping, max_ext=None)
def _SteepOpt(self, mapping, chains_to_optimize=None)
def GetlDDTMapping(self, model, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], strategy="heuristic", steep_opt_rate=None, block_seed_size=5, block_blocks_per_chem_group=5, chem_mapping_result=None, heuristic_n_max_naive=40320)
def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False, only_interchain=False, chem_mapping_result=None, global_mapping=None)
def GetMapping(self, model, n_max_naive=40320)
def __init__(self, target, resnum_alignments=False, pep_seqid_thr=95., nuc_seqid_thr=95., pep_subst_mat=seq.alg.BLOSUM62, pep_gap_open=-11, pep_gap_ext=-1, nuc_subst_mat=seq.alg.NUC44, nuc_gap_open=-4, nuc_gap_ext=-4, min_pep_length=6, min_nuc_length=4, n_max_naive=1e8, mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0.)
def GetQSScoreMapping(self, model, contact_d=12.0, strategy="heuristic", block_seed_size=5, block_blocks_per_chem_group=5, heuristic_n_max_naive=40320, steep_opt_rate=None, chem_mapping_result=None, greedy_prune_contact_map=True)
def GetRMSDMapping(self, model, strategy="heuristic", subsampling=50, chem_mapping_result=None, heuristic_n_max_naive=120)
def __init__(self, target, model, chem_groups, chem_mapping, unmapped_mdl_chains, mapping, alns, opt_score=None)
def GetFlatMapping(self, mdl_as_key=False)
def GetFlatChainMapping(self, mdl_as_key=False)
def __init__(self, lDDT, substructure, ref_view, mdl_view)
def _GetInconsistentResidues(self, ref_residues, mdl_residues)
def _InterfacePenalty1(self, interface)
Definition: qsscore.py:734
def _InterfacePenalty2(self, interface)
Definition: qsscore.py:740
def _MappedInterfaceScores(self, int1, int2)
Definition: qsscore.py:583
def FromFlatMapping(self, flat_mapping)
Definition: qsscore.py:528