OpenStructure
chain_mapping.py
Go to the documentation of this file.
1 """
2 Chain mapping aims to identify a one-to-one relationship between chains in a
3 reference structure and a model.
4 """
5 
6 import itertools
7 import copy
8 
9 import numpy as np
10 
11 from scipy.special import factorial
12 from scipy.special import binom # as of Python 3.8, the math module implements
13  # comb, i.e. n choose k
14 
15 import ost
16 from ost import seq
17 from ost import mol
18 from ost import geom
19 
20 from ost.mol.alg import lddt
21 from ost.mol.alg import bb_lddt
22 from ost.mol.alg import qsscore
23 
24 def _CSel(ent, cnames):
25  """ Returns view with specified chains
26 
27  Ensures that quotation marks are around chain names to not confuse
28  OST query language with weird special characters.
29  """
30  query = "cname=" + ','.join([mol.QueryQuoteName(cname) for cname in cnames])
31  return ent.Select(query)
32 
34  """ Result object for the chain mapping functions in :class:`ChainMapper`
35 
36  Constructor is directly called within the functions, no need to construct
37  such objects yourself.
38  """
39  def __init__(self, target, model, chem_groups, chem_mapping, mapping, alns,
40  opt_score=None):
41  self._target_target = target
42  self._model_model = model
43  self._chem_groups_chem_groups = chem_groups
44  self._chem_mapping_chem_mapping = chem_mapping
45  self._mapping_mapping = mapping
46  self._alns_alns = alns
47  self._opt_score_opt_score = opt_score
48 
49  @property
50  def target(self):
51  """ Target/reference structure, i.e. :attr:`ChainMapper.target`
52 
53  :type: :class:`ost.mol.EntityView`
54  """
55  return self._target_target
56 
57  @property
58  def model(self):
59  """ Model structure that gets mapped onto :attr:`~target`
60 
61  Underwent same processing as :attr:`ChainMapper.target`, i.e.
62  only contains peptide/nucleotide chains of sufficient size.
63 
64  :type: :class:`ost.mol.EntityView`
65  """
66  return self._model_model
67 
68  @property
69  def chem_groups(self):
70  """ Groups of chemically equivalent chains in :attr:`~target`
71 
72  Same as :attr:`ChainMapper.chem_group`
73 
74  :class:`list` of :class:`list` of :class:`str` (chain names)
75  """
76  return self._chem_groups_chem_groups
77 
78  @property
79  def chem_mapping(self):
80  """ Assigns chains in :attr:`~model` to :attr:`~chem_groups`.
81 
82  :class:`list` of :class:`list` of :class:`str` (chain names)
83  """
84  return self._chem_mapping_chem_mapping
85 
86  @property
87  def mapping(self):
88  """ Mapping of :attr:`~model` chains onto :attr:`~target`
89 
90  Exact same shape as :attr:`~chem_groups` but containing the names of the
91  mapped chains in :attr:`~model`. May contain None for :attr:`~target`
92  chains that are not covered. No guarantee that all chains in
93  :attr:`~model` are mapped.
94 
95  :class:`list` of :class:`list` of :class:`str` (chain names)
96  """
97  return self._mapping_mapping
98 
99  @property
100  def alns(self):
101  """ Alignments of mapped chains in :attr:`~target` and :attr:`~model`
102 
103  Each alignment is accessible with ``alns[(t_chain,m_chain)]``. First
104  sequence is the sequence of :attr:`target` chain, second sequence the
105  one from :attr:`~model`. The respective :class:`ost.mol.EntityView` are
106  attached with :func:`ost.seq.ConstSequenceHandle.AttachView`.
107 
108  :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
109  :class:`ost.seq.AlignmentHandle`
110  """
111  return self._alns_alns
112 
113  @property
114  def opt_score(self):
115  """ Placeholder property without any guarantee of being set
116 
117  Different scores get optimized in the various chain mapping algorithms.
118  Some of them may set their final optimal score in that property.
119  Consult the documentation of the respective chain mapping algorithm
120  for more information. Won't be in the return dict of
121  :func:`JSONSummary`.
122  """
123  return self._opt_score_opt_score
124 
125  def GetFlatMapping(self, mdl_as_key=False):
126  """ Returns flat mapping as :class:`dict` for all mapable chains
127 
128  :param mdl_as_key: Default is target chain name as key and model chain
129  name as value. This can be reversed with this flag.
130  :returns: :class:`dict` with :class:`str` as key/value that describe
131  one-to-one mapping
132  """
133  flat_mapping = dict()
134  for trg_chem_group, mdl_chem_group in zip(self.chem_groupschem_groups,
135  self.mappingmapping):
136  for a,b in zip(trg_chem_group, mdl_chem_group):
137  if a is not None and b is not None:
138  if mdl_as_key:
139  flat_mapping[b] = a
140  else:
141  flat_mapping[a] = b
142  return flat_mapping
143 
144  def JSONSummary(self):
145  """ Returns JSON serializable summary of results
146  """
147  json_dict = dict()
148  json_dict["chem_groups"] = self.chem_groupschem_groups
149  json_dict["mapping"] = self.mappingmapping
150  json_dict["flat_mapping"] = self.GetFlatMappingGetFlatMapping()
151  json_dict["alns"] = list()
152  for aln in self.alnsalns.values():
153  trg_seq = aln.GetSequence(0)
154  mdl_seq = aln.GetSequence(1)
155  aln_dict = {"trg_ch": trg_seq.GetName(), "trg_seq": str(trg_seq),
156  "mdl_ch": mdl_seq.GetName(), "mdl_seq": str(mdl_seq)}
157  json_dict["alns"].append(aln_dict)
158  return json_dict
159 
160 
162 
163  """ Result object for :func:`ChainMapper.GetRepr`
164 
165  Constructor is directly called within the function, no need to construct
166  such objects yourself.
167 
168  :param lDDT: lDDT for this mapping. Depends on how you call
169  :func:`ChainMapper.GetRepr` whether this is backbone only or
170  full atom lDDT.
171  :type lDDT: :class:`float`
172  :param substructure: The full substructure for which we searched for a
173  representation
174  :type substructure: :class:`ost.mol.EntityView`
175  :param ref_view: View pointing to the same underlying entity as
176  *substructure* but only contains the stuff that is mapped
177  :type ref_view: :class:`mol.EntityView`
178  :param mdl_view: The matching counterpart in model
179  :type mdl_view: :class:`mol.EntityView`
180  """
181  def __init__(self, lDDT, substructure, ref_view, mdl_view):
182  self._lDDT_lDDT = lDDT
183  self._substructure_substructure = substructure
184  assert(len(ref_view.residues) == len(mdl_view.residues))
185  self._ref_view_ref_view = ref_view
186  self._mdl_view_mdl_view = mdl_view
187 
188  # lazily evaluated attributes
189  self._ref_bb_pos_ref_bb_pos = None
190  self._mdl_bb_pos_mdl_bb_pos = None
191  self._ref_full_bb_pos_ref_full_bb_pos = None
192  self._mdl_full_bb_pos_mdl_full_bb_pos = None
193  self._transform_transform = None
194  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos = None
195  self._bb_rmsd_bb_rmsd = None
196  self._gdt_8_gdt_8 = None
197  self._gdt_4_gdt_4 = None
198  self._gdt_2_gdt_2 = None
199  self._gdt_1_gdt_1 = None
200  self._ost_query_ost_query = None
201  self._flat_mapping_flat_mapping = None
202  self._inconsistent_residues_inconsistent_residues = None
203 
204  @property
205  def lDDT(self):
206  """ lDDT of representation result
207 
208  Depends on how you call :func:`ChainMapper.GetRepr` whether this is
209  backbone only or full atom lDDT.
210 
211  :type: :class:`float`
212  """
213  return self._lDDT_lDDT
214 
215  @property
216  def substructure(self):
217  """ The full substructure for which we searched for a
218  representation
219 
220  :type: :class:`ost.mol.EntityView`
221  """
222  return self._substructure_substructure
223 
224  @property
225  def ref_view(self):
226  """ View which contains the mapped subset of :attr:`substructure`
227 
228  :type: :class:`ost.mol.EntityView`
229  """
230  return self._ref_view_ref_view
231 
232  @property
233  def mdl_view(self):
234  """ The :attr:`ref_view` representation in the model
235 
236  :type: :class:`ost.mol.EntityView`
237  """
238  return self._mdl_view_mdl_view
239 
240  @property
241  def ref_residues(self):
242  """ The reference residues
243 
244  :type: class:`mol.ResidueViewList`
245  """
246  return self.ref_viewref_view.residues
247 
248  @property
249  def mdl_residues(self):
250  """ The model residues
251 
252  :type: :class:`mol.ResidueViewList`
253  """
254  return self.mdl_viewmdl_view.residues
255 
256  @property
258  """ A list of mapped residue whose names do not match (eg. ALA in the
259  reference and LEU in the model).
260 
261  The mismatches are reported as a tuple of :class:`~ost.mol.ResidueView`
262  (reference, model), or as an empty list if all the residue names match.
263 
264  :type: :class:`list`
265  """
266  if self._inconsistent_residues_inconsistent_residues is None:
267  self._inconsistent_residues_inconsistent_residues = self._GetInconsistentResidues_GetInconsistentResidues(
268  self.ref_residuesref_residues, self.mdl_residuesmdl_residues)
269  return self._inconsistent_residues_inconsistent_residues
270 
271  @property
272  def ref_bb_pos(self):
273  """ Representative backbone positions for reference residues.
274 
275  Thats CA positions for peptides and C3' positions for Nucleotides.
276 
277  :type: :class:`geom.Vec3List`
278  """
279  if self._ref_bb_pos_ref_bb_pos is None:
280  self._ref_bb_pos_ref_bb_pos = self._GetBBPos_GetBBPos(self.ref_residuesref_residues)
281  return self._ref_bb_pos_ref_bb_pos
282 
283  @property
284  def mdl_bb_pos(self):
285  """ Representative backbone positions for model residues.
286 
287  Thats CA positions for peptides and C3' positions for Nucleotides.
288 
289  :type: :class:`geom.Vec3List`
290  """
291  if self._mdl_bb_pos_mdl_bb_pos is None:
292  self._mdl_bb_pos_mdl_bb_pos = self._GetBBPos_GetBBPos(self.mdl_residuesmdl_residues)
293  return self._mdl_bb_pos_mdl_bb_pos
294 
295  @property
296  def ref_full_bb_pos(self):
297  """ Representative backbone positions for reference residues.
298 
299  Thats N, CA and C positions for peptides and O5', C5', C4', C3', O3'
300  positions for Nucleotides.
301 
302  :type: :class:`geom.Vec3List`
303  """
304  if self._ref_full_bb_pos_ref_full_bb_pos is None:
305  self._ref_full_bb_pos_ref_full_bb_pos = self._GetFullBBPos_GetFullBBPos(self.ref_residuesref_residues)
306  return self._ref_full_bb_pos_ref_full_bb_pos
307 
308  @property
309  def mdl_full_bb_pos(self):
310  """ Representative backbone positions for reference residues.
311 
312  Thats N, CA and C positions for peptides and O5', C5', C4', C3', O3'
313  positions for Nucleotides.
314 
315  :type: :class:`geom.Vec3List`
316  """
317  if self._mdl_full_bb_pos_mdl_full_bb_pos is None:
318  self._mdl_full_bb_pos_mdl_full_bb_pos = self._GetFullBBPos_GetFullBBPos(self.mdl_residuesmdl_residues)
319  return self._mdl_full_bb_pos_mdl_full_bb_pos
320 
321  @property
322  def transform(self):
323  """ Transformation to superpose mdl residues onto ref residues
324 
325  Superposition computed as minimal RMSD superposition on
326  :attr:`ref_bb_pos` and :attr:`mdl_bb_pos`. If number of positions is
327  smaller 3, the full_bb_pos equivalents are used instead.
328 
329  :type: :class:`ost.geom.Mat4`
330  """
331  if self._transform_transform is None:
332  if len(self.mdl_bb_posmdl_bb_pos) < 3:
333  self._transform_transform = _GetTransform(self.mdl_full_bb_posmdl_full_bb_pos,
334  self.ref_full_bb_posref_full_bb_pos, False)
335  else:
336  self._transform_transform = _GetTransform(self.mdl_bb_posmdl_bb_pos,
337  self.ref_bb_posref_bb_pos, False)
338  return self._transform_transform
339 
340  @property
342  """ :attr:`mdl_bb_pos` with :attr:`transform applied`
343 
344  :type: :class:`geom.Vec3List`
345  """
346  if self._superposed_mdl_bb_pos_superposed_mdl_bb_pos is None:
347  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos = geom.Vec3List(self.mdl_bb_posmdl_bb_pos)
348  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos.ApplyTransform(self.transformtransform)
349  return self._superposed_mdl_bb_pos_superposed_mdl_bb_pos
350 
351  @property
352  def bb_rmsd(self):
353  """ RMSD between :attr:`ref_bb_pos` and :attr:`superposed_mdl_bb_pos`
354 
355  :type: :class:`float`
356  """
357  if self._bb_rmsd_bb_rmsd is None:
358  self._bb_rmsd_bb_rmsd = self.ref_bb_posref_bb_pos.GetRMSD(self.superposed_mdl_bb_possuperposed_mdl_bb_pos)
359  return self._bb_rmsd_bb_rmsd
360 
361  @property
362  def gdt_8(self):
363  """ GDT with one single threshold: 8.0
364 
365  :type: :class:`float`
366  """
367  if self._gdt_8_gdt_8 is None:
368  self._gdt_8_gdt_8 = self.ref_bb_posref_bb_pos.GetGDT(self.superposed_mdl_bb_possuperposed_mdl_bb_pos, 8.0)
369  return self._gdt_8_gdt_8
370 
371  @property
372  def gdt_4(self):
373  """ GDT with one single threshold: 4.0
374 
375  :type: :class:`float`
376  """
377  if self._gdt_4_gdt_4 is None:
378  self._gdt_4_gdt_4 = self.ref_bb_posref_bb_pos.GetGDT(self.superposed_mdl_bb_possuperposed_mdl_bb_pos, 4.0)
379  return self._gdt_4_gdt_4
380 
381  @property
382  def gdt_2(self):
383  """ GDT with one single threshold: 2.0
384 
385  :type: :class:`float`
386  """
387  if self._gdt_2_gdt_2 is None:
388  self._gdt_2_gdt_2 = self.ref_bb_posref_bb_pos.GetGDT(self.superposed_mdl_bb_possuperposed_mdl_bb_pos, 2.0)
389  return self._gdt_2_gdt_2
390 
391  @property
392  def gdt_1(self):
393  """ GDT with one single threshold: 1.0
394 
395  :type: :class:`float`
396  """
397  if self._gdt_1_gdt_1 is None:
398  self._gdt_1_gdt_1 = self.ref_bb_posref_bb_pos.GetGDT(self.superposed_mdl_bb_possuperposed_mdl_bb_pos, 1.0)
399  return self._gdt_1_gdt_1
400 
401  @property
402  def ost_query(self):
403  """ query for mdl residues in OpenStructure query language
404 
405  Repr can be selected as ``full_mdl.Select(ost_query)``
406 
407  Returns invalid query if residue numbers have insertion codes.
408 
409  :type: :class:`str`
410  """
411  if self._ost_query_ost_query is None:
412  chain_rnums = dict()
413  for r in self.mdl_residuesmdl_residues:
414  chname = r.GetChain().GetName()
415  rnum = r.GetNumber().GetNum()
416  if chname not in chain_rnums:
417  chain_rnums[chname] = list()
418  chain_rnums[chname].append(str(rnum))
419  chain_queries = list()
420  for k,v in chain_rnums.items():
421  q = f"(cname={mol.QueryQuoteName(k)} and "
422  q += f"rnum={','.join(v)})"
423  chain_queries.append(q)
424  self._ost_query_ost_query = " or ".join(chain_queries)
425  return self._ost_query_ost_query
426 
427  def JSONSummary(self):
428  """ Returns JSON serializable summary of results
429  """
430  json_dict = dict()
431  json_dict["lDDT"] = self.lDDTlDDT
432  json_dict["ref_residues"] = [r.GetQualifiedName() for r in \
433  self.ref_residuesref_residues]
434  json_dict["mdl_residues"] = [r.GetQualifiedName() for r in \
435  self.mdl_residuesmdl_residues]
436  json_dict["transform"] = list(self.transformtransform.data)
437  json_dict["bb_rmsd"] = self.bb_rmsdbb_rmsd
438  json_dict["gdt_8"] = self.gdt_8gdt_8
439  json_dict["gdt_4"] = self.gdt_4gdt_4
440  json_dict["gdt_2"] = self.gdt_2gdt_2
441  json_dict["gdt_1"] = self.gdt_1gdt_1
442  json_dict["ost_query"] = self.ost_queryost_query
443  json_dict["flat_mapping"] = self.GetFlatChainMappingGetFlatChainMapping()
444  return json_dict
445 
446  def GetFlatChainMapping(self, mdl_as_key=False):
447  """ Returns flat mapping of all chains in the representation
448 
449  :param mdl_as_key: Default is target chain name as key and model chain
450  name as value. This can be reversed with this flag.
451  :returns: :class:`dict` with :class:`str` as key/value that describe
452  one-to-one mapping
453  """
454  flat_mapping = dict()
455  for trg_res, mdl_res in zip(self.ref_residuesref_residues, self.mdl_residuesmdl_residues):
456  if mdl_as_key:
457  flat_mapping[mdl_res.chain.name] = trg_res.chain.name
458  else:
459  flat_mapping[trg_res.chain.name] = mdl_res.chain.name
460  return flat_mapping
461 
462  def _GetFullBBPos(self, residues):
463  """ Helper to extract full backbone positions
464  """
465  exp_pep_atoms = ["N", "CA", "C"]
466  exp_nuc_atoms = ["\"O5'\"", "\"C5'\"", "\"C4'\"", "\"C3'\"", "\"O3'\""]
467  bb_pos = geom.Vec3List()
468  for r in residues:
469  if r.GetChemType() == mol.ChemType.NUCLEOTIDES:
470  exp_atoms = exp_nuc_atoms
471  elif r.GetChemType() == mol.ChemType.AMINOACIDS:
472  exp_atoms = exp_pep_atoms
473  else:
474  raise RuntimeError("Something terrible happened... RUN...")
475  for aname in exp_atoms:
476  a = r.FindAtom(aname)
477  if not a.IsValid():
478  raise RuntimeError("Something terrible happened... "
479  "RUN...")
480  bb_pos.append(a.GetPos())
481  return bb_pos
482 
483  def _GetBBPos(self, residues):
484  """ Helper to extract single representative position for each residue
485  """
486  bb_pos = geom.Vec3List()
487  for r in residues:
488  at = r.FindAtom("CA")
489  if not at.IsValid():
490  at = r.FindAtom("C3'")
491  if not at.IsValid():
492  raise RuntimeError("Something terrible happened... RUN...")
493  bb_pos.append(at.GetPos())
494  return bb_pos
495 
496  def _GetInconsistentResidues(self, ref_residues, mdl_residues):
497  """ Helper to extract a list of inconsistent residues.
498  """
499  if len(ref_residues) != len(mdl_residues):
500  raise ValueError("Something terrible happened... Reference and "
501  "model lengths differ... RUN...")
502  inconsistent_residues = list()
503  for ref_residue, mdl_residue in zip(ref_residues, mdl_residues):
504  if ref_residue.name != mdl_residue.name:
505  inconsistent_residues.append((ref_residue, mdl_residue))
506  return inconsistent_residues
507 
508 
510  """ Class to compute chain mappings
511 
512  All algorithms are performed on processed structures which fulfill
513  criteria as given in constructor arguments (*min_pep_length*,
514  "min_nuc_length") and only contain residues which have all required backbone
515  atoms. for peptide residues thats N, CA, C and CB (no CB for GLY), for
516  nucleotide residues thats O5', C5', C4', C3' and O3'.
517 
518  Chain mapping is a three step process:
519 
520  * Group chemically identical chains in *target* using pairwise
521  alignments that are either computed with Needleman-Wunsch (NW) or
522  simply derived from residue numbers (*resnum_alignments* flag).
523  In case of NW, *pep_subst_mat*, *pep_gap_open* and *pep_gap_ext*
524  and their nucleotide equivalents are relevant. Two chains are
525  considered identical if they fulfill the thresholds given by
526  *pep_seqid_thr*, *pep_gap_thr*, their nucleotide equivalents
527  respectively. The grouping information is available as
528  attributes of this class.
529 
530  * Map chains in an input model to these groups. Generating alignments
531  and the similarity criteria are the same as above. You can either
532  get the group mapping with :func:`GetChemMapping` or directly call
533  one of the full fletched one-to-one chain mapping functions which
534  execute that step internally.
535 
536  * Obtain one-to-one mapping for chains in an input model and
537  *target* with one of the available mapping functions. Just to get an
538  idea of complexity. If *target* and *model* are octamers, there are
539  ``8! = 40320`` possible chain mappings.
540 
541  :param target: Target structure onto which models are mapped.
542  Computations happen on a selection only containing
543  polypeptides and polynucleotides.
544  :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
545  :param resnum_alignments: Use residue numbers instead of
546  Needleman-Wunsch to compute pairwise
547  alignments. Relevant for :attr:`~chem_groups`
548  and related attributes.
549  :type resnum_alignments: :class:`bool`
550  :param pep_seqid_thr: Threshold used to decide when two chains are
551  identical. 95 percent tolerates the few mutations
552  crystallographers like to do.
553  :type pep_seqid_thr: :class:`float`
554  :param pep_gap_thr: Additional threshold to avoid gappy alignments with
555  high seqid. By default this is disabled (set to 1.0).
556  This threshold checks for a maximum allowed fraction
557  of gaps in any of the two sequences after stripping
558  terminal gaps. The reason for not just normalizing
559  seqid by the longer sequence is that one sequence
560  might be a perfect subsequence of the other but only
561  cover half of it.
562  :type pep_gap_thr: :class:`float`
563  :param nuc_seqid_thr: Nucleotide equivalent for *pep_seqid_thr*
564  :type nuc_seqid_thr: :class:`float`
565  :param nuc_gap_thr: Nucleotide equivalent for *nuc_gap_thr*
566  :type nuc_gap_thr: :class:`float`
567  :param pep_subst_mat: Substitution matrix to align peptide sequences,
568  irrelevant if *resnum_alignments* is True,
569  defaults to seq.alg.BLOSUM62
570  :type pep_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
571  :param pep_gap_open: Gap open penalty to align peptide sequences,
572  irrelevant if *resnum_alignments* is True
573  :type pep_gap_open: :class:`int`
574  :param pep_gap_ext: Gap extension penalty to align peptide sequences,
575  irrelevant if *resnum_alignments* is True
576  :type pep_gap_ext: :class:`int`
577  :param nuc_subst_mat: Nucleotide equivalent for *pep_subst_mat*,
578  defaults to seq.alg.NUC44
579  :type nuc_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
580  :param nuc_gap_open: Nucleotide equivalent for *pep_gap_open*
581  :type nuc_gap_open: :class:`int`
582  :param nuc_gap_ext: Nucleotide equivalent for *pep_gap_ext*
583  :type nuc_gap_ext: :class:`int`
584  :param min_pep_length: Minimal number of residues for a peptide chain to be
585  considered in target and in models.
586  :type min_pep_length: :class:`int`
587  :param min_nuc_length: Minimal number of residues for a nucleotide chain to be
588  considered in target and in models.
589  :type min_nuc_length: :class:`int`
590  :param n_max_naive: Max possible chain mappings that are enumerated in
591  :func:`~GetNaivelDDTMapping` /
592  :func:`~GetDecomposerlDDTMapping`. A
593  :class:`RuntimeError` is raised in case of bigger
594  complexity.
595  :type n_max_naive: :class:`int`
596  """
597  def __init__(self, target, resnum_alignments=False,
598  pep_seqid_thr = 95., pep_gap_thr = 1.0,
599  nuc_seqid_thr = 95., nuc_gap_thr = 1.0,
600  pep_subst_mat = seq.alg.BLOSUM62, pep_gap_open = -11,
601  pep_gap_ext = -1, nuc_subst_mat = seq.alg.NUC44,
602  nuc_gap_open = -4, nuc_gap_ext = -4,
603  min_pep_length = 6, min_nuc_length = 4,
604  n_max_naive = 1e8):
605 
606  # attributes
607  self.resnum_alignmentsresnum_alignments = resnum_alignments
608  self.pep_seqid_thrpep_seqid_thr = pep_seqid_thr
609  self.pep_gap_thrpep_gap_thr = pep_gap_thr
610  self.nuc_seqid_thrnuc_seqid_thr = nuc_seqid_thr
611  self.nuc_gap_thrnuc_gap_thr = nuc_gap_thr
612  self.min_pep_lengthmin_pep_length = min_pep_length
613  self.min_nuc_lengthmin_nuc_length = min_nuc_length
614  self.n_max_naiven_max_naive = n_max_naive
615 
616  # lazy computed attributes
617  self._chem_groups_chem_groups = None
618  self._chem_group_alignments_chem_group_alignments = None
619  self._chem_group_ref_seqs_chem_group_ref_seqs = None
620  self._chem_group_types_chem_group_types = None
621 
622  # helper class to generate pairwise alignments
623  self.aligneraligner = _Aligner(resnum_aln = resnum_alignments,
624  pep_subst_mat = pep_subst_mat,
625  pep_gap_open = pep_gap_open,
626  pep_gap_ext = pep_gap_ext,
627  nuc_subst_mat = nuc_subst_mat,
628  nuc_gap_open = nuc_gap_open,
629  nuc_gap_ext = nuc_gap_ext)
630 
631  # target structure preprocessing
632  self._target, self._polypep_seqs, self._polynuc_seqs_polynuc_seqs = \
633  self.ProcessStructureProcessStructure(target)
634 
635  @property
636  def target(self):
637  """Target structure that only contains peptides/nucleotides
638 
639  Contains only residues that have the backbone representatives
640  (CA for peptide and C3' for nucleotides) to avoid ATOMSEQ alignment
641  inconsistencies when switching between all atom and backbone only
642  representations.
643 
644  :type: :class:`ost.mol.EntityView`
645  """
646  return self._target
647 
648  @property
649  def polypep_seqs(self):
650  """Sequences of peptide chains in :attr:`~target`
651 
652  Respective :class:`EntityView` from *target* for each sequence s are
653  available as ``s.GetAttachedView()``
654 
655  :type: :class:`ost.seq.SequenceList`
656  """
657  return self._polypep_seqs
658 
659  @property
660  def polynuc_seqs(self):
661  """Sequences of nucleotide chains in :attr:`~target`
662 
663  Respective :class:`EntityView` from *target* for each sequence s are
664  available as ``s.GetAttachedView()``
665 
666  :type: :class:`ost.seq.SequenceList`
667  """
668  return self._polynuc_seqs_polynuc_seqs
669 
670  @property
671  def chem_groups(self):
672  """Groups of chemically equivalent chains in :attr:`~target`
673 
674  First chain in group is the one with longest sequence.
675 
676  :getter: Computed on first use (cached)
677  :type: :class:`list` of :class:`list` of :class:`str` (chain names)
678  """
679  if self._chem_groups_chem_groups is None:
680  self._chem_groups_chem_groups = list()
681  for a in self.chem_group_alignmentschem_group_alignments:
682  self._chem_groups_chem_groups.append([s.GetName() for s in a.sequences])
683  return self._chem_groups_chem_groups
684 
685  @property
687  """MSA for each group in :attr:`~chem_groups`
688 
689  Sequences in MSAs exhibit same order as in :attr:`~chem_groups` and
690  have the respective :class:`ost.mol.EntityView` from *target* attached.
691 
692  :getter: Computed on first use (cached)
693  :type: :class:`ost.seq.AlignmentList`
694  """
695  if self._chem_group_alignments_chem_group_alignments is None:
696  self._chem_group_alignments_chem_group_alignments, self._chem_group_types_chem_group_types = \
697  _GetChemGroupAlignments(self.polypep_seqspolypep_seqs, self.polynuc_seqspolynuc_seqs,
698  self.aligneraligner,
699  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
700  pep_gap_thr=self.pep_gap_thrpep_gap_thr,
701  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
702  nuc_gap_thr=self.nuc_gap_thrnuc_gap_thr)
703 
704  return self._chem_group_alignments_chem_group_alignments
705 
706  @property
708  """Reference (longest) sequence for each group in :attr:`~chem_groups`
709 
710  Respective :class:`EntityView` from *target* for each sequence s are
711  available as ``s.GetAttachedView()``
712 
713  :getter: Computed on first use (cached)
714  :type: :class:`ost.seq.SequenceList`
715  """
716  if self._chem_group_ref_seqs_chem_group_ref_seqs is None:
717  self._chem_group_ref_seqs_chem_group_ref_seqs = seq.CreateSequenceList()
718  for a in self.chem_group_alignmentschem_group_alignments:
719  s = seq.CreateSequence(a.GetSequence(0).GetName(),
720  a.GetSequence(0).GetGaplessString())
721  s.AttachView(a.GetSequence(0).GetAttachedView())
722  self._chem_group_ref_seqs_chem_group_ref_seqs.AddSequence(s)
723  return self._chem_group_ref_seqs_chem_group_ref_seqs
724 
725  @property
726  def chem_group_types(self):
727  """ChemType of each group in :attr:`~chem_groups`
728 
729  Specifying if groups are poly-peptides/nucleotides, i.e.
730  :class:`ost.mol.ChemType.AMINOACIDS` or
731  :class:`ost.mol.ChemType.NUCLEOTIDES`
732 
733  :getter: Computed on first use (cached)
734  :type: :class:`list` of :class:`ost.mol.ChemType`
735  """
736  if self._chem_group_types_chem_group_types is None:
737  self._chem_group_alignments_chem_group_alignments, self._chem_group_types_chem_group_types = \
738  _GetChemGroupAlignments(self.polypep_seqspolypep_seqs, self.polynuc_seqspolynuc_seqs,
739  self.aligneraligner,
740  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
741  pep_gap_thr=self.pep_gap_thrpep_gap_thr,
742  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
743  nuc_gap_thr=self.nuc_gap_thrnuc_gap_thr)
744 
745  return self._chem_group_types_chem_group_types
746 
747  def GetChemMapping(self, model):
748  """Maps sequences in *model* to chem_groups of target
749 
750  :param model: Model from which to extract sequences, a
751  selection that only includes peptides and nucleotides
752  is performed and returned along other results.
753  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
754  :returns: Tuple with two lists of length `len(self.chem_groups)` and
755  an :class:`ost.mol.EntityView` representing *model*:
756  1) Each element is a :class:`list` with mdl chain names that
757  map to the chem group at that position.
758  2) Each element is a :class:`ost.seq.AlignmentList` aligning
759  these mdl chain sequences to the chem group ref sequences.
760  3) A selection of *model* that only contains polypeptides and
761  polynucleotides whose ATOMSEQ exactly matches the sequence
762  info in the returned alignments.
763  """
764  mdl, mdl_pep_seqs, mdl_nuc_seqs = self.ProcessStructureProcessStructure(model)
765  mapping = [list() for x in self.chem_groupschem_groups]
766  alns = [seq.AlignmentList() for x in self.chem_groupschem_groups]
767 
768  for s in mdl_pep_seqs:
769  idx, aln = _MapSequence(self.chem_group_ref_seqschem_group_ref_seqs,
770  self.chem_group_typeschem_group_types,
771  s, mol.ChemType.AMINOACIDS,
772  self.aligneraligner)
773  if idx is not None:
774  mapping[idx].append(s.GetName())
775  alns[idx].append(aln)
776 
777  for s in mdl_nuc_seqs:
778  idx, aln = _MapSequence(self.chem_group_ref_seqschem_group_ref_seqs,
779  self.chem_group_typeschem_group_types,
780  s, mol.ChemType.NUCLEOTIDES,
781  self.aligneraligner)
782  if idx is not None:
783  mapping[idx].append(s.GetName())
784  alns[idx].append(aln)
785 
786  return (mapping, alns, mdl)
787 
788 
789  def GetlDDTMapping(self, model, inclusion_radius=15.0,
790  thresholds=[0.5, 1.0, 2.0, 4.0], strategy="heuristic",
791  steep_opt_rate = None, block_seed_size = 5,
792  block_blocks_per_chem_group = 5,
793  chem_mapping_result = None,
794  heuristic_n_max_naive = 40320):
795  """ Identify chain mapping by optimizing lDDT score
796 
797  Maps *model* chain sequences to :attr:`~chem_groups` and find mapping
798  based on backbone only lDDT score (CA for amino acids C3' for
799  Nucleotides).
800 
801  Either performs a naive search, i.e. enumerate all possible mappings or
802  executes a greedy strategy that tries to identify a (close to) optimal
803  mapping in an iterative way by starting from a start mapping (seed). In
804  each iteration, the one-to-one mapping that leads to highest increase
805  in number of conserved contacts is added with the additional requirement
806  that this added mapping must have non-zero interface counts towards the
807  already mapped chains. So basically we're "growing" the mapped structure
808  by only adding connected stuff.
809 
810  The available strategies:
811 
812  * **naive**: Enumerates all possible mappings and returns best
813 
814  * **greedy_fast**: perform all vs. all single chain lDDTs within the
815  respective ref/mdl chem groups. The mapping with highest number of
816  conserved contacts is selected as seed for greedy extension
817 
818  * **greedy_full**: try multiple seeds for greedy extension, i.e. try
819  all ref/mdl chain combinations within the respective chem groups and
820  retain the mapping leading to the best lDDT.
821 
822  * **greedy_block**: try multiple seeds for greedy extension, i.e. try
823  all ref/mdl chain combinations within the respective chem groups and
824  extend them to *block_seed_size*. *block_blocks_per_chem_group*
825  for each chem group are selected for exhaustive extension.
826 
827  * **heuristic**: Uses *naive* strategy if number of possible mappings
828  is within *heuristic_n_max_naive*. The default of 40320 corresponds
829  to an octamer (8!=40320). A structure with stoichiometry A6B2 would be
830  6!*2!=1440 etc. If the number of possible mappings is larger,
831  *greedy_full* is used.
832 
833  Sets :attr:`MappingResult.opt_score` in case of no trivial one-to-one
834  mapping.
835 
836  :param model: Model to map
837  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
838  :param inclusion_radius: Inclusion radius for lDDT
839  :type inclusion_radius: :class:`float`
840  :param thresholds: Thresholds for lDDT
841  :type thresholds: :class:`list` of :class:`float`
842  :param strategy: Strategy to find mapping. Must be in ["naive",
843  "greedy_fast", "greedy_full", "greedy_block"]
844  :type strategy: :class:`str`
845  :param steep_opt_rate: Only relevant for greedy strategies.
846  If set, every *steep_opt_rate* mappings, a simple
847  optimization is executed with the goal of
848  avoiding local minima. The optimization
849  iteratively checks all possible swaps of mappings
850  within their respective chem groups and accepts
851  swaps that improve lDDT score. Iteration stops as
852  soon as no improvement can be achieved anymore.
853  :type steep_opt_rate: :class:`int`
854  :param block_seed_size: Param for *greedy_block* strategy - Initial seeds
855  are extended by that number of chains.
856  :type block_seed_size: :class:`int`
857  :param block_blocks_per_chem_group: Param for *greedy_block* strategy -
858  Number of blocks per chem group that
859  are extended in an initial search
860  for high scoring local solutions.
861  :type block_blocks_per_chem_group: :class:`int`
862  :param chem_mapping_result: Pro param. The result of
863  :func:`~GetChemMapping` where you provided
864  *model*. If set, *model* parameter is not
865  used.
866  :type chem_mapping_result: :class:`tuple`
867  :returns: A :class:`MappingResult`
868  """
869 
870  strategies = ["naive", "greedy_fast", "greedy_full", "greedy_block",
871  "heuristic"]
872  if strategy not in strategies:
873  raise RuntimeError(f"Strategy must be in {strategies}")
874 
875  if chem_mapping_result is None:
876  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
877  else:
878  chem_mapping, chem_group_alns, mdl = chem_mapping_result
879 
880  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
881  self.chem_group_alignmentschem_group_alignments,
882  chem_mapping,
883  chem_group_alns)
884 
885  # check for the simplest case
886  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
887  if one_to_one is not None:
888  alns = dict()
889  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
890  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
891  if ref_ch is not None and mdl_ch is not None:
892  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
893  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
894  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
895  alns[(ref_ch, mdl_ch)] = aln
896  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
897  one_to_one, alns)
898 
899  if strategy == "heuristic":
900  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
901  heuristic_n_max_naive):
902  strategy = "naive"
903  else:
904  strategy = "greedy_full"
905 
906  mapping = None
907  opt_lddt = None
908 
909  if strategy == "naive":
910  mapping, opt_lddt = _lDDTNaive(self.targettarget, mdl, inclusion_radius,
911  thresholds, self.chem_groupschem_groups,
912  chem_mapping, ref_mdl_alns,
913  self.n_max_naiven_max_naive)
914  else:
915  # its one of the greedy strategies - setup greedy searcher
916  the_greed = _lDDTGreedySearcher(self.targettarget, mdl, self.chem_groupschem_groups,
917  chem_mapping, ref_mdl_alns,
918  inclusion_radius=inclusion_radius,
919  thresholds=thresholds,
920  steep_opt_rate=steep_opt_rate)
921  if strategy == "greedy_fast":
922  mapping = _lDDTGreedyFast(the_greed)
923  elif strategy == "greedy_full":
924  mapping = _lDDTGreedyFull(the_greed)
925  elif strategy == "greedy_block":
926  mapping = _lDDTGreedyBlock(the_greed, block_seed_size,
927  block_blocks_per_chem_group)
928  # cached => lDDT computation is fast here
929  opt_lddt = the_greed.Score(mapping)
930 
931  alns = dict()
932  for ref_group, mdl_group in zip(self.chem_groupschem_groups, mapping):
933  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
934  if ref_ch is not None and mdl_ch is not None:
935  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
936  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
937  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
938  alns[(ref_ch, mdl_ch)] = aln
939 
940  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
941  mapping, alns, opt_score = opt_lddt)
942 
943 
944  def GetQSScoreMapping(self, model, contact_d = 12.0, strategy = "heuristic",
945  block_seed_size = 5, block_blocks_per_chem_group = 5,
946  heuristic_n_max_naive = 40320, steep_opt_rate = None,
947  chem_mapping_result = None,
948  greedy_prune_contact_map = True):
949  """ Identify chain mapping based on QSScore
950 
951  Scoring is based on CA/C3' positions which are present in all chains of
952  a :attr:`chem_groups` as well as the *model* chains which are mapped to
953  that respective chem group.
954 
955  The following strategies are available:
956 
957  * **naive**: Naively iterate all possible mappings and return best based
958  on QS score.
959 
960  * **greedy_fast**: perform all vs. all single chain lDDTs within the
961  respective ref/mdl chem groups. The mapping with highest number of
962  conserved contacts is selected as seed for greedy extension.
963  Extension is based on QS-score.
964 
965  * **greedy_full**: try multiple seeds for greedy extension, i.e. try
966  all ref/mdl chain combinations within the respective chem groups and
967  retain the mapping leading to the best QS-score.
968 
969  * **greedy_block**: try multiple seeds for greedy extension, i.e. try
970  all ref/mdl chain combinations within the respective chem groups and
971  extend them to *block_seed_size*. *block_blocks_per_chem_group*
972  for each chem group are selected for exhaustive extension.
973 
974  * **heuristic**: Uses *naive* strategy if number of possible mappings
975  is within *heuristic_n_max_naive*. The default of 40320 corresponds
976  to an octamer (8!=40320). A structure with stoichiometry A6B2 would be
977  6!*2!=1440 etc. If the number of possible mappings is larger,
978  *greedy_full* is used.
979 
980  Sets :attr:`MappingResult.opt_score` in case of no trivial one-to-one
981  mapping.
982 
983  :param model: Model to map
984  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
985  :param contact_d: Max distance between two residues to be considered as
986  contact in qs scoring
987  :type contact_d: :class:`float`
988  :param strategy: Strategy for sampling, must be in ["naive",
989  "greedy_fast", "greedy_full", "greedy_block"]
990  :type strategy: :class:`str`
991  :param chem_mapping_result: Pro param. The result of
992  :func:`~GetChemMapping` where you provided
993  *model*. If set, *model* parameter is not
994  used.
995  :type chem_mapping_result: :class:`tuple`
996  :param greedy_prune_contact_map: Relevant for all strategies that use
997  greedy extensions. If True, only chains
998  with at least 3 contacts (8A CB
999  distance) towards already mapped chains
1000  in trg/mdl are considered for
1001  extension. All chains that give a
1002  potential non-zero QS-score increase
1003  are used otherwise (at least one
1004  contact within 12A). The consequence
1005  is reduced runtime and usually no
1006  real reduction in accuracy.
1007  :returns: A :class:`MappingResult`
1008  """
1009 
1010  strategies = ["naive", "greedy_fast", "greedy_full", "greedy_block", "heuristic"]
1011  if strategy not in strategies:
1012  raise RuntimeError(f"strategy must be {strategies}")
1013 
1014  if chem_mapping_result is None:
1015  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1016  else:
1017  chem_mapping, chem_group_alns, mdl = chem_mapping_result
1018  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1019  self.chem_group_alignmentschem_group_alignments,
1020  chem_mapping,
1021  chem_group_alns)
1022  # check for the simplest case
1023  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
1024  if one_to_one is not None:
1025  alns = dict()
1026  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
1027  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1028  if ref_ch is not None and mdl_ch is not None:
1029  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1030  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1031  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1032  alns[(ref_ch, mdl_ch)] = aln
1033  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1034  one_to_one, alns)
1035 
1036  if strategy == "heuristic":
1037  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
1038  heuristic_n_max_naive):
1039  strategy = "naive"
1040  else:
1041  strategy = "greedy_full"
1042 
1043  mapping = None
1044  opt_qsscore = None
1045 
1046  if strategy == "naive":
1047  mapping, opt_qsscore = _QSScoreNaive(self.targettarget, mdl,
1048  self.chem_groupschem_groups,
1049  chem_mapping, ref_mdl_alns,
1050  contact_d, self.n_max_naiven_max_naive)
1051  else:
1052  # its one of the greedy strategies - setup greedy searcher
1053  the_greed = _QSScoreGreedySearcher(self.targettarget, mdl,
1054  self.chem_groupschem_groups,
1055  chem_mapping, ref_mdl_alns,
1056  contact_d = contact_d,
1057  steep_opt_rate=steep_opt_rate,
1058  greedy_prune_contact_map = greedy_prune_contact_map)
1059  if strategy == "greedy_fast":
1060  mapping = _QSScoreGreedyFast(the_greed)
1061  elif strategy == "greedy_full":
1062  mapping = _QSScoreGreedyFull(the_greed)
1063  elif strategy == "greedy_block":
1064  mapping = _QSScoreGreedyBlock(the_greed, block_seed_size,
1065  block_blocks_per_chem_group)
1066  # cached => QSScore computation is fast here
1067  opt_qsscore = the_greed.Score(mapping, check=False).QS_global
1068 
1069  alns = dict()
1070  for ref_group, mdl_group in zip(self.chem_groupschem_groups, mapping):
1071  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1072  if ref_ch is not None and mdl_ch is not None:
1073  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1074  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1075  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1076  alns[(ref_ch, mdl_ch)] = aln
1077 
1078  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1079  mapping, alns, opt_score = opt_qsscore)
1080 
1081  def GetRMSDMapping(self, model, strategy = "heuristic", subsampling=50,
1082  chem_mapping_result = None, heuristic_n_max_naive = 120):
1083  """Identify chain mapping based on minimal RMSD superposition
1084 
1085  Superposition and scoring is based on CA/C3' positions which are present
1086  in all chains of a :attr:`chem_groups` as well as the *model*
1087  chains which are mapped to that respective chem group.
1088 
1089  The following strategies are available:
1090 
1091  * **naive**: Naively iterate all possible mappings and return the one
1092  with lowes RMSD.
1093 
1094  * **greedy_single**: perform all vs. all single chain superpositions
1095  within the respective ref/mdl chem groups to use as starting points.
1096  For each starting point, iteratively add the model/target chain pair
1097  with lowest RMSD until a full mapping is achieved. The mapping with
1098  lowest RMSD is returned.
1099 
1100  * **greedy_iterative**: Same as greedy_single_rmsd exept that the
1101  transformation gets updated with each added chain pair.
1102 
1103  * **heuristic**: Uses *naive* strategy if number of possible mappings
1104  is within *heuristic_n_max_naive*. The default of 120 corresponds
1105  to a homo-pentamer (5!=120). If the number of possible mappings is
1106  larger, *greedy_iterative* is used.
1107 
1108  :param model: Model to map
1109  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1110  :param strategy: Strategy for sampling. Must be in ["naive",
1111  "greedy_single", "greedy_iterative"]
1112  :type strategy: :class:`str`
1113  :param subsampling: If given, only an equally distributed subset
1114  of CA/C3' positions in each chain are used for
1115  superposition/scoring.
1116  :type subsampling: :class:`int`
1117  :param chem_mapping_result: Pro param. The result of
1118  :func:`~GetChemMapping` where you provided
1119  *model*. If set, *model* parameter is not
1120  used.
1121  :type chem_mapping_result: :class:`tuple`
1122  :returns: A :class:`MappingResult`
1123  """
1124 
1125  strategies = ["naive", "greedy_single", "greedy_iterative", "heuristic"]
1126 
1127  if strategy not in strategies:
1128  raise RuntimeError(f"strategy must be {strategies}")
1129 
1130  if chem_mapping_result is None:
1131  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1132  else:
1133  chem_mapping, chem_group_alns, mdl = chem_mapping_result
1134  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1135  self.chem_group_alignmentschem_group_alignments,
1136  chem_mapping,
1137  chem_group_alns)
1138 
1139  # check for the simplest case
1140  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
1141  if one_to_one is not None:
1142  alns = dict()
1143  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
1144  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1145  if ref_ch is not None and mdl_ch is not None:
1146  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1147  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1148  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1149  alns[(ref_ch, mdl_ch)] = aln
1150  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1151  one_to_one, alns)
1152 
1153  trg_group_pos, mdl_group_pos = _GetRefPos(self.targettarget, mdl,
1154  self.chem_group_alignmentschem_group_alignments,
1155  chem_group_alns,
1156  max_pos = subsampling)
1157 
1158  if strategy == "heuristic":
1159  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
1160  heuristic_n_max_naive):
1161  strategy = "naive"
1162  else:
1163  strategy = "greedy_iterative"
1164 
1165  mapping = None
1166 
1167  if strategy.startswith("greedy"):
1168  # get transforms of any mdl chain onto any trg chain in same chem
1169  # group that fulfills gdtts threshold
1170  initial_transforms = list()
1171  initial_mappings = list()
1172  for trg_pos, trg_chains, mdl_pos, mdl_chains in zip(trg_group_pos,
1173  self.chem_groupschem_groups,
1174  mdl_group_pos,
1175  chem_mapping):
1176  for t_pos, t in zip(trg_pos, trg_chains):
1177  for m_pos, m in zip(mdl_pos, mdl_chains):
1178  if len(t_pos) >= 3 and len(m_pos) >= 3:
1179  transform = _GetTransform(m_pos, t_pos, False)
1180  initial_transforms.append(transform)
1181  initial_mappings.append((t,m))
1182 
1183  if strategy == "greedy_single":
1184  mapping = _SingleRigidRMSD(initial_transforms,
1185  initial_mappings,
1186  self.chem_groupschem_groups,
1187  chem_mapping,
1188  trg_group_pos,
1189  mdl_group_pos)
1190 
1191 
1192  elif strategy == "greedy_iterative":
1193  mapping = _IterativeRigidRMSD(initial_transforms,
1194  initial_mappings,
1195  self.chem_groupschem_groups, chem_mapping,
1196  trg_group_pos, mdl_group_pos)
1197  elif strategy == "naive":
1198  mapping = _NaiveRMSD(self.chem_groupschem_groups, chem_mapping,
1199  trg_group_pos, mdl_group_pos,
1200  self.n_max_naiven_max_naive)
1201 
1202  # translate mapping format and return
1203  final_mapping = list()
1204  for ref_chains in self.chem_groupschem_groups:
1205  mapped_mdl_chains = list()
1206  for ref_ch in ref_chains:
1207  if ref_ch in mapping:
1208  mapped_mdl_chains.append(mapping[ref_ch])
1209  else:
1210  mapped_mdl_chains.append(None)
1211  final_mapping.append(mapped_mdl_chains)
1212 
1213  alns = dict()
1214  for ref_group, mdl_group in zip(self.chem_groupschem_groups, final_mapping):
1215  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1216  if ref_ch is not None and mdl_ch is not None:
1217  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1218  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1219  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1220  alns[(ref_ch, mdl_ch)] = aln
1221 
1222  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1223  final_mapping, alns)
1224 
1225  def GetMapping(self, model, n_max_naive = 40320):
1226  """ Convenience function to get mapping with currently preferred method
1227 
1228  If number of possible chain mappings is <= *n_max_naive*, a naive
1229  QS-score mapping is performed and optimal QS-score is guaranteed.
1230  For anything else, a QS-score mapping with the greedy_full strategy is
1231  performed (greedy_prune_contact_map = True). The default for
1232  *n_max_naive* of 40320 corresponds to an octamer (8!=40320). A
1233  structure with stoichiometry A6B2 would be 6!*2!=1440 etc.
1234 
1235  If :attr:`~target` has nucleotide sequences, the QS-score target
1236  function is replaced with a backbone only lDDT score that has
1237  an inclusion radius of 30A.
1238  """
1239  if len(self.polynuc_seqspolynuc_seqs) > 0:
1240  return self.GetlDDTMappingGetlDDTMapping(model, strategy = "heuristic",
1241  inclusion_radius = 30.0,
1242  heuristic_n_max_naive = n_max_naive)
1243  else:
1244  return self.GetQSScoreMappingGetQSScoreMapping(model, strategy="heuristic",
1245  greedy_prune_contact_map=True,
1246  heuristic_n_max_naive = n_max_naive)
1247 
1248  def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0,
1249  thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False,
1250  only_interchain=False, chem_mapping_result = None,
1251  global_mapping = None):
1252  """ Identify *topn* representations of *substructure* in *model*
1253 
1254  *substructure* defines a subset of :attr:`~target` for which one
1255  wants the *topn* representations in *model*. Representations are scored
1256  and sorted by lDDT.
1257 
1258  :param substructure: A :class:`ost.mol.EntityView` which is a subset of
1259  :attr:`~target`. Should be selected with the
1260  OpenStructure query language. Example: if you're
1261  interested in residues with number 42,43 and 85 in
1262  chain A:
1263  ``substructure=mapper.target.Select("cname=A and rnum=42,43,85")``
1264  A :class:`RuntimeError` is raised if *substructure*
1265  does not refer to the same underlying
1266  :class:`ost.mol.EntityHandle` as :attr:`~target`.
1267  :type substructure: :class:`ost.mol.EntityView`
1268  :param model: Structure in which one wants to find representations for
1269  *substructure*
1270  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1271  :param topn: Max number of representations that are returned
1272  :type topn: :class:`int`
1273  :param inclusion_radius: Inclusion radius for lDDT
1274  :type inclusion_radius: :class:`float`
1275  :param thresholds: Thresholds for lDDT
1276  :type thresholds: :class:`list` of :class:`float`
1277  :param bb_only: Only consider backbone atoms in lDDT computation
1278  :type bb_only: :class:`bool`
1279  :param only_interchain: Only score interchain contacts in lDDT. Useful
1280  if you want to identify interface patches.
1281  :type only_interchain: :class:`bool`
1282  :param chem_mapping_result: Pro param. The result of
1283  :func:`~GetChemMapping` where you provided
1284  *model*. If set, *model* parameter is not
1285  used.
1286  :type chem_mapping_result: :class:`tuple`
1287  :param global_mapping: Pro param. Specify a global mapping result. This
1288  fully defines the desired representation in the
1289  model but extracts it and enriches it with all
1290  the nice attributes of :class:`ReprResult`.
1291  The target attribute in *global_mapping* must be
1292  of the same entity as self.target and the model
1293  attribute of *global_mapping* must be of the same
1294  entity as *model*.
1295  :type global_mapping: :class:`MappingResult`
1296  :returns: :class:`list` of :class:`ReprResult`
1297  """
1298 
1299  if topn < 1:
1300  raise RuntimeError("topn must be >= 1")
1301 
1302  if global_mapping is not None:
1303  # ensure that this mapping is derived from the same structures
1304  if global_mapping.target.handle.GetHashCode() != \
1305  self.targettarget.handle.GetHashCode():
1306  raise RuntimeError("global_mapping.target must be the same "
1307  "entity as self.target")
1308  if global_mapping.model.handle.GetHashCode() != \
1309  model.handle.GetHashCode():
1310  raise RuntimeError("global_mapping.model must be the same "
1311  "entity as model param")
1312 
1313  # check whether substructure really is a subset of self.target
1314  for r in substructure.residues:
1315  ch_name = r.GetChain().GetName()
1316  rnum = r.GetNumber()
1317  target_r = self.targettarget.FindResidue(ch_name, rnum)
1318  if not target_r.IsValid():
1319  raise RuntimeError(f"substructure has residue "
1320  f"{r.GetQualifiedName()} which is not in "
1321  f"self.target")
1322  if target_r.handle.GetHashCode() != r.handle.GetHashCode():
1323  raise RuntimeError(f"substructure has residue "
1324  f"{r.GetQualifiedName()} which has an "
1325  f"equivalent in self.target but it does "
1326  f"not refer to the same underlying "
1327  f"EntityHandle")
1328  for a in r.atoms:
1329  target_a = target_r.FindAtom(a.GetName())
1330  if not target_a.IsValid():
1331  raise RuntimeError(f"substructure has atom "
1332  f"{a.GetQualifiedName()} which is not "
1333  f"in self.target")
1334  if a.handle.GetHashCode() != target_a.handle.GetHashCode():
1335  raise RuntimeError(f"substructure has atom "
1336  f"{a.GetQualifiedName()} which has an "
1337  f"equivalent in self.target but it does "
1338  f"not refer to the same underlying "
1339  f"EntityHandle")
1340 
1341  # check whether it contains either CA or C3'
1342  ca = r.FindAtom("CA")
1343  c3 = r.FindAtom("C3'") # FindAtom with prime in string is tested
1344  # and works
1345  if not ca.IsValid() and not c3.IsValid():
1346  raise RuntimeError("All residues in substructure must contain "
1347  "a backbone atom named CA or C3\'")
1348 
1349  # perform mapping and alignments on full structures
1350  if chem_mapping_result is None:
1351  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1352  else:
1353  chem_mapping, chem_group_alns, mdl = chem_mapping_result
1354  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1355  self.chem_group_alignmentschem_group_alignments,
1356  chem_mapping,
1357  chem_group_alns)
1358 
1359  # Get residue indices relative to full target chain
1360  substructure_res_indices = dict()
1361  for ch in substructure.chains:
1362  full_ch = self.targettarget.FindChain(ch.GetName())
1363  idx = [full_ch.GetResidueIndex(r.GetNumber()) for r in ch.residues]
1364  substructure_res_indices[ch.GetName()] = idx
1365 
1366  # strip down variables to make them specific to substructure
1367  # keep only chem_groups which are present in substructure
1368  substructure_chem_groups = list()
1369  substructure_chem_mapping = list()
1370 
1371  chnames = set([ch.GetName() for ch in substructure.chains])
1372  for chem_group, mapping in zip(self.chem_groupschem_groups, chem_mapping):
1373  substructure_chem_group = [ch for ch in chem_group if ch in chnames]
1374  if len(substructure_chem_group) > 0:
1375  substructure_chem_groups.append(substructure_chem_group)
1376  substructure_chem_mapping.append(mapping)
1377 
1378  # early stopping if no mdl chain can be mapped to substructure
1379  n_mapped_mdl_chains = sum([len(m) for m in substructure_chem_mapping])
1380  if n_mapped_mdl_chains == 0:
1381  return list()
1382 
1383  # strip the reference sequence in alignments to only contain
1384  # sequence from substructure
1385  substructure_ref_mdl_alns = dict()
1386  mdl_views = dict()
1387  for ch in mdl.chains:
1388  mdl_views[ch.GetName()] = _CSel(mdl, [ch.GetName()])
1389  for chem_group, mapping in zip(substructure_chem_groups,
1390  substructure_chem_mapping):
1391  for ref_ch in chem_group:
1392  for mdl_ch in mapping:
1393  full_aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1394  ref_seq = full_aln.GetSequence(0)
1395  # the ref sequence is tricky... we start with a gap only
1396  # sequence and only add olcs as defined by the residue
1397  # indices that we extracted before...
1398  tmp = ['-'] * len(full_aln)
1399  for idx in substructure_res_indices[ref_ch]:
1400  idx_in_seq = ref_seq.GetPos(idx)
1401  tmp[idx_in_seq] = ref_seq[idx_in_seq]
1402  ref_seq = seq.CreateSequence(ref_ch, ''.join(tmp))
1403  ref_seq.AttachView(_CSel(substructure, [ref_ch]))
1404  mdl_seq = full_aln.GetSequence(1)
1405  mdl_seq = seq.CreateSequence(mdl_seq.GetName(),
1406  mdl_seq.GetString())
1407  mdl_seq.AttachView(mdl_views[mdl_ch])
1408  aln = seq.CreateAlignment()
1409  aln.AddSequence(ref_seq)
1410  aln.AddSequence(mdl_seq)
1411  substructure_ref_mdl_alns[(ref_ch, mdl_ch)] = aln
1412 
1413  lddt_scorer = lddt.lDDTScorer(substructure,
1414  inclusion_radius = inclusion_radius,
1415  bb_only = bb_only)
1416  scored_mappings = list()
1417 
1418  if global_mapping:
1419  # construct mapping of substructure from global mapping
1420  flat_mapping = global_mapping.GetFlatMapping()
1421  mapping = list()
1422  for chem_group, chem_mapping in zip(substructure_chem_groups,
1423  substructure_chem_mapping):
1424  chem_group_mapping = list()
1425  for ch in chem_group:
1426  if ch in flat_mapping:
1427  mdl_ch = flat_mapping[ch]
1428  if mdl_ch in chem_mapping:
1429  chem_group_mapping.append(mdl_ch)
1430  else:
1431  chem_group_mapping.append(None)
1432  else:
1433  chem_group_mapping.append(None)
1434  mapping.append(chem_group_mapping)
1435  mappings = [mapping]
1436  else:
1437  mappings = list(_ChainMappings(substructure_chem_groups,
1438  substructure_chem_mapping,
1439  self.n_max_naiven_max_naive))
1440 
1441  for mapping in mappings:
1442  # chain_mapping and alns as input for lDDT computation
1443  lddt_chain_mapping = dict()
1444  lddt_alns = dict()
1445  n_res_aln = 0
1446  for ref_chem_group, mdl_chem_group in zip(substructure_chem_groups,
1447  mapping):
1448  for ref_ch, mdl_ch in zip(ref_chem_group, mdl_chem_group):
1449  # some mdl chains can be None
1450  if mdl_ch is not None:
1451  lddt_chain_mapping[mdl_ch] = ref_ch
1452  aln = substructure_ref_mdl_alns[(ref_ch, mdl_ch)]
1453  lddt_alns[mdl_ch] = aln
1454  tmp = [int(c[0] != '-' and c[1] != '-') for c in aln]
1455  n_res_aln += sum(tmp)
1456  # don't compute lDDT if no single residue in mdl and ref is aligned
1457  if n_res_aln == 0:
1458  continue
1459 
1460  lDDT, _ = lddt_scorer.lDDT(mdl, thresholds=thresholds,
1461  chain_mapping=lddt_chain_mapping,
1462  residue_mapping = lddt_alns,
1463  check_resnames = False,
1464  no_intrachain = only_interchain)
1465 
1466  if lDDT is None:
1467  ost.LogVerbose("No valid contacts in the reference")
1468  lDDT = 0.0 # that means, that we have not a single valid contact
1469  # in lDDT. For the code below to work, we just set it
1470  # to a terrible score => 0.0
1471 
1472  if len(scored_mappings) == 0:
1473  scored_mappings.append((lDDT, mapping))
1474  elif len(scored_mappings) < topn:
1475  scored_mappings.append((lDDT, mapping))
1476  scored_mappings.sort(reverse=True, key=lambda x: x[0])
1477  elif lDDT > scored_mappings[-1][0]:
1478  scored_mappings.append((lDDT, mapping))
1479  scored_mappings.sort(reverse=True, key=lambda x: x[0])
1480  scored_mappings = scored_mappings[:topn]
1481 
1482  # finalize and return
1483  results = list()
1484  for scored_mapping in scored_mappings:
1485  ref_view = substructure.handle.CreateEmptyView()
1486  mdl_view = mdl.handle.CreateEmptyView()
1487  for ref_ch_group, mdl_ch_group in zip(substructure_chem_groups,
1488  scored_mapping[1]):
1489  for ref_ch, mdl_ch in zip(ref_ch_group, mdl_ch_group):
1490  if ref_ch is not None and mdl_ch is not None:
1491  aln = substructure_ref_mdl_alns[(ref_ch, mdl_ch)]
1492  for col in aln:
1493  if col[0] != '-' and col[1] != '-':
1494  ref_view.AddResidue(col.GetResidue(0),
1495  mol.ViewAddFlag.INCLUDE_ALL)
1496  mdl_view.AddResidue(col.GetResidue(1),
1497  mol.ViewAddFlag.INCLUDE_ALL)
1498  results.append(ReprResult(scored_mapping[0], substructure,
1499  ref_view, mdl_view))
1500  return results
1501 
1502  def GetNMappings(self, model):
1503  """ Returns number of possible mappings
1504 
1505  :param model: Model with chains that are mapped onto
1506  :attr:`chem_groups`
1507  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1508  """
1509  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1510  return _NMappings(self.chem_groupschem_groups, chem_mapping)
1511 
1512  def ProcessStructure(self, ent):
1513  """ Entity processing for chain mapping
1514 
1515  * Selects view containing peptide and nucleotide residues which have
1516  required backbone atoms present - for peptide residues thats
1517  N, CA, C and CB (no CB for GLY), for nucleotide residues thats
1518  O5', C5', C4', C3' and O3'.
1519  * filters view by chain lengths, see *min_pep_length* and
1520  *min_nuc_length* in constructor
1521  * Extracts atom sequences for each chain in that view
1522  * Attaches corresponding :class:`ost.mol.EntityView` to each sequence
1523  * If residue number alignments are used, strictly increasing residue
1524  numbers without insertion codes are ensured in each chain
1525 
1526  :param ent: Entity to process
1527  :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1528  :returns: Tuple with 3 elements: 1) :class:`ost.mol.EntityView`
1529  containing peptide and nucleotide residues 2)
1530  :class:`ost.seq.SequenceList` containing ATOMSEQ sequences
1531  for each polypeptide chain in returned view, sequences have
1532  :class:`ost.mol.EntityView` of according chains attached
1533  3) same for polynucleotide chains
1534  """
1535  view = ent.CreateEmptyView()
1536  exp_pep_atoms = ["N", "CA", "C", "CB"]
1537  exp_nuc_atoms = ["\"O5'\"", "\"C5'\"", "\"C4'\"", "\"C3'\"", "\"O3'\""]
1538  pep_query = "peptide=true and aname=" + ','.join(exp_pep_atoms)
1539  nuc_query = "nucleotide=true and aname=" + ','.join(exp_nuc_atoms)
1540 
1541  pep_sel = ent.Select(pep_query)
1542  for r in pep_sel.residues:
1543  if len(r.atoms) == 4:
1544  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1545  elif r.name == "GLY" and len(r.atoms) == 3:
1546  atom_names = [a.GetName() for a in r.atoms]
1547  if sorted(atom_names) == ["C", "CA", "N"]:
1548  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1549 
1550  nuc_sel = ent.Select(nuc_query)
1551  for r in nuc_sel.residues:
1552  if len(r.atoms) == 5:
1553  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1554 
1555  polypep_seqs = seq.CreateSequenceList()
1556  polynuc_seqs = seq.CreateSequenceList()
1557 
1558  if len(view.residues) == 0:
1559  # no residues survived => return
1560  return (view, polypep_seqs, polynuc_seqs)
1561 
1562  for ch in view.chains:
1563  n_res = len(ch.residues)
1564  n_pep = sum([r.IsPeptideLinking() for r in ch.residues])
1565  n_nuc = sum([r.IsNucleotideLinking() for r in ch.residues])
1566 
1567  # guarantee that we have either pep or nuc (no mix of the two)
1568  if n_pep > 0 and n_nuc > 0:
1569  raise RuntimeError(f"Must not mix peptide and nucleotide linking "
1570  f"residues in same chain ({ch.GetName()})")
1571 
1572  if (n_pep + n_nuc) != n_res:
1573  raise RuntimeError("All residues must either be peptide_linking "
1574  "or nucleotide_linking")
1575 
1576  # filter out short chains
1577  if n_pep > 0 and n_pep < self.min_pep_lengthmin_pep_length:
1578  continue
1579 
1580  if n_nuc > 0 and n_nuc < self.min_nuc_lengthmin_nuc_length:
1581  continue
1582 
1583  # the superfast residue number based alignment adds some
1584  # restrictions on the numbers themselves:
1585  # 1) no insertion codes 2) strictly increasing
1586  if self.resnum_alignmentsresnum_alignments:
1587  # check if no insertion codes are present in residue numbers
1588  ins_codes = [r.GetNumber().GetInsCode() for r in ch.residues]
1589  if len(set(ins_codes)) != 1 or ins_codes[0] != '\0':
1590  raise RuntimeError("Residue numbers in input structures must not "
1591  "contain insertion codes")
1592 
1593  # check if residue numbers are strictly increasing
1594  nums = [r.GetNumber().GetNum() for r in ch.residues]
1595  if not all(i < j for i, j in zip(nums, nums[1:])):
1596  raise RuntimeError("Residue numbers in input structures must be "
1597  "strictly increasing for each chain")
1598 
1599  s = ''.join([r.one_letter_code for r in ch.residues])
1600  s = seq.CreateSequence(ch.GetName(), s)
1601  s.AttachView(_CSel(view, [ch.GetName()]))
1602  if n_pep == n_res:
1603  polypep_seqs.AddSequence(s)
1604  elif n_nuc == n_res:
1605  polynuc_seqs.AddSequence(s)
1606  else:
1607  raise RuntimeError("This shouldnt happen")
1608 
1609  if len(polypep_seqs) == 0 and len(polynuc_seqs) == 0:
1610  raise RuntimeError(f"No chain fulfilled minimum length requirement "
1611  f"to be considered in chain mapping "
1612  f"({self.min_pep_length} for peptide chains, "
1613  f"{self.min_nuc_length} for nucleotide chains) "
1614  f"- mapping failed")
1615 
1616  # select for chains for which we actually extracted the sequence
1617  chain_names = [s.GetAttachedView().chains[0].name for s in polypep_seqs]
1618  chain_names += [s.GetAttachedView().chains[0].name for s in polynuc_seqs]
1619  view = _CSel(view, chain_names)
1620 
1621  return (view, polypep_seqs, polynuc_seqs)
1622 
1623  def Align(self, s1, s2, stype):
1624  """ Access to internal sequence alignment functionality
1625 
1626  Alignment parameterization is setup at ChainMapper construction
1627 
1628  :param s1: First sequence to align - must have view attached in case
1629  of resnum_alignments
1630  :type s1: :class:`ost.seq.SequenceHandle`
1631  :param s2: Second sequence to align - must have view attached in case
1632  of resnum_alignments
1633  :type s2: :class:`ost.seq.SequenceHandle`
1634  :param stype: Type of sequences to align, must be in
1635  [:class:`ost.mol.ChemType.AMINOACIDS`,
1636  :class:`ost.mol.ChemType.NUCLEOTIDES`]
1637  :returns: Pairwise alignment of s1 and s2
1638  """
1639  if stype not in [mol.ChemType.AMINOACIDS, mol.ChemType.NUCLEOTIDES]:
1640  raise RuntimeError("stype must be ost.mol.ChemType.AMINOACIDS or "
1641  "ost.mol.ChemType.NUCLEOTIDES")
1642  return self.aligneraligner.Align(s1, s2, chem_type = stype)
1643 
1644 
1645 # INTERNAL HELPERS
1646 
1647 class _Aligner:
1648  def __init__(self, pep_subst_mat = seq.alg.BLOSUM62, pep_gap_open = -5,
1649  pep_gap_ext = -2, nuc_subst_mat = seq.alg.NUC44,
1650  nuc_gap_open = -4, nuc_gap_ext = -4, resnum_aln = False):
1651  """ Helper class to compute alignments
1652 
1653  Sets default values for substitution matrix, gap open and gap extension
1654  penalties. They are only used in default mode (Needleman-Wunsch aln).
1655  If *resnum_aln* is True, only residue numbers of views that are attached
1656  to input sequences are considered.
1657  """
1658  self.pep_subst_matpep_subst_mat = pep_subst_mat
1659  self.pep_gap_openpep_gap_open = pep_gap_open
1660  self.pep_gap_extpep_gap_ext = pep_gap_ext
1661  self.nuc_subst_matnuc_subst_mat = nuc_subst_mat
1662  self.nuc_gap_opennuc_gap_open = nuc_gap_open
1663  self.nuc_gap_extnuc_gap_ext = nuc_gap_ext
1664  self.resnum_alnresnum_aln = resnum_aln
1665 
1666  def Align(self, s1, s2, chem_type=None):
1667  if self.resnum_alnresnum_aln:
1668  return self.ResNumAlignResNumAlign(s1, s2)
1669  else:
1670  if chem_type is None:
1671  raise RuntimeError("Must specify chem_type for NW alignment")
1672  return self.NWAlignNWAlign(s1, s2, chem_type)
1673 
1674  def NWAlign(self, s1, s2, chem_type):
1675  """ Returns pairwise alignment using Needleman-Wunsch algorithm
1676 
1677  :param s1: First sequence to align
1678  :type s1: :class:`ost.seq.SequenceHandle`
1679  :param s2: Second sequence to align
1680  :type s2: :class:`ost.seq.SequenceHandle`
1681  :param chem_type: Must be in [:class:`ost.mol.ChemType.AMINOACIDS`,
1682  :class:`ost.mol.ChemType.NUCLEOTIDES`], determines
1683  substitution matrix and gap open/extension penalties
1684  :type chem_type: :class:`ost.mol.ChemType`
1685  :returns: Alignment with s1 as first and s2 as second sequence
1686  """
1687  if chem_type == mol.ChemType.AMINOACIDS:
1688  return seq.alg.SemiGlobalAlign(s1, s2, self.pep_subst_matpep_subst_mat,
1689  gap_open=self.pep_gap_openpep_gap_open,
1690  gap_ext=self.pep_gap_extpep_gap_ext)[0]
1691  elif chem_type == mol.ChemType.NUCLEOTIDES:
1692  return seq.alg.SemiGlobalAlign(s1, s2, self.nuc_subst_matnuc_subst_mat,
1693  gap_open=self.nuc_gap_opennuc_gap_open,
1694  gap_ext=self.nuc_gap_extnuc_gap_ext)[0]
1695  else:
1696  raise RuntimeError("Invalid ChemType")
1697  return aln
1698 
1699  def ResNumAlign(self, s1, s2):
1700  """ Returns pairwise alignment using residue numbers of attached views
1701 
1702  Assumes that there are no insertion codes (alignment only on numerical
1703  component) and that resnums are strictly increasing (fast min/max
1704  identification). These requirements are assured if a structure has been
1705  processed by :class:`ChainMapper`.
1706 
1707  :param s1: First sequence to align, must have :class:`ost.mol.EntityView`
1708  attached
1709  :type s1: :class:`ost.seq.SequenceHandle`
1710  :param s2: Second sequence to align, must have :class:`ost.mol.EntityView`
1711  attached
1712  :type s2: :class:`ost.seq.SequenceHandle`
1713  """
1714  assert(s1.HasAttachedView())
1715  assert(s2.HasAttachedView())
1716  v1 = s1.GetAttachedView()
1717  rnums1 = [r.GetNumber().GetNum() for r in v1.residues]
1718  v2 = s2.GetAttachedView()
1719  rnums2 = [r.GetNumber().GetNum() for r in v2.residues]
1720 
1721  min_num = min(rnums1[0], rnums2[0])
1722  max_num = max(rnums1[-1], rnums2[-1])
1723  aln_length = max_num - min_num + 1
1724 
1725  aln_s1 = ['-'] * aln_length
1726  for r, rnum in zip(v1.residues, rnums1):
1727  aln_s1[rnum-min_num] = r.one_letter_code
1728 
1729  aln_s2 = ['-'] * aln_length
1730  for r, rnum in zip(v2.residues, rnums2):
1731  aln_s2[rnum-min_num] = r.one_letter_code
1732 
1733  aln = seq.CreateAlignment()
1734  aln.AddSequence(seq.CreateSequence(s1.GetName(), ''.join(aln_s1)))
1735  aln.AddSequence(seq.CreateSequence(s2.GetName(), ''.join(aln_s2)))
1736  return aln
1737 
1738 def _GetAlnPropsTwo(aln):
1739  """Returns basic properties of *aln* version two...
1740 
1741  :param aln: Alignment to compute properties
1742  :type aln: :class:`seq.AlignmentHandle`
1743  :returns: Tuple with 2 elements. 1) sequence identify in range [0, 100]
1744  considering aligned columns 2) Fraction of non-gap characters
1745  in first sequence that are covered by non-gap characters in
1746  second sequence.
1747  """
1748  assert(aln.GetCount() == 2)
1749  n_tot = sum([1 for col in aln if col[0] != '-'])
1750  n_aligned = sum([1 for col in aln if (col[0] != '-' and col[1] != '-')])
1751  return (seq.alg.SequenceIdentity(aln), float(n_aligned)/n_tot)
1752 
1753 def _GetAlnPropsOne(aln):
1754 
1755  """Returns basic properties of *aln* version one...
1756 
1757  :param aln: Alignment to compute properties
1758  :type aln: :class:`seq.AlignmentHandle`
1759  :returns: Tuple with 3 elements. 1) sequence identify in range [0, 100]
1760  considering aligned columns 2) Fraction of gaps between
1761  first and last aligned column in s1 3) same for s2.
1762  """
1763  assert(aln.GetCount() == 2)
1764  n_gaps_1 = str(aln.GetSequence(0)).strip('-').count('-')
1765  n_gaps_2 = str(aln.GetSequence(1)).strip('-').count('-')
1766  gap_frac_1 = float(n_gaps_1)/len(aln.GetSequence(0).GetGaplessString())
1767  gap_frac_2 = float(n_gaps_2)/len(aln.GetSequence(1).GetGaplessString())
1768  return (seq.alg.SequenceIdentity(aln), gap_frac_1, gap_frac_2)
1769 
1770 def _GetChemGroupAlignments(pep_seqs, nuc_seqs, aligner, pep_seqid_thr=95.,
1771  pep_gap_thr=0.1, nuc_seqid_thr=95.,
1772  nuc_gap_thr=0.1):
1773  """Returns alignments with groups of chemically equivalent chains
1774 
1775  :param pep_seqs: List of polypeptide sequences
1776  :type pep_seqs: :class:`seq.SequenceList`
1777  :param nuc_seqs: List of polynucleotide sequences
1778  :type nuc_seqs: :class:`seq.SequenceList`
1779  :param aligner: Helper class to generate pairwise alignments
1780  :type aligner: :class:`_Aligner`
1781  :param pep_seqid_thr: Threshold used to decide when two peptide chains are
1782  identical. 95 percent tolerates the few mutations
1783  crystallographers like to do.
1784  :type pep_seqid_thr: :class:`float`
1785  :param pep_gap_thr: Additional threshold to avoid gappy alignments with high
1786  seqid. The reason for not just normalizing seqid by the
1787  longer sequence is that one sequence might be a perfect
1788  subsequence of the other but only cover half of it. This
1789  threshold checks for a maximum allowed fraction of gaps
1790  in any of the two sequences after stripping terminal gaps.
1791  :type pep_gap_thr: :class:`float`
1792  :param nuc_seqid_thr: Nucleotide equivalent of *pep_seqid_thr*
1793  :type nuc_seqid_thr: :class:`float`
1794  :param nuc_gap_thr: Nucleotide equivalent of *nuc_gap_thr*
1795  :type nuc_gap_thr: :class:`float`
1796  :returns: Tuple with first element being an AlignmentList. Each alignment
1797  represents a group of chemically equivalent chains and the first
1798  sequence is the longest. Second element is a list of equivalent
1799  length specifying the types of the groups. List elements are in
1800  [:class:`ost.ChemType.AMINOACIDS`,
1801  :class:`ost.ChemType.NUCLEOTIDES`]
1802  """
1803  pep_groups = _GroupSequences(pep_seqs, pep_seqid_thr, pep_gap_thr, aligner,
1804  mol.ChemType.AMINOACIDS)
1805  nuc_groups = _GroupSequences(nuc_seqs, nuc_seqid_thr, nuc_gap_thr, aligner,
1806  mol.ChemType.NUCLEOTIDES)
1807  group_types = [mol.ChemType.AMINOACIDS] * len(pep_groups)
1808  group_types += [mol.ChemType.NUCLEOTIDES] * len(nuc_groups)
1809  groups = pep_groups
1810  groups.extend(nuc_groups)
1811  return (groups, group_types)
1812 
1813 def _GroupSequences(seqs, seqid_thr, gap_thr, aligner, chem_type):
1814  """Get list of alignments representing groups of equivalent sequences
1815 
1816  :param seqid_thr: Threshold used to decide when two chains are identical.
1817  :type seqid_thr: :class:`float`
1818  :param gap_thr: Additional threshold to avoid gappy alignments with high
1819  seqid. The reason for not just normalizing seqid by the
1820  longer sequence is that one sequence might be a perfect
1821  subsequence of the other but only cover half of it. This
1822  threshold checks for a maximum allowed fraction of gaps
1823  in any of the two sequences after stripping terminal gaps.
1824  :type gap_thr: :class:`float`
1825  :param aligner: Helper class to generate pairwise alignments
1826  :type aligner: :class:`_Aligner`
1827  :param chem_type: ChemType of seqs which is passed to *aligner*, must be in
1828  [:class:`ost.mol.ChemType.AMINOACIDS`,
1829  :class:`ost.mol.ChemType.NUCLEOTIDES`]
1830  :type chem_type: :class:`ost.mol.ChemType`
1831  :returns: A list of alignments, one alignment for each group
1832  with longest sequence (reference) as first sequence.
1833  :rtype: :class:`ost.seq.AlignmentList`
1834  """
1835  groups = list()
1836  for s_idx in range(len(seqs)):
1837  matching_group = None
1838  for g_idx in range(len(groups)):
1839  for g_s_idx in range(len(groups[g_idx])):
1840  aln = aligner.Align(seqs[s_idx], seqs[groups[g_idx][g_s_idx]],
1841  chem_type)
1842  sid, frac_i, frac_j = _GetAlnPropsOne(aln)
1843  if sid >= seqid_thr and frac_i < gap_thr and frac_j < gap_thr:
1844  matching_group = g_idx
1845  break
1846  if matching_group is not None:
1847  break
1848 
1849  if matching_group is None:
1850  groups.append([s_idx])
1851  else:
1852  groups[matching_group].append(s_idx)
1853 
1854  # sort based on sequence length
1855  sorted_groups = list()
1856  for g in groups:
1857  if len(g) > 1:
1858  tmp = sorted([[len(seqs[i]), i] for i in g], reverse=True)
1859  sorted_groups.append([x[1] for x in tmp])
1860  else:
1861  sorted_groups.append(g)
1862 
1863  # translate from indices back to sequences and directly generate alignments
1864  # of the groups with the longest (first) sequence as reference
1865  aln_list = seq.AlignmentList()
1866  for g in sorted_groups:
1867  if len(g) == 1:
1868  # aln with one single sequence
1869  aln_list.append(seq.CreateAlignment(seqs[g[0]]))
1870  else:
1871  # obtain pairwise aln of first sequence (reference) to all others
1872  alns = seq.AlignmentList()
1873  i = g[0]
1874  for j in g[1:]:
1875  alns.append(aligner.Align(seqs[i], seqs[j], chem_type))
1876  # and merge
1877  aln_list.append(seq.alg.MergePairwiseAlignments(alns, seqs[i]))
1878 
1879  # transfer attached views
1880  seq_dict = {s.GetName(): s for s in seqs}
1881  for aln_idx in range(len(aln_list)):
1882  for aln_s_idx in range(aln_list[aln_idx].GetCount()):
1883  s_name = aln_list[aln_idx].GetSequence(aln_s_idx).GetName()
1884  s = seq_dict[s_name]
1885  aln_list[aln_idx].AttachView(aln_s_idx, s.GetAttachedView())
1886 
1887  return aln_list
1888 
1889 def _MapSequence(ref_seqs, ref_types, s, s_type, aligner):
1890  """Tries top map *s* onto any of the sequences in *ref_seqs*
1891 
1892  Computes alignments of *s* to each of the reference sequences of equal type
1893  and sorts them by seqid*fraction_covered (seqid: sequence identity of
1894  aligned columns in alignment, fraction_covered: Fraction of non-gap
1895  characters in reference sequence that are covered by non-gap characters in
1896  *s*). Best scoring mapping is returned.
1897 
1898  :param ref_seqs: Reference sequences
1899  :type ref_seqs: :class:`ost.seq.SequenceList`
1900  :param ref_types: Types of reference sequences, e.g.
1901  ost.mol.ChemType.AminoAcids
1902  :type ref_types: :class:`list` of :class:`ost.mol.ChemType`
1903  :param s: Sequence to map
1904  :type s: :class:`ost.seq.SequenceHandle`
1905  :param s_type: Type of *s*, only try mapping to sequences in *ref_seqs*
1906  with equal type as defined in *ref_types*
1907  :param aligner: Helper class to generate pairwise alignments
1908  :type aligner: :class:`_Aligner`
1909  :returns: Tuple with two elements. 1) index of sequence in *ref_seqs* to
1910  which *s* can be mapped 2) Pairwise sequence alignment with
1911  sequence from *ref_seqs* as first sequence. Both elements are
1912  None if no mapping can be found.
1913  :raises: :class:`RuntimeError` if mapping is ambiguous, i.e. *s*
1914  successfully maps to more than one sequence in *ref_seqs*
1915  """
1916  scored_alns = list()
1917  for ref_idx, ref_seq in enumerate(ref_seqs):
1918  if ref_types[ref_idx] == s_type:
1919  aln = aligner.Align(ref_seq, s, s_type)
1920  seqid, fraction_covered = _GetAlnPropsTwo(aln)
1921  score = seqid * fraction_covered
1922  scored_alns.append((score, ref_idx, aln))
1923 
1924  if len(scored_alns) == 0:
1925  return (None, None) # no mapping possible...
1926 
1927  scored_alns = sorted(scored_alns, key=lambda x: x[0], reverse=True)
1928  return (scored_alns[0][1], scored_alns[0][2])
1929 
1930 def _GetRefMdlAlns(ref_chem_groups, ref_chem_group_msas, mdl_chem_groups,
1931  mdl_chem_group_alns, pairs=None):
1932  """ Get all possible ref/mdl chain alignments given chem group mapping
1933 
1934  :param ref_chem_groups: :attr:`ChainMapper.chem_groups`
1935  :type ref_chem_groups: :class:`list` of :class:`list` of :class:`str`
1936  :param ref_chem_group_msas: :attr:`ChainMapper.chem_group_alignments`
1937  :type ref_chem_group_msas: :class:`ost.seq.AlignmentList`
1938  :param mdl_chem_groups: Groups of model chains that are mapped to
1939  *ref_chem_groups*. Return value of
1940  :func:`ChainMapper.GetChemMapping`.
1941  :type mdl_chem_groups: :class:`list` of :class:`list` of :class:`str`
1942  :param mdl_chem_group_alns: A pairwise sequence alignment for every chain
1943  in *mdl_chem_groups* that aligns these sequences
1944  to the respective reference sequence.
1945  Return values of
1946  :func:`ChainMapper.GetChemMapping`.
1947  :type mdl_chem_group_alns: :class:`list` of :class:`ost.seq.AlignmentList`
1948  :param pairs: Pro param - restrict return dict to specified pairs. A set of
1949  tuples in form (<trg_ch>, <mdl_ch>)
1950  :type pairs: :class:`set`
1951  :returns: A dictionary holding all possible ref/mdl chain alignments. Keys
1952  in that dictionary are tuples of the form (ref_ch, mdl_ch) and
1953  values are the respective pairwise alignments with first sequence
1954  being from ref, the second from mdl.
1955  """
1956  # alignment of each model chain to chem_group reference sequence
1957  mdl_alns = dict()
1958  for alns in mdl_chem_group_alns:
1959  for aln in alns:
1960  mdl_chain_name = aln.GetSequence(1).GetName()
1961  mdl_alns[mdl_chain_name] = aln
1962 
1963  # generate all alignments between ref/mdl chain atomseqs that we will
1964  # ever observe
1965  ref_mdl_alns = dict()
1966  for ref_chains, mdl_chains, ref_aln in zip(ref_chem_groups, mdl_chem_groups,
1967  ref_chem_group_msas):
1968  for ref_ch in ref_chains:
1969  for mdl_ch in mdl_chains:
1970  if pairs is not None and (ref_ch, mdl_ch) not in pairs:
1971  continue
1972  # obtain alignments of mdl and ref chains towards chem
1973  # group ref sequence and merge them
1974  aln_list = seq.AlignmentList()
1975  # do ref aln
1976  s1 = ref_aln.GetSequence(0)
1977  s2 = ref_aln.GetSequence(ref_chains.index(ref_ch))
1978  aln_list.append(seq.CreateAlignment(s1, s2))
1979  # do mdl aln
1980  aln_list.append(mdl_alns[mdl_ch])
1981  # merge
1982  ref_seq = seq.CreateSequence(s1.GetName(),
1983  s1.GetGaplessString())
1984  merged_aln = seq.alg.MergePairwiseAlignments(aln_list,
1985  ref_seq)
1986  # merged_aln:
1987  # seq1: ref seq of chem group
1988  # seq2: seq of ref chain
1989  # seq3: seq of mdl chain
1990  # => we need the alignment between seq2 and seq3
1991  s2 = merged_aln.GetSequence(1)
1992  s3 = merged_aln.GetSequence(2)
1993  # cut leading and trailing gap columns
1994  a = 0 # number of leading gap columns
1995  for idx in range(len(s2)):
1996  if s2[idx] != '-' or s3[idx] != '-':
1997  break
1998  a += 1
1999  b = 0 # number of trailing gap columns
2000  for idx in reversed(range(len(s2))):
2001  if s2[idx] != '-' or s3[idx] != '-':
2002  break
2003  b += 1
2004  s2 = seq.CreateSequence(s2.GetName(), s2[a: len(s2)-b])
2005  s3 = seq.CreateSequence(s3.GetName(), s3[a: len(s3)-b])
2006  ref_mdl_alns[(ref_ch, mdl_ch)] = seq.CreateAlignment(s2, s3)
2007 
2008  return ref_mdl_alns
2009 
2010 def _CheckOneToOneMapping(ref_chains, mdl_chains):
2011  """ Checks whether we already have a perfect one to one mapping
2012 
2013  That means each list in *ref_chains* has exactly one element and each
2014  list in *mdl_chains* has either one element (it's mapped) or is empty
2015  (ref chain has no mapped mdl chain). Returns None if no such mapping
2016  can be found.
2017 
2018  :param ref_chains: corresponds to :attr:`ChainMapper.chem_groups`
2019  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
2020  :param mdl_chains: mdl chains mapped to chem groups in *ref_chains*, i.e.
2021  the return value of :func:`ChainMapper.GetChemMapping`
2022  :type mdl_chains: class:`list` of :class:`list` of :class:`str`
2023  :returns: A :class:`list` of :class:`list` if a one to one mapping is found,
2024  None otherwise
2025  """
2026  only_one_to_one = True
2027  one_to_one = list()
2028  for ref, mdl in zip(ref_chains, mdl_chains):
2029  if len(ref) == 1 and len(mdl) == 1:
2030  one_to_one.append(mdl)
2031  elif len(ref) == 1 and len(mdl) == 0:
2032  one_to_one.append([None])
2033  else:
2034  only_one_to_one = False
2035  break
2036  if only_one_to_one:
2037  return one_to_one
2038  else:
2039  return None
2040 
2042  def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups,
2043  ref_mdl_alns, inclusion_radius = 15.0,
2044  thresholds = [0.5, 1.0, 2.0, 4.0],
2045  steep_opt_rate = None):
2046 
2047  """ Greedy extension of already existing but incomplete chain mappings
2048  """
2049  super().__init__(ref, ref_chem_groups, mdl, ref_mdl_alns,
2050  dist_thresh=inclusion_radius,
2051  dist_diff_thresholds=thresholds)
2052 
2053  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2054  self.steep_opt_ratesteep_opt_rate = steep_opt_rate
2055 
2056  self.neighborsneighbors = {k: set() for k in self.trgtrg.chain_names}
2057  for interface in self.trgtrg.interacting_chains:
2058  self.neighborsneighbors[interface[0]].add(interface[1])
2059  self.neighborsneighbors[interface[1]].add(interface[0])
2060 
2061  assert(len(ref_chem_groups) == len(mdl_chem_groups))
2062  self.ref_chem_groupsref_chem_groups = ref_chem_groups
2063  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2064  self.ref_ch_group_mapperref_ch_group_mapper = dict()
2065  self.mdl_ch_group_mappermdl_ch_group_mapper = dict()
2066  for g_idx, (ref_g, mdl_g) in enumerate(zip(ref_chem_groups,
2067  mdl_chem_groups)):
2068  for ch in ref_g:
2069  self.ref_ch_group_mapperref_ch_group_mapper[ch] = g_idx
2070  for ch in mdl_g:
2071  self.mdl_ch_group_mappermdl_ch_group_mapper[ch] = g_idx
2072 
2073  # keep track of mdl chains that potentially give lDDT contributions,
2074  # i.e. they have locations within inclusion_radius + max(thresholds)
2075  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.mdlmdl.chain_names}
2076  for interface in self.mdlmdl.potentially_contributing_chains:
2077  self.mdl_neighborsmdl_neighbors[interface[0]].add(interface[1])
2078  self.mdl_neighborsmdl_neighbors[interface[1]].add(interface[0])
2079 
2080  def ExtendMapping(self, mapping, max_ext = None):
2081 
2082  if len(mapping) == 0:
2083  raise RuntimError("Mapping must contain a starting point")
2084 
2085  # Ref chains onto which we can map. The algorithm starts with a mapping
2086  # on ref_ch. From there we can start to expand to connected neighbors.
2087  # All neighbors that we can reach from the already mapped chains are
2088  # stored in this set which will be updated during runtime
2089  map_targets = set()
2090  for ref_ch in mapping.keys():
2091  map_targets.update(self.neighborsneighbors[ref_ch])
2092 
2093  # remove the already mapped chains
2094  for ref_ch in mapping.keys():
2095  map_targets.discard(ref_ch)
2096 
2097  if len(map_targets) == 0:
2098  return mapping # nothing to extend
2099 
2100  # keep track of what model chains are not yet mapped for each chem group
2101  free_mdl_chains = list()
2102  for chem_group in self.mdl_chem_groupsmdl_chem_groups:
2103  tmp = [x for x in chem_group if x not in mapping.values()]
2104  free_mdl_chains.append(set(tmp))
2105 
2106  # keep track of what ref chains got a mapping
2107  newly_mapped_ref_chains = list()
2108 
2109  something_happened = True
2110  while something_happened:
2111  something_happened=False
2112 
2113  if self.steep_opt_ratesteep_opt_rate is not None:
2114  n_chains = len(newly_mapped_ref_chains)
2115  if n_chains > 0 and n_chains % self.steep_opt_ratesteep_opt_rate == 0:
2116  mapping = self._SteepOpt_SteepOpt(mapping, newly_mapped_ref_chains)
2117 
2118  if max_ext is not None and len(newly_mapped_ref_chains) >= max_ext:
2119  break
2120 
2121  max_n = 0
2122  max_mapping = None
2123  for ref_ch in map_targets:
2124  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ref_ch]
2125  for mdl_ch in free_mdl_chains[chem_group_idx]:
2126  # single chain score
2127  n_single = self._NSCConserved_NSCConserved(ref_ch, mdl_ch).sum()
2128  # scores towards neighbors that are already mapped
2129  n_inter = 0
2130  for neighbor in self.neighborsneighbors[ref_ch]:
2131  if neighbor in mapping and mapping[neighbor] in \
2132  self.mdl_neighborsmdl_neighbors[mdl_ch]:
2133  n_inter += self._NPairConserved_NPairConserved((ref_ch, neighbor),
2134  (mdl_ch, mapping[neighbor])).sum()
2135  n = n_single + n_inter
2136 
2137  if n_inter > 0 and n > max_n:
2138  # Only accept a new solution if its actually connected
2139  # i.e. n_inter > 0. Otherwise we could just map a big
2140  # fat mdl chain sitting somewhere in Nirvana
2141  max_n = n
2142  max_mapping = (ref_ch, mdl_ch)
2143 
2144  if max_n > 0:
2145  something_happened = True
2146  # assign new found mapping
2147  mapping[max_mapping[0]] = max_mapping[1]
2148 
2149  # add all neighboring chains to map targets as they are now
2150  # reachable
2151  for neighbor in self.neighborsneighbors[max_mapping[0]]:
2152  if neighbor not in mapping:
2153  map_targets.add(neighbor)
2154 
2155  # remove the ref chain from map targets
2156  map_targets.remove(max_mapping[0])
2157 
2158  # remove the mdl chain from free_mdl_chains - its taken...
2159  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[max_mapping[0]]
2160  free_mdl_chains[chem_group_idx].remove(max_mapping[1])
2161 
2162  # keep track of what ref chains got a mapping
2163  newly_mapped_ref_chains.append(max_mapping[0])
2164 
2165  return mapping
2166 
2167  def _SteepOpt(self, mapping, chains_to_optimize=None):
2168 
2169  # just optimize ALL ref chains if nothing specified
2170  if chains_to_optimize is None:
2171  chains_to_optimize = mapping.keys()
2172 
2173  # make sure that we only have ref chains which are actually mapped
2174  ref_chains = [x for x in chains_to_optimize if mapping[x] is not None]
2175 
2176  # group ref chains to be optimized into chem groups
2177  tmp = dict()
2178  for ch in ref_chains:
2179  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ch]
2180  if chem_group_idx in tmp:
2181  tmp[chem_group_idx].append(ch)
2182  else:
2183  tmp[chem_group_idx] = [ch]
2184  chem_groups = list(tmp.values())
2185 
2186  # try all possible mapping swaps. Swaps that improve the score are
2187  # immediately accepted and we start all over again
2188  current_lddt = self.FromFlatMappingFromFlatMapping(mapping)
2189  something_happened = True
2190  while something_happened:
2191  something_happened = False
2192  for chem_group in chem_groups:
2193  if something_happened:
2194  break
2195  for ch1, ch2 in itertools.combinations(chem_group, 2):
2196  swapped_mapping = dict(mapping)
2197  swapped_mapping[ch1] = mapping[ch2]
2198  swapped_mapping[ch2] = mapping[ch1]
2199  score = self.FromFlatMappingFromFlatMapping(swapped_mapping)
2200  if score > current_lddt:
2201  something_happened = True
2202  mapping = swapped_mapping
2203  current_lddt = score
2204  break
2205  return mapping
2206 
2207 
2208 def _lDDTNaive(trg, mdl, inclusion_radius, thresholds, chem_groups,
2209  chem_mapping, ref_mdl_alns, n_max_naive):
2210  """ Naively iterates all possible chain mappings and returns the best
2211  """
2212  best_mapping = None
2213  best_lddt = -1.0
2214 
2215  # Setup scoring
2216  lddt_scorer = bb_lddt.BBlDDTScorer(trg, chem_groups, mdl, ref_mdl_alns,
2217  dist_thresh=inclusion_radius,
2218  dist_diff_thresholds=thresholds)
2219  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2220  lDDT = lddt_scorer.Score(mapping, check=False)
2221  if lDDT > best_lddt:
2222  best_mapping = mapping
2223  best_lddt = lDDT
2224 
2225  return (best_mapping, best_lddt)
2226 
2227 
2228 def _GetSeeds(ref_chem_groups, mdl_chem_groups, mapped_ref_chains = set(),
2229  mapped_mdl_chains = set()):
2230  seeds = list()
2231  for ref_chains, mdl_chains in zip(ref_chem_groups,
2232  mdl_chem_groups):
2233  for ref_ch in ref_chains:
2234  if ref_ch not in mapped_ref_chains:
2235  for mdl_ch in mdl_chains:
2236  if mdl_ch not in mapped_mdl_chains:
2237  seeds.append((ref_ch, mdl_ch))
2238  return seeds
2239 
2240 
2241 def _lDDTGreedyFast(the_greed):
2242 
2243  something_happened = True
2244  mapping = dict()
2245 
2246  while something_happened:
2247  something_happened = False
2248  seeds = _GetSeeds(the_greed.ref_chem_groups,
2249  the_greed.mdl_chem_groups,
2250  mapped_ref_chains = set(mapping.keys()),
2251  mapped_mdl_chains = set(mapping.values()))
2252  # search for best scoring starting point
2253  n_best = 0
2254  best_seed = None
2255  for seed in seeds:
2256  n = the_greed._NSCConserved(seed[0], seed[1]).sum()
2257  if n > n_best:
2258  n_best = n
2259  best_seed = seed
2260  if n_best == 0:
2261  break # no proper seed found anymore...
2262  # add seed to mapping and start the greed
2263  mapping[best_seed[0]] = best_seed[1]
2264  mapping = the_greed.ExtendMapping(mapping)
2265  something_happened = True
2266 
2267  # translate mapping format and return
2268  final_mapping = list()
2269  for ref_chains in the_greed.ref_chem_groups:
2270  mapped_mdl_chains = list()
2271  for ref_ch in ref_chains:
2272  if ref_ch in mapping:
2273  mapped_mdl_chains.append(mapping[ref_ch])
2274  else:
2275  mapped_mdl_chains.append(None)
2276  final_mapping.append(mapped_mdl_chains)
2277 
2278  return final_mapping
2279 
2280 
2281 def _lDDTGreedyFull(the_greed):
2282  """ Uses each reference chain as starting point for expansion
2283  """
2284 
2285  seeds = _GetSeeds(the_greed.ref_chem_groups, the_greed.mdl_chem_groups)
2286  best_overall_score = -1.0
2287  best_overall_mapping = dict()
2288 
2289  for seed in seeds:
2290 
2291  # do initial extension
2292  mapping = the_greed.ExtendMapping({seed[0]: seed[1]})
2293 
2294  # repeat the process until we have a full mapping
2295  something_happened = True
2296  while something_happened:
2297  something_happened = False
2298  remnant_seeds = _GetSeeds(the_greed.ref_chem_groups,
2299  the_greed.mdl_chem_groups,
2300  mapped_ref_chains = set(mapping.keys()),
2301  mapped_mdl_chains = set(mapping.values()))
2302  if len(remnant_seeds) > 0:
2303  # still more mapping to be done
2304  best_score = -1.0
2305  best_mapping = None
2306  for remnant_seed in remnant_seeds:
2307  tmp_mapping = dict(mapping)
2308  tmp_mapping[remnant_seed[0]] = remnant_seed[1]
2309  tmp_mapping = the_greed.ExtendMapping(tmp_mapping)
2310  score = the_greed.FromFlatMapping(tmp_mapping)
2311  if score > best_score:
2312  best_score = score
2313  best_mapping = tmp_mapping
2314  if best_mapping is not None:
2315  something_happened = True
2316  mapping = best_mapping
2317 
2318  score = the_greed.FromFlatMapping(mapping)
2319  if score > best_overall_score:
2320  best_overall_score = score
2321  best_overall_mapping = mapping
2322 
2323  mapping = best_overall_mapping
2324 
2325  # translate mapping format and return
2326  final_mapping = list()
2327  for ref_chains in the_greed.ref_chem_groups:
2328  mapped_mdl_chains = list()
2329  for ref_ch in ref_chains:
2330  if ref_ch in mapping:
2331  mapped_mdl_chains.append(mapping[ref_ch])
2332  else:
2333  mapped_mdl_chains.append(None)
2334  final_mapping.append(mapped_mdl_chains)
2335 
2336  return final_mapping
2337 
2338 
2339 def _lDDTGreedyBlock(the_greed, seed_size, blocks_per_chem_group):
2340  """ try multiple seeds, i.e. try all ref/mdl chain combinations within the
2341  respective chem groups and compute single chain lDDTs. The
2342  *blocks_per_chem_group* best scoring ones are extend by *seed_size* chains
2343  and the best scoring one is exhaustively extended.
2344  """
2345 
2346  if seed_size is None or seed_size < 1:
2347  raise RuntimeError(f"seed_size must be an int >= 1 (got {seed_size})")
2348 
2349  if blocks_per_chem_group is None or blocks_per_chem_group < 1:
2350  raise RuntimeError(f"blocks_per_chem_group must be an int >= 1 "
2351  f"(got {blocks_per_chem_group})")
2352 
2353  max_ext = seed_size - 1 # -1 => start seed already has size 1
2354 
2355  ref_chem_groups = copy.deepcopy(the_greed.ref_chem_groups)
2356  mdl_chem_groups = copy.deepcopy(the_greed.mdl_chem_groups)
2357 
2358  mapping = dict()
2359  something_happened = True
2360  while something_happened:
2361  something_happened = False
2362  starting_blocks = list()
2363  for ref_chains, mdl_chains in zip(ref_chem_groups, mdl_chem_groups):
2364  if len(mdl_chains) == 0:
2365  continue # nothing to map
2366  ref_chains_copy = list(ref_chains)
2367  for i in range(blocks_per_chem_group):
2368  if len(ref_chains_copy) == 0:
2369  break
2370  seeds = list()
2371  for ref_ch in ref_chains_copy:
2372  seeds += [(ref_ch, mdl_ch) for mdl_ch in mdl_chains]
2373  # extend starting seeds to *seed_size* and retain best scoring
2374  # block for further extension
2375  best_score = -1.0
2376  best_mapping = None
2377  best_seed = None
2378  for s in seeds:
2379  seed = dict(mapping)
2380  seed.update({s[0]: s[1]})
2381  seed = the_greed.ExtendMapping(seed, max_ext = max_ext)
2382  seed_lddt = the_greed.FromFlatMapping(seed)
2383  if seed_lddt > best_score:
2384  best_score = seed_lddt
2385  best_mapping = seed
2386  best_seed = s
2387  if best_mapping != None:
2388  starting_blocks.append(best_mapping)
2389  if best_seed[0] in ref_chains_copy:
2390  # remove that ref chain to enforce diversity
2391  ref_chains_copy.remove(best_seed[0])
2392 
2393  # fully expand initial starting blocks
2394  best_lddt = 0.0
2395  best_mapping = None
2396  for seed in starting_blocks:
2397  seed = the_greed.ExtendMapping(seed)
2398  seed_lddt = the_greed.FromFlatMapping(seed)
2399  if seed_lddt > best_lddt:
2400  best_lddt = seed_lddt
2401  best_mapping = seed
2402 
2403  if best_lddt == 0.0:
2404  break # no proper mapping found anymore
2405 
2406  something_happened = True
2407  mapping.update(best_mapping)
2408  for ref_ch, mdl_ch in best_mapping.items():
2409  for group_idx in range(len(ref_chem_groups)):
2410  if ref_ch in ref_chem_groups[group_idx]:
2411  ref_chem_groups[group_idx].remove(ref_ch)
2412  if mdl_ch in mdl_chem_groups[group_idx]:
2413  mdl_chem_groups[group_idx].remove(mdl_ch)
2414 
2415  # translate mapping format and return
2416  final_mapping = list()
2417  for ref_chains in the_greed.ref_chem_groups:
2418  mapped_mdl_chains = list()
2419  for ref_ch in ref_chains:
2420  if ref_ch in mapping:
2421  mapped_mdl_chains.append(mapping[ref_ch])
2422  else:
2423  mapped_mdl_chains.append(None)
2424  final_mapping.append(mapped_mdl_chains)
2425 
2426  return final_mapping
2427 
2428 
2430  def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups,
2431  ref_mdl_alns, contact_d = 12.0,
2432  steep_opt_rate = None, greedy_prune_contact_map=False):
2433  """ Greedy extension of already existing but incomplete chain mappings
2434  """
2435  super().__init__(ref, ref_chem_groups, mdl, ref_mdl_alns,
2436  contact_d = contact_d)
2437  self.refref = ref
2438  self.mdlmdl = mdl
2439  self.ref_mdl_alnsref_mdl_alns = ref_mdl_alns
2440  self.steep_opt_ratesteep_opt_rate = steep_opt_rate
2441 
2442  if greedy_prune_contact_map:
2443  self.neighborsneighbors = {k: set() for k in self.qsent1qsent1.chain_names}
2444  for p in self.qsent1qsent1.interacting_chains:
2445  if np.count_nonzero(self.qsent1qsent1.PairDist(p[0], p[1])<=8) >= 3:
2446  self.neighborsneighbors[p[0]].add(p[1])
2447  self.neighborsneighbors[p[1]].add(p[0])
2448 
2449  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.qsent2qsent2.chain_names}
2450  for p in self.qsent2qsent2.interacting_chains:
2451  if np.count_nonzero(self.qsent2qsent2.PairDist(p[0], p[1])<=8) >= 3:
2452  self.mdl_neighborsmdl_neighbors[p[0]].add(p[1])
2453  self.mdl_neighborsmdl_neighbors[p[1]].add(p[0])
2454 
2455 
2456  else:
2457  self.neighborsneighbors = {k: set() for k in self.qsent1qsent1.chain_names}
2458  for p in self.qsent1qsent1.interacting_chains:
2459  self.neighborsneighbors[p[0]].add(p[1])
2460  self.neighborsneighbors[p[1]].add(p[0])
2461 
2462  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.qsent2qsent2.chain_names}
2463  for p in self.qsent2qsent2.interacting_chains:
2464  self.mdl_neighborsmdl_neighbors[p[0]].add(p[1])
2465  self.mdl_neighborsmdl_neighbors[p[1]].add(p[0])
2466 
2467  assert(len(ref_chem_groups) == len(mdl_chem_groups))
2468  self.ref_chem_groupsref_chem_groups = ref_chem_groups
2469  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2470  self.ref_ch_group_mapperref_ch_group_mapper = dict()
2471  self.mdl_ch_group_mappermdl_ch_group_mapper = dict()
2472  for g_idx, (ref_g, mdl_g) in enumerate(zip(ref_chem_groups,
2473  mdl_chem_groups)):
2474  for ch in ref_g:
2475  self.ref_ch_group_mapperref_ch_group_mapper[ch] = g_idx
2476  for ch in mdl_g:
2477  self.mdl_ch_group_mappermdl_ch_group_mapper[ch] = g_idx
2478 
2479  # cache for lDDT based single chain conserved contacts
2480  # used to identify starting points for further extension by QS score
2481  # key: tuple (ref_ch, mdl_ch) value: number of conserved contacts
2482  self.single_chain_scorersingle_chain_scorer = dict()
2483  self.single_chain_cachesingle_chain_cache = dict()
2484  for ch in self.refref.chains:
2485  single_chain_ref = _CSel(self.refref, [ch.GetName()])
2486  self.single_chain_scorersingle_chain_scorer[ch.GetName()] = \
2487  lddt.lDDTScorer(single_chain_ref, bb_only = True)
2488 
2489  def SCCounts(self, ref_ch, mdl_ch):
2490  if not (ref_ch, mdl_ch) in self.single_chain_cachesingle_chain_cache:
2491  alns = dict()
2492  alns[mdl_ch] = self.ref_mdl_alnsref_mdl_alns[(ref_ch, mdl_ch)]
2493  mdl_sel = _CSel(self.mdlmdl, [mdl_ch])
2494  s = self.single_chain_scorersingle_chain_scorer[ref_ch]
2495  _,_,_,conserved,_,_,_ = s.lDDT(mdl_sel,
2496  residue_mapping=alns,
2497  return_dist_test=True,
2498  no_interchain=True,
2499  chain_mapping={mdl_ch: ref_ch},
2500  check_resnames=False)
2501  self.single_chain_cachesingle_chain_cache[(ref_ch, mdl_ch)] = conserved
2502  return self.single_chain_cachesingle_chain_cache[(ref_ch, mdl_ch)]
2503 
2504  def ExtendMapping(self, mapping, max_ext = None):
2505 
2506  if len(mapping) == 0:
2507  raise RuntimError("Mapping must contain a starting point")
2508 
2509  # Ref chains onto which we can map. The algorithm starts with a mapping
2510  # on ref_ch. From there we can start to expand to connected neighbors.
2511  # All neighbors that we can reach from the already mapped chains are
2512  # stored in this set which will be updated during runtime
2513  map_targets = set()
2514  for ref_ch in mapping.keys():
2515  map_targets.update(self.neighborsneighbors[ref_ch])
2516 
2517  # remove the already mapped chains
2518  for ref_ch in mapping.keys():
2519  map_targets.discard(ref_ch)
2520 
2521  if len(map_targets) == 0:
2522  return mapping # nothing to extend
2523 
2524  # keep track of what model chains are not yet mapped for each chem group
2525  free_mdl_chains = list()
2526  for chem_group in self.mdl_chem_groupsmdl_chem_groups:
2527  tmp = [x for x in chem_group if x not in mapping.values()]
2528  free_mdl_chains.append(set(tmp))
2529 
2530  # keep track of what ref chains got a mapping
2531  newly_mapped_ref_chains = list()
2532 
2533  something_happened = True
2534  while something_happened:
2535  something_happened=False
2536 
2537  if self.steep_opt_ratesteep_opt_rate is not None:
2538  n_chains = len(newly_mapped_ref_chains)
2539  if n_chains > 0 and n_chains % self.steep_opt_ratesteep_opt_rate == 0:
2540  mapping = self._SteepOpt_SteepOpt(mapping, newly_mapped_ref_chains)
2541 
2542  if max_ext is not None and len(newly_mapped_ref_chains) >= max_ext:
2543  break
2544 
2545  score_result = self.FromFlatMappingFromFlatMapping(mapping)
2546  old_score = score_result.QS_global
2547  nominator = score_result.weighted_scores
2548  denominator = score_result.weight_sum + score_result.weight_extra_all
2549 
2550  max_diff = 0.0
2551  max_mapping = None
2552  for ref_ch in map_targets:
2553  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ref_ch]
2554  for mdl_ch in free_mdl_chains[chem_group_idx]:
2555  # we're not computing full QS-score here, we directly hack
2556  # into the QS-score formula to compute a diff
2557  nominator_diff = 0.0
2558  denominator_diff = 0.0
2559  for neighbor in self.neighborsneighbors[ref_ch]:
2560  if neighbor in mapping and mapping[neighbor] in \
2561  self.mdl_neighborsmdl_neighbors[mdl_ch]:
2562  # it's a newly added interface if (ref_ch, mdl_ch)
2563  # are added to mapping
2564  int1 = (ref_ch, neighbor)
2565  int2 = (mdl_ch, mapping[neighbor])
2566  a, b, c, d = self._MappedInterfaceScores_MappedInterfaceScores(int1, int2)
2567  nominator_diff += a # weighted_scores
2568  denominator_diff += b # weight_sum
2569  denominator_diff += d # weight_extra_all
2570  # the respective interface penalties are subtracted
2571  # from denominator
2572  denominator_diff -= self._InterfacePenalty1_InterfacePenalty1(int1)
2573  denominator_diff -= self._InterfacePenalty2_InterfacePenalty2(int2)
2574 
2575  if nominator_diff > 0:
2576  # Only accept a new solution if its actually connected
2577  # i.e. nominator_diff > 0.
2578  new_nominator = nominator + nominator_diff
2579  new_denominator = denominator + denominator_diff
2580  new_score = 0.0
2581  if new_denominator != 0.0:
2582  new_score = new_nominator/new_denominator
2583  diff = new_score - old_score
2584  if diff > max_diff:
2585  max_diff = diff
2586  max_mapping = (ref_ch, mdl_ch)
2587 
2588  if max_mapping is not None:
2589  something_happened = True
2590  # assign new found mapping
2591  mapping[max_mapping[0]] = max_mapping[1]
2592 
2593  # add all neighboring chains to map targets as they are now
2594  # reachable
2595  for neighbor in self.neighborsneighbors[max_mapping[0]]:
2596  if neighbor not in mapping:
2597  map_targets.add(neighbor)
2598 
2599  # remove the ref chain from map targets
2600  map_targets.remove(max_mapping[0])
2601 
2602  # remove the mdl chain from free_mdl_chains - its taken...
2603  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[max_mapping[0]]
2604  free_mdl_chains[chem_group_idx].remove(max_mapping[1])
2605 
2606  # keep track of what ref chains got a mapping
2607  newly_mapped_ref_chains.append(max_mapping[0])
2608 
2609  return mapping
2610 
2611  def _SteepOpt(self, mapping, chains_to_optimize=None):
2612 
2613  # just optimize ALL ref chains if nothing specified
2614  if chains_to_optimize is None:
2615  chains_to_optimize = mapping.keys()
2616 
2617  # make sure that we only have ref chains which are actually mapped
2618  ref_chains = [x for x in chains_to_optimize if mapping[x] is not None]
2619 
2620  # group ref chains to be optimized into chem groups
2621  tmp = dict()
2622  for ch in ref_chains:
2623  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ch]
2624  if chem_group_idx in tmp:
2625  tmp[chem_group_idx].append(ch)
2626  else:
2627  tmp[chem_group_idx] = [ch]
2628  chem_groups = list(tmp.values())
2629 
2630  # try all possible mapping swaps. Swaps that improve the score are
2631  # immediately accepted and we start all over again
2632  score_result = self.FromFlatMappingFromFlatMapping(mapping)
2633  current_score = score_result.QS_global
2634  something_happened = True
2635  while something_happened:
2636  something_happened = False
2637  for chem_group in chem_groups:
2638  if something_happened:
2639  break
2640  for ch1, ch2 in itertools.combinations(chem_group, 2):
2641  swapped_mapping = dict(mapping)
2642  swapped_mapping[ch1] = mapping[ch2]
2643  swapped_mapping[ch2] = mapping[ch1]
2644  score_result = self.FromFlatMappingFromFlatMapping(swapped_mapping)
2645  if score_result.QS_global > current_score:
2646  something_happened = True
2647  mapping = swapped_mapping
2648  current_score = score_result.QS_global
2649  break
2650  return mapping
2651 
2652 
2653 def _QSScoreNaive(trg, mdl, chem_groups, chem_mapping, ref_mdl_alns, contact_d,
2654  n_max_naive):
2655  best_mapping = None
2656  best_score = -1.0
2657  # qs_scorer implements caching, score calculation is thus as fast as it gets
2658  # you'll just hit a wall when the number of possible mappings becomes large
2659  qs_scorer = qsscore.QSScorer(trg, chem_groups, mdl, ref_mdl_alns)
2660  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2661  score_result = qs_scorer.Score(mapping, check=False)
2662  if score_result.QS_global > best_score:
2663  best_mapping = mapping
2664  best_score = score_result.QS_global
2665  return (best_mapping, best_score)
2666 
2667 
2668 def _QSScoreGreedyFast(the_greed):
2669 
2670  something_happened = True
2671  mapping = dict()
2672  while something_happened:
2673  something_happened = False
2674  # search for best scoring starting point, we're using lDDT here
2675  n_best = 0
2676  best_seed = None
2677  seeds = _GetSeeds(the_greed.ref_chem_groups,
2678  the_greed.mdl_chem_groups,
2679  mapped_ref_chains = set(mapping.keys()),
2680  mapped_mdl_chains = set(mapping.values()))
2681  for seed in seeds:
2682  n = the_greed.SCCounts(seed[0], seed[1])
2683  if n > n_best:
2684  n_best = n
2685  best_seed = seed
2686  if n_best == 0:
2687  break # no proper seed found anymore...
2688  # add seed to mapping and start the greed
2689  mapping[best_seed[0]] = best_seed[1]
2690  mapping = the_greed.ExtendMapping(mapping)
2691  something_happened = True
2692 
2693  # translate mapping format and return
2694  final_mapping = list()
2695  for ref_chains in the_greed.ref_chem_groups:
2696  mapped_mdl_chains = list()
2697  for ref_ch in ref_chains:
2698  if ref_ch in mapping:
2699  mapped_mdl_chains.append(mapping[ref_ch])
2700  else:
2701  mapped_mdl_chains.append(None)
2702  final_mapping.append(mapped_mdl_chains)
2703 
2704  return final_mapping
2705 
2706 
2707 def _QSScoreGreedyFull(the_greed):
2708  """ Uses each reference chain as starting point for expansion
2709  """
2710 
2711  seeds = _GetSeeds(the_greed.ref_chem_groups, the_greed.mdl_chem_groups)
2712  best_overall_score = -1.0
2713  best_overall_mapping = dict()
2714 
2715  for seed in seeds:
2716 
2717  # do initial extension
2718  mapping = the_greed.ExtendMapping({seed[0]: seed[1]})
2719 
2720  # repeat the process until we have a full mapping
2721  something_happened = True
2722  while something_happened:
2723  something_happened = False
2724  remnant_seeds = _GetSeeds(the_greed.ref_chem_groups,
2725  the_greed.mdl_chem_groups,
2726  mapped_ref_chains = set(mapping.keys()),
2727  mapped_mdl_chains = set(mapping.values()))
2728  if len(remnant_seeds) > 0:
2729  # still more mapping to be done
2730  best_score = -1.0
2731  best_mapping = None
2732  for remnant_seed in remnant_seeds:
2733  tmp_mapping = dict(mapping)
2734  tmp_mapping[remnant_seed[0]] = remnant_seed[1]
2735  tmp_mapping = the_greed.ExtendMapping(tmp_mapping)
2736  score_result = the_greed.FromFlatMapping(tmp_mapping)
2737  if score_result.QS_global > best_score:
2738  best_score = score_result.QS_global
2739  best_mapping = tmp_mapping
2740  if best_mapping is not None:
2741  something_happened = True
2742  mapping = best_mapping
2743 
2744  score_result = the_greed.FromFlatMapping(mapping)
2745  if score_result.QS_global > best_overall_score:
2746  best_overall_score = score_result.QS_global
2747  best_overall_mapping = mapping
2748 
2749  mapping = best_overall_mapping
2750 
2751  # translate mapping format and return
2752  final_mapping = list()
2753  for ref_chains in the_greed.ref_chem_groups:
2754  mapped_mdl_chains = list()
2755  for ref_ch in ref_chains:
2756  if ref_ch in mapping:
2757  mapped_mdl_chains.append(mapping[ref_ch])
2758  else:
2759  mapped_mdl_chains.append(None)
2760  final_mapping.append(mapped_mdl_chains)
2761 
2762  return final_mapping
2763 
2764 
2765 def _QSScoreGreedyBlock(the_greed, seed_size, blocks_per_chem_group):
2766  """ try multiple seeds, i.e. try all ref/mdl chain combinations within the
2767  respective chem groups and compute single chain lDDTs. The
2768  *blocks_per_chem_group* best scoring ones are extend by *seed_size* chains
2769  and the best scoring one with respect to QS score is exhaustively extended.
2770  """
2771 
2772  if seed_size is None or seed_size < 1:
2773  raise RuntimeError(f"seed_size must be an int >= 1 (got {seed_size})")
2774 
2775  if blocks_per_chem_group is None or blocks_per_chem_group < 1:
2776  raise RuntimeError(f"blocks_per_chem_group must be an int >= 1 "
2777  f"(got {blocks_per_chem_group})")
2778 
2779  max_ext = seed_size - 1 # -1 => start seed already has size 1
2780 
2781  ref_chem_groups = copy.deepcopy(the_greed.ref_chem_groups)
2782  mdl_chem_groups = copy.deepcopy(the_greed.mdl_chem_groups)
2783 
2784  mapping = dict()
2785 
2786  something_happened = True
2787  while something_happened:
2788  something_happened = False
2789  starting_blocks = list()
2790  for ref_chains, mdl_chains in zip(ref_chem_groups, mdl_chem_groups):
2791  if len(mdl_chains) == 0:
2792  continue # nothing to map
2793  ref_chains_copy = list(ref_chains)
2794  for i in range(blocks_per_chem_group):
2795  if len(ref_chains_copy) == 0:
2796  break
2797  seeds = list()
2798  for ref_ch in ref_chains_copy:
2799  seeds += [(ref_ch, mdl_ch) for mdl_ch in mdl_chains]
2800  # extend starting seeds to *seed_size* and retain best scoring block
2801  # for further extension
2802  best_score = -1.0
2803  best_mapping = None
2804  best_seed = None
2805  for s in seeds:
2806  seed = dict(mapping)
2807  seed.update({s[0]: s[1]})
2808  seed = the_greed.ExtendMapping(seed, max_ext = max_ext)
2809  score_result = the_greed.FromFlatMapping(seed)
2810  if score_result.QS_global > best_score:
2811  best_score = score_result.QS_global
2812  best_mapping = seed
2813  best_seed = s
2814  if best_mapping != None:
2815  starting_blocks.append(best_mapping)
2816  if best_seed[0] in ref_chains_copy:
2817  # remove selected ref chain to enforce diversity
2818  ref_chains_copy.remove(best_seed[0])
2819 
2820  # fully expand initial starting blocks
2821  best_score = -1.0
2822  best_mapping = None
2823  for seed in starting_blocks:
2824  seed = the_greed.ExtendMapping(seed)
2825  score_result = the_greed.FromFlatMapping(seed)
2826  if score_result.QS_global > best_score:
2827  best_score = score_result.QS_global
2828  best_mapping = seed
2829 
2830  if best_mapping is not None and len(best_mapping) > len(mapping):
2831  # this even accepts extensions that lead to no increase in QS-score
2832  # at least they make sense from an lDDT perspective
2833  something_happened = True
2834  mapping.update(best_mapping)
2835  for ref_ch, mdl_ch in best_mapping.items():
2836  for group_idx in range(len(ref_chem_groups)):
2837  if ref_ch in ref_chem_groups[group_idx]:
2838  ref_chem_groups[group_idx].remove(ref_ch)
2839  if mdl_ch in mdl_chem_groups[group_idx]:
2840  mdl_chem_groups[group_idx].remove(mdl_ch)
2841 
2842  # translate mapping format and return
2843  final_mapping = list()
2844  for ref_chains in the_greed.ref_chem_groups:
2845  mapped_mdl_chains = list()
2846  for ref_ch in ref_chains:
2847  if ref_ch in mapping:
2848  mapped_mdl_chains.append(mapping[ref_ch])
2849  else:
2850  mapped_mdl_chains.append(None)
2851  final_mapping.append(mapped_mdl_chains)
2852 
2853  return final_mapping
2854 
2855 def _SingleRigidRMSD(initial_transforms, initial_mappings, chem_groups,
2856  chem_mapping, trg_group_pos, mdl_group_pos):
2857  """
2858  Takes initial transforms and sequentially adds chain pairs with lowest RMSD.
2859  The mapping from the transform that leads to lowest overall RMSD is
2860  returned.
2861  """
2862  best_mapping = dict()
2863  best_ssd = float("inf") # we're actually going for summed squared distances
2864  # Since all positions have same lengths and we do a
2865  # full mapping, lowest SSD has a guarantee of also
2866  # being lowest RMSD
2867  for transform in initial_transforms:
2868  mapping = dict()
2869  mapped_mdl_chains = set()
2870  ssd = 0.0
2871  for trg_chains, mdl_chains, trg_pos, mdl_pos, in zip(chem_groups,
2872  chem_mapping,
2873  trg_group_pos,
2874  mdl_group_pos):
2875  if len(trg_pos) == 0 or len(mdl_pos) == 0:
2876  continue # cannot compute valid rmsd
2877  ssds = list()
2878  t_mdl_pos = list()
2879  for m_pos in mdl_pos:
2880  t_m_pos = geom.Vec3List(m_pos)
2881  t_m_pos.ApplyTransform(transform)
2882  t_mdl_pos.append(t_m_pos)
2883  for t_pos, t in zip(trg_pos, trg_chains):
2884  for t_m_pos, m in zip(t_mdl_pos, mdl_chains):
2885  ssd = t_pos.GetSummedSquaredDistances(t_m_pos)
2886  ssds.append((ssd, (t,m)))
2887  ssds.sort()
2888  for item in ssds:
2889  p = item[1]
2890  if p[0] not in mapping and p[1] not in mapped_mdl_chains:
2891  mapping[p[0]] = p[1]
2892  mapped_mdl_chains.add(p[1])
2893  ssd += item[0]
2894 
2895  if ssd < best_ssd:
2896  best_ssd = ssd
2897  best_mapping = mapping
2898 
2899  return best_mapping
2900 
2901 def _IterativeRigidRMSD(initial_transforms, initial_mappings, chem_groups,
2902  chem_mapping, trg_group_pos, mdl_group_pos):
2903  """ Takes initial transforms and sequentially adds chain pairs with
2904  lowest RMSD. With each added chain pair, the transform gets updated.
2905  Thus the naming iterative. The mapping from the initial transform that
2906  leads to best overall RMSD score is returned.
2907  """
2908 
2909  # to directly retrieve positions using chain names
2910  trg_pos_dict = dict()
2911  for trg_pos, trg_chains in zip(trg_group_pos, chem_groups):
2912  for t_pos, t in zip(trg_pos, trg_chains):
2913  trg_pos_dict[t] = t_pos
2914  mdl_pos_dict = dict()
2915  for mdl_pos, mdl_chains in zip(mdl_group_pos, chem_mapping):
2916  for m_pos, m in zip(mdl_pos, mdl_chains):
2917  mdl_pos_dict[m] = m_pos
2918 
2919  best_mapping = dict()
2920  best_rmsd = float("inf")
2921  for initial_transform, initial_mapping in zip(initial_transforms,
2922  initial_mappings):
2923  mapping = {initial_mapping[0]: initial_mapping[1]}
2924  transform = geom.Mat4(initial_transform)
2925  mapped_trg_pos = geom.Vec3List(trg_pos_dict[initial_mapping[0]])
2926  mapped_mdl_pos = geom.Vec3List(mdl_pos_dict[initial_mapping[1]])
2927 
2928  # the following variables contain the chains which are
2929  # available for mapping
2930  trg_chain_groups = [set(group) for group in chem_groups]
2931  mdl_chain_groups = [set(group) for group in chem_mapping]
2932 
2933  # search and kick out inital mapping
2934  for group in trg_chain_groups:
2935  if initial_mapping[0] in group:
2936  group.remove(initial_mapping[0])
2937  break
2938  for group in mdl_chain_groups:
2939  if initial_mapping[1] in group:
2940  group.remove(initial_mapping[1])
2941  break
2942 
2943  something_happened = True
2944  while something_happened:
2945  # search for best mapping given current transform
2946  something_happened=False
2947  best_sc_mapping = None
2948  best_sc_group_idx = None
2949  best_sc_rmsd = float("inf")
2950  group_idx = 0
2951  for trg_chains, mdl_chains in zip(trg_chain_groups, mdl_chain_groups):
2952  for t in trg_chains:
2953  t_pos = trg_pos_dict[t]
2954  for m in mdl_chains:
2955  m_pos = mdl_pos_dict[m]
2956  t_m_pos = geom.Vec3List(m_pos)
2957  t_m_pos.ApplyTransform(transform)
2958  rmsd = t_pos.GetRMSD(t_m_pos)
2959  if rmsd < best_sc_rmsd:
2960  best_sc_rmsd = rmsd
2961  best_sc_mapping = (t,m)
2962  best_sc_group_idx = group_idx
2963  group_idx += 1
2964 
2965  if best_sc_mapping is not None:
2966  something_happened = True
2967  mapping[best_sc_mapping[0]] = best_sc_mapping[1]
2968  mapped_trg_pos.extend(trg_pos_dict[best_sc_mapping[0]])
2969  mapped_mdl_pos.extend(mdl_pos_dict[best_sc_mapping[1]])
2970  trg_chain_groups[best_sc_group_idx].remove(best_sc_mapping[0])
2971  mdl_chain_groups[best_sc_group_idx].remove(best_sc_mapping[1])
2972 
2973  transform = _GetTransform(mapped_mdl_pos, mapped_trg_pos,
2974  False)
2975 
2976  # compute overall RMSD for current transform
2977  mapped_mdl_pos.ApplyTransform(transform)
2978  rmsd = mapped_trg_pos.GetRMSD(mapped_mdl_pos)
2979 
2980  if rmsd < best_rmsd:
2981  best_rmsd = rmsd
2982  best_mapping = mapping
2983 
2984  return best_mapping
2985 
2986 def _NaiveRMSD(chem_groups, chem_mapping, trg_group_pos, mdl_group_pos,
2987  n_max_naive):
2988 
2989  # to directly retrieve positions using chain names
2990  trg_pos_dict = dict()
2991  for trg_pos, trg_chains in zip(trg_group_pos, chem_groups):
2992  for t_pos, t in zip(trg_pos, trg_chains):
2993  trg_pos_dict[t] = t_pos
2994  mdl_pos_dict = dict()
2995  for mdl_pos, mdl_chains in zip(mdl_group_pos, chem_mapping):
2996  for m_pos, m in zip(mdl_pos, mdl_chains):
2997  mdl_pos_dict[m] = m_pos
2998 
2999  best_mapping = dict()
3000  best_rmsd = float("inf")
3001 
3002  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
3003  trg_pos = geom.Vec3List()
3004  mdl_pos = geom.Vec3List()
3005  for trg_group, mdl_group in zip(chem_groups, mapping):
3006  for trg_ch, mdl_ch in zip(trg_group, mdl_group):
3007  if trg_ch is not None and mdl_ch is not None:
3008  trg_pos.extend(trg_pos_dict[trg_ch])
3009  mdl_pos.extend(mdl_pos_dict[mdl_ch])
3010  superpose_res = mol.alg.SuperposeSVD(mdl_pos, trg_pos)
3011  if superpose_res.rmsd < best_rmsd:
3012  best_rmsd = superpose_res.rmsd
3013  best_mapping = mapping
3014 
3015  # this is stupid...
3016  tmp = dict()
3017  for chem_group, mapping in zip(chem_groups, best_mapping):
3018  for trg_ch, mdl_ch in zip(chem_group, mapping):
3019  tmp[trg_ch] = mdl_ch
3020 
3021  return tmp
3022 
3023 
3024 def _GetRefPos(trg, mdl, trg_msas, mdl_alns, max_pos = None):
3025  """ Extracts reference positions which are present in trg and mdl
3026  """
3027 
3028  # select only backbone atoms, makes processing simpler later on
3029  # (just select res.atoms[0].GetPos() as ref pos)
3030  bb_trg = trg.Select("aname=\"CA\",\"C3'\"")
3031  bb_mdl = mdl.Select("aname=\"CA\",\"C3'\"")
3032 
3033  # mdl_alns are pairwise, let's construct MSAs
3034  mdl_msas = list()
3035  for aln_list in mdl_alns:
3036  if len(aln_list) > 0:
3037  tmp = aln_list[0].GetSequence(0)
3038  ref_seq = seq.CreateSequence(tmp.GetName(), tmp.GetGaplessString())
3039  mdl_msas.append(seq.alg.MergePairwiseAlignments(aln_list, ref_seq))
3040  else:
3041  mdl_msas.append(seq.CreateAlignment())
3042 
3043  trg_pos = list()
3044  mdl_pos = list()
3045 
3046  for trg_msa, mdl_msa in zip(trg_msas, mdl_msas):
3047 
3048  if mdl_msa.GetCount() > 0:
3049  # make sure they have the same ref sequence (should be a given...)
3050  assert(trg_msa.GetSequence(0).GetGaplessString() == \
3051  mdl_msa.GetSequence(0).GetGaplessString())
3052  else:
3053  # if mdl_msa is empty, i.e. no model chain maps to the chem group
3054  # represented by trg_msa, we just continue. The result will be
3055  # empty position lists added to trg_pos and mdl_pos.
3056  pass
3057 
3058  # check which columns in MSAs are fully covered (indices relative to
3059  # first sequence)
3060  trg_indices = _GetFullyCoveredIndices(trg_msa)
3061  mdl_indices = _GetFullyCoveredIndices(mdl_msa)
3062 
3063  # get indices where both, mdl and trg, are fully covered
3064  indices = sorted(list(trg_indices.intersection(mdl_indices)))
3065 
3066  # subsample if necessary
3067  if max_pos is not None and len(indices) > max_pos:
3068  step = int(len(indices)/max_pos)
3069  indices = [indices[i] for i in range(0, len(indices), step)]
3070 
3071  # translate to column indices in the respective MSAs
3072  trg_indices = _RefIndicesToColumnIndices(trg_msa, indices)
3073  mdl_indices = _RefIndicesToColumnIndices(mdl_msa, indices)
3074 
3075  # extract positions
3076  trg_pos.append(list())
3077  mdl_pos.append(list())
3078  for s_idx in range(trg_msa.GetCount()):
3079  trg_pos[-1].append(_ExtractMSAPos(trg_msa, s_idx, trg_indices,
3080  bb_trg))
3081  # first seq in mdl_msa is ref sequence in trg and does not belong to mdl
3082  for s_idx in range(1, mdl_msa.GetCount()):
3083  mdl_pos[-1].append(_ExtractMSAPos(mdl_msa, s_idx, mdl_indices,
3084  bb_mdl))
3085 
3086  return (trg_pos, mdl_pos)
3087 
3088 def _GetFullyCoveredIndices(msa):
3089  """ Helper for _GetRefPos
3090 
3091  Returns a set containing the indices relative to first sequence in msa which
3092  are fully covered in all other sequences
3093 
3094  --AA-A-A
3095  -BBBB-BB
3096  CCCC-C-C
3097 
3098  => (0,1,3)
3099  """
3100  indices = set()
3101  ref_idx = 0
3102  for col in msa:
3103  if sum([1 for olc in col if olc != '-']) == col.GetRowCount():
3104  indices.add(ref_idx)
3105  if col[0] != '-':
3106  ref_idx += 1
3107  return indices
3108 
3109 def _RefIndicesToColumnIndices(msa, indices):
3110  """ Helper for _GetRefPos
3111 
3112  Returns a list of mapped indices. indices refer to non-gap one letter
3113  codes in the first msa sequence. The returnes mapped indices are translated
3114  to the according msa column indices
3115  """
3116  ref_idx = 0
3117  mapping = dict()
3118  for col_idx, col in enumerate(msa):
3119  if col[0] != '-':
3120  mapping[ref_idx] = col_idx
3121  ref_idx += 1
3122  return [mapping[i] for i in indices]
3123 
3124 def _ExtractMSAPos(msa, s_idx, indices, view):
3125  """ Helper for _GetRefPos
3126 
3127  Returns a geom.Vec3List containing positions refering to given msa sequence.
3128  => Chain with corresponding name is mapped onto sequence and the position of
3129  the first atom of each residue specified in indices is extracted.
3130  Indices refers to column indices in msa!
3131  """
3132  s = msa.GetSequence(s_idx)
3133  s_v = _CSel(view, [s.GetName()])
3134 
3135  # sanity check
3136  assert(len(s.GetGaplessString()) == len(s_v.residues))
3137 
3138  residue_idx = [s.GetResidueIndex(i) for i in indices]
3139  return geom.Vec3List([s_v.residues[i].atoms[0].pos for i in residue_idx])
3140 
3141 def _NChemGroupMappings(ref_chains, mdl_chains):
3142  """ Number of mappings within one chem group
3143 
3144  :param ref_chains: Reference chains
3145  :type ref_chains: :class:`list` of :class:`str`
3146  :param mdl_chains: Model chains that are mapped onto *ref_chains*
3147  :type mdl_chains: :class:`list` of :class:`str`
3148  :returns: Number of possible mappings of *mdl_chains* onto *ref_chains*
3149  """
3150  n_ref = len(ref_chains)
3151  n_mdl = len(mdl_chains)
3152  if n_ref == n_mdl:
3153  return factorial(n_ref)
3154  elif n_ref > n_mdl:
3155  n_choose_k = binom(n_ref, n_mdl)
3156  return n_choose_k * factorial(n_mdl)
3157  else:
3158  n_choose_k = binom(n_mdl, n_ref)
3159  return n_choose_k * factorial(n_ref)
3160 
3161 def _NMappings(ref_chains, mdl_chains):
3162  """ Number of mappings for a full chem mapping
3163 
3164  :param ref_chains: Chem groups of reference
3165  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
3166  :param mdl_chains: Model chains that map onto those chem groups
3167  :type mdl_chains: :class:`list` of :class:`list` of :class:`str`
3168  :returns: Number of possible mappings of *mdl_chains* onto *ref_chains*
3169  """
3170  assert(len(ref_chains) == len(mdl_chains))
3171  n = 1
3172  for a,b in zip(ref_chains, mdl_chains):
3173  n *= _NChemGroupMappings(a,b)
3174  return n
3175 
3176 def _NMappingsWithin(ref_chains, mdl_chains, max_mappings):
3177  """ Check whether total number of mappings is smaller than given maximum
3178 
3179  In principle the same as :func:`_NMappings` but it stops as soon as the
3180  maximum is hit.
3181 
3182  :param ref_chains: Chem groups of reference
3183  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
3184  :param mdl_chains: Model chains that map onto those chem groups
3185  :type mdl_chains: :class:`list` of :class:`list` of :class:`str`
3186  :param max_mappings: Number of max allowed mappings
3187  :returns: Whether number of possible mappings of *mdl_chains* onto
3188  *ref_chains* is below or equal *max_mappings*.
3189  """
3190  assert(len(ref_chains) == len(mdl_chains))
3191  n = 1
3192  for a,b in zip(ref_chains, mdl_chains):
3193  n *= _NChemGroupMappings(a,b)
3194  if n > max_mappings:
3195  return False
3196  return True
3197 
3198 def _RefSmallerGenerator(ref_chains, mdl_chains):
3199  """ Returns all possible ways to map mdl_chains onto ref_chains
3200 
3201  Specific for the case where len(ref_chains) < len(mdl_chains)
3202  """
3203  for c in itertools.combinations(mdl_chains, len(ref_chains)):
3204  for p in itertools.permutations(c):
3205  yield list(p)
3206 
3207 def _RefLargerGenerator(ref_chains, mdl_chains):
3208  """ Returns all possible ways to map mdl_chains onto ref_chains
3209 
3210  Specific for the case where len(ref_chains) > len(mdl_chains)
3211  Ref chains without mapped mdl chain are assigned None
3212  """
3213  n_ref = len(ref_chains)
3214  n_mdl = len(mdl_chains)
3215  for c in itertools.combinations(range(n_ref), n_mdl):
3216  for p in itertools.permutations(mdl_chains):
3217  ret_list = [None] * n_ref
3218  for idx, ch in zip(c, p):
3219  ret_list[idx] = ch
3220  yield ret_list
3221 
3222 def _RefEqualGenerator(ref_chains, mdl_chains):
3223  """ Returns all possible ways to map mdl_chains onto ref_chains
3224 
3225  Specific for the case where len(ref_chains) == len(mdl_chains)
3226  """
3227  for p in itertools.permutations(mdl_chains):
3228  yield list(p)
3229 
3230 def _RefEmptyGenerator(ref_chains, mdl_chains):
3231  yield list()
3232 
3233 def _ConcatIterators(iterators):
3234  for item in itertools.product(*iterators):
3235  yield list(item)
3236 
3237 def _ChainMappings(ref_chains, mdl_chains, n_max=None):
3238  """Returns all possible ways to map *mdl_chains* onto fixed *ref_chains*
3239 
3240  :param ref_chains: List of list of chemically equivalent chains in reference
3241  :type ref_chains: :class:`list` of :class:`list`
3242  :param mdl_chains: Equally long list of list of chemically equivalent chains
3243  in model that map on those ref chains.
3244  :type mdl_chains: :class:`list` of :class:`list`
3245  :param n_max: Aborts and raises :class:`RuntimeError` if max number of
3246  mappings is above this threshold.
3247  :type n_max: :class:`int`
3248  :returns: Iterator over all possible mappings of *mdl_chains* onto fixed
3249  *ref_chains*. Potentially contains None as padding when number of
3250  model chains for a certain mapping is smaller than the according
3251  reference chains.
3252  Example: _ChainMappings([['A', 'B', 'C'], ['D', 'E']],
3253  [['x', 'y'], ['i', 'j']])
3254  gives an iterator over: [[['x', 'y', None], ['i', 'j']],
3255  [['x', 'y', None], ['j', 'i']],
3256  [['y', 'x', None], ['i', 'j']],
3257  [['y', 'x', None], ['j', 'i']],
3258  [['x', None, 'y'], ['i', 'j']],
3259  [['x', None, 'y'], ['j', 'i']],
3260  [['y', None, 'x'], ['i', 'j']],
3261  [['y', None, 'x'], ['j', 'i']],
3262  [[None, 'x', 'y'], ['i', 'j']],
3263  [[None, 'x', 'y'], ['j', 'i']],
3264  [[None, 'y', 'x'], ['i', 'j']],
3265  [[None, 'y', 'x'], ['j', 'i']]]
3266  """
3267  assert(len(ref_chains) == len(mdl_chains))
3268 
3269  if n_max is not None:
3270  if not _NMappingsWithin(ref_chains, mdl_chains, n_max):
3271  raise RuntimeError(f"Too many mappings. Max allowed: {n_max}")
3272 
3273  # one iterator per mapping representing all mdl combinations relative to
3274  # reference
3275  iterators = list()
3276  for ref, mdl in zip(ref_chains, mdl_chains):
3277  if len(ref) == 0:
3278  iterators.append(_RefEmptyGenerator(ref, mdl))
3279  elif len(ref) == len(mdl):
3280  iterators.append(_RefEqualGenerator(ref, mdl))
3281  elif len(ref) < len(mdl):
3282  iterators.append(_RefSmallerGenerator(ref, mdl))
3283  else:
3284  iterators.append(_RefLargerGenerator(ref, mdl))
3285 
3286  return _ConcatIterators(iterators)
3287 
3288 
3289 def _GetTransform(pos_one, pos_two, iterative):
3290  """ Computes minimal RMSD superposition for pos_one onto pos_two
3291 
3292  :param pos_one: Positions that should be superposed onto *pos_two*
3293  :type pos_one: :class:`geom.Vec3List`
3294  :param pos_two: Reference positions
3295  :type pos_two: :class:`geom.Vec3List`
3296  :iterative: Whether iterative superposition should be used. Iterative
3297  potentially raises, uses standard superposition as fallback.
3298  :type iterative: :class:`bool`
3299  :returns: Transformation matrix to superpose *pos_one* onto *pos_two*
3300  :rtype: :class:`geom.Mat4`
3301  """
3302  res = None
3303  if iterative:
3304  try:
3305  res = mol.alg.IterativeSuperposeSVD(pos_one, pos_two)
3306  except:
3307  pass # triggers fallback below
3308  if res is None:
3309  res = mol.alg.SuperposeSVD(pos_one, pos_two)
3310  return res.transformation
3311 
3312 # specify public interface
3313 __all__ = ('ChainMapper', 'ReprResult', 'MappingResult')
def _NSCConserved(self, trg_ch, mdl_ch)
Definition: bb_lddt.py:481
def _NPairConserved(self, trg_int, mdl_int)
Definition: bb_lddt.py:504
def FromFlatMapping(self, flat_mapping)
Definition: bb_lddt.py:457
def __init__(self, pep_subst_mat=seq.alg.BLOSUM62, pep_gap_open=-5, pep_gap_ext=-2, nuc_subst_mat=seq.alg.NUC44, nuc_gap_open=-4, nuc_gap_ext=-4, resnum_aln=False)
def Align(self, s1, s2, chem_type=None)
def NWAlign(self, s1, s2, chem_type)
def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups, ref_mdl_alns, contact_d=12.0, steep_opt_rate=None, greedy_prune_contact_map=False)
def ExtendMapping(self, mapping, max_ext=None)
def _SteepOpt(self, mapping, chains_to_optimize=None)
def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups, ref_mdl_alns, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], steep_opt_rate=None)
def ExtendMapping(self, mapping, max_ext=None)
def _SteepOpt(self, mapping, chains_to_optimize=None)
def GetlDDTMapping(self, model, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], strategy="heuristic", steep_opt_rate=None, block_seed_size=5, block_blocks_per_chem_group=5, chem_mapping_result=None, heuristic_n_max_naive=40320)
def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False, only_interchain=False, chem_mapping_result=None, global_mapping=None)
def GetMapping(self, model, n_max_naive=40320)
def __init__(self, target, resnum_alignments=False, pep_seqid_thr=95., pep_gap_thr=1.0, nuc_seqid_thr=95., nuc_gap_thr=1.0, pep_subst_mat=seq.alg.BLOSUM62, pep_gap_open=-11, pep_gap_ext=-1, nuc_subst_mat=seq.alg.NUC44, nuc_gap_open=-4, nuc_gap_ext=-4, min_pep_length=6, min_nuc_length=4, n_max_naive=1e8)
def GetQSScoreMapping(self, model, contact_d=12.0, strategy="heuristic", block_seed_size=5, block_blocks_per_chem_group=5, heuristic_n_max_naive=40320, steep_opt_rate=None, chem_mapping_result=None, greedy_prune_contact_map=True)
def GetRMSDMapping(self, model, strategy="heuristic", subsampling=50, chem_mapping_result=None, heuristic_n_max_naive=120)
def GetFlatMapping(self, mdl_as_key=False)
def __init__(self, target, model, chem_groups, chem_mapping, mapping, alns, opt_score=None)
def GetFlatChainMapping(self, mdl_as_key=False)
def __init__(self, lDDT, substructure, ref_view, mdl_view)
def _GetInconsistentResidues(self, ref_residues, mdl_residues)
def _InterfacePenalty1(self, interface)
Definition: qsscore.py:732
def _InterfacePenalty2(self, interface)
Definition: qsscore.py:738
def _MappedInterfaceScores(self, int1, int2)
Definition: qsscore.py:583
def FromFlatMapping(self, flat_mapping)
Definition: qsscore.py:528