OpenStructure
chain_mapping.py
Go to the documentation of this file.
1 """
2 Chain mapping aims to identify a one-to-one relationship between chains in a
3 reference structure and a model.
4 """
5 
6 import itertools
7 import copy
8 
9 import numpy as np
10 
11 from scipy.special import factorial
12 from scipy.special import binom # as of Python 3.8, the math module implements
13  # comb, i.e. n choose k
14 
15 import ost
16 from ost import seq
17 from ost import mol
18 from ost import geom
19 
20 from ost.mol.alg import lddt
21 from ost.mol.alg import bb_lddt
22 from ost.mol.alg import qsscore
23 
24 def _CSel(ent, cnames):
25  """ Returns view with specified chains
26 
27  Ensures that quotation marks are around chain names to not confuse
28  OST query language with weird special characters.
29  """
30  query = "cname=" + ','.join([mol.QueryQuoteName(cname) for cname in cnames])
31  return ent.Select(query)
32 
34  """ Result object for the chain mapping functions in :class:`ChainMapper`
35 
36  Constructor is directly called within the functions, no need to construct
37  such objects yourself.
38  """
39  def __init__(self, target, model, chem_groups, chem_mapping, mapping, alns,
40  opt_score=None):
41  self._target_target = target
42  self._model_model = model
43  self._chem_groups_chem_groups = chem_groups
44  self._chem_mapping_chem_mapping = chem_mapping
45  self._mapping_mapping = mapping
46  self._alns_alns = alns
47  self._opt_score_opt_score = opt_score
48 
49  @property
50  def target(self):
51  """ Target/reference structure, i.e. :attr:`ChainMapper.target`
52 
53  :type: :class:`ost.mol.EntityView`
54  """
55  return self._target_target
56 
57  @property
58  def model(self):
59  """ Model structure that gets mapped onto :attr:`~target`
60 
61  Underwent same processing as :attr:`ChainMapper.target`, i.e.
62  only contains peptide/nucleotide chains of sufficient size.
63 
64  :type: :class:`ost.mol.EntityView`
65  """
66  return self._model_model
67 
68  @property
69  def chem_groups(self):
70  """ Groups of chemically equivalent chains in :attr:`~target`
71 
72  Same as :attr:`ChainMapper.chem_group`
73 
74  :class:`list` of :class:`list` of :class:`str` (chain names)
75  """
76  return self._chem_groups_chem_groups
77 
78  @property
79  def chem_mapping(self):
80  """ Assigns chains in :attr:`~model` to :attr:`~chem_groups`.
81 
82  :class:`list` of :class:`list` of :class:`str` (chain names)
83  """
84  return self._chem_mapping_chem_mapping
85 
86  @property
87  def mapping(self):
88  """ Mapping of :attr:`~model` chains onto :attr:`~target`
89 
90  Exact same shape as :attr:`~chem_groups` but containing the names of the
91  mapped chains in :attr:`~model`. May contain None for :attr:`~target`
92  chains that are not covered. No guarantee that all chains in
93  :attr:`~model` are mapped.
94 
95  :class:`list` of :class:`list` of :class:`str` (chain names)
96  """
97  return self._mapping_mapping
98 
99  @property
100  def alns(self):
101  """ Alignments of mapped chains in :attr:`~target` and :attr:`~model`
102 
103  Each alignment is accessible with ``alns[(t_chain,m_chain)]``. First
104  sequence is the sequence of :attr:`target` chain, second sequence the
105  one from :attr:`~model`. The respective :class:`ost.mol.EntityView` are
106  attached with :func:`ost.seq.ConstSequenceHandle.AttachView`.
107 
108  :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
109  :class:`ost.seq.AlignmentHandle`
110  """
111  return self._alns_alns
112 
113  @property
114  def opt_score(self):
115  """ Placeholder property without any guarantee of being set
116 
117  Different scores get optimized in the various chain mapping algorithms.
118  Some of them may set their final optimal score in that property.
119  Consult the documentation of the respective chain mapping algorithm
120  for more information. Won't be in the return dict of
121  :func:`JSONSummary`.
122  """
123  return self._opt_score_opt_score
124 
125  def GetFlatMapping(self, mdl_as_key=False):
126  """ Returns flat mapping as :class:`dict` for all mapable chains
127 
128  :param mdl_as_key: Default is target chain name as key and model chain
129  name as value. This can be reversed with this flag.
130  :returns: :class:`dict` with :class:`str` as key/value that describe
131  one-to-one mapping
132  """
133  flat_mapping = dict()
134  for trg_chem_group, mdl_chem_group in zip(self.chem_groupschem_groups,
135  self.mappingmapping):
136  for a,b in zip(trg_chem_group, mdl_chem_group):
137  if a is not None and b is not None:
138  if mdl_as_key:
139  flat_mapping[b] = a
140  else:
141  flat_mapping[a] = b
142  return flat_mapping
143 
144  def JSONSummary(self):
145  """ Returns JSON serializable summary of results
146  """
147  json_dict = dict()
148  json_dict["chem_groups"] = self.chem_groupschem_groups
149  json_dict["mapping"] = self.mappingmapping
150  json_dict["flat_mapping"] = self.GetFlatMappingGetFlatMapping()
151  json_dict["alns"] = list()
152  for aln in self.alnsalns.values():
153  trg_seq = aln.GetSequence(0)
154  mdl_seq = aln.GetSequence(1)
155  aln_dict = {"trg_ch": trg_seq.GetName(), "trg_seq": str(trg_seq),
156  "mdl_ch": mdl_seq.GetName(), "mdl_seq": str(mdl_seq)}
157  json_dict["alns"].append(aln_dict)
158  return json_dict
159 
160 
162 
163  """ Result object for :func:`ChainMapper.GetRepr`
164 
165  Constructor is directly called within the function, no need to construct
166  such objects yourself.
167 
168  :param lDDT: lDDT for this mapping. Depends on how you call
169  :func:`ChainMapper.GetRepr` whether this is backbone only or
170  full atom lDDT.
171  :type lDDT: :class:`float`
172  :param substructure: The full substructure for which we searched for a
173  representation
174  :type substructure: :class:`ost.mol.EntityView`
175  :param ref_view: View pointing to the same underlying entity as
176  *substructure* but only contains the stuff that is mapped
177  :type ref_view: :class:`mol.EntityView`
178  :param mdl_view: The matching counterpart in model
179  :type mdl_view: :class:`mol.EntityView`
180  """
181  def __init__(self, lDDT, substructure, ref_view, mdl_view):
182  self._lDDT_lDDT = lDDT
183  self._substructure_substructure = substructure
184  assert(len(ref_view.residues) == len(mdl_view.residues))
185  self._ref_view_ref_view = ref_view
186  self._mdl_view_mdl_view = mdl_view
187 
188  # lazily evaluated attributes
189  self._ref_bb_pos_ref_bb_pos = None
190  self._mdl_bb_pos_mdl_bb_pos = None
191  self._ref_full_bb_pos_ref_full_bb_pos = None
192  self._mdl_full_bb_pos_mdl_full_bb_pos = None
193  self._superposition_superposition = None
194  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos = None
195  self._ost_query_ost_query = None
196  self._flat_mapping_flat_mapping = None
197  self._inconsistent_residues_inconsistent_residues = None
198 
199  @property
200  def lDDT(self):
201  """ lDDT of representation result
202 
203  Depends on how you call :func:`ChainMapper.GetRepr` whether this is
204  backbone only or full atom lDDT.
205 
206  :type: :class:`float`
207  """
208  return self._lDDT_lDDT
209 
210  @property
211  def substructure(self):
212  """ The full substructure for which we searched for a
213  representation
214 
215  :type: :class:`ost.mol.EntityView`
216  """
217  return self._substructure_substructure
218 
219  @property
220  def ref_view(self):
221  """ View which contains the mapped subset of :attr:`substructure`
222 
223  :type: :class:`ost.mol.EntityView`
224  """
225  return self._ref_view_ref_view
226 
227  @property
228  def mdl_view(self):
229  """ The :attr:`ref_view` representation in the model
230 
231  :type: :class:`ost.mol.EntityView`
232  """
233  return self._mdl_view_mdl_view
234 
235  @property
236  def ref_residues(self):
237  """ The reference residues
238 
239  :type: class:`mol.ResidueViewList`
240  """
241  return self.ref_viewref_view.residues
242 
243  @property
244  def mdl_residues(self):
245  """ The model residues
246 
247  :type: :class:`mol.ResidueViewList`
248  """
249  return self.mdl_viewmdl_view.residues
250 
251  @property
253  """ A list of mapped residue whose names do not match (eg. ALA in the
254  reference and LEU in the model).
255 
256  The mismatches are reported as a tuple of :class:`~ost.mol.ResidueView`
257  (reference, model), or as an empty list if all the residue names match.
258 
259  :type: :class:`list`
260  """
261  if self._inconsistent_residues_inconsistent_residues is None:
262  self._inconsistent_residues_inconsistent_residues = self._GetInconsistentResidues_GetInconsistentResidues(
263  self.ref_residuesref_residues, self.mdl_residuesmdl_residues)
264  return self._inconsistent_residues_inconsistent_residues
265 
266  @property
267  def ref_bb_pos(self):
268  """ Representative backbone positions for reference residues.
269 
270  Thats CA positions for peptides and C3' positions for Nucleotides.
271 
272  :type: :class:`geom.Vec3List`
273  """
274  if self._ref_bb_pos_ref_bb_pos is None:
275  self._ref_bb_pos_ref_bb_pos = self._GetBBPos_GetBBPos(self.ref_residuesref_residues)
276  return self._ref_bb_pos_ref_bb_pos
277 
278  @property
279  def mdl_bb_pos(self):
280  """ Representative backbone positions for model residues.
281 
282  Thats CA positions for peptides and C3' positions for Nucleotides.
283 
284  :type: :class:`geom.Vec3List`
285  """
286  if self._mdl_bb_pos_mdl_bb_pos is None:
287  self._mdl_bb_pos_mdl_bb_pos = self._GetBBPos_GetBBPos(self.mdl_residuesmdl_residues)
288  return self._mdl_bb_pos_mdl_bb_pos
289 
290  @property
291  def ref_full_bb_pos(self):
292  """ Representative backbone positions for reference residues.
293 
294  Thats N, CA and C positions for peptides and O5', C5', C4', C3', O3'
295  positions for Nucleotides.
296 
297  :type: :class:`geom.Vec3List`
298  """
299  if self._ref_full_bb_pos_ref_full_bb_pos is None:
300  self._ref_full_bb_pos_ref_full_bb_pos = self._GetFullBBPos_GetFullBBPos(self.ref_residuesref_residues)
301  return self._ref_full_bb_pos_ref_full_bb_pos
302 
303  @property
304  def mdl_full_bb_pos(self):
305  """ Representative backbone positions for reference residues.
306 
307  Thats N, CA and C positions for peptides and O5', C5', C4', C3', O3'
308  positions for Nucleotides.
309 
310  :type: :class:`geom.Vec3List`
311  """
312  if self._mdl_full_bb_pos_mdl_full_bb_pos is None:
313  self._mdl_full_bb_pos_mdl_full_bb_pos = self._GetFullBBPos_GetFullBBPos(self.mdl_residuesmdl_residues)
314  return self._mdl_full_bb_pos_mdl_full_bb_pos
315 
316 
317  @property
318  def superposition(self):
319  """ Superposition of mdl residues onto ref residues
320 
321  Superposition computed as minimal RMSD superposition on
322  :attr:`ref_bb_pos` and :attr:`mdl_bb_pos`. If number of positions is
323  smaller 3, the full_bb_pos equivalents are used instead.
324 
325  :type: :class:`ost.mol.alg.SuperpositionResult`
326  """
327  if self._superposition_superposition is None:
328  if len(self.mdl_bb_posmdl_bb_pos) < 3:
329  self._superposition_superposition = _GetSuperposition(self.mdl_full_bb_posmdl_full_bb_pos,
330  self.ref_full_bb_posref_full_bb_pos, False)
331  else:
332  self._superposition_superposition = _GetSuperposition(self.mdl_bb_posmdl_bb_pos,
333  self.ref_bb_posref_bb_pos, False)
334  return self._superposition_superposition
335 
336  @property
337  def transform(self):
338  """ Transformation to superpose mdl residues onto ref residues
339 
340  :type: :class:`ost.geom.Mat4`
341  """
342  return self.superpositionsuperposition.transformation
343 
344  @property
346  """ :attr:`mdl_bb_pos` with :attr:`transform applied`
347 
348  :type: :class:`geom.Vec3List`
349  """
350  if self._superposed_mdl_bb_pos_superposed_mdl_bb_pos is None:
351  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos = geom.Vec3List(self.mdl_bb_posmdl_bb_pos)
352  self._superposed_mdl_bb_pos_superposed_mdl_bb_pos.ApplyTransform(self.transformtransform)
353  return self._superposed_mdl_bb_pos_superposed_mdl_bb_pos
354 
355  @property
356  def bb_rmsd(self):
357  """ RMSD of the binding site backbone atoms after :attr:`superposition`
358 
359  :type: :class:`float`
360  """
361  return self.superpositionsuperposition.rmsd
362 
363 
364  @property
365  def ost_query(self):
366  """ query for mdl residues in OpenStructure query language
367 
368  Repr can be selected as ``full_mdl.Select(ost_query)``
369 
370  Returns invalid query if residue numbers have insertion codes.
371 
372  :type: :class:`str`
373  """
374  if self._ost_query_ost_query is None:
375  chain_rnums = dict()
376  for r in self.mdl_residuesmdl_residues:
377  chname = r.GetChain().GetName()
378  rnum = r.GetNumber().GetNum()
379  if chname not in chain_rnums:
380  chain_rnums[chname] = list()
381  chain_rnums[chname].append(str(rnum))
382  chain_queries = list()
383  for k,v in chain_rnums.items():
384  q = f"(cname={mol.QueryQuoteName(k)} and "
385  q += f"rnum={','.join(v)})"
386  chain_queries.append(q)
387  self._ost_query_ost_query = " or ".join(chain_queries)
388  return self._ost_query_ost_query
389 
390  def JSONSummary(self):
391  """ Returns JSON serializable summary of results
392  """
393  json_dict = dict()
394  json_dict["lDDT"] = self.lDDTlDDT
395  json_dict["ref_residues"] = [r.GetQualifiedName() for r in \
396  self.ref_residuesref_residues]
397  json_dict["mdl_residues"] = [r.GetQualifiedName() for r in \
398  self.mdl_residuesmdl_residues]
399  json_dict["transform"] = list(self.transformtransform.data)
400  json_dict["bb_rmsd"] = self.bb_rmsdbb_rmsd
401  json_dict["ost_query"] = self.ost_queryost_query
402  json_dict["flat_mapping"] = self.GetFlatChainMappingGetFlatChainMapping()
403  return json_dict
404 
405  def GetFlatChainMapping(self, mdl_as_key=False):
406  """ Returns flat mapping of all chains in the representation
407 
408  :param mdl_as_key: Default is target chain name as key and model chain
409  name as value. This can be reversed with this flag.
410  :returns: :class:`dict` with :class:`str` as key/value that describe
411  one-to-one mapping
412  """
413  flat_mapping = dict()
414  for trg_res, mdl_res in zip(self.ref_residuesref_residues, self.mdl_residuesmdl_residues):
415  if mdl_as_key:
416  flat_mapping[mdl_res.chain.name] = trg_res.chain.name
417  else:
418  flat_mapping[trg_res.chain.name] = mdl_res.chain.name
419  return flat_mapping
420 
421  def _GetFullBBPos(self, residues):
422  """ Helper to extract full backbone positions
423  """
424  exp_pep_atoms = ["N", "CA", "C"]
425  exp_nuc_atoms = ["\"O5'\"", "\"C5'\"", "\"C4'\"", "\"C3'\"", "\"O3'\""]
426  bb_pos = geom.Vec3List()
427  for r in residues:
428  if r.GetChemType() == mol.ChemType.NUCLEOTIDES:
429  exp_atoms = exp_nuc_atoms
430  elif r.GetChemType() == mol.ChemType.AMINOACIDS:
431  exp_atoms = exp_pep_atoms
432  else:
433  raise RuntimeError("Something terrible happened... RUN...")
434  for aname in exp_atoms:
435  a = r.FindAtom(aname)
436  if not a.IsValid():
437  raise RuntimeError("Something terrible happened... "
438  "RUN...")
439  bb_pos.append(a.GetPos())
440  return bb_pos
441 
442  def _GetBBPos(self, residues):
443  """ Helper to extract single representative position for each residue
444  """
445  bb_pos = geom.Vec3List()
446  for r in residues:
447  at = r.FindAtom("CA")
448  if not at.IsValid():
449  at = r.FindAtom("C3'")
450  if not at.IsValid():
451  raise RuntimeError("Something terrible happened... RUN...")
452  bb_pos.append(at.GetPos())
453  return bb_pos
454 
455  def _GetInconsistentResidues(self, ref_residues, mdl_residues):
456  """ Helper to extract a list of inconsistent residues.
457  """
458  if len(ref_residues) != len(mdl_residues):
459  raise ValueError("Something terrible happened... Reference and "
460  "model lengths differ... RUN...")
461  inconsistent_residues = list()
462  for ref_residue, mdl_residue in zip(ref_residues, mdl_residues):
463  if ref_residue.name != mdl_residue.name:
464  inconsistent_residues.append((ref_residue, mdl_residue))
465  return inconsistent_residues
466 
467 
469  """ Class to compute chain mappings
470 
471  All algorithms are performed on processed structures which fulfill
472  criteria as given in constructor arguments (*min_pep_length*,
473  "min_nuc_length") and only contain residues which have all required backbone
474  atoms. for peptide residues thats N, CA, C and CB (no CB for GLY), for
475  nucleotide residues thats O5', C5', C4', C3' and O3'.
476 
477  Chain mapping is a three step process:
478 
479  * Group chemically identical chains in *target* using pairwise
480  alignments that are either computed with Needleman-Wunsch (NW) or
481  simply derived from residue numbers (*resnum_alignments* flag).
482  In case of NW, *pep_subst_mat*, *pep_gap_open* and *pep_gap_ext*
483  and their nucleotide equivalents are relevant. Two chains are
484  considered identical if they fulfill the *pep_seqid_thr* and
485  have at least *min_pep_length* aligned residues. Same logic
486  is applied for nucleotides with respective thresholds.
487 
488  * Map chains in an input model to these groups. Generating alignments
489  and the similarity criteria are the same as above. You can either
490  get the group mapping with :func:`GetChemMapping` or directly call
491  one of the full fletched one-to-one chain mapping functions which
492  execute that step internally.
493 
494  * Obtain one-to-one mapping for chains in an input model and
495  *target* with one of the available mapping functions. Just to get an
496  idea of complexity. If *target* and *model* are octamers, there are
497  ``8! = 40320`` possible chain mappings.
498 
499  :param target: Target structure onto which models are mapped.
500  Computations happen on a selection only containing
501  polypeptides and polynucleotides.
502  :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
503  :param resnum_alignments: Use residue numbers instead of
504  Needleman-Wunsch to compute pairwise
505  alignments. Relevant for :attr:`~chem_groups`
506  and related attributes.
507  :type resnum_alignments: :class:`bool`
508  :param pep_seqid_thr: Threshold used to decide when two chains are
509  identical. 95 percent tolerates the few mutations
510  crystallographers like to do.
511  :type pep_seqid_thr: :class:`float`
512  :param nuc_seqid_thr: Nucleotide equivalent for *pep_seqid_thr*
513  :type nuc_seqid_thr: :class:`float`
514  :param pep_subst_mat: Substitution matrix to align peptide sequences,
515  irrelevant if *resnum_alignments* is True,
516  defaults to seq.alg.BLOSUM62
517  :type pep_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
518  :param pep_gap_open: Gap open penalty to align peptide sequences,
519  irrelevant if *resnum_alignments* is True
520  :type pep_gap_open: :class:`int`
521  :param pep_gap_ext: Gap extension penalty to align peptide sequences,
522  irrelevant if *resnum_alignments* is True
523  :type pep_gap_ext: :class:`int`
524  :param nuc_subst_mat: Nucleotide equivalent for *pep_subst_mat*,
525  defaults to seq.alg.NUC44
526  :type nuc_subst_mat: :class:`ost.seq.alg.SubstWeightMatrix`
527  :param nuc_gap_open: Nucleotide equivalent for *pep_gap_open*
528  :type nuc_gap_open: :class:`int`
529  :param nuc_gap_ext: Nucleotide equivalent for *pep_gap_ext*
530  :type nuc_gap_ext: :class:`int`
531  :param min_pep_length: Minimal number of residues for a peptide chain to be
532  considered in target and in models.
533  :type min_pep_length: :class:`int`
534  :param min_nuc_length: Minimal number of residues for a nucleotide chain to be
535  considered in target and in models.
536  :type min_nuc_length: :class:`int`
537  :param n_max_naive: Max possible chain mappings that are enumerated in
538  :func:`~GetNaivelDDTMapping` /
539  :func:`~GetDecomposerlDDTMapping`. A
540  :class:`RuntimeError` is raised in case of bigger
541  complexity.
542  :type n_max_naive: :class:`int`
543  """
544  def __init__(self, target, resnum_alignments=False,
545  pep_seqid_thr = 95., nuc_seqid_thr = 95.,
546  pep_subst_mat = seq.alg.BLOSUM62, pep_gap_open = -11,
547  pep_gap_ext = -1, nuc_subst_mat = seq.alg.NUC44,
548  nuc_gap_open = -4, nuc_gap_ext = -4,
549  min_pep_length = 6, min_nuc_length = 4,
550  n_max_naive = 1e8):
551 
552  # attributes
553  self.resnum_alignmentsresnum_alignments = resnum_alignments
554  self.pep_seqid_thrpep_seqid_thr = pep_seqid_thr
555  self.nuc_seqid_thrnuc_seqid_thr = nuc_seqid_thr
556  self.min_pep_lengthmin_pep_length = min_pep_length
557  self.min_nuc_lengthmin_nuc_length = min_nuc_length
558  self.n_max_naiven_max_naive = n_max_naive
559 
560  # lazy computed attributes
561  self._chem_groups_chem_groups = None
562  self._chem_group_alignments_chem_group_alignments = None
563  self._chem_group_ref_seqs_chem_group_ref_seqs = None
564  self._chem_group_types_chem_group_types = None
565 
566  # helper class to generate pairwise alignments
567  self.aligneraligner = _Aligner(resnum_aln = resnum_alignments,
568  pep_subst_mat = pep_subst_mat,
569  pep_gap_open = pep_gap_open,
570  pep_gap_ext = pep_gap_ext,
571  nuc_subst_mat = nuc_subst_mat,
572  nuc_gap_open = nuc_gap_open,
573  nuc_gap_ext = nuc_gap_ext)
574 
575  # target structure preprocessing
576  self._target, self._polypep_seqs, self._polynuc_seqs_polynuc_seqs = \
577  self.ProcessStructureProcessStructure(target)
578 
579  @property
580  def target(self):
581  """Target structure that only contains peptides/nucleotides
582 
583  Contains only residues that have the backbone representatives
584  (CA for peptide and C3' for nucleotides) to avoid ATOMSEQ alignment
585  inconsistencies when switching between all atom and backbone only
586  representations.
587 
588  :type: :class:`ost.mol.EntityView`
589  """
590  return self._target
591 
592  @property
593  def polypep_seqs(self):
594  """Sequences of peptide chains in :attr:`~target`
595 
596  Respective :class:`EntityView` from *target* for each sequence s are
597  available as ``s.GetAttachedView()``
598 
599  :type: :class:`ost.seq.SequenceList`
600  """
601  return self._polypep_seqs
602 
603  @property
604  def polynuc_seqs(self):
605  """Sequences of nucleotide chains in :attr:`~target`
606 
607  Respective :class:`EntityView` from *target* for each sequence s are
608  available as ``s.GetAttachedView()``
609 
610  :type: :class:`ost.seq.SequenceList`
611  """
612  return self._polynuc_seqs_polynuc_seqs
613 
614  @property
615  def chem_groups(self):
616  """Groups of chemically equivalent chains in :attr:`~target`
617 
618  First chain in group is the one with longest sequence.
619 
620  :getter: Computed on first use (cached)
621  :type: :class:`list` of :class:`list` of :class:`str` (chain names)
622  """
623  if self._chem_groups_chem_groups is None:
624  self._chem_groups_chem_groups = list()
625  for a in self.chem_group_alignmentschem_group_alignments:
626  self._chem_groups_chem_groups.append([s.GetName() for s in a.sequences])
627  return self._chem_groups_chem_groups
628 
629  @property
631  """MSA for each group in :attr:`~chem_groups`
632 
633  Sequences in MSAs exhibit same order as in :attr:`~chem_groups` and
634  have the respective :class:`ost.mol.EntityView` from *target* attached.
635 
636  :getter: Computed on first use (cached)
637  :type: :class:`ost.seq.AlignmentList`
638  """
639  if self._chem_group_alignments_chem_group_alignments is None:
640  self._chem_group_alignments_chem_group_alignments, self._chem_group_types_chem_group_types = \
641  _GetChemGroupAlignments(self.polypep_seqspolypep_seqs, self.polynuc_seqspolynuc_seqs,
642  self.aligneraligner,
643  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
644  min_pep_length=self.min_pep_lengthmin_pep_length,
645  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
646  min_nuc_length=self.min_nuc_lengthmin_nuc_length)
647 
648  return self._chem_group_alignments_chem_group_alignments
649 
650  @property
652  """Reference (longest) sequence for each group in :attr:`~chem_groups`
653 
654  Respective :class:`EntityView` from *target* for each sequence s are
655  available as ``s.GetAttachedView()``
656 
657  :getter: Computed on first use (cached)
658  :type: :class:`ost.seq.SequenceList`
659  """
660  if self._chem_group_ref_seqs_chem_group_ref_seqs is None:
661  self._chem_group_ref_seqs_chem_group_ref_seqs = seq.CreateSequenceList()
662  for a in self.chem_group_alignmentschem_group_alignments:
663  s = seq.CreateSequence(a.GetSequence(0).GetName(),
664  a.GetSequence(0).GetGaplessString())
665  s.AttachView(a.GetSequence(0).GetAttachedView())
666  self._chem_group_ref_seqs_chem_group_ref_seqs.AddSequence(s)
667  return self._chem_group_ref_seqs_chem_group_ref_seqs
668 
669  @property
670  def chem_group_types(self):
671  """ChemType of each group in :attr:`~chem_groups`
672 
673  Specifying if groups are poly-peptides/nucleotides, i.e.
674  :class:`ost.mol.ChemType.AMINOACIDS` or
675  :class:`ost.mol.ChemType.NUCLEOTIDES`
676 
677  :getter: Computed on first use (cached)
678  :type: :class:`list` of :class:`ost.mol.ChemType`
679  """
680  if self._chem_group_types_chem_group_types is None:
681  self._chem_group_alignments_chem_group_alignments, self._chem_group_types_chem_group_types = \
682  _GetChemGroupAlignments(self.polypep_seqspolypep_seqs, self.polynuc_seqspolynuc_seqs,
683  self.aligneraligner,
684  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
685  min_pep_length=self.min_pep_lengthmin_pep_length,
686  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
687  min_nuc_length=self.min_nuc_lengthmin_nuc_length)
688 
689  return self._chem_group_types_chem_group_types
690 
691  def GetChemMapping(self, model):
692  """Maps sequences in *model* to chem_groups of target
693 
694  :param model: Model from which to extract sequences, a
695  selection that only includes peptides and nucleotides
696  is performed and returned along other results.
697  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
698  :returns: Tuple with two lists of length `len(self.chem_groups)` and
699  an :class:`ost.mol.EntityView` representing *model*:
700  1) Each element is a :class:`list` with mdl chain names that
701  map to the chem group at that position.
702  2) Each element is a :class:`ost.seq.AlignmentList` aligning
703  these mdl chain sequences to the chem group ref sequences.
704  3) A selection of *model* that only contains polypeptides and
705  polynucleotides whose ATOMSEQ exactly matches the sequence
706  info in the returned alignments.
707  """
708  mdl, mdl_pep_seqs, mdl_nuc_seqs = self.ProcessStructureProcessStructure(model)
709  mapping = [list() for x in self.chem_groupschem_groups]
710  alns = [seq.AlignmentList() for x in self.chem_groupschem_groups]
711 
712  for s in mdl_pep_seqs:
713  idx, aln = _MapSequence(self.chem_group_ref_seqschem_group_ref_seqs,
714  self.chem_group_typeschem_group_types,
715  s, mol.ChemType.AMINOACIDS,
716  self.aligneraligner)
717  if idx is not None:
718  mapping[idx].append(s.GetName())
719  alns[idx].append(aln)
720 
721  for s in mdl_nuc_seqs:
722  idx, aln = _MapSequence(self.chem_group_ref_seqschem_group_ref_seqs,
723  self.chem_group_typeschem_group_types,
724  s, mol.ChemType.NUCLEOTIDES,
725  self.aligneraligner)
726  if idx is not None:
727  mapping[idx].append(s.GetName())
728  alns[idx].append(aln)
729 
730  return (mapping, alns, mdl)
731 
732 
733  def GetlDDTMapping(self, model, inclusion_radius=15.0,
734  thresholds=[0.5, 1.0, 2.0, 4.0], strategy="heuristic",
735  steep_opt_rate = None, block_seed_size = 5,
736  block_blocks_per_chem_group = 5,
737  chem_mapping_result = None,
738  heuristic_n_max_naive = 40320):
739  """ Identify chain mapping by optimizing lDDT score
740 
741  Maps *model* chain sequences to :attr:`~chem_groups` and find mapping
742  based on backbone only lDDT score (CA for amino acids C3' for
743  Nucleotides).
744 
745  Either performs a naive search, i.e. enumerate all possible mappings or
746  executes a greedy strategy that tries to identify a (close to) optimal
747  mapping in an iterative way by starting from a start mapping (seed). In
748  each iteration, the one-to-one mapping that leads to highest increase
749  in number of conserved contacts is added with the additional requirement
750  that this added mapping must have non-zero interface counts towards the
751  already mapped chains. So basically we're "growing" the mapped structure
752  by only adding connected stuff.
753 
754  The available strategies:
755 
756  * **naive**: Enumerates all possible mappings and returns best
757 
758  * **greedy_fast**: perform all vs. all single chain lDDTs within the
759  respective ref/mdl chem groups. The mapping with highest number of
760  conserved contacts is selected as seed for greedy extension
761 
762  * **greedy_full**: try multiple seeds for greedy extension, i.e. try
763  all ref/mdl chain combinations within the respective chem groups and
764  retain the mapping leading to the best lDDT.
765 
766  * **greedy_block**: try multiple seeds for greedy extension, i.e. try
767  all ref/mdl chain combinations within the respective chem groups and
768  extend them to *block_seed_size*. *block_blocks_per_chem_group*
769  for each chem group are selected for exhaustive extension.
770 
771  * **heuristic**: Uses *naive* strategy if number of possible mappings
772  is within *heuristic_n_max_naive*. The default of 40320 corresponds
773  to an octamer (8!=40320). A structure with stoichiometry A6B2 would be
774  6!*2!=1440 etc. If the number of possible mappings is larger,
775  *greedy_full* is used.
776 
777  Sets :attr:`MappingResult.opt_score` in case of no trivial one-to-one
778  mapping.
779 
780  :param model: Model to map
781  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
782  :param inclusion_radius: Inclusion radius for lDDT
783  :type inclusion_radius: :class:`float`
784  :param thresholds: Thresholds for lDDT
785  :type thresholds: :class:`list` of :class:`float`
786  :param strategy: Strategy to find mapping. Must be in ["naive",
787  "greedy_fast", "greedy_full", "greedy_block"]
788  :type strategy: :class:`str`
789  :param steep_opt_rate: Only relevant for greedy strategies.
790  If set, every *steep_opt_rate* mappings, a simple
791  optimization is executed with the goal of
792  avoiding local minima. The optimization
793  iteratively checks all possible swaps of mappings
794  within their respective chem groups and accepts
795  swaps that improve lDDT score. Iteration stops as
796  soon as no improvement can be achieved anymore.
797  :type steep_opt_rate: :class:`int`
798  :param block_seed_size: Param for *greedy_block* strategy - Initial seeds
799  are extended by that number of chains.
800  :type block_seed_size: :class:`int`
801  :param block_blocks_per_chem_group: Param for *greedy_block* strategy -
802  Number of blocks per chem group that
803  are extended in an initial search
804  for high scoring local solutions.
805  :type block_blocks_per_chem_group: :class:`int`
806  :param chem_mapping_result: Pro param. The result of
807  :func:`~GetChemMapping` where you provided
808  *model*. If set, *model* parameter is not
809  used.
810  :type chem_mapping_result: :class:`tuple`
811  :returns: A :class:`MappingResult`
812  """
813 
814  strategies = ["naive", "greedy_fast", "greedy_full", "greedy_block",
815  "heuristic"]
816  if strategy not in strategies:
817  raise RuntimeError(f"Strategy must be in {strategies}")
818 
819  if chem_mapping_result is None:
820  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
821  else:
822  chem_mapping, chem_group_alns, mdl = chem_mapping_result
823 
824  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
825  self.chem_group_alignmentschem_group_alignments,
826  chem_mapping,
827  chem_group_alns)
828 
829  # check for the simplest case
830  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
831  if one_to_one is not None:
832  alns = dict()
833  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
834  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
835  if ref_ch is not None and mdl_ch is not None:
836  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
837  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
838  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
839  alns[(ref_ch, mdl_ch)] = aln
840  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
841  one_to_one, alns)
842 
843  if strategy == "heuristic":
844  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
845  heuristic_n_max_naive):
846  strategy = "naive"
847  else:
848  strategy = "greedy_full"
849 
850  mapping = None
851  opt_lddt = None
852 
853  if strategy == "naive":
854  mapping, opt_lddt = _lDDTNaive(self.targettarget, mdl, inclusion_radius,
855  thresholds, self.chem_groupschem_groups,
856  chem_mapping, ref_mdl_alns,
857  self.n_max_naiven_max_naive)
858  else:
859  # its one of the greedy strategies - setup greedy searcher
860  the_greed = _lDDTGreedySearcher(self.targettarget, mdl, self.chem_groupschem_groups,
861  chem_mapping, ref_mdl_alns,
862  inclusion_radius=inclusion_radius,
863  thresholds=thresholds,
864  steep_opt_rate=steep_opt_rate)
865  if strategy == "greedy_fast":
866  mapping = _lDDTGreedyFast(the_greed)
867  elif strategy == "greedy_full":
868  mapping = _lDDTGreedyFull(the_greed)
869  elif strategy == "greedy_block":
870  mapping = _lDDTGreedyBlock(the_greed, block_seed_size,
871  block_blocks_per_chem_group)
872  # cached => lDDT computation is fast here
873  opt_lddt = the_greed.Score(mapping)
874 
875  alns = dict()
876  for ref_group, mdl_group in zip(self.chem_groupschem_groups, mapping):
877  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
878  if ref_ch is not None and mdl_ch is not None:
879  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
880  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
881  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
882  alns[(ref_ch, mdl_ch)] = aln
883 
884  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
885  mapping, alns, opt_score = opt_lddt)
886 
887 
888  def GetQSScoreMapping(self, model, contact_d = 12.0, strategy = "heuristic",
889  block_seed_size = 5, block_blocks_per_chem_group = 5,
890  heuristic_n_max_naive = 40320, steep_opt_rate = None,
891  chem_mapping_result = None,
892  greedy_prune_contact_map = True):
893  """ Identify chain mapping based on QSScore
894 
895  Scoring is based on CA/C3' positions which are present in all chains of
896  a :attr:`chem_groups` as well as the *model* chains which are mapped to
897  that respective chem group.
898 
899  The following strategies are available:
900 
901  * **naive**: Naively iterate all possible mappings and return best based
902  on QS score.
903 
904  * **greedy_fast**: perform all vs. all single chain lDDTs within the
905  respective ref/mdl chem groups. The mapping with highest number of
906  conserved contacts is selected as seed for greedy extension.
907  Extension is based on QS-score.
908 
909  * **greedy_full**: try multiple seeds for greedy extension, i.e. try
910  all ref/mdl chain combinations within the respective chem groups and
911  retain the mapping leading to the best QS-score.
912 
913  * **greedy_block**: try multiple seeds for greedy extension, i.e. try
914  all ref/mdl chain combinations within the respective chem groups and
915  extend them to *block_seed_size*. *block_blocks_per_chem_group*
916  for each chem group are selected for exhaustive extension.
917 
918  * **heuristic**: Uses *naive* strategy if number of possible mappings
919  is within *heuristic_n_max_naive*. The default of 40320 corresponds
920  to an octamer (8!=40320). A structure with stoichiometry A6B2 would be
921  6!*2!=1440 etc. If the number of possible mappings is larger,
922  *greedy_full* is used.
923 
924  Sets :attr:`MappingResult.opt_score` in case of no trivial one-to-one
925  mapping.
926 
927  :param model: Model to map
928  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
929  :param contact_d: Max distance between two residues to be considered as
930  contact in qs scoring
931  :type contact_d: :class:`float`
932  :param strategy: Strategy for sampling, must be in ["naive",
933  "greedy_fast", "greedy_full", "greedy_block"]
934  :type strategy: :class:`str`
935  :param chem_mapping_result: Pro param. The result of
936  :func:`~GetChemMapping` where you provided
937  *model*. If set, *model* parameter is not
938  used.
939  :type chem_mapping_result: :class:`tuple`
940  :param greedy_prune_contact_map: Relevant for all strategies that use
941  greedy extensions. If True, only chains
942  with at least 3 contacts (8A CB
943  distance) towards already mapped chains
944  in trg/mdl are considered for
945  extension. All chains that give a
946  potential non-zero QS-score increase
947  are used otherwise (at least one
948  contact within 12A). The consequence
949  is reduced runtime and usually no
950  real reduction in accuracy.
951  :returns: A :class:`MappingResult`
952  """
953 
954  strategies = ["naive", "greedy_fast", "greedy_full", "greedy_block", "heuristic"]
955  if strategy not in strategies:
956  raise RuntimeError(f"strategy must be {strategies}")
957 
958  if chem_mapping_result is None:
959  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
960  else:
961  chem_mapping, chem_group_alns, mdl = chem_mapping_result
962  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
963  self.chem_group_alignmentschem_group_alignments,
964  chem_mapping,
965  chem_group_alns)
966  # check for the simplest case
967  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
968  if one_to_one is not None:
969  alns = dict()
970  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
971  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
972  if ref_ch is not None and mdl_ch is not None:
973  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
974  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
975  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
976  alns[(ref_ch, mdl_ch)] = aln
977  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
978  one_to_one, alns)
979 
980  if strategy == "heuristic":
981  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
982  heuristic_n_max_naive):
983  strategy = "naive"
984  else:
985  strategy = "greedy_full"
986 
987  mapping = None
988  opt_qsscore = None
989 
990  if strategy == "naive":
991  mapping, opt_qsscore = _QSScoreNaive(self.targettarget, mdl,
992  self.chem_groupschem_groups,
993  chem_mapping, ref_mdl_alns,
994  contact_d, self.n_max_naiven_max_naive)
995  else:
996  # its one of the greedy strategies - setup greedy searcher
997  the_greed = _QSScoreGreedySearcher(self.targettarget, mdl,
998  self.chem_groupschem_groups,
999  chem_mapping, ref_mdl_alns,
1000  contact_d = contact_d,
1001  steep_opt_rate=steep_opt_rate,
1002  greedy_prune_contact_map = greedy_prune_contact_map)
1003  if strategy == "greedy_fast":
1004  mapping = _QSScoreGreedyFast(the_greed)
1005  elif strategy == "greedy_full":
1006  mapping = _QSScoreGreedyFull(the_greed)
1007  elif strategy == "greedy_block":
1008  mapping = _QSScoreGreedyBlock(the_greed, block_seed_size,
1009  block_blocks_per_chem_group)
1010  # cached => QSScore computation is fast here
1011  opt_qsscore = the_greed.Score(mapping, check=False).QS_global
1012 
1013  alns = dict()
1014  for ref_group, mdl_group in zip(self.chem_groupschem_groups, mapping):
1015  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1016  if ref_ch is not None and mdl_ch is not None:
1017  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1018  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1019  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1020  alns[(ref_ch, mdl_ch)] = aln
1021 
1022  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1023  mapping, alns, opt_score = opt_qsscore)
1024 
1025  def GetRMSDMapping(self, model, strategy = "heuristic", subsampling=50,
1026  chem_mapping_result = None, heuristic_n_max_naive = 120):
1027  """Identify chain mapping based on minimal RMSD superposition
1028 
1029  Superposition and scoring is based on CA/C3' positions which are present
1030  in all chains of a :attr:`chem_groups` as well as the *model*
1031  chains which are mapped to that respective chem group.
1032 
1033  The following strategies are available:
1034 
1035  * **naive**: Naively iterate all possible mappings and return the one
1036  with lowes RMSD.
1037 
1038  * **greedy_single**: perform all vs. all single chain superpositions
1039  within the respective ref/mdl chem groups to use as starting points.
1040  For each starting point, iteratively add the model/target chain pair
1041  with lowest RMSD until a full mapping is achieved. The mapping with
1042  lowest RMSD is returned.
1043 
1044  * **greedy_iterative**: Same as greedy_single_rmsd exept that the
1045  transformation gets updated with each added chain pair.
1046 
1047  * **heuristic**: Uses *naive* strategy if number of possible mappings
1048  is within *heuristic_n_max_naive*. The default of 120 corresponds
1049  to a homo-pentamer (5!=120). If the number of possible mappings is
1050  larger, *greedy_iterative* is used.
1051 
1052  :param model: Model to map
1053  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1054  :param strategy: Strategy for sampling. Must be in ["naive",
1055  "greedy_single", "greedy_iterative"]
1056  :type strategy: :class:`str`
1057  :param subsampling: If given, only an equally distributed subset
1058  of CA/C3' positions in each chain are used for
1059  superposition/scoring.
1060  :type subsampling: :class:`int`
1061  :param chem_mapping_result: Pro param. The result of
1062  :func:`~GetChemMapping` where you provided
1063  *model*. If set, *model* parameter is not
1064  used.
1065  :type chem_mapping_result: :class:`tuple`
1066  :returns: A :class:`MappingResult`
1067  """
1068 
1069  strategies = ["naive", "greedy_single", "greedy_iterative", "heuristic"]
1070 
1071  if strategy not in strategies:
1072  raise RuntimeError(f"strategy must be {strategies}")
1073 
1074  if chem_mapping_result is None:
1075  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1076  else:
1077  chem_mapping, chem_group_alns, mdl = chem_mapping_result
1078  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1079  self.chem_group_alignmentschem_group_alignments,
1080  chem_mapping,
1081  chem_group_alns)
1082 
1083  # check for the simplest case
1084  one_to_one = _CheckOneToOneMapping(self.chem_groupschem_groups, chem_mapping)
1085  if one_to_one is not None:
1086  alns = dict()
1087  for ref_group, mdl_group in zip(self.chem_groupschem_groups, one_to_one):
1088  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1089  if ref_ch is not None and mdl_ch is not None:
1090  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1091  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1092  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1093  alns[(ref_ch, mdl_ch)] = aln
1094  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1095  one_to_one, alns)
1096 
1097  trg_group_pos, mdl_group_pos = _GetRefPos(self.targettarget, mdl,
1098  self.chem_group_alignmentschem_group_alignments,
1099  chem_group_alns,
1100  max_pos = subsampling)
1101 
1102  if strategy == "heuristic":
1103  if _NMappingsWithin(self.chem_groupschem_groups, chem_mapping,
1104  heuristic_n_max_naive):
1105  strategy = "naive"
1106  else:
1107  strategy = "greedy_iterative"
1108 
1109  mapping = None
1110 
1111  if strategy.startswith("greedy"):
1112  # get transforms of any mdl chain onto any trg chain in same chem
1113  # group that fulfills gdtts threshold
1114  initial_transforms = list()
1115  initial_mappings = list()
1116  for trg_pos, trg_chains, mdl_pos, mdl_chains in zip(trg_group_pos,
1117  self.chem_groupschem_groups,
1118  mdl_group_pos,
1119  chem_mapping):
1120  for t_pos, t in zip(trg_pos, trg_chains):
1121  for m_pos, m in zip(mdl_pos, mdl_chains):
1122  if len(t_pos) >= 3 and len(m_pos) >= 3:
1123  transform = _GetSuperposition(m_pos, t_pos,
1124  False).transformation
1125  initial_transforms.append(transform)
1126  initial_mappings.append((t,m))
1127 
1128  if strategy == "greedy_single":
1129  mapping = _SingleRigidRMSD(initial_transforms,
1130  initial_mappings,
1131  self.chem_groupschem_groups,
1132  chem_mapping,
1133  trg_group_pos,
1134  mdl_group_pos)
1135 
1136 
1137  elif strategy == "greedy_iterative":
1138  mapping = _IterativeRigidRMSD(initial_transforms,
1139  initial_mappings,
1140  self.chem_groupschem_groups, chem_mapping,
1141  trg_group_pos, mdl_group_pos)
1142  elif strategy == "naive":
1143  mapping = _NaiveRMSD(self.chem_groupschem_groups, chem_mapping,
1144  trg_group_pos, mdl_group_pos,
1145  self.n_max_naiven_max_naive)
1146 
1147  # translate mapping format and return
1148  final_mapping = list()
1149  for ref_chains in self.chem_groupschem_groups:
1150  mapped_mdl_chains = list()
1151  for ref_ch in ref_chains:
1152  if ref_ch in mapping:
1153  mapped_mdl_chains.append(mapping[ref_ch])
1154  else:
1155  mapped_mdl_chains.append(None)
1156  final_mapping.append(mapped_mdl_chains)
1157 
1158  alns = dict()
1159  for ref_group, mdl_group in zip(self.chem_groupschem_groups, final_mapping):
1160  for ref_ch, mdl_ch in zip(ref_group, mdl_group):
1161  if ref_ch is not None and mdl_ch is not None:
1162  aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1163  aln.AttachView(0, _CSel(self.targettarget, [ref_ch]))
1164  aln.AttachView(1, _CSel(mdl, [mdl_ch]))
1165  alns[(ref_ch, mdl_ch)] = aln
1166 
1167  return MappingResult(self.targettarget, mdl, self.chem_groupschem_groups, chem_mapping,
1168  final_mapping, alns)
1169 
1170  def GetMapping(self, model, n_max_naive = 40320):
1171  """ Convenience function to get mapping with currently preferred method
1172 
1173  If number of possible chain mappings is <= *n_max_naive*, a naive
1174  QS-score mapping is performed and optimal QS-score is guaranteed.
1175  For anything else, a QS-score mapping with the greedy_full strategy is
1176  performed (greedy_prune_contact_map = True). The default for
1177  *n_max_naive* of 40320 corresponds to an octamer (8!=40320). A
1178  structure with stoichiometry A6B2 would be 6!*2!=1440 etc.
1179 
1180  If :attr:`~target` has nucleotide sequences, the QS-score target
1181  function is replaced with a backbone only lDDT score that has
1182  an inclusion radius of 30A.
1183  """
1184  if len(self.polynuc_seqspolynuc_seqs) > 0:
1185  return self.GetlDDTMappingGetlDDTMapping(model, strategy = "heuristic",
1186  inclusion_radius = 30.0,
1187  heuristic_n_max_naive = n_max_naive)
1188  else:
1189  return self.GetQSScoreMappingGetQSScoreMapping(model, strategy="heuristic",
1190  greedy_prune_contact_map=True,
1191  heuristic_n_max_naive = n_max_naive)
1192 
1193  def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0,
1194  thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False,
1195  only_interchain=False, chem_mapping_result = None,
1196  global_mapping = None):
1197  """ Identify *topn* representations of *substructure* in *model*
1198 
1199  *substructure* defines a subset of :attr:`~target` for which one
1200  wants the *topn* representations in *model*. Representations are scored
1201  and sorted by lDDT.
1202 
1203  :param substructure: A :class:`ost.mol.EntityView` which is a subset of
1204  :attr:`~target`. Should be selected with the
1205  OpenStructure query language. Example: if you're
1206  interested in residues with number 42,43 and 85 in
1207  chain A:
1208  ``substructure=mapper.target.Select("cname=A and rnum=42,43,85")``
1209  A :class:`RuntimeError` is raised if *substructure*
1210  does not refer to the same underlying
1211  :class:`ost.mol.EntityHandle` as :attr:`~target`.
1212  :type substructure: :class:`ost.mol.EntityView`
1213  :param model: Structure in which one wants to find representations for
1214  *substructure*
1215  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1216  :param topn: Max number of representations that are returned
1217  :type topn: :class:`int`
1218  :param inclusion_radius: Inclusion radius for lDDT
1219  :type inclusion_radius: :class:`float`
1220  :param thresholds: Thresholds for lDDT
1221  :type thresholds: :class:`list` of :class:`float`
1222  :param bb_only: Only consider backbone atoms in lDDT computation
1223  :type bb_only: :class:`bool`
1224  :param only_interchain: Only score interchain contacts in lDDT. Useful
1225  if you want to identify interface patches.
1226  :type only_interchain: :class:`bool`
1227  :param chem_mapping_result: Pro param. The result of
1228  :func:`~GetChemMapping` where you provided
1229  *model*. If set, *model* parameter is not
1230  used.
1231  :type chem_mapping_result: :class:`tuple`
1232  :param global_mapping: Pro param. Specify a global mapping result. This
1233  fully defines the desired representation in the
1234  model but extracts it and enriches it with all
1235  the nice attributes of :class:`ReprResult`.
1236  The target attribute in *global_mapping* must be
1237  of the same entity as self.target and the model
1238  attribute of *global_mapping* must be of the same
1239  entity as *model*.
1240  :type global_mapping: :class:`MappingResult`
1241  :returns: :class:`list` of :class:`ReprResult`
1242  """
1243 
1244  if topn < 1:
1245  raise RuntimeError("topn must be >= 1")
1246 
1247  if global_mapping is not None:
1248  # ensure that this mapping is derived from the same structures
1249  if global_mapping.target.handle.GetHashCode() != \
1250  self.targettarget.handle.GetHashCode():
1251  raise RuntimeError("global_mapping.target must be the same "
1252  "entity as self.target")
1253  if global_mapping.model.handle.GetHashCode() != \
1254  model.handle.GetHashCode():
1255  raise RuntimeError("global_mapping.model must be the same "
1256  "entity as model param")
1257 
1258  # check whether substructure really is a subset of self.target
1259  for r in substructure.residues:
1260  ch_name = r.GetChain().GetName()
1261  rnum = r.GetNumber()
1262  target_r = self.targettarget.FindResidue(ch_name, rnum)
1263  if not target_r.IsValid():
1264  raise RuntimeError(f"substructure has residue "
1265  f"{r.GetQualifiedName()} which is not in "
1266  f"self.target")
1267  if target_r.handle.GetHashCode() != r.handle.GetHashCode():
1268  raise RuntimeError(f"substructure has residue "
1269  f"{r.GetQualifiedName()} which has an "
1270  f"equivalent in self.target but it does "
1271  f"not refer to the same underlying "
1272  f"EntityHandle")
1273  for a in r.atoms:
1274  target_a = target_r.FindAtom(a.GetName())
1275  if not target_a.IsValid():
1276  raise RuntimeError(f"substructure has atom "
1277  f"{a.GetQualifiedName()} which is not "
1278  f"in self.target")
1279  if a.handle.GetHashCode() != target_a.handle.GetHashCode():
1280  raise RuntimeError(f"substructure has atom "
1281  f"{a.GetQualifiedName()} which has an "
1282  f"equivalent in self.target but it does "
1283  f"not refer to the same underlying "
1284  f"EntityHandle")
1285 
1286  # check whether it contains either CA or C3'
1287  ca = r.FindAtom("CA")
1288  c3 = r.FindAtom("C3'") # FindAtom with prime in string is tested
1289  # and works
1290  if not ca.IsValid() and not c3.IsValid():
1291  raise RuntimeError("All residues in substructure must contain "
1292  "a backbone atom named CA or C3\'")
1293 
1294  # perform mapping and alignments on full structures
1295  if chem_mapping_result is None:
1296  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1297  else:
1298  chem_mapping, chem_group_alns, mdl = chem_mapping_result
1299  ref_mdl_alns = _GetRefMdlAlns(self.chem_groupschem_groups,
1300  self.chem_group_alignmentschem_group_alignments,
1301  chem_mapping,
1302  chem_group_alns)
1303 
1304  # Get residue indices relative to full target chain
1305  substructure_res_indices = dict()
1306  for ch in substructure.chains:
1307  full_ch = self.targettarget.FindChain(ch.GetName())
1308  idx = [full_ch.GetResidueIndex(r.GetNumber()) for r in ch.residues]
1309  substructure_res_indices[ch.GetName()] = idx
1310 
1311  # strip down variables to make them specific to substructure
1312  # keep only chem_groups which are present in substructure
1313  substructure_chem_groups = list()
1314  substructure_chem_mapping = list()
1315 
1316  chnames = set([ch.GetName() for ch in substructure.chains])
1317  for chem_group, mapping in zip(self.chem_groupschem_groups, chem_mapping):
1318  substructure_chem_group = [ch for ch in chem_group if ch in chnames]
1319  if len(substructure_chem_group) > 0:
1320  substructure_chem_groups.append(substructure_chem_group)
1321  substructure_chem_mapping.append(mapping)
1322 
1323  # early stopping if no mdl chain can be mapped to substructure
1324  n_mapped_mdl_chains = sum([len(m) for m in substructure_chem_mapping])
1325  if n_mapped_mdl_chains == 0:
1326  return list()
1327 
1328  # strip the reference sequence in alignments to only contain
1329  # sequence from substructure
1330  substructure_ref_mdl_alns = dict()
1331  mdl_views = dict()
1332  for ch in mdl.chains:
1333  mdl_views[ch.GetName()] = _CSel(mdl, [ch.GetName()])
1334  for chem_group, mapping in zip(substructure_chem_groups,
1335  substructure_chem_mapping):
1336  for ref_ch in chem_group:
1337  for mdl_ch in mapping:
1338  full_aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1339  ref_seq = full_aln.GetSequence(0)
1340  # the ref sequence is tricky... we start with a gap only
1341  # sequence and only add olcs as defined by the residue
1342  # indices that we extracted before...
1343  tmp = ['-'] * len(full_aln)
1344  for idx in substructure_res_indices[ref_ch]:
1345  idx_in_seq = ref_seq.GetPos(idx)
1346  tmp[idx_in_seq] = ref_seq[idx_in_seq]
1347  ref_seq = seq.CreateSequence(ref_ch, ''.join(tmp))
1348  ref_seq.AttachView(_CSel(substructure, [ref_ch]))
1349  mdl_seq = full_aln.GetSequence(1)
1350  mdl_seq = seq.CreateSequence(mdl_seq.GetName(),
1351  mdl_seq.GetString())
1352  mdl_seq.AttachView(mdl_views[mdl_ch])
1353  aln = seq.CreateAlignment()
1354  aln.AddSequence(ref_seq)
1355  aln.AddSequence(mdl_seq)
1356  substructure_ref_mdl_alns[(ref_ch, mdl_ch)] = aln
1357 
1358  lddt_scorer = lddt.lDDTScorer(substructure,
1359  inclusion_radius = inclusion_radius,
1360  bb_only = bb_only)
1361  scored_mappings = list()
1362 
1363  if global_mapping:
1364  # construct mapping of substructure from global mapping
1365  flat_mapping = global_mapping.GetFlatMapping()
1366  mapping = list()
1367  for chem_group, chem_mapping in zip(substructure_chem_groups,
1368  substructure_chem_mapping):
1369  chem_group_mapping = list()
1370  for ch in chem_group:
1371  if ch in flat_mapping:
1372  mdl_ch = flat_mapping[ch]
1373  if mdl_ch in chem_mapping:
1374  chem_group_mapping.append(mdl_ch)
1375  else:
1376  chem_group_mapping.append(None)
1377  else:
1378  chem_group_mapping.append(None)
1379  mapping.append(chem_group_mapping)
1380  mappings = [mapping]
1381  else:
1382  mappings = list(_ChainMappings(substructure_chem_groups,
1383  substructure_chem_mapping,
1384  self.n_max_naiven_max_naive))
1385 
1386  # This step can be slow so give some hints in logs
1387  msg = "Computing initial ranking of %d chain mappings" % len(mappings)
1388  (ost.LogWarning if len(mappings) > 10000 else ost.LogInfo)(msg)
1389 
1390  for mapping in mappings:
1391  # chain_mapping and alns as input for lDDT computation
1392  lddt_chain_mapping = dict()
1393  lddt_alns = dict()
1394  n_res_aln = 0
1395  for ref_chem_group, mdl_chem_group in zip(substructure_chem_groups,
1396  mapping):
1397  for ref_ch, mdl_ch in zip(ref_chem_group, mdl_chem_group):
1398  # some mdl chains can be None
1399  if mdl_ch is not None:
1400  lddt_chain_mapping[mdl_ch] = ref_ch
1401  aln = substructure_ref_mdl_alns[(ref_ch, mdl_ch)]
1402  lddt_alns[mdl_ch] = aln
1403  tmp = [int(c[0] != '-' and c[1] != '-') for c in aln]
1404  n_res_aln += sum(tmp)
1405  # don't compute lDDT if no single residue in mdl and ref is aligned
1406  if n_res_aln == 0:
1407  continue
1408 
1409  lDDT, _ = lddt_scorer.lDDT(mdl, thresholds=thresholds,
1410  chain_mapping=lddt_chain_mapping,
1411  residue_mapping = lddt_alns,
1412  check_resnames = False,
1413  no_intrachain = only_interchain)
1414 
1415  if lDDT is None:
1416  ost.LogVerbose("No valid contacts in the reference")
1417  lDDT = 0.0 # that means, that we have not a single valid contact
1418  # in lDDT. For the code below to work, we just set it
1419  # to a terrible score => 0.0
1420 
1421  if len(scored_mappings) == 0:
1422  scored_mappings.append((lDDT, mapping))
1423  elif len(scored_mappings) < topn:
1424  scored_mappings.append((lDDT, mapping))
1425  scored_mappings.sort(reverse=True, key=lambda x: x[0])
1426  elif lDDT > scored_mappings[-1][0]:
1427  scored_mappings.append((lDDT, mapping))
1428  scored_mappings.sort(reverse=True, key=lambda x: x[0])
1429  scored_mappings = scored_mappings[:topn]
1430 
1431  # finalize and return
1432  results = list()
1433  for scored_mapping in scored_mappings:
1434  ref_view = substructure.handle.CreateEmptyView()
1435  mdl_view = mdl.handle.CreateEmptyView()
1436  for ref_ch_group, mdl_ch_group in zip(substructure_chem_groups,
1437  scored_mapping[1]):
1438  for ref_ch, mdl_ch in zip(ref_ch_group, mdl_ch_group):
1439  if ref_ch is not None and mdl_ch is not None:
1440  aln = substructure_ref_mdl_alns[(ref_ch, mdl_ch)]
1441  for col in aln:
1442  if col[0] != '-' and col[1] != '-':
1443  ref_view.AddResidue(col.GetResidue(0),
1444  mol.ViewAddFlag.INCLUDE_ALL)
1445  mdl_view.AddResidue(col.GetResidue(1),
1446  mol.ViewAddFlag.INCLUDE_ALL)
1447  results.append(ReprResult(scored_mapping[0], substructure,
1448  ref_view, mdl_view))
1449  return results
1450 
1451  def GetNMappings(self, model):
1452  """ Returns number of possible mappings
1453 
1454  :param model: Model with chains that are mapped onto
1455  :attr:`chem_groups`
1456  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1457  """
1458  chem_mapping, chem_group_alns, mdl = self.GetChemMappingGetChemMapping(model)
1459  return _NMappings(self.chem_groupschem_groups, chem_mapping)
1460 
1461  def ProcessStructure(self, ent):
1462  """ Entity processing for chain mapping
1463 
1464  * Selects view containing peptide and nucleotide residues which have
1465  required backbone atoms present - for peptide residues thats
1466  N, CA, C and CB (no CB for GLY), for nucleotide residues thats
1467  O5', C5', C4', C3' and O3'.
1468  * filters view by chain lengths, see *min_pep_length* and
1469  *min_nuc_length* in constructor
1470  * Extracts atom sequences for each chain in that view
1471  * Attaches corresponding :class:`ost.mol.EntityView` to each sequence
1472  * If residue number alignments are used, strictly increasing residue
1473  numbers without insertion codes are ensured in each chain
1474 
1475  :param ent: Entity to process
1476  :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
1477  :returns: Tuple with 3 elements: 1) :class:`ost.mol.EntityView`
1478  containing peptide and nucleotide residues 2)
1479  :class:`ost.seq.SequenceList` containing ATOMSEQ sequences
1480  for each polypeptide chain in returned view, sequences have
1481  :class:`ost.mol.EntityView` of according chains attached
1482  3) same for polynucleotide chains
1483  """
1484  view = ent.CreateEmptyView()
1485  exp_pep_atoms = ["N", "CA", "C", "CB"]
1486  exp_nuc_atoms = ["\"O5'\"", "\"C5'\"", "\"C4'\"", "\"C3'\"", "\"O3'\""]
1487  pep_query = "peptide=true and aname=" + ','.join(exp_pep_atoms)
1488  nuc_query = "nucleotide=true and aname=" + ','.join(exp_nuc_atoms)
1489 
1490  pep_sel = ent.Select(pep_query)
1491  for r in pep_sel.residues:
1492  if len(r.atoms) == 4:
1493  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1494  elif r.name == "GLY" and len(r.atoms) == 3:
1495  atom_names = [a.GetName() for a in r.atoms]
1496  if sorted(atom_names) == ["C", "CA", "N"]:
1497  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1498 
1499  nuc_sel = ent.Select(nuc_query)
1500  for r in nuc_sel.residues:
1501  if len(r.atoms) == 5:
1502  view.AddResidue(r.handle, mol.INCLUDE_ALL)
1503 
1504  polypep_seqs = seq.CreateSequenceList()
1505  polynuc_seqs = seq.CreateSequenceList()
1506 
1507  if len(view.residues) == 0:
1508  # no residues survived => return
1509  return (view, polypep_seqs, polynuc_seqs)
1510 
1511  for ch in view.chains:
1512  n_res = len(ch.residues)
1513  n_pep = sum([r.IsPeptideLinking() for r in ch.residues])
1514  n_nuc = sum([r.IsNucleotideLinking() for r in ch.residues])
1515 
1516  # guarantee that we have either pep or nuc (no mix of the two)
1517  if n_pep > 0 and n_nuc > 0:
1518  raise RuntimeError(f"Must not mix peptide and nucleotide linking "
1519  f"residues in same chain ({ch.GetName()})")
1520 
1521  if (n_pep + n_nuc) != n_res:
1522  raise RuntimeError("All residues must either be peptide_linking "
1523  "or nucleotide_linking")
1524 
1525  # filter out short chains
1526  if n_pep > 0 and n_pep < self.min_pep_lengthmin_pep_length:
1527  continue
1528 
1529  if n_nuc > 0 and n_nuc < self.min_nuc_lengthmin_nuc_length:
1530  continue
1531 
1532  # the superfast residue number based alignment adds some
1533  # restrictions on the numbers themselves:
1534  # 1) no insertion codes 2) strictly increasing
1535  if self.resnum_alignmentsresnum_alignments:
1536  # check if no insertion codes are present in residue numbers
1537  ins_codes = [r.GetNumber().GetInsCode() for r in ch.residues]
1538  if len(set(ins_codes)) != 1 or ins_codes[0] != '\0':
1539  raise RuntimeError("Residue numbers in input structures must not "
1540  "contain insertion codes")
1541 
1542  # check if residue numbers are strictly increasing
1543  nums = [r.GetNumber().GetNum() for r in ch.residues]
1544  if not all(i < j for i, j in zip(nums, nums[1:])):
1545  raise RuntimeError("Residue numbers in input structures must be "
1546  "strictly increasing for each chain")
1547 
1548  s = ''.join([r.one_letter_code for r in ch.residues])
1549  s = seq.CreateSequence(ch.GetName(), s)
1550  s.AttachView(_CSel(view, [ch.GetName()]))
1551  if n_pep == n_res:
1552  polypep_seqs.AddSequence(s)
1553  elif n_nuc == n_res:
1554  polynuc_seqs.AddSequence(s)
1555  else:
1556  raise RuntimeError("This shouldnt happen")
1557 
1558  if len(polypep_seqs) == 0 and len(polynuc_seqs) == 0:
1559  raise RuntimeError(f"No chain fulfilled minimum length requirement "
1560  f"to be considered in chain mapping "
1561  f"({self.min_pep_length} for peptide chains, "
1562  f"{self.min_nuc_length} for nucleotide chains) "
1563  f"- mapping failed")
1564 
1565  # select for chains for which we actually extracted the sequence
1566  chain_names = [s.GetAttachedView().chains[0].name for s in polypep_seqs]
1567  chain_names += [s.GetAttachedView().chains[0].name for s in polynuc_seqs]
1568  view = _CSel(view, chain_names)
1569 
1570  return (view, polypep_seqs, polynuc_seqs)
1571 
1572  def Align(self, s1, s2, stype):
1573  """ Access to internal sequence alignment functionality
1574 
1575  Alignment parameterization is setup at ChainMapper construction
1576 
1577  :param s1: First sequence to align - must have view attached in case
1578  of resnum_alignments
1579  :type s1: :class:`ost.seq.SequenceHandle`
1580  :param s2: Second sequence to align - must have view attached in case
1581  of resnum_alignments
1582  :type s2: :class:`ost.seq.SequenceHandle`
1583  :param stype: Type of sequences to align, must be in
1584  [:class:`ost.mol.ChemType.AMINOACIDS`,
1585  :class:`ost.mol.ChemType.NUCLEOTIDES`]
1586  :returns: Pairwise alignment of s1 and s2
1587  """
1588  if stype not in [mol.ChemType.AMINOACIDS, mol.ChemType.NUCLEOTIDES]:
1589  raise RuntimeError("stype must be ost.mol.ChemType.AMINOACIDS or "
1590  "ost.mol.ChemType.NUCLEOTIDES")
1591  return self.aligneraligner.Align(s1, s2, chem_type = stype)
1592 
1593 
1594 # INTERNAL HELPERS
1595 
1596 class _Aligner:
1597  def __init__(self, pep_subst_mat = seq.alg.BLOSUM62, pep_gap_open = -5,
1598  pep_gap_ext = -2, nuc_subst_mat = seq.alg.NUC44,
1599  nuc_gap_open = -4, nuc_gap_ext = -4, resnum_aln = False):
1600  """ Helper class to compute alignments
1601 
1602  Sets default values for substitution matrix, gap open and gap extension
1603  penalties. They are only used in default mode (Needleman-Wunsch aln).
1604  If *resnum_aln* is True, only residue numbers of views that are attached
1605  to input sequences are considered.
1606  """
1607  self.pep_subst_matpep_subst_mat = pep_subst_mat
1608  self.pep_gap_openpep_gap_open = pep_gap_open
1609  self.pep_gap_extpep_gap_ext = pep_gap_ext
1610  self.nuc_subst_matnuc_subst_mat = nuc_subst_mat
1611  self.nuc_gap_opennuc_gap_open = nuc_gap_open
1612  self.nuc_gap_extnuc_gap_ext = nuc_gap_ext
1613  self.resnum_alnresnum_aln = resnum_aln
1614 
1615  def Align(self, s1, s2, chem_type=None):
1616  if self.resnum_alnresnum_aln:
1617  return self.ResNumAlignResNumAlign(s1, s2)
1618  else:
1619  if chem_type is None:
1620  raise RuntimeError("Must specify chem_type for NW alignment")
1621  return self.NWAlignNWAlign(s1, s2, chem_type)
1622 
1623  def NWAlign(self, s1, s2, chem_type):
1624  """ Returns pairwise alignment using Needleman-Wunsch algorithm
1625 
1626  :param s1: First sequence to align
1627  :type s1: :class:`ost.seq.SequenceHandle`
1628  :param s2: Second sequence to align
1629  :type s2: :class:`ost.seq.SequenceHandle`
1630  :param chem_type: Must be in [:class:`ost.mol.ChemType.AMINOACIDS`,
1631  :class:`ost.mol.ChemType.NUCLEOTIDES`], determines
1632  substitution matrix and gap open/extension penalties
1633  :type chem_type: :class:`ost.mol.ChemType`
1634  :returns: Alignment with s1 as first and s2 as second sequence
1635  """
1636  if chem_type == mol.ChemType.AMINOACIDS:
1637  return seq.alg.SemiGlobalAlign(s1, s2, self.pep_subst_matpep_subst_mat,
1638  gap_open=self.pep_gap_openpep_gap_open,
1639  gap_ext=self.pep_gap_extpep_gap_ext)[0]
1640  elif chem_type == mol.ChemType.NUCLEOTIDES:
1641  return seq.alg.SemiGlobalAlign(s1, s2, self.nuc_subst_matnuc_subst_mat,
1642  gap_open=self.nuc_gap_opennuc_gap_open,
1643  gap_ext=self.nuc_gap_extnuc_gap_ext)[0]
1644  else:
1645  raise RuntimeError("Invalid ChemType")
1646  return aln
1647 
1648  def ResNumAlign(self, s1, s2):
1649  """ Returns pairwise alignment using residue numbers of attached views
1650 
1651  Assumes that there are no insertion codes (alignment only on numerical
1652  component) and that resnums are strictly increasing (fast min/max
1653  identification). These requirements are assured if a structure has been
1654  processed by :class:`ChainMapper`.
1655 
1656  :param s1: First sequence to align, must have :class:`ost.mol.EntityView`
1657  attached
1658  :type s1: :class:`ost.seq.SequenceHandle`
1659  :param s2: Second sequence to align, must have :class:`ost.mol.EntityView`
1660  attached
1661  :type s2: :class:`ost.seq.SequenceHandle`
1662  """
1663  assert(s1.HasAttachedView())
1664  assert(s2.HasAttachedView())
1665  v1 = s1.GetAttachedView()
1666  rnums1 = [r.GetNumber().GetNum() for r in v1.residues]
1667  v2 = s2.GetAttachedView()
1668  rnums2 = [r.GetNumber().GetNum() for r in v2.residues]
1669 
1670  min_num = min(rnums1[0], rnums2[0])
1671  max_num = max(rnums1[-1], rnums2[-1])
1672  aln_length = max_num - min_num + 1
1673 
1674  aln_s1 = ['-'] * aln_length
1675  for r, rnum in zip(v1.residues, rnums1):
1676  aln_s1[rnum-min_num] = r.one_letter_code
1677 
1678  aln_s2 = ['-'] * aln_length
1679  for r, rnum in zip(v2.residues, rnums2):
1680  aln_s2[rnum-min_num] = r.one_letter_code
1681 
1682  aln = seq.CreateAlignment()
1683  aln.AddSequence(seq.CreateSequence(s1.GetName(), ''.join(aln_s1)))
1684  aln.AddSequence(seq.CreateSequence(s2.GetName(), ''.join(aln_s2)))
1685  return aln
1686 
1687 def _GetAlnPropsTwo(aln):
1688  """Returns basic properties of *aln* version two...
1689 
1690  :param aln: Alignment to compute properties
1691  :type aln: :class:`seq.AlignmentHandle`
1692  :returns: Tuple with 2 elements. 1) sequence identify in range [0, 100]
1693  considering aligned columns 2) Fraction of non-gap characters
1694  in first sequence that are covered by non-gap characters in
1695  second sequence.
1696  """
1697  assert(aln.GetCount() == 2)
1698  n_tot = sum([1 for col in aln if col[0] != '-'])
1699  n_aligned = sum([1 for col in aln if (col[0] != '-' and col[1] != '-')])
1700  return (seq.alg.SequenceIdentity(aln), float(n_aligned)/n_tot)
1701 
1702 def _GetAlnPropsOne(aln):
1703 
1704  """Returns basic properties of *aln* version one...
1705 
1706  :param aln: Alignment to compute properties
1707  :type aln: :class:`seq.AlignmentHandle`
1708  :returns: Tuple with 2 elements. 1) sequence identify in range [0, 100]
1709  considering aligned columns 2) Number of aligned columns.
1710  """
1711  assert(aln.GetCount() == 2)
1712  seqid = seq.alg.SequenceIdentity(aln)
1713  n_aligned = sum([1 for col in aln if (col[0] != '-' and col[1] != '-')])
1714  return (seqid, n_aligned)
1715 
1716 def _GetChemGroupAlignments(pep_seqs, nuc_seqs, aligner, pep_seqid_thr=95.,
1717  min_pep_length=6, nuc_seqid_thr=95.,
1718  min_nuc_length=4):
1719  """Returns alignments with groups of chemically equivalent chains
1720 
1721  :param pep_seqs: List of polypeptide sequences
1722  :type pep_seqs: :class:`seq.SequenceList`
1723  :param nuc_seqs: List of polynucleotide sequences
1724  :type nuc_seqs: :class:`seq.SequenceList`
1725  :param aligner: Helper class to generate pairwise alignments
1726  :type aligner: :class:`_Aligner`
1727  :param pep_seqid_thr: Threshold used to decide when two peptide chains are
1728  identical. 95 percent tolerates the few mutations
1729  crystallographers like to do.
1730  :type pep_seqid_thr: :class:`float`
1731  :param min_pep_length: Additional threshold to avoid gappy alignments with high
1732  seqid. Number of aligned columns must be at least this
1733  number.
1734  :type min_pep_length: :class:`int`
1735  :param nuc_seqid_thr: Nucleotide equivalent of *pep_seqid_thr*
1736  :type nuc_seqid_thr: :class:`float`
1737  :param min_nuc_length: Nucleotide equivalent of *min_pep_length*
1738  :type min_nuc_length: :class:`int`
1739  :returns: Tuple with first element being an AlignmentList. Each alignment
1740  represents a group of chemically equivalent chains and the first
1741  sequence is the longest. Second element is a list of equivalent
1742  length specifying the types of the groups. List elements are in
1743  [:class:`ost.ChemType.AMINOACIDS`,
1744  :class:`ost.ChemType.NUCLEOTIDES`]
1745  """
1746  pep_groups = _GroupSequences(pep_seqs, pep_seqid_thr, min_pep_length, aligner,
1747  mol.ChemType.AMINOACIDS)
1748  nuc_groups = _GroupSequences(nuc_seqs, nuc_seqid_thr, min_nuc_length, aligner,
1749  mol.ChemType.NUCLEOTIDES)
1750  group_types = [mol.ChemType.AMINOACIDS] * len(pep_groups)
1751  group_types += [mol.ChemType.NUCLEOTIDES] * len(nuc_groups)
1752  groups = pep_groups
1753  groups.extend(nuc_groups)
1754  return (groups, group_types)
1755 
1756 def _GroupSequences(seqs, seqid_thr, min_length, aligner, chem_type):
1757  """Get list of alignments representing groups of equivalent sequences
1758 
1759  :param seqid_thr: Threshold used to decide when two chains are identical.
1760  :type seqid_thr: :class:`float`
1761  :param gap_thr: Additional threshold to avoid gappy alignments with high
1762  seqid. Number of aligned columns must be at least this
1763  number.
1764  :type gap_thr: :class:`int`
1765  :param aligner: Helper class to generate pairwise alignments
1766  :type aligner: :class:`_Aligner`
1767  :param chem_type: ChemType of seqs which is passed to *aligner*, must be in
1768  [:class:`ost.mol.ChemType.AMINOACIDS`,
1769  :class:`ost.mol.ChemType.NUCLEOTIDES`]
1770  :type chem_type: :class:`ost.mol.ChemType`
1771  :returns: A list of alignments, one alignment for each group
1772  with longest sequence (reference) as first sequence.
1773  :rtype: :class:`ost.seq.AlignmentList`
1774  """
1775  groups = list()
1776  for s_idx in range(len(seqs)):
1777  matching_group = None
1778  for g_idx in range(len(groups)):
1779  for g_s_idx in range(len(groups[g_idx])):
1780  aln = aligner.Align(seqs[s_idx], seqs[groups[g_idx][g_s_idx]],
1781  chem_type)
1782  sid, n_aligned = _GetAlnPropsOne(aln)
1783  if sid >= seqid_thr and n_aligned >= min_length:
1784  matching_group = g_idx
1785  break
1786  if matching_group is not None:
1787  break
1788 
1789  if matching_group is None:
1790  groups.append([s_idx])
1791  else:
1792  groups[matching_group].append(s_idx)
1793 
1794  # sort based on sequence length
1795  sorted_groups = list()
1796  for g in groups:
1797  if len(g) > 1:
1798  tmp = sorted([[len(seqs[i]), i] for i in g], reverse=True)
1799  sorted_groups.append([x[1] for x in tmp])
1800  else:
1801  sorted_groups.append(g)
1802 
1803  # translate from indices back to sequences and directly generate alignments
1804  # of the groups with the longest (first) sequence as reference
1805  aln_list = seq.AlignmentList()
1806  for g in sorted_groups:
1807  if len(g) == 1:
1808  # aln with one single sequence
1809  aln_list.append(seq.CreateAlignment(seqs[g[0]]))
1810  else:
1811  # obtain pairwise aln of first sequence (reference) to all others
1812  alns = seq.AlignmentList()
1813  i = g[0]
1814  for j in g[1:]:
1815  alns.append(aligner.Align(seqs[i], seqs[j], chem_type))
1816  # and merge
1817  aln_list.append(seq.alg.MergePairwiseAlignments(alns, seqs[i]))
1818 
1819  # transfer attached views
1820  seq_dict = {s.GetName(): s for s in seqs}
1821  for aln_idx in range(len(aln_list)):
1822  for aln_s_idx in range(aln_list[aln_idx].GetCount()):
1823  s_name = aln_list[aln_idx].GetSequence(aln_s_idx).GetName()
1824  s = seq_dict[s_name]
1825  aln_list[aln_idx].AttachView(aln_s_idx, s.GetAttachedView())
1826 
1827  return aln_list
1828 
1829 def _MapSequence(ref_seqs, ref_types, s, s_type, aligner):
1830  """Tries top map *s* onto any of the sequences in *ref_seqs*
1831 
1832  Computes alignments of *s* to each of the reference sequences of equal type
1833  and sorts them by seqid*fraction_covered (seqid: sequence identity of
1834  aligned columns in alignment, fraction_covered: Fraction of non-gap
1835  characters in reference sequence that are covered by non-gap characters in
1836  *s*). Best scoring mapping is returned.
1837 
1838  :param ref_seqs: Reference sequences
1839  :type ref_seqs: :class:`ost.seq.SequenceList`
1840  :param ref_types: Types of reference sequences, e.g.
1841  ost.mol.ChemType.AminoAcids
1842  :type ref_types: :class:`list` of :class:`ost.mol.ChemType`
1843  :param s: Sequence to map
1844  :type s: :class:`ost.seq.SequenceHandle`
1845  :param s_type: Type of *s*, only try mapping to sequences in *ref_seqs*
1846  with equal type as defined in *ref_types*
1847  :param aligner: Helper class to generate pairwise alignments
1848  :type aligner: :class:`_Aligner`
1849  :returns: Tuple with two elements. 1) index of sequence in *ref_seqs* to
1850  which *s* can be mapped 2) Pairwise sequence alignment with
1851  sequence from *ref_seqs* as first sequence. Both elements are
1852  None if no mapping can be found.
1853  :raises: :class:`RuntimeError` if mapping is ambiguous, i.e. *s*
1854  successfully maps to more than one sequence in *ref_seqs*
1855  """
1856  scored_alns = list()
1857  for ref_idx, ref_seq in enumerate(ref_seqs):
1858  if ref_types[ref_idx] == s_type:
1859  aln = aligner.Align(ref_seq, s, s_type)
1860  seqid, fraction_covered = _GetAlnPropsTwo(aln)
1861  score = seqid * fraction_covered
1862  scored_alns.append((score, ref_idx, aln))
1863 
1864  if len(scored_alns) == 0:
1865  return (None, None) # no mapping possible...
1866 
1867  scored_alns = sorted(scored_alns, key=lambda x: x[0], reverse=True)
1868  return (scored_alns[0][1], scored_alns[0][2])
1869 
1870 def _GetRefMdlAlns(ref_chem_groups, ref_chem_group_msas, mdl_chem_groups,
1871  mdl_chem_group_alns, pairs=None):
1872  """ Get all possible ref/mdl chain alignments given chem group mapping
1873 
1874  :param ref_chem_groups: :attr:`ChainMapper.chem_groups`
1875  :type ref_chem_groups: :class:`list` of :class:`list` of :class:`str`
1876  :param ref_chem_group_msas: :attr:`ChainMapper.chem_group_alignments`
1877  :type ref_chem_group_msas: :class:`ost.seq.AlignmentList`
1878  :param mdl_chem_groups: Groups of model chains that are mapped to
1879  *ref_chem_groups*. Return value of
1880  :func:`ChainMapper.GetChemMapping`.
1881  :type mdl_chem_groups: :class:`list` of :class:`list` of :class:`str`
1882  :param mdl_chem_group_alns: A pairwise sequence alignment for every chain
1883  in *mdl_chem_groups* that aligns these sequences
1884  to the respective reference sequence.
1885  Return values of
1886  :func:`ChainMapper.GetChemMapping`.
1887  :type mdl_chem_group_alns: :class:`list` of :class:`ost.seq.AlignmentList`
1888  :param pairs: Pro param - restrict return dict to specified pairs. A set of
1889  tuples in form (<trg_ch>, <mdl_ch>)
1890  :type pairs: :class:`set`
1891  :returns: A dictionary holding all possible ref/mdl chain alignments. Keys
1892  in that dictionary are tuples of the form (ref_ch, mdl_ch) and
1893  values are the respective pairwise alignments with first sequence
1894  being from ref, the second from mdl.
1895  """
1896  # alignment of each model chain to chem_group reference sequence
1897  mdl_alns = dict()
1898  for alns in mdl_chem_group_alns:
1899  for aln in alns:
1900  mdl_chain_name = aln.GetSequence(1).GetName()
1901  mdl_alns[mdl_chain_name] = aln
1902 
1903  # generate all alignments between ref/mdl chain atomseqs that we will
1904  # ever observe
1905  ref_mdl_alns = dict()
1906  for ref_chains, mdl_chains, ref_aln in zip(ref_chem_groups, mdl_chem_groups,
1907  ref_chem_group_msas):
1908  for ref_ch in ref_chains:
1909  for mdl_ch in mdl_chains:
1910  if pairs is not None and (ref_ch, mdl_ch) not in pairs:
1911  continue
1912  # obtain alignments of mdl and ref chains towards chem
1913  # group ref sequence and merge them
1914  aln_list = seq.AlignmentList()
1915  # do ref aln
1916  s1 = ref_aln.GetSequence(0)
1917  s2 = ref_aln.GetSequence(ref_chains.index(ref_ch))
1918  aln_list.append(seq.CreateAlignment(s1, s2))
1919  # do mdl aln
1920  aln_list.append(mdl_alns[mdl_ch])
1921  # merge
1922  ref_seq = seq.CreateSequence(s1.GetName(),
1923  s1.GetGaplessString())
1924  merged_aln = seq.alg.MergePairwiseAlignments(aln_list,
1925  ref_seq)
1926  # merged_aln:
1927  # seq1: ref seq of chem group
1928  # seq2: seq of ref chain
1929  # seq3: seq of mdl chain
1930  # => we need the alignment between seq2 and seq3
1931  s2 = merged_aln.GetSequence(1)
1932  s3 = merged_aln.GetSequence(2)
1933  # cut leading and trailing gap columns
1934  a = 0 # number of leading gap columns
1935  for idx in range(len(s2)):
1936  if s2[idx] != '-' or s3[idx] != '-':
1937  break
1938  a += 1
1939  b = 0 # number of trailing gap columns
1940  for idx in reversed(range(len(s2))):
1941  if s2[idx] != '-' or s3[idx] != '-':
1942  break
1943  b += 1
1944  s2 = seq.CreateSequence(s2.GetName(), s2[a: len(s2)-b])
1945  s3 = seq.CreateSequence(s3.GetName(), s3[a: len(s3)-b])
1946  ref_mdl_alns[(ref_ch, mdl_ch)] = seq.CreateAlignment(s2, s3)
1947 
1948  return ref_mdl_alns
1949 
1950 def _CheckOneToOneMapping(ref_chains, mdl_chains):
1951  """ Checks whether we already have a perfect one to one mapping
1952 
1953  That means each list in *ref_chains* has exactly one element and each
1954  list in *mdl_chains* has either one element (it's mapped) or is empty
1955  (ref chain has no mapped mdl chain). Returns None if no such mapping
1956  can be found.
1957 
1958  :param ref_chains: corresponds to :attr:`ChainMapper.chem_groups`
1959  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
1960  :param mdl_chains: mdl chains mapped to chem groups in *ref_chains*, i.e.
1961  the return value of :func:`ChainMapper.GetChemMapping`
1962  :type mdl_chains: class:`list` of :class:`list` of :class:`str`
1963  :returns: A :class:`list` of :class:`list` if a one to one mapping is found,
1964  None otherwise
1965  """
1966  only_one_to_one = True
1967  one_to_one = list()
1968  for ref, mdl in zip(ref_chains, mdl_chains):
1969  if len(ref) == 1 and len(mdl) == 1:
1970  one_to_one.append(mdl)
1971  elif len(ref) == 1 and len(mdl) == 0:
1972  one_to_one.append([None])
1973  else:
1974  only_one_to_one = False
1975  break
1976  if only_one_to_one:
1977  return one_to_one
1978  else:
1979  return None
1980 
1982  def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups,
1983  ref_mdl_alns, inclusion_radius = 15.0,
1984  thresholds = [0.5, 1.0, 2.0, 4.0],
1985  steep_opt_rate = None):
1986 
1987  """ Greedy extension of already existing but incomplete chain mappings
1988  """
1989  super().__init__(ref, ref_chem_groups, mdl, ref_mdl_alns,
1990  dist_thresh=inclusion_radius,
1991  dist_diff_thresholds=thresholds)
1992 
1993  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
1994  self.steep_opt_ratesteep_opt_rate = steep_opt_rate
1995 
1996  self.neighborsneighbors = {k: set() for k in self.trgtrg.chain_names}
1997  for interface in self.trgtrg.interacting_chains:
1998  self.neighborsneighbors[interface[0]].add(interface[1])
1999  self.neighborsneighbors[interface[1]].add(interface[0])
2000 
2001  assert(len(ref_chem_groups) == len(mdl_chem_groups))
2002  self.ref_chem_groupsref_chem_groups = ref_chem_groups
2003  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2004  self.ref_ch_group_mapperref_ch_group_mapper = dict()
2005  self.mdl_ch_group_mappermdl_ch_group_mapper = dict()
2006  for g_idx, (ref_g, mdl_g) in enumerate(zip(ref_chem_groups,
2007  mdl_chem_groups)):
2008  for ch in ref_g:
2009  self.ref_ch_group_mapperref_ch_group_mapper[ch] = g_idx
2010  for ch in mdl_g:
2011  self.mdl_ch_group_mappermdl_ch_group_mapper[ch] = g_idx
2012 
2013  # keep track of mdl chains that potentially give lDDT contributions,
2014  # i.e. they have locations within inclusion_radius + max(thresholds)
2015  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.mdlmdl.chain_names}
2016  for interface in self.mdlmdl.potentially_contributing_chains:
2017  self.mdl_neighborsmdl_neighbors[interface[0]].add(interface[1])
2018  self.mdl_neighborsmdl_neighbors[interface[1]].add(interface[0])
2019 
2020  def ExtendMapping(self, mapping, max_ext = None):
2021 
2022  if len(mapping) == 0:
2023  raise RuntimError("Mapping must contain a starting point")
2024 
2025  # Ref chains onto which we can map. The algorithm starts with a mapping
2026  # on ref_ch. From there we can start to expand to connected neighbors.
2027  # All neighbors that we can reach from the already mapped chains are
2028  # stored in this set which will be updated during runtime
2029  map_targets = set()
2030  for ref_ch in mapping.keys():
2031  map_targets.update(self.neighborsneighbors[ref_ch])
2032 
2033  # remove the already mapped chains
2034  for ref_ch in mapping.keys():
2035  map_targets.discard(ref_ch)
2036 
2037  if len(map_targets) == 0:
2038  return mapping # nothing to extend
2039 
2040  # keep track of what model chains are not yet mapped for each chem group
2041  free_mdl_chains = list()
2042  for chem_group in self.mdl_chem_groupsmdl_chem_groups:
2043  tmp = [x for x in chem_group if x not in mapping.values()]
2044  free_mdl_chains.append(set(tmp))
2045 
2046  # keep track of what ref chains got a mapping
2047  newly_mapped_ref_chains = list()
2048 
2049  something_happened = True
2050  while something_happened:
2051  something_happened=False
2052 
2053  if self.steep_opt_ratesteep_opt_rate is not None:
2054  n_chains = len(newly_mapped_ref_chains)
2055  if n_chains > 0 and n_chains % self.steep_opt_ratesteep_opt_rate == 0:
2056  mapping = self._SteepOpt_SteepOpt(mapping, newly_mapped_ref_chains)
2057 
2058  if max_ext is not None and len(newly_mapped_ref_chains) >= max_ext:
2059  break
2060 
2061  max_n = 0
2062  max_mapping = None
2063  for ref_ch in map_targets:
2064  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ref_ch]
2065  for mdl_ch in free_mdl_chains[chem_group_idx]:
2066  # single chain score
2067  n_single = self._NSCConserved_NSCConserved(ref_ch, mdl_ch).sum()
2068  # scores towards neighbors that are already mapped
2069  n_inter = 0
2070  for neighbor in self.neighborsneighbors[ref_ch]:
2071  if neighbor in mapping and mapping[neighbor] in \
2072  self.mdl_neighborsmdl_neighbors[mdl_ch]:
2073  n_inter += self._NPairConserved_NPairConserved((ref_ch, neighbor),
2074  (mdl_ch, mapping[neighbor])).sum()
2075  n = n_single + n_inter
2076 
2077  if n_inter > 0 and n > max_n:
2078  # Only accept a new solution if its actually connected
2079  # i.e. n_inter > 0. Otherwise we could just map a big
2080  # fat mdl chain sitting somewhere in Nirvana
2081  max_n = n
2082  max_mapping = (ref_ch, mdl_ch)
2083 
2084  if max_n > 0:
2085  something_happened = True
2086  # assign new found mapping
2087  mapping[max_mapping[0]] = max_mapping[1]
2088 
2089  # add all neighboring chains to map targets as they are now
2090  # reachable
2091  for neighbor in self.neighborsneighbors[max_mapping[0]]:
2092  if neighbor not in mapping:
2093  map_targets.add(neighbor)
2094 
2095  # remove the ref chain from map targets
2096  map_targets.remove(max_mapping[0])
2097 
2098  # remove the mdl chain from free_mdl_chains - its taken...
2099  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[max_mapping[0]]
2100  free_mdl_chains[chem_group_idx].remove(max_mapping[1])
2101 
2102  # keep track of what ref chains got a mapping
2103  newly_mapped_ref_chains.append(max_mapping[0])
2104 
2105  return mapping
2106 
2107  def _SteepOpt(self, mapping, chains_to_optimize=None):
2108 
2109  # just optimize ALL ref chains if nothing specified
2110  if chains_to_optimize is None:
2111  chains_to_optimize = mapping.keys()
2112 
2113  # make sure that we only have ref chains which are actually mapped
2114  ref_chains = [x for x in chains_to_optimize if mapping[x] is not None]
2115 
2116  # group ref chains to be optimized into chem groups
2117  tmp = dict()
2118  for ch in ref_chains:
2119  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ch]
2120  if chem_group_idx in tmp:
2121  tmp[chem_group_idx].append(ch)
2122  else:
2123  tmp[chem_group_idx] = [ch]
2124  chem_groups = list(tmp.values())
2125 
2126  # try all possible mapping swaps. Swaps that improve the score are
2127  # immediately accepted and we start all over again
2128  current_lddt = self.FromFlatMappingFromFlatMapping(mapping)
2129  something_happened = True
2130  while something_happened:
2131  something_happened = False
2132  for chem_group in chem_groups:
2133  if something_happened:
2134  break
2135  for ch1, ch2 in itertools.combinations(chem_group, 2):
2136  swapped_mapping = dict(mapping)
2137  swapped_mapping[ch1] = mapping[ch2]
2138  swapped_mapping[ch2] = mapping[ch1]
2139  score = self.FromFlatMappingFromFlatMapping(swapped_mapping)
2140  if score > current_lddt:
2141  something_happened = True
2142  mapping = swapped_mapping
2143  current_lddt = score
2144  break
2145  return mapping
2146 
2147 
2148 def _lDDTNaive(trg, mdl, inclusion_radius, thresholds, chem_groups,
2149  chem_mapping, ref_mdl_alns, n_max_naive):
2150  """ Naively iterates all possible chain mappings and returns the best
2151  """
2152  best_mapping = None
2153  best_lddt = -1.0
2154 
2155  # Setup scoring
2156  lddt_scorer = bb_lddt.BBlDDTScorer(trg, chem_groups, mdl, ref_mdl_alns,
2157  dist_thresh=inclusion_radius,
2158  dist_diff_thresholds=thresholds)
2159  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2160  lDDT = lddt_scorer.Score(mapping, check=False)
2161  if lDDT > best_lddt:
2162  best_mapping = mapping
2163  best_lddt = lDDT
2164 
2165  return (best_mapping, best_lddt)
2166 
2167 
2168 def _GetSeeds(ref_chem_groups, mdl_chem_groups, mapped_ref_chains = set(),
2169  mapped_mdl_chains = set()):
2170  seeds = list()
2171  for ref_chains, mdl_chains in zip(ref_chem_groups,
2172  mdl_chem_groups):
2173  for ref_ch in ref_chains:
2174  if ref_ch not in mapped_ref_chains:
2175  for mdl_ch in mdl_chains:
2176  if mdl_ch not in mapped_mdl_chains:
2177  seeds.append((ref_ch, mdl_ch))
2178  return seeds
2179 
2180 
2181 def _lDDTGreedyFast(the_greed):
2182 
2183  something_happened = True
2184  mapping = dict()
2185 
2186  while something_happened:
2187  something_happened = False
2188  seeds = _GetSeeds(the_greed.ref_chem_groups,
2189  the_greed.mdl_chem_groups,
2190  mapped_ref_chains = set(mapping.keys()),
2191  mapped_mdl_chains = set(mapping.values()))
2192  # search for best scoring starting point
2193  n_best = 0
2194  best_seed = None
2195  for seed in seeds:
2196  n = the_greed._NSCConserved(seed[0], seed[1]).sum()
2197  if n > n_best:
2198  n_best = n
2199  best_seed = seed
2200  if n_best == 0:
2201  break # no proper seed found anymore...
2202  # add seed to mapping and start the greed
2203  mapping[best_seed[0]] = best_seed[1]
2204  mapping = the_greed.ExtendMapping(mapping)
2205  something_happened = True
2206 
2207  # translate mapping format and return
2208  final_mapping = list()
2209  for ref_chains in the_greed.ref_chem_groups:
2210  mapped_mdl_chains = list()
2211  for ref_ch in ref_chains:
2212  if ref_ch in mapping:
2213  mapped_mdl_chains.append(mapping[ref_ch])
2214  else:
2215  mapped_mdl_chains.append(None)
2216  final_mapping.append(mapped_mdl_chains)
2217 
2218  return final_mapping
2219 
2220 
2221 def _lDDTGreedyFull(the_greed):
2222  """ Uses each reference chain as starting point for expansion
2223  """
2224 
2225  seeds = _GetSeeds(the_greed.ref_chem_groups, the_greed.mdl_chem_groups)
2226  best_overall_score = -1.0
2227  best_overall_mapping = dict()
2228 
2229  for seed in seeds:
2230 
2231  # do initial extension
2232  mapping = the_greed.ExtendMapping({seed[0]: seed[1]})
2233 
2234  # repeat the process until we have a full mapping
2235  something_happened = True
2236  while something_happened:
2237  something_happened = False
2238  remnant_seeds = _GetSeeds(the_greed.ref_chem_groups,
2239  the_greed.mdl_chem_groups,
2240  mapped_ref_chains = set(mapping.keys()),
2241  mapped_mdl_chains = set(mapping.values()))
2242  if len(remnant_seeds) > 0:
2243  # still more mapping to be done
2244  best_score = -1.0
2245  best_mapping = None
2246  for remnant_seed in remnant_seeds:
2247  tmp_mapping = dict(mapping)
2248  tmp_mapping[remnant_seed[0]] = remnant_seed[1]
2249  tmp_mapping = the_greed.ExtendMapping(tmp_mapping)
2250  score = the_greed.FromFlatMapping(tmp_mapping)
2251  if score > best_score:
2252  best_score = score
2253  best_mapping = tmp_mapping
2254  if best_mapping is not None:
2255  something_happened = True
2256  mapping = best_mapping
2257 
2258  score = the_greed.FromFlatMapping(mapping)
2259  if score > best_overall_score:
2260  best_overall_score = score
2261  best_overall_mapping = mapping
2262 
2263  mapping = best_overall_mapping
2264 
2265  # translate mapping format and return
2266  final_mapping = list()
2267  for ref_chains in the_greed.ref_chem_groups:
2268  mapped_mdl_chains = list()
2269  for ref_ch in ref_chains:
2270  if ref_ch in mapping:
2271  mapped_mdl_chains.append(mapping[ref_ch])
2272  else:
2273  mapped_mdl_chains.append(None)
2274  final_mapping.append(mapped_mdl_chains)
2275 
2276  return final_mapping
2277 
2278 
2279 def _lDDTGreedyBlock(the_greed, seed_size, blocks_per_chem_group):
2280  """ try multiple seeds, i.e. try all ref/mdl chain combinations within the
2281  respective chem groups and compute single chain lDDTs. The
2282  *blocks_per_chem_group* best scoring ones are extend by *seed_size* chains
2283  and the best scoring one is exhaustively extended.
2284  """
2285 
2286  if seed_size is None or seed_size < 1:
2287  raise RuntimeError(f"seed_size must be an int >= 1 (got {seed_size})")
2288 
2289  if blocks_per_chem_group is None or blocks_per_chem_group < 1:
2290  raise RuntimeError(f"blocks_per_chem_group must be an int >= 1 "
2291  f"(got {blocks_per_chem_group})")
2292 
2293  max_ext = seed_size - 1 # -1 => start seed already has size 1
2294 
2295  ref_chem_groups = copy.deepcopy(the_greed.ref_chem_groups)
2296  mdl_chem_groups = copy.deepcopy(the_greed.mdl_chem_groups)
2297 
2298  mapping = dict()
2299  something_happened = True
2300  while something_happened:
2301  something_happened = False
2302  starting_blocks = list()
2303  for ref_chains, mdl_chains in zip(ref_chem_groups, mdl_chem_groups):
2304  if len(mdl_chains) == 0:
2305  continue # nothing to map
2306  ref_chains_copy = list(ref_chains)
2307  for i in range(blocks_per_chem_group):
2308  if len(ref_chains_copy) == 0:
2309  break
2310  seeds = list()
2311  for ref_ch in ref_chains_copy:
2312  seeds += [(ref_ch, mdl_ch) for mdl_ch in mdl_chains]
2313  # extend starting seeds to *seed_size* and retain best scoring
2314  # block for further extension
2315  best_score = -1.0
2316  best_mapping = None
2317  best_seed = None
2318  for s in seeds:
2319  seed = dict(mapping)
2320  seed.update({s[0]: s[1]})
2321  seed = the_greed.ExtendMapping(seed, max_ext = max_ext)
2322  seed_lddt = the_greed.FromFlatMapping(seed)
2323  if seed_lddt > best_score:
2324  best_score = seed_lddt
2325  best_mapping = seed
2326  best_seed = s
2327  if best_mapping != None:
2328  starting_blocks.append(best_mapping)
2329  if best_seed[0] in ref_chains_copy:
2330  # remove that ref chain to enforce diversity
2331  ref_chains_copy.remove(best_seed[0])
2332 
2333  # fully expand initial starting blocks
2334  best_lddt = 0.0
2335  best_mapping = None
2336  for seed in starting_blocks:
2337  seed = the_greed.ExtendMapping(seed)
2338  seed_lddt = the_greed.FromFlatMapping(seed)
2339  if seed_lddt > best_lddt:
2340  best_lddt = seed_lddt
2341  best_mapping = seed
2342 
2343  if best_lddt == 0.0:
2344  break # no proper mapping found anymore
2345 
2346  something_happened = True
2347  mapping.update(best_mapping)
2348  for ref_ch, mdl_ch in best_mapping.items():
2349  for group_idx in range(len(ref_chem_groups)):
2350  if ref_ch in ref_chem_groups[group_idx]:
2351  ref_chem_groups[group_idx].remove(ref_ch)
2352  if mdl_ch in mdl_chem_groups[group_idx]:
2353  mdl_chem_groups[group_idx].remove(mdl_ch)
2354 
2355  # translate mapping format and return
2356  final_mapping = list()
2357  for ref_chains in the_greed.ref_chem_groups:
2358  mapped_mdl_chains = list()
2359  for ref_ch in ref_chains:
2360  if ref_ch in mapping:
2361  mapped_mdl_chains.append(mapping[ref_ch])
2362  else:
2363  mapped_mdl_chains.append(None)
2364  final_mapping.append(mapped_mdl_chains)
2365 
2366  return final_mapping
2367 
2368 
2370  def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups,
2371  ref_mdl_alns, contact_d = 12.0,
2372  steep_opt_rate = None, greedy_prune_contact_map=False):
2373  """ Greedy extension of already existing but incomplete chain mappings
2374  """
2375  super().__init__(ref, ref_chem_groups, mdl, ref_mdl_alns,
2376  contact_d = contact_d)
2377  self.refref = ref
2378  self.mdlmdl = mdl
2379  self.ref_mdl_alnsref_mdl_alns = ref_mdl_alns
2380  self.steep_opt_ratesteep_opt_rate = steep_opt_rate
2381 
2382  if greedy_prune_contact_map:
2383  self.neighborsneighbors = {k: set() for k in self.qsent1qsent1.chain_names}
2384  for p in self.qsent1qsent1.interacting_chains:
2385  if np.count_nonzero(self.qsent1qsent1.PairDist(p[0], p[1])<=8) >= 3:
2386  self.neighborsneighbors[p[0]].add(p[1])
2387  self.neighborsneighbors[p[1]].add(p[0])
2388 
2389  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.qsent2qsent2.chain_names}
2390  for p in self.qsent2qsent2.interacting_chains:
2391  if np.count_nonzero(self.qsent2qsent2.PairDist(p[0], p[1])<=8) >= 3:
2392  self.mdl_neighborsmdl_neighbors[p[0]].add(p[1])
2393  self.mdl_neighborsmdl_neighbors[p[1]].add(p[0])
2394 
2395 
2396  else:
2397  self.neighborsneighbors = {k: set() for k in self.qsent1qsent1.chain_names}
2398  for p in self.qsent1qsent1.interacting_chains:
2399  self.neighborsneighbors[p[0]].add(p[1])
2400  self.neighborsneighbors[p[1]].add(p[0])
2401 
2402  self.mdl_neighborsmdl_neighbors = {k: set() for k in self.qsent2qsent2.chain_names}
2403  for p in self.qsent2qsent2.interacting_chains:
2404  self.mdl_neighborsmdl_neighbors[p[0]].add(p[1])
2405  self.mdl_neighborsmdl_neighbors[p[1]].add(p[0])
2406 
2407  assert(len(ref_chem_groups) == len(mdl_chem_groups))
2408  self.ref_chem_groupsref_chem_groups = ref_chem_groups
2409  self.mdl_chem_groupsmdl_chem_groups = mdl_chem_groups
2410  self.ref_ch_group_mapperref_ch_group_mapper = dict()
2411  self.mdl_ch_group_mappermdl_ch_group_mapper = dict()
2412  for g_idx, (ref_g, mdl_g) in enumerate(zip(ref_chem_groups,
2413  mdl_chem_groups)):
2414  for ch in ref_g:
2415  self.ref_ch_group_mapperref_ch_group_mapper[ch] = g_idx
2416  for ch in mdl_g:
2417  self.mdl_ch_group_mappermdl_ch_group_mapper[ch] = g_idx
2418 
2419  # cache for lDDT based single chain conserved contacts
2420  # used to identify starting points for further extension by QS score
2421  # key: tuple (ref_ch, mdl_ch) value: number of conserved contacts
2422  self.single_chain_scorersingle_chain_scorer = dict()
2423  self.single_chain_cachesingle_chain_cache = dict()
2424  for ch in self.refref.chains:
2425  single_chain_ref = _CSel(self.refref, [ch.GetName()])
2426  self.single_chain_scorersingle_chain_scorer[ch.GetName()] = \
2427  lddt.lDDTScorer(single_chain_ref, bb_only = True)
2428 
2429  def SCCounts(self, ref_ch, mdl_ch):
2430  if not (ref_ch, mdl_ch) in self.single_chain_cachesingle_chain_cache:
2431  alns = dict()
2432  alns[mdl_ch] = self.ref_mdl_alnsref_mdl_alns[(ref_ch, mdl_ch)]
2433  mdl_sel = _CSel(self.mdlmdl, [mdl_ch])
2434  s = self.single_chain_scorersingle_chain_scorer[ref_ch]
2435  _,_,_,conserved,_,_,_ = s.lDDT(mdl_sel,
2436  residue_mapping=alns,
2437  return_dist_test=True,
2438  no_interchain=True,
2439  chain_mapping={mdl_ch: ref_ch},
2440  check_resnames=False)
2441  self.single_chain_cachesingle_chain_cache[(ref_ch, mdl_ch)] = conserved
2442  return self.single_chain_cachesingle_chain_cache[(ref_ch, mdl_ch)]
2443 
2444  def ExtendMapping(self, mapping, max_ext = None):
2445 
2446  if len(mapping) == 0:
2447  raise RuntimError("Mapping must contain a starting point")
2448 
2449  # Ref chains onto which we can map. The algorithm starts with a mapping
2450  # on ref_ch. From there we can start to expand to connected neighbors.
2451  # All neighbors that we can reach from the already mapped chains are
2452  # stored in this set which will be updated during runtime
2453  map_targets = set()
2454  for ref_ch in mapping.keys():
2455  map_targets.update(self.neighborsneighbors[ref_ch])
2456 
2457  # remove the already mapped chains
2458  for ref_ch in mapping.keys():
2459  map_targets.discard(ref_ch)
2460 
2461  if len(map_targets) == 0:
2462  return mapping # nothing to extend
2463 
2464  # keep track of what model chains are not yet mapped for each chem group
2465  free_mdl_chains = list()
2466  for chem_group in self.mdl_chem_groupsmdl_chem_groups:
2467  tmp = [x for x in chem_group if x not in mapping.values()]
2468  free_mdl_chains.append(set(tmp))
2469 
2470  # keep track of what ref chains got a mapping
2471  newly_mapped_ref_chains = list()
2472 
2473  something_happened = True
2474  while something_happened:
2475  something_happened=False
2476 
2477  if self.steep_opt_ratesteep_opt_rate is not None:
2478  n_chains = len(newly_mapped_ref_chains)
2479  if n_chains > 0 and n_chains % self.steep_opt_ratesteep_opt_rate == 0:
2480  mapping = self._SteepOpt_SteepOpt(mapping, newly_mapped_ref_chains)
2481 
2482  if max_ext is not None and len(newly_mapped_ref_chains) >= max_ext:
2483  break
2484 
2485  score_result = self.FromFlatMappingFromFlatMapping(mapping)
2486  old_score = score_result.QS_global
2487  nominator = score_result.weighted_scores
2488  denominator = score_result.weight_sum + score_result.weight_extra_all
2489 
2490  max_diff = 0.0
2491  max_mapping = None
2492  for ref_ch in map_targets:
2493  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ref_ch]
2494  for mdl_ch in free_mdl_chains[chem_group_idx]:
2495  # we're not computing full QS-score here, we directly hack
2496  # into the QS-score formula to compute a diff
2497  nominator_diff = 0.0
2498  denominator_diff = 0.0
2499  for neighbor in self.neighborsneighbors[ref_ch]:
2500  if neighbor in mapping and mapping[neighbor] in \
2501  self.mdl_neighborsmdl_neighbors[mdl_ch]:
2502  # it's a newly added interface if (ref_ch, mdl_ch)
2503  # are added to mapping
2504  int1 = (ref_ch, neighbor)
2505  int2 = (mdl_ch, mapping[neighbor])
2506  a, b, c, d = self._MappedInterfaceScores_MappedInterfaceScores(int1, int2)
2507  nominator_diff += a # weighted_scores
2508  denominator_diff += b # weight_sum
2509  denominator_diff += d # weight_extra_all
2510  # the respective interface penalties are subtracted
2511  # from denominator
2512  denominator_diff -= self._InterfacePenalty1_InterfacePenalty1(int1)
2513  denominator_diff -= self._InterfacePenalty2_InterfacePenalty2(int2)
2514 
2515  if nominator_diff > 0:
2516  # Only accept a new solution if its actually connected
2517  # i.e. nominator_diff > 0.
2518  new_nominator = nominator + nominator_diff
2519  new_denominator = denominator + denominator_diff
2520  new_score = 0.0
2521  if new_denominator != 0.0:
2522  new_score = new_nominator/new_denominator
2523  diff = new_score - old_score
2524  if diff > max_diff:
2525  max_diff = diff
2526  max_mapping = (ref_ch, mdl_ch)
2527 
2528  if max_mapping is not None:
2529  something_happened = True
2530  # assign new found mapping
2531  mapping[max_mapping[0]] = max_mapping[1]
2532 
2533  # add all neighboring chains to map targets as they are now
2534  # reachable
2535  for neighbor in self.neighborsneighbors[max_mapping[0]]:
2536  if neighbor not in mapping:
2537  map_targets.add(neighbor)
2538 
2539  # remove the ref chain from map targets
2540  map_targets.remove(max_mapping[0])
2541 
2542  # remove the mdl chain from free_mdl_chains - its taken...
2543  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[max_mapping[0]]
2544  free_mdl_chains[chem_group_idx].remove(max_mapping[1])
2545 
2546  # keep track of what ref chains got a mapping
2547  newly_mapped_ref_chains.append(max_mapping[0])
2548 
2549  return mapping
2550 
2551  def _SteepOpt(self, mapping, chains_to_optimize=None):
2552 
2553  # just optimize ALL ref chains if nothing specified
2554  if chains_to_optimize is None:
2555  chains_to_optimize = mapping.keys()
2556 
2557  # make sure that we only have ref chains which are actually mapped
2558  ref_chains = [x for x in chains_to_optimize if mapping[x] is not None]
2559 
2560  # group ref chains to be optimized into chem groups
2561  tmp = dict()
2562  for ch in ref_chains:
2563  chem_group_idx = self.ref_ch_group_mapperref_ch_group_mapper[ch]
2564  if chem_group_idx in tmp:
2565  tmp[chem_group_idx].append(ch)
2566  else:
2567  tmp[chem_group_idx] = [ch]
2568  chem_groups = list(tmp.values())
2569 
2570  # try all possible mapping swaps. Swaps that improve the score are
2571  # immediately accepted and we start all over again
2572  score_result = self.FromFlatMappingFromFlatMapping(mapping)
2573  current_score = score_result.QS_global
2574  something_happened = True
2575  while something_happened:
2576  something_happened = False
2577  for chem_group in chem_groups:
2578  if something_happened:
2579  break
2580  for ch1, ch2 in itertools.combinations(chem_group, 2):
2581  swapped_mapping = dict(mapping)
2582  swapped_mapping[ch1] = mapping[ch2]
2583  swapped_mapping[ch2] = mapping[ch1]
2584  score_result = self.FromFlatMappingFromFlatMapping(swapped_mapping)
2585  if score_result.QS_global > current_score:
2586  something_happened = True
2587  mapping = swapped_mapping
2588  current_score = score_result.QS_global
2589  break
2590  return mapping
2591 
2592 
2593 def _QSScoreNaive(trg, mdl, chem_groups, chem_mapping, ref_mdl_alns, contact_d,
2594  n_max_naive):
2595  best_mapping = None
2596  best_score = -1.0
2597  # qs_scorer implements caching, score calculation is thus as fast as it gets
2598  # you'll just hit a wall when the number of possible mappings becomes large
2599  qs_scorer = qsscore.QSScorer(trg, chem_groups, mdl, ref_mdl_alns)
2600  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2601  score_result = qs_scorer.Score(mapping, check=False)
2602  if score_result.QS_global > best_score:
2603  best_mapping = mapping
2604  best_score = score_result.QS_global
2605  return (best_mapping, best_score)
2606 
2607 
2608 def _QSScoreGreedyFast(the_greed):
2609 
2610  something_happened = True
2611  mapping = dict()
2612  while something_happened:
2613  something_happened = False
2614  # search for best scoring starting point, we're using lDDT here
2615  n_best = 0
2616  best_seed = None
2617  seeds = _GetSeeds(the_greed.ref_chem_groups,
2618  the_greed.mdl_chem_groups,
2619  mapped_ref_chains = set(mapping.keys()),
2620  mapped_mdl_chains = set(mapping.values()))
2621  for seed in seeds:
2622  n = the_greed.SCCounts(seed[0], seed[1])
2623  if n > n_best:
2624  n_best = n
2625  best_seed = seed
2626  if n_best == 0:
2627  break # no proper seed found anymore...
2628  # add seed to mapping and start the greed
2629  mapping[best_seed[0]] = best_seed[1]
2630  mapping = the_greed.ExtendMapping(mapping)
2631  something_happened = True
2632 
2633  # translate mapping format and return
2634  final_mapping = list()
2635  for ref_chains in the_greed.ref_chem_groups:
2636  mapped_mdl_chains = list()
2637  for ref_ch in ref_chains:
2638  if ref_ch in mapping:
2639  mapped_mdl_chains.append(mapping[ref_ch])
2640  else:
2641  mapped_mdl_chains.append(None)
2642  final_mapping.append(mapped_mdl_chains)
2643 
2644  return final_mapping
2645 
2646 
2647 def _QSScoreGreedyFull(the_greed):
2648  """ Uses each reference chain as starting point for expansion
2649  """
2650 
2651  seeds = _GetSeeds(the_greed.ref_chem_groups, the_greed.mdl_chem_groups)
2652  best_overall_score = -1.0
2653  best_overall_mapping = dict()
2654 
2655  for seed in seeds:
2656 
2657  # do initial extension
2658  mapping = the_greed.ExtendMapping({seed[0]: seed[1]})
2659 
2660  # repeat the process until we have a full mapping
2661  something_happened = True
2662  while something_happened:
2663  something_happened = False
2664  remnant_seeds = _GetSeeds(the_greed.ref_chem_groups,
2665  the_greed.mdl_chem_groups,
2666  mapped_ref_chains = set(mapping.keys()),
2667  mapped_mdl_chains = set(mapping.values()))
2668  if len(remnant_seeds) > 0:
2669  # still more mapping to be done
2670  best_score = -1.0
2671  best_mapping = None
2672  for remnant_seed in remnant_seeds:
2673  tmp_mapping = dict(mapping)
2674  tmp_mapping[remnant_seed[0]] = remnant_seed[1]
2675  tmp_mapping = the_greed.ExtendMapping(tmp_mapping)
2676  score_result = the_greed.FromFlatMapping(tmp_mapping)
2677  if score_result.QS_global > best_score:
2678  best_score = score_result.QS_global
2679  best_mapping = tmp_mapping
2680  if best_mapping is not None:
2681  something_happened = True
2682  mapping = best_mapping
2683 
2684  score_result = the_greed.FromFlatMapping(mapping)
2685  if score_result.QS_global > best_overall_score:
2686  best_overall_score = score_result.QS_global
2687  best_overall_mapping = mapping
2688 
2689  mapping = best_overall_mapping
2690 
2691  # translate mapping format and return
2692  final_mapping = list()
2693  for ref_chains in the_greed.ref_chem_groups:
2694  mapped_mdl_chains = list()
2695  for ref_ch in ref_chains:
2696  if ref_ch in mapping:
2697  mapped_mdl_chains.append(mapping[ref_ch])
2698  else:
2699  mapped_mdl_chains.append(None)
2700  final_mapping.append(mapped_mdl_chains)
2701 
2702  return final_mapping
2703 
2704 
2705 def _QSScoreGreedyBlock(the_greed, seed_size, blocks_per_chem_group):
2706  """ try multiple seeds, i.e. try all ref/mdl chain combinations within the
2707  respective chem groups and compute single chain lDDTs. The
2708  *blocks_per_chem_group* best scoring ones are extend by *seed_size* chains
2709  and the best scoring one with respect to QS score is exhaustively extended.
2710  """
2711 
2712  if seed_size is None or seed_size < 1:
2713  raise RuntimeError(f"seed_size must be an int >= 1 (got {seed_size})")
2714 
2715  if blocks_per_chem_group is None or blocks_per_chem_group < 1:
2716  raise RuntimeError(f"blocks_per_chem_group must be an int >= 1 "
2717  f"(got {blocks_per_chem_group})")
2718 
2719  max_ext = seed_size - 1 # -1 => start seed already has size 1
2720 
2721  ref_chem_groups = copy.deepcopy(the_greed.ref_chem_groups)
2722  mdl_chem_groups = copy.deepcopy(the_greed.mdl_chem_groups)
2723 
2724  mapping = dict()
2725 
2726  something_happened = True
2727  while something_happened:
2728  something_happened = False
2729  starting_blocks = list()
2730  for ref_chains, mdl_chains in zip(ref_chem_groups, mdl_chem_groups):
2731  if len(mdl_chains) == 0:
2732  continue # nothing to map
2733  ref_chains_copy = list(ref_chains)
2734  for i in range(blocks_per_chem_group):
2735  if len(ref_chains_copy) == 0:
2736  break
2737  seeds = list()
2738  for ref_ch in ref_chains_copy:
2739  seeds += [(ref_ch, mdl_ch) for mdl_ch in mdl_chains]
2740  # extend starting seeds to *seed_size* and retain best scoring block
2741  # for further extension
2742  best_score = -1.0
2743  best_mapping = None
2744  best_seed = None
2745  for s in seeds:
2746  seed = dict(mapping)
2747  seed.update({s[0]: s[1]})
2748  seed = the_greed.ExtendMapping(seed, max_ext = max_ext)
2749  score_result = the_greed.FromFlatMapping(seed)
2750  if score_result.QS_global > best_score:
2751  best_score = score_result.QS_global
2752  best_mapping = seed
2753  best_seed = s
2754  if best_mapping != None:
2755  starting_blocks.append(best_mapping)
2756  if best_seed[0] in ref_chains_copy:
2757  # remove selected ref chain to enforce diversity
2758  ref_chains_copy.remove(best_seed[0])
2759 
2760  # fully expand initial starting blocks
2761  best_score = -1.0
2762  best_mapping = None
2763  for seed in starting_blocks:
2764  seed = the_greed.ExtendMapping(seed)
2765  score_result = the_greed.FromFlatMapping(seed)
2766  if score_result.QS_global > best_score:
2767  best_score = score_result.QS_global
2768  best_mapping = seed
2769 
2770  if best_mapping is not None and len(best_mapping) > len(mapping):
2771  # this even accepts extensions that lead to no increase in QS-score
2772  # at least they make sense from an lDDT perspective
2773  something_happened = True
2774  mapping.update(best_mapping)
2775  for ref_ch, mdl_ch in best_mapping.items():
2776  for group_idx in range(len(ref_chem_groups)):
2777  if ref_ch in ref_chem_groups[group_idx]:
2778  ref_chem_groups[group_idx].remove(ref_ch)
2779  if mdl_ch in mdl_chem_groups[group_idx]:
2780  mdl_chem_groups[group_idx].remove(mdl_ch)
2781 
2782  # translate mapping format and return
2783  final_mapping = list()
2784  for ref_chains in the_greed.ref_chem_groups:
2785  mapped_mdl_chains = list()
2786  for ref_ch in ref_chains:
2787  if ref_ch in mapping:
2788  mapped_mdl_chains.append(mapping[ref_ch])
2789  else:
2790  mapped_mdl_chains.append(None)
2791  final_mapping.append(mapped_mdl_chains)
2792 
2793  return final_mapping
2794 
2795 def _SingleRigidRMSD(initial_transforms, initial_mappings, chem_groups,
2796  chem_mapping, trg_group_pos, mdl_group_pos):
2797  """
2798  Takes initial transforms and sequentially adds chain pairs with lowest RMSD.
2799  The mapping from the transform that leads to lowest overall RMSD is
2800  returned.
2801  """
2802  best_mapping = dict()
2803  best_ssd = float("inf") # we're actually going for summed squared distances
2804  # Since all positions have same lengths and we do a
2805  # full mapping, lowest SSD has a guarantee of also
2806  # being lowest RMSD
2807  for transform in initial_transforms:
2808  mapping = dict()
2809  mapped_mdl_chains = set()
2810  ssd = 0.0
2811  for trg_chains, mdl_chains, trg_pos, mdl_pos, in zip(chem_groups,
2812  chem_mapping,
2813  trg_group_pos,
2814  mdl_group_pos):
2815  if len(trg_pos) == 0 or len(mdl_pos) == 0:
2816  continue # cannot compute valid rmsd
2817  ssds = list()
2818  t_mdl_pos = list()
2819  for m_pos in mdl_pos:
2820  t_m_pos = geom.Vec3List(m_pos)
2821  t_m_pos.ApplyTransform(transform)
2822  t_mdl_pos.append(t_m_pos)
2823  for t_pos, t in zip(trg_pos, trg_chains):
2824  for t_m_pos, m in zip(t_mdl_pos, mdl_chains):
2825  ssd = t_pos.GetSummedSquaredDistances(t_m_pos)
2826  ssds.append((ssd, (t,m)))
2827  ssds.sort()
2828  for item in ssds:
2829  p = item[1]
2830  if p[0] not in mapping and p[1] not in mapped_mdl_chains:
2831  mapping[p[0]] = p[1]
2832  mapped_mdl_chains.add(p[1])
2833  ssd += item[0]
2834 
2835  if ssd < best_ssd:
2836  best_ssd = ssd
2837  best_mapping = mapping
2838 
2839  return best_mapping
2840 
2841 def _IterativeRigidRMSD(initial_transforms, initial_mappings, chem_groups,
2842  chem_mapping, trg_group_pos, mdl_group_pos):
2843  """ Takes initial transforms and sequentially adds chain pairs with
2844  lowest RMSD. With each added chain pair, the transform gets updated.
2845  Thus the naming iterative. The mapping from the initial transform that
2846  leads to best overall RMSD score is returned.
2847  """
2848 
2849  # to directly retrieve positions using chain names
2850  trg_pos_dict = dict()
2851  for trg_pos, trg_chains in zip(trg_group_pos, chem_groups):
2852  for t_pos, t in zip(trg_pos, trg_chains):
2853  trg_pos_dict[t] = t_pos
2854  mdl_pos_dict = dict()
2855  for mdl_pos, mdl_chains in zip(mdl_group_pos, chem_mapping):
2856  for m_pos, m in zip(mdl_pos, mdl_chains):
2857  mdl_pos_dict[m] = m_pos
2858 
2859  best_mapping = dict()
2860  best_rmsd = float("inf")
2861  for initial_transform, initial_mapping in zip(initial_transforms,
2862  initial_mappings):
2863  mapping = {initial_mapping[0]: initial_mapping[1]}
2864  transform = geom.Mat4(initial_transform)
2865  mapped_trg_pos = geom.Vec3List(trg_pos_dict[initial_mapping[0]])
2866  mapped_mdl_pos = geom.Vec3List(mdl_pos_dict[initial_mapping[1]])
2867 
2868  # the following variables contain the chains which are
2869  # available for mapping
2870  trg_chain_groups = [set(group) for group in chem_groups]
2871  mdl_chain_groups = [set(group) for group in chem_mapping]
2872 
2873  # search and kick out inital mapping
2874  for group in trg_chain_groups:
2875  if initial_mapping[0] in group:
2876  group.remove(initial_mapping[0])
2877  break
2878  for group in mdl_chain_groups:
2879  if initial_mapping[1] in group:
2880  group.remove(initial_mapping[1])
2881  break
2882 
2883  something_happened = True
2884  while something_happened:
2885  # search for best mapping given current transform
2886  something_happened=False
2887  best_sc_mapping = None
2888  best_sc_group_idx = None
2889  best_sc_rmsd = float("inf")
2890  group_idx = 0
2891  for trg_chains, mdl_chains in zip(trg_chain_groups, mdl_chain_groups):
2892  for t in trg_chains:
2893  t_pos = trg_pos_dict[t]
2894  for m in mdl_chains:
2895  m_pos = mdl_pos_dict[m]
2896  t_m_pos = geom.Vec3List(m_pos)
2897  t_m_pos.ApplyTransform(transform)
2898  rmsd = t_pos.GetRMSD(t_m_pos)
2899  if rmsd < best_sc_rmsd:
2900  best_sc_rmsd = rmsd
2901  best_sc_mapping = (t,m)
2902  best_sc_group_idx = group_idx
2903  group_idx += 1
2904 
2905  if best_sc_mapping is not None:
2906  something_happened = True
2907  mapping[best_sc_mapping[0]] = best_sc_mapping[1]
2908  mapped_trg_pos.extend(trg_pos_dict[best_sc_mapping[0]])
2909  mapped_mdl_pos.extend(mdl_pos_dict[best_sc_mapping[1]])
2910  trg_chain_groups[best_sc_group_idx].remove(best_sc_mapping[0])
2911  mdl_chain_groups[best_sc_group_idx].remove(best_sc_mapping[1])
2912 
2913  transform = _GetSuperposition(mapped_mdl_pos, mapped_trg_pos,
2914  False).transformation
2915 
2916  # compute overall RMSD for current transform
2917  mapped_mdl_pos.ApplyTransform(transform)
2918  rmsd = mapped_trg_pos.GetRMSD(mapped_mdl_pos)
2919 
2920  if rmsd < best_rmsd:
2921  best_rmsd = rmsd
2922  best_mapping = mapping
2923 
2924  return best_mapping
2925 
2926 def _NaiveRMSD(chem_groups, chem_mapping, trg_group_pos, mdl_group_pos,
2927  n_max_naive):
2928 
2929  # to directly retrieve positions using chain names
2930  trg_pos_dict = dict()
2931  for trg_pos, trg_chains in zip(trg_group_pos, chem_groups):
2932  for t_pos, t in zip(trg_pos, trg_chains):
2933  trg_pos_dict[t] = t_pos
2934  mdl_pos_dict = dict()
2935  for mdl_pos, mdl_chains in zip(mdl_group_pos, chem_mapping):
2936  for m_pos, m in zip(mdl_pos, mdl_chains):
2937  mdl_pos_dict[m] = m_pos
2938 
2939  best_mapping = dict()
2940  best_rmsd = float("inf")
2941 
2942  for mapping in _ChainMappings(chem_groups, chem_mapping, n_max_naive):
2943  trg_pos = geom.Vec3List()
2944  mdl_pos = geom.Vec3List()
2945  for trg_group, mdl_group in zip(chem_groups, mapping):
2946  for trg_ch, mdl_ch in zip(trg_group, mdl_group):
2947  if trg_ch is not None and mdl_ch is not None:
2948  trg_pos.extend(trg_pos_dict[trg_ch])
2949  mdl_pos.extend(mdl_pos_dict[mdl_ch])
2950  superpose_res = mol.alg.SuperposeSVD(mdl_pos, trg_pos)
2951  if superpose_res.rmsd < best_rmsd:
2952  best_rmsd = superpose_res.rmsd
2953  best_mapping = mapping
2954 
2955  # this is stupid...
2956  tmp = dict()
2957  for chem_group, mapping in zip(chem_groups, best_mapping):
2958  for trg_ch, mdl_ch in zip(chem_group, mapping):
2959  tmp[trg_ch] = mdl_ch
2960 
2961  return tmp
2962 
2963 
2964 def _GetRefPos(trg, mdl, trg_msas, mdl_alns, max_pos = None):
2965  """ Extracts reference positions which are present in trg and mdl
2966  """
2967 
2968  # select only backbone atoms, makes processing simpler later on
2969  # (just select res.atoms[0].GetPos() as ref pos)
2970  bb_trg = trg.Select("aname=\"CA\",\"C3'\"")
2971  bb_mdl = mdl.Select("aname=\"CA\",\"C3'\"")
2972 
2973  # mdl_alns are pairwise, let's construct MSAs
2974  mdl_msas = list()
2975  for aln_list in mdl_alns:
2976  if len(aln_list) > 0:
2977  tmp = aln_list[0].GetSequence(0)
2978  ref_seq = seq.CreateSequence(tmp.GetName(), tmp.GetGaplessString())
2979  mdl_msas.append(seq.alg.MergePairwiseAlignments(aln_list, ref_seq))
2980  else:
2981  mdl_msas.append(seq.CreateAlignment())
2982 
2983  trg_pos = list()
2984  mdl_pos = list()
2985 
2986  for trg_msa, mdl_msa in zip(trg_msas, mdl_msas):
2987 
2988  if mdl_msa.GetCount() > 0:
2989  # make sure they have the same ref sequence (should be a given...)
2990  assert(trg_msa.GetSequence(0).GetGaplessString() == \
2991  mdl_msa.GetSequence(0).GetGaplessString())
2992  else:
2993  # if mdl_msa is empty, i.e. no model chain maps to the chem group
2994  # represented by trg_msa, we just continue. The result will be
2995  # empty position lists added to trg_pos and mdl_pos.
2996  pass
2997 
2998  # check which columns in MSAs are fully covered (indices relative to
2999  # first sequence)
3000  trg_indices = _GetFullyCoveredIndices(trg_msa)
3001  mdl_indices = _GetFullyCoveredIndices(mdl_msa)
3002 
3003  # get indices where both, mdl and trg, are fully covered
3004  indices = sorted(list(trg_indices.intersection(mdl_indices)))
3005 
3006  # subsample if necessary
3007  if max_pos is not None and len(indices) > max_pos:
3008  step = int(len(indices)/max_pos)
3009  indices = [indices[i] for i in range(0, len(indices), step)]
3010 
3011  # translate to column indices in the respective MSAs
3012  trg_indices = _RefIndicesToColumnIndices(trg_msa, indices)
3013  mdl_indices = _RefIndicesToColumnIndices(mdl_msa, indices)
3014 
3015  # extract positions
3016  trg_pos.append(list())
3017  mdl_pos.append(list())
3018  for s_idx in range(trg_msa.GetCount()):
3019  trg_pos[-1].append(_ExtractMSAPos(trg_msa, s_idx, trg_indices,
3020  bb_trg))
3021  # first seq in mdl_msa is ref sequence in trg and does not belong to mdl
3022  for s_idx in range(1, mdl_msa.GetCount()):
3023  mdl_pos[-1].append(_ExtractMSAPos(mdl_msa, s_idx, mdl_indices,
3024  bb_mdl))
3025 
3026  return (trg_pos, mdl_pos)
3027 
3028 def _GetFullyCoveredIndices(msa):
3029  """ Helper for _GetRefPos
3030 
3031  Returns a set containing the indices relative to first sequence in msa which
3032  are fully covered in all other sequences
3033 
3034  --AA-A-A
3035  -BBBB-BB
3036  CCCC-C-C
3037 
3038  => (0,1,3)
3039  """
3040  indices = set()
3041  ref_idx = 0
3042  for col in msa:
3043  if sum([1 for olc in col if olc != '-']) == col.GetRowCount():
3044  indices.add(ref_idx)
3045  if col[0] != '-':
3046  ref_idx += 1
3047  return indices
3048 
3049 def _RefIndicesToColumnIndices(msa, indices):
3050  """ Helper for _GetRefPos
3051 
3052  Returns a list of mapped indices. indices refer to non-gap one letter
3053  codes in the first msa sequence. The returnes mapped indices are translated
3054  to the according msa column indices
3055  """
3056  ref_idx = 0
3057  mapping = dict()
3058  for col_idx, col in enumerate(msa):
3059  if col[0] != '-':
3060  mapping[ref_idx] = col_idx
3061  ref_idx += 1
3062  return [mapping[i] for i in indices]
3063 
3064 def _ExtractMSAPos(msa, s_idx, indices, view):
3065  """ Helper for _GetRefPos
3066 
3067  Returns a geom.Vec3List containing positions refering to given msa sequence.
3068  => Chain with corresponding name is mapped onto sequence and the position of
3069  the first atom of each residue specified in indices is extracted.
3070  Indices refers to column indices in msa!
3071  """
3072  s = msa.GetSequence(s_idx)
3073  s_v = _CSel(view, [s.GetName()])
3074 
3075  # sanity check
3076  assert(len(s.GetGaplessString()) == len(s_v.residues))
3077 
3078  residue_idx = [s.GetResidueIndex(i) for i in indices]
3079  return geom.Vec3List([s_v.residues[i].atoms[0].pos for i in residue_idx])
3080 
3081 def _NChemGroupMappings(ref_chains, mdl_chains):
3082  """ Number of mappings within one chem group
3083 
3084  :param ref_chains: Reference chains
3085  :type ref_chains: :class:`list` of :class:`str`
3086  :param mdl_chains: Model chains that are mapped onto *ref_chains*
3087  :type mdl_chains: :class:`list` of :class:`str`
3088  :returns: Number of possible mappings of *mdl_chains* onto *ref_chains*
3089  """
3090  n_ref = len(ref_chains)
3091  n_mdl = len(mdl_chains)
3092  if n_ref == n_mdl:
3093  return factorial(n_ref)
3094  elif n_ref > n_mdl:
3095  n_choose_k = binom(n_ref, n_mdl)
3096  return n_choose_k * factorial(n_mdl)
3097  else:
3098  n_choose_k = binom(n_mdl, n_ref)
3099  return n_choose_k * factorial(n_ref)
3100 
3101 def _NMappings(ref_chains, mdl_chains):
3102  """ Number of mappings for a full chem mapping
3103 
3104  :param ref_chains: Chem groups of reference
3105  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
3106  :param mdl_chains: Model chains that map onto those chem groups
3107  :type mdl_chains: :class:`list` of :class:`list` of :class:`str`
3108  :returns: Number of possible mappings of *mdl_chains* onto *ref_chains*
3109  """
3110  assert(len(ref_chains) == len(mdl_chains))
3111  n = 1
3112  for a,b in zip(ref_chains, mdl_chains):
3113  n *= _NChemGroupMappings(a,b)
3114  return n
3115 
3116 def _NMappingsWithin(ref_chains, mdl_chains, max_mappings):
3117  """ Check whether total number of mappings is smaller than given maximum
3118 
3119  In principle the same as :func:`_NMappings` but it stops as soon as the
3120  maximum is hit.
3121 
3122  :param ref_chains: Chem groups of reference
3123  :type ref_chains: :class:`list` of :class:`list` of :class:`str`
3124  :param mdl_chains: Model chains that map onto those chem groups
3125  :type mdl_chains: :class:`list` of :class:`list` of :class:`str`
3126  :param max_mappings: Number of max allowed mappings
3127  :returns: Whether number of possible mappings of *mdl_chains* onto
3128  *ref_chains* is below or equal *max_mappings*.
3129  """
3130  assert(len(ref_chains) == len(mdl_chains))
3131  n = 1
3132  for a,b in zip(ref_chains, mdl_chains):
3133  n *= _NChemGroupMappings(a,b)
3134  if n > max_mappings:
3135  return False
3136  return True
3137 
3138 def _RefSmallerGenerator(ref_chains, mdl_chains):
3139  """ Returns all possible ways to map mdl_chains onto ref_chains
3140 
3141  Specific for the case where len(ref_chains) < len(mdl_chains)
3142  """
3143  for c in itertools.combinations(mdl_chains, len(ref_chains)):
3144  for p in itertools.permutations(c):
3145  yield list(p)
3146 
3147 def _RefLargerGenerator(ref_chains, mdl_chains):
3148  """ Returns all possible ways to map mdl_chains onto ref_chains
3149 
3150  Specific for the case where len(ref_chains) > len(mdl_chains)
3151  Ref chains without mapped mdl chain are assigned None
3152  """
3153  n_ref = len(ref_chains)
3154  n_mdl = len(mdl_chains)
3155  for c in itertools.combinations(range(n_ref), n_mdl):
3156  for p in itertools.permutations(mdl_chains):
3157  ret_list = [None] * n_ref
3158  for idx, ch in zip(c, p):
3159  ret_list[idx] = ch
3160  yield ret_list
3161 
3162 def _RefEqualGenerator(ref_chains, mdl_chains):
3163  """ Returns all possible ways to map mdl_chains onto ref_chains
3164 
3165  Specific for the case where len(ref_chains) == len(mdl_chains)
3166  """
3167  for p in itertools.permutations(mdl_chains):
3168  yield list(p)
3169 
3170 def _RefEmptyGenerator(ref_chains, mdl_chains):
3171  yield list()
3172 
3173 def _ConcatIterators(iterators):
3174  for item in itertools.product(*iterators):
3175  yield list(item)
3176 
3177 def _ChainMappings(ref_chains, mdl_chains, n_max=None):
3178  """Returns all possible ways to map *mdl_chains* onto fixed *ref_chains*
3179 
3180  :param ref_chains: List of list of chemically equivalent chains in reference
3181  :type ref_chains: :class:`list` of :class:`list`
3182  :param mdl_chains: Equally long list of list of chemically equivalent chains
3183  in model that map on those ref chains.
3184  :type mdl_chains: :class:`list` of :class:`list`
3185  :param n_max: Aborts and raises :class:`RuntimeError` if max number of
3186  mappings is above this threshold.
3187  :type n_max: :class:`int`
3188  :returns: Iterator over all possible mappings of *mdl_chains* onto fixed
3189  *ref_chains*. Potentially contains None as padding when number of
3190  model chains for a certain mapping is smaller than the according
3191  reference chains.
3192  Example: _ChainMappings([['A', 'B', 'C'], ['D', 'E']],
3193  [['x', 'y'], ['i', 'j']])
3194  gives an iterator over: [[['x', 'y', None], ['i', 'j']],
3195  [['x', 'y', None], ['j', 'i']],
3196  [['y', 'x', None], ['i', 'j']],
3197  [['y', 'x', None], ['j', 'i']],
3198  [['x', None, 'y'], ['i', 'j']],
3199  [['x', None, 'y'], ['j', 'i']],
3200  [['y', None, 'x'], ['i', 'j']],
3201  [['y', None, 'x'], ['j', 'i']],
3202  [[None, 'x', 'y'], ['i', 'j']],
3203  [[None, 'x', 'y'], ['j', 'i']],
3204  [[None, 'y', 'x'], ['i', 'j']],
3205  [[None, 'y', 'x'], ['j', 'i']]]
3206  """
3207  assert(len(ref_chains) == len(mdl_chains))
3208 
3209  if n_max is not None:
3210  if not _NMappingsWithin(ref_chains, mdl_chains, n_max):
3211  raise RuntimeError(f"Too many mappings. Max allowed: {n_max}")
3212 
3213  # one iterator per mapping representing all mdl combinations relative to
3214  # reference
3215  iterators = list()
3216  for ref, mdl in zip(ref_chains, mdl_chains):
3217  if len(ref) == 0:
3218  iterators.append(_RefEmptyGenerator(ref, mdl))
3219  elif len(ref) == len(mdl):
3220  iterators.append(_RefEqualGenerator(ref, mdl))
3221  elif len(ref) < len(mdl):
3222  iterators.append(_RefSmallerGenerator(ref, mdl))
3223  else:
3224  iterators.append(_RefLargerGenerator(ref, mdl))
3225 
3226  return _ConcatIterators(iterators)
3227 
3228 
3229 def _GetSuperposition(pos_one, pos_two, iterative):
3230  """ Computes minimal RMSD superposition for pos_one onto pos_two
3231 
3232  :param pos_one: Positions that should be superposed onto *pos_two*
3233  :type pos_one: :class:`geom.Vec3List`
3234  :param pos_two: Reference positions
3235  :type pos_two: :class:`geom.Vec3List`
3236  :iterative: Whether iterative superposition should be used. Iterative
3237  potentially raises, uses standard superposition as fallback.
3238  :type iterative: :class:`bool`
3239  :returns: Transformation matrix to superpose *pos_one* onto *pos_two*
3240  :rtype: :class:`ost.mol.alg.SuperpositionResult`
3241  """
3242  res = None
3243  if iterative:
3244  try:
3245  res = mol.alg.IterativeSuperposeSVD(pos_one, pos_two)
3246  except:
3247  pass # triggers fallback below
3248  if res is None:
3249  res = mol.alg.SuperposeSVD(pos_one, pos_two)
3250  return res
3251 
3252 # specify public interface
3253 __all__ = ('ChainMapper', 'ReprResult', 'MappingResult')
def _NSCConserved(self, trg_ch, mdl_ch)
Definition: bb_lddt.py:481
def _NPairConserved(self, trg_int, mdl_int)
Definition: bb_lddt.py:504
def FromFlatMapping(self, flat_mapping)
Definition: bb_lddt.py:457
def __init__(self, pep_subst_mat=seq.alg.BLOSUM62, pep_gap_open=-5, pep_gap_ext=-2, nuc_subst_mat=seq.alg.NUC44, nuc_gap_open=-4, nuc_gap_ext=-4, resnum_aln=False)
def Align(self, s1, s2, chem_type=None)
def NWAlign(self, s1, s2, chem_type)
def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups, ref_mdl_alns, contact_d=12.0, steep_opt_rate=None, greedy_prune_contact_map=False)
def ExtendMapping(self, mapping, max_ext=None)
def _SteepOpt(self, mapping, chains_to_optimize=None)
def __init__(self, ref, mdl, ref_chem_groups, mdl_chem_groups, ref_mdl_alns, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], steep_opt_rate=None)
def ExtendMapping(self, mapping, max_ext=None)
def _SteepOpt(self, mapping, chains_to_optimize=None)
def GetlDDTMapping(self, model, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], strategy="heuristic", steep_opt_rate=None, block_seed_size=5, block_blocks_per_chem_group=5, chem_mapping_result=None, heuristic_n_max_naive=40320)
def GetRepr(self, substructure, model, topn=1, inclusion_radius=15.0, thresholds=[0.5, 1.0, 2.0, 4.0], bb_only=False, only_interchain=False, chem_mapping_result=None, global_mapping=None)
def __init__(self, target, resnum_alignments=False, pep_seqid_thr=95., nuc_seqid_thr=95., pep_subst_mat=seq.alg.BLOSUM62, pep_gap_open=-11, pep_gap_ext=-1, nuc_subst_mat=seq.alg.NUC44, nuc_gap_open=-4, nuc_gap_ext=-4, min_pep_length=6, min_nuc_length=4, n_max_naive=1e8)
def GetMapping(self, model, n_max_naive=40320)
def GetQSScoreMapping(self, model, contact_d=12.0, strategy="heuristic", block_seed_size=5, block_blocks_per_chem_group=5, heuristic_n_max_naive=40320, steep_opt_rate=None, chem_mapping_result=None, greedy_prune_contact_map=True)
def GetRMSDMapping(self, model, strategy="heuristic", subsampling=50, chem_mapping_result=None, heuristic_n_max_naive=120)
def GetFlatMapping(self, mdl_as_key=False)
def __init__(self, target, model, chem_groups, chem_mapping, mapping, alns, opt_score=None)
def GetFlatChainMapping(self, mdl_as_key=False)
def __init__(self, lDDT, substructure, ref_view, mdl_view)
def _GetInconsistentResidues(self, ref_residues, mdl_residues)
def _InterfacePenalty1(self, interface)
Definition: qsscore.py:734
def _InterfacePenalty2(self, interface)
Definition: qsscore.py:740
def _MappedInterfaceScores(self, int1, int2)
Definition: qsscore.py:583
def FromFlatMapping(self, flat_mapping)
Definition: qsscore.py:528