OpenStructure
qsscore.py
Go to the documentation of this file.
1 import itertools
2 import numpy as np
3 from scipy.spatial import distance
4 
5 import time
6 from ost import mol
7 
8 class QSEntity:
9  """ Helper object for QS-score computation
10 
11  Holds structural information and getters for interacting chains, i.e.
12  interfaces. Peptide residues are represented by their CB position
13  (CA for GLY) and nucleotides by C3'.
14 
15  :param ent: Structure for QS score computation
16  :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
17  :param contact_d: Pairwise distance of residues to be considered as contacts
18  :type contact_d: :class:`float`
19  """
20  def __init__(self, ent, contact_d = 12.0):
21  pep_query = "(peptide=true and (aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\")))"
22  nuc_query = "(nucleotide=True and aname=\"C3'\")"
23  self._view_view = ent.Select(" or ".join([pep_query, nuc_query]))
24  self._contact_d_contact_d = contact_d
25 
26  # the following attributes will be lazily evaluated
27  self._chain_names_chain_names = None
28  self._interacting_chains_interacting_chains = None
29  self._sequence_sequence = dict()
30  self._pos_pos = dict()
31  self._pair_dist_pair_dist = dict()
32  # min and max xyz for elements in pos used for fast collision
33  # detection
34  self._min_pos_min_pos = dict()
35  self._max_pos_max_pos = dict()
36 
37  @property
38  def view(self):
39  """ Processed structure
40 
41  View that only contains representative atoms. That's CB for peptide
42  residues (CA for GLY) and C3' for nucleotides.
43 
44  :type: :class:`ost.mol.EntityView`
45  """
46  return self._view_view
47 
48  @property
49  def contact_d(self):
50  """ Pairwise distance of residues to be considered as contacts
51 
52  Given at :class:`QSEntity` construction
53 
54  :type: :class:`float`
55  """
56  return self._contact_d_contact_d
57 
58  @property
59  def chain_names(self):
60  """ Chain names in :attr:`~view`
61 
62  Names are sorted
63 
64  :type: :class:`list` of :class:`str`
65  """
66  if self._chain_names_chain_names is None:
67  self._chain_names_chain_names = sorted([ch.name for ch in self.viewview.chains])
68  return self._chain_names_chain_names
69 
70  @property
71  def interacting_chains(self):
72  """ Pairs of chains in :attr:`~view` with at least one contact
73 
74  :type: :class:`list` of :class:`tuples`
75  """
76  if self._interacting_chains_interacting_chains is None:
77  self._interacting_chains_interacting_chains = list()
78  for x in itertools.combinations(self.chain_nameschain_names, 2):
79  if self.PotentialInteractionPotentialInteraction(x[0], x[1]):
80  if np.count_nonzero(self.PairDistPairDist(x[0], x[1]) < self.contact_dcontact_d):
81  self._interacting_chains_interacting_chains.append(x)
82  return self._interacting_chains_interacting_chains
83 
84  def GetChain(self, chain_name):
85  """ Get chain by name
86 
87  :param chain_name: Chain in :attr:`~view`
88  :type chain_name: :class:`str`
89  """
90  chain = self.viewview.FindChain(chain_name)
91  if not chain.IsValid():
92  raise RuntimeError(f"view has no chain named \"{chain_name}\"")
93  return chain
94 
95  def GetSequence(self, chain_name):
96  """ Get sequence of chain
97 
98  Returns sequence of specified chain as raw :class:`str`
99 
100  :param chain_name: Chain in :attr:`~view`
101  :type chain_name: :class:`str`
102  """
103  if chain_name not in self._sequence_sequence:
104  ch = self.GetChainGetChain(chain_name)
105  s = ''.join([r.one_letter_code for r in ch.residues])
106  self._sequence_sequence[chain_name] = s
107  return self._sequence_sequence[chain_name]
108 
109  def GetPos(self, chain_name):
110  """ Get representative positions of chain
111 
112  That's CB positions for peptide residues (CA for GLY) and C3' for
113  nucleotides. Returns positions as a numpy array of shape
114  (n_residues, 3).
115 
116  :param chain_name: Chain in :attr:`~view`
117  :type chain_name: :class:`str`
118  """
119  if chain_name not in self._pos_pos:
120  ch = self.GetChainGetChain(chain_name)
121  pos = np.zeros((len(ch.residues), 3))
122  for i, r in enumerate(ch.residues):
123  pos[i,:] = r.atoms[0].GetPos().data
124  self._pos_pos[chain_name] = pos
125  return self._pos_pos[chain_name]
126 
127  def PairDist(self, chain_name_one, chain_name_two):
128  """ Get pairwise distances between two chains
129 
130  Returns distances as numpy array of shape (a, b).
131  Where a is the number of residues of the chain that comes BEFORE the
132  other in :attr:`~chain_names`
133  """
134  key = (min(chain_name_one, chain_name_two),
135  max(chain_name_one, chain_name_two))
136  if key not in self._pair_dist_pair_dist:
137  self._pair_dist_pair_dist[key] = distance.cdist(self.GetPosGetPos(key[0]),
138  self.GetPosGetPos(key[1]),
139  'euclidean')
140  return self._pair_dist_pair_dist[key]
141 
142  def GetMinPos(self, chain_name):
143  """ Get min x,y,z cooridnates for given chain
144 
145  Based on positions that are extracted with GetPos
146 
147  :param chain_name: Chain in :attr:`~view`
148  :type chain_name: :class:`str`
149  """
150  if chain_name not in self._min_pos_min_pos:
151  self._min_pos_min_pos[chain_name] = self.GetPosGetPos(chain_name).min(0)
152  return self._min_pos_min_pos[chain_name]
153 
154  def GetMaxPos(self, chain_name):
155  """ Get max x,y,z cooridnates for given chain
156 
157  Based on positions that are extracted with GetPos
158 
159  :param chain_name: Chain in :attr:`~view`
160  :type chain_name: :class:`str`
161  """
162  if chain_name not in self._max_pos_max_pos:
163  self._max_pos_max_pos[chain_name] = self.GetPosGetPos(chain_name).max(0)
164  return self._max_pos_max_pos[chain_name]
165 
166  def PotentialInteraction(self, chain_name_one, chain_name_two):
167  """ Returns True if chains potentially interact
168 
169  Based on crude collision detection. There is no guarantee
170  that they actually interact if True is returned. However,
171  if False is returned, they don't interact for sure.
172 
173  :param chain_name_one: Chain in :attr:`~view`
174  :type chain_name_one: class:`str`
175  :param chain_name_two: Chain in :attr:`~view`
176  :type chain_name_two: class:`str`
177  """
178  min_one = self.GetMinPosGetMinPos(chain_name_one)
179  max_one = self.GetMaxPosGetMaxPos(chain_name_one)
180  min_two = self.GetMinPosGetMinPos(chain_name_two)
181  max_two = self.GetMaxPosGetMaxPos(chain_name_two)
182  if np.max(min_one - max_two) > self.contact_dcontact_d:
183  return False
184  if np.max(min_two - max_one) > self.contact_dcontact_d:
185  return False
186  return True
187 
189  """
190  Holds data relevant for QS-score computation. Formulas for QS scores:
191 
192  ::
193 
194  - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
195  - QS_global = weighted_scores / (weight_sum + weight_extra_all)
196  -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
197  -> weight_sum = sum(w(min(d1,d2))) for shared
198  -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
199  -> weight_extra_all = sum(w(d)) for all non-shared
200  -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
201 
202  In the formulas above:
203 
204  * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
205  "shared" contacts).
206  * "mapped": we could map chains of two structures and align residues in
207  :attr:`alignments`.
208  * "shared": pairs of residues which are "mapped" and have
209  "inter-chain contact" in both structures.
210  * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
211  (fallback to CA-CA if :attr:`calpha_only` is True).
212  * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
213  from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
214  """
215  def __init__(self, weighted_scores, weight_sum, weight_extra_mapped,
216  weight_extra_all, complete_mapping):
217  self._weighted_scores_weighted_scores = weighted_scores
218  self._weight_sum_weight_sum = weight_sum
219  self._weight_extra_mapped_weight_extra_mapped = weight_extra_mapped
220  self._weight_extra_all_weight_extra_all = weight_extra_all
221  self._complete_mapping_complete_mapping = complete_mapping
222 
223  @property
224  def weighted_scores(self):
225  """ weighted_scores attribute as described in formula section above
226 
227  :type: :class:`float`
228  """
229  return self._weighted_scores_weighted_scores
230 
231  @property
232  def weight_sum(self):
233  """ weight_sum attribute as described in formula section above
234 
235  :type: :class:`float`
236  """
237  return self._weight_sum_weight_sum
238 
239  @property
241  """ weight_extra_mapped attribute as described in formula section above
242 
243  :type: :class:`float`
244  """
245  return self._weight_extra_mapped_weight_extra_mapped
246 
247  @property
248  def weight_extra_all(self):
249  """ weight_extra_all attribute as described in formula section above
250 
251  :type: :class:`float`
252  """
253  return self._weight_extra_all_weight_extra_all
254 
255  @property
256  def complete_mapping(self):
257  """ Whether the underlying mapping of the scored assemblies is complete
258 
259  In other words: If they have the same stoichiometry. This is relevant
260  for :attr:`~QS_best` and :attr:`~QS_global` in case of no contacts in
261  any of the scored entities.
262 
263  :type: :class:`bool`
264  """
265  return self._complete_mapping_complete_mapping
266 
267  @property
268  def QS_best(self):
269  """ QS_best - the actual score as described in formula section above
270 
271  If there are no contacts observed in any of the scored entities this
272  score is 1.0 if we're comparing structures with
273  :attr:`~complete_mapping`, 0.0 otherwise. In the example of two
274  monomers, no contacts can be observed but they exactly match in terms
275  of quaternary structure. Thus a perfect score. In terms of higher order
276  structure that becomes a bit more abstract but in principle they still
277  have the exact same quaternary structure if they match in stoichiometry
278  but have no single contact.
279 
280  :type: :class:`float`
281  """
282  nominator = self.weighted_scoresweighted_scores
283  denominator = self.weight_sumweight_sum + self.weight_extra_mappedweight_extra_mapped
284  if denominator != 0.0:
285  return nominator/denominator
286  elif self.complete_mappingcomplete_mapping:
287  return 1.0
288  else:
289  return 0.0
290 
291  @property
292  def QS_global(self):
293  """ QS_global - the actual score as described in formula section above
294 
295  If there are no contacts observed in any of the scored entities this
296  score is 1.0 if we're comparing structures with
297  :attr:`~complete_mapping`, 0.0 otherwise. In the example of two
298  monomers, no contacts can be observed but they exactly match in terms
299  of quaternary structure. Thus a perfect score. In terms of higher order
300  structure that becomes a bit more abstract but in principle they still
301  have the exact same quaternary structure if they match in stoichiometry
302  but have no single contact.
303 
304  :type: :class:`float`
305  """
306  nominator = self.weighted_scoresweighted_scores
307  denominator = self.weight_sumweight_sum + self.weight_extra_allweight_extra_all
308  if denominator != 0.0:
309  return nominator/denominator
310  elif self.complete_mappingcomplete_mapping:
311  return 1.0
312  else:
313  return 0.0
314 
315 
316 class QSScorer:
317  """ Helper object to compute QS-score
318 
319  Tightly integrated into the mechanisms from the chain_mapping module.
320  The prefered way to derive an object of type :class:`QSScorer` is through
321  the static constructor: :func:`~FromMappingResult`. Example score
322  computation including mapping:
323 
324  ::
325 
326  from ost.mol.alg.qsscore import QSScorer
327  from ost.mol.alg.chain_mapping import ChainMapper
328 
329  ent_1 = io.LoadPDB("path_to_assembly_1.pdb")
330  ent_2 = io.LoadPDB("path_to_assembly_2.pdb")
331 
332  chain_mapper = ChainMapper(ent_1)
333  mapping_result = chain_mapper.GetlDDTMapping(ent_2)
334  qs_scorer = QSScorer.FromMappingResult(mapping_result)
335  score_result = qs_scorer.Score(mapping_result.mapping)
336  print("score:", score_result.QS_global)
337 
338  QS-score computation in :func:`QSScorer.Score` implements caching.
339  Repeated computations with alternative chain mappings thus become faster.
340 
341  :param target: Structure designated as "target". Can be fetched from
342  :class:`ost.mol.alg.chain_mapping.MappingResult`
343  :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
344  :param chem_groups: Groups of chemically equivalent chains in *target*.
345  Can be fetched from
346  :class:`ost.mol.alg.chain_mapping.MappingResult`
347  :type chem_groups: :class:`list` of :class:`list` of :class:`str`
348  :param model: Structure designated as "model". Can be fetched from
349  :class:`ost.mol.alg.chain_mapping.MappingResult`
350  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
351  :param alns: Each alignment is accessible with ``alns[(t_chain,m_chain)]``.
352  First sequence is the sequence of the respective chain in
353  :attr:`~qsent1`, second sequence the one from :attr:`~qsent2`.
354  Can be fetched from
355  :class:`ost.mol.alg.chain_mapping.MappingResult`
356  :type alns: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
357  :class:`ost.seq.AlignmentHandle`
358  """
359  def __init__(self, target, chem_groups, model, alns, contact_d = 12.0):
360 
361  self._qsent1_qsent1 = QSEntity(target, contact_d = contact_d)
362 
363  # ensure that target chain names match the ones in chem_groups
364  chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups))
365  if self._qsent1_qsent1.chain_names != sorted(chem_group_ch_names):
366  raise RuntimeError(f"Expect exact same chain names in chem_groups "
367  f"and in target (which is processed to only "
368  f"contain peptides/nucleotides). target: "
369  f"{self._qsent1.chain_names}, chem_groups: "
370  f"{chem_group_ch_names}")
371 
372  self._chem_groups_chem_groups = chem_groups
373  self._qsent2_qsent2 = QSEntity(model, contact_d = contact_d)
374  self._alns_alns = alns
375 
376  # cache for mapped interface scores
377  # key: tuple of tuple ((qsent1_ch1, qsent1_ch2),
378  # ((qsent2_ch1, qsent2_ch2))
379  # value: tuple with four numbers referring to QS-score formalism
380  # 1: weighted_scores
381  # 2: weight_sum
382  # 3: weight_extra_mapped
383  # 4: weight_extra_all
384  self._mapped_cache_mapped_cache = dict()
385 
386  # cache for non-mapped interfaces in qsent1
387  # key: tuple (qsent1_ch1, qsent1_ch2)
388  # value: contribution of that interface to weight_extra_all
389  self._qsent_1_penalties_qsent_1_penalties = dict()
390 
391  # same for qsent2
392  self._qsent_2_penalties_qsent_2_penalties = dict()
393 
394  @staticmethod
395  def FromMappingResult(mapping_result):
396  """ The preferred way to get a :class:`QSScorer`
397 
398  Static constructor that derives an object of type :class:`QSScorer`
399  using a :class:`ost.mol.alg.chain_mapping.MappingResult`
400 
401  :param mapping_result: Data source
402  :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult`
403  """
404  qs_scorer = QSScorer(mapping_result.target, mapping_result.chem_groups,
405  mapping_result.model, alns = mapping_result.alns)
406  return qs_scorer
407 
408  @property
409  def qsent1(self):
410  """ Represents *target*
411 
412  :type: :class:`QSEntity`
413  """
414  return self._qsent1_qsent1
415 
416  @property
417  def chem_groups(self):
418  """ Groups of chemically equivalent chains in *target*
419 
420  Provided at object construction
421 
422  :type: :class:`list` of :class:`list` of :class:`str`
423  """
424  return self._chem_groups_chem_groups
425 
426  @property
427  def qsent2(self):
428  """ Represents *model*
429 
430  :type: :class:`QSEntity`
431  """
432  return self._qsent2_qsent2
433 
434  @property
435  def alns(self):
436  """ Alignments between chains in :attr:`~qsent1` and :attr:`~qsent2`
437 
438  Provided at object construction. Each alignment is accessible with
439  ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the
440  respective chain in :attr:`~qsent1`, second sequence the one from
441  :attr:`~qsent2`.
442 
443  :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
444  :class:`ost.seq.AlignmentHandle`
445  """
446  return self._alns_alns
447 
448  def Score(self, mapping, check=True):
449  """ Computes QS-score given chain mapping
450 
451  Again, the preferred way is to get *mapping* is from an object
452  of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
453 
454  :param mapping: see
455  :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
456  :type mapping: :class:`list` of :class:`list` of :class:`str`
457  :param check: Perform input checks, can be disabled for speed purposes
458  if you know what you're doing.
459  :type check: :class:`bool`
460  :returns: Result object of type :class:`QSScorerResult`
461  """
462 
463  if check:
464  # ensure that dimensionality of mapping matches self.chem_groups
465  if len(self.chem_groupschem_groups) != len(mapping):
466  raise RuntimeError("Dimensions of self.chem_groups and mapping "
467  "must match")
468  for a,b in zip(self.chem_groupschem_groups, mapping):
469  if len(a) != len(b):
470  raise RuntimeError("Dimensions of self.chem_groups and "
471  "mapping must match")
472  # ensure that chain names in mapping are all present in qsent2
473  for name in itertools.chain.from_iterable(mapping):
474  if name is not None and name not in self.qsent2qsent2.chain_names:
475  raise RuntimeError(f"Each chain in mapping must be present "
476  f"in self.qsent2. No match for "
477  f"\"{name}\"")
478 
479  flat_mapping = dict()
480  for a, b in zip(self.chem_groupschem_groups, mapping):
481  flat_mapping.update({x: y for x, y in zip(a, b) if y is not None})
482 
483  return self.FromFlatMappingFromFlatMapping(flat_mapping)
484 
485  def ScoreInterface(self, trg_ch1, trg_ch2, mdl_ch1, mdl_ch2):
486  """ Computes QS-score only considering one interface
487 
488  This only works for interfaces that are computed in :func:`Score`, i.e.
489  interfaces for which the alignments are set up correctly.
490 
491  As all specified chains must be present, the mapping is considered
492  complete which affects
493  :attr:`QSScorerResult.QS_global`/:attr:`QSScorerResult.QS_best` in
494  edge cases of no observed contacts.
495 
496  :param trg_ch1: Name of first interface chain in target
497  :type trg_ch1: :class:`str`
498  :param trg_ch2: Name of second interface chain in target
499  :type trg_ch2: :class:`str`
500  :param mdl_ch1: Name of first interface chain in model
501  :type mdl_ch1: :class:`str`
502  :param mdl_ch2: Name of second interface chain in model
503  :type mdl_ch2: :class:`str`
504  :returns: Result object of type :class:`QSScorerResult`
505  :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
506  trg_ch2/mdl_ch2 is available.
507  """
508  if (trg_ch1, mdl_ch1) not in self.alnsalns:
509  raise RuntimeError(f"No aln between trg_ch1 ({trg_ch1}) and "
510  f"mdl_ch1 ({mdl_ch1}) available. Did you "
511  f"construct the QSScorer object from a "
512  f"MappingResult and are trg_ch1 and mdl_ch1 "
513  f"mapped to each other?")
514  if (trg_ch2, mdl_ch2) not in self.alnsalns:
515  raise RuntimeError(f"No aln between trg_ch1 ({trg_ch1}) and "
516  f"mdl_ch1 ({mdl_ch1}) available. Did you "
517  f"construct the QSScorer object from a "
518  f"MappingResult and are trg_ch1 and mdl_ch1 "
519  f"mapped to each other?")
520  trg_int = (trg_ch1, trg_ch2)
521  mdl_int = (mdl_ch1, mdl_ch2)
522  a, b, c, d = self._MappedInterfaceScores_MappedInterfaceScores(trg_int, mdl_int)
523 
524  # complete_mapping is True by definition, as the requested chain pairs
525  # are both present
526  return QSScorerResult(a, b, c, d, True)
527 
528  def FromFlatMapping(self, flat_mapping):
529  """ Same as :func:`Score` but with flat mapping
530 
531  :param flat_mapping: Dictionary with target chain names as keys and
532  the mapped model chain names as value
533  :type flat_mapping: :class:`dict` with :class:`str` as key and value
534  :returns: Result object of type :class:`QSScorerResult`
535  """
536 
537  weighted_scores = 0.0
538  weight_sum = 0.0
539  weight_extra_mapped = 0.0
540  weight_extra_all = 0.0
541 
542  # keep track of processed interfaces in qsent2
543  processed_qsent2_interfaces = set()
544 
545  for int1 in self.qsent1qsent1.interacting_chains:
546  if int1[0] in flat_mapping and int1[1] in flat_mapping:
547  int2 = (flat_mapping[int1[0]], flat_mapping[int1[1]])
548  a, b, c, d = self._MappedInterfaceScores_MappedInterfaceScores(int1, int2)
549  weighted_scores += a
550  weight_sum += b
551  weight_extra_mapped += c
552  weight_extra_all += d
553  processed_qsent2_interfaces.add((min(int2[0], int2[1]),
554  max(int2[0], int2[1])))
555  else:
556  weight_extra_all += self._InterfacePenalty1_InterfacePenalty1(int1)
557 
558  # process interfaces that only exist in qsent2
559  r_flat_mapping = {v:k for k,v in flat_mapping.items()} # reverse mapping...
560  for int2 in self.qsent2qsent2.interacting_chains:
561  if int2 not in processed_qsent2_interfaces:
562  if int2[0] in r_flat_mapping and int2[1] in r_flat_mapping:
563  int1 = (r_flat_mapping[int2[0]], r_flat_mapping[int2[1]])
564  a, b, c, d = self._MappedInterfaceScores_MappedInterfaceScores(int1, int2)
565  weighted_scores += a
566  weight_sum += b
567  weight_extra_mapped += c
568  weight_extra_all += d
569  else:
570  weight_extra_all += self._InterfacePenalty2_InterfacePenalty2(int2)
571 
572  trg_chains = sorted(self.qsent1qsent1.chain_names) # should be sorted already
573  mdl_chains = sorted(self.qsent2qsent2.chain_names) # should be sorted already
574  mapped_trg_chains = sorted(flat_mapping.keys())
575  mapped_mdl_chains = sorted(flat_mapping.values())
576  trg_complete = trg_chains == mapped_trg_chains
577  mdl_complete = mdl_chains == mapped_mdl_chains
578  complete_mapping = trg_complete and mdl_complete
579 
580  return QSScorerResult(weighted_scores, weight_sum, weight_extra_mapped,
581  weight_extra_all, complete_mapping)
582 
583  def _MappedInterfaceScores(self, int1, int2):
584  key_one = (int1, int2)
585  if key_one in self._mapped_cache_mapped_cache:
586  return self._mapped_cache_mapped_cache[key_one]
587  key_two = ((int1[1], int1[0]), (int2[1], int2[0]))
588  if key_two in self._mapped_cache_mapped_cache:
589  return self._mapped_cache_mapped_cache[key_two]
590 
591  weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all = \
592  self._InterfaceScores_InterfaceScores(int1, int2)
593  self._mapped_cache_mapped_cache[key_one] = (weighted_scores, weight_sum, weight_extra_mapped,
594  weight_extra_all)
595  return (weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all)
596 
597  def _InterfaceScores(self, int1, int2):
598 
599  d1 = self.qsent1qsent1.PairDist(int1[0], int1[1])
600  d2 = self.qsent2qsent2.PairDist(int2[0], int2[1])
601 
602  # given two chain names a and b: if a < b, shape of pairwise distances is
603  # (len(a), len(b)). However, if b > a, its (len(b), len(a)) => transpose
604  if int1[0] > int1[1]:
605  d1 = d1.transpose()
606  if int2[0] > int2[1]:
607  d2 = d2.transpose()
608 
609  # indices of the first chain in the two interfaces
610  mapped_indices_1_1, mapped_indices_1_2 = \
611  self._IndexMapping_IndexMapping(int1[0], int2[0])
612  # indices of the second chain in the two interfaces
613  mapped_indices_2_1, mapped_indices_2_2 = \
614  self._IndexMapping_IndexMapping(int1[1], int2[1])
615 
616  # get shared_masks - for this we first need to select the actual
617  # mapped positions to get a one-to-one relationship and map it back
618  # to the original mask size
619  assert(self.qsent1qsent1.contact_d == self.qsent2qsent2.contact_d)
620  contact_d = self.qsent1qsent1.contact_d
621  mapped_idx_grid_1 = np.ix_(mapped_indices_1_1, mapped_indices_2_1)
622  mapped_idx_grid_2 = np.ix_(mapped_indices_1_2, mapped_indices_2_2)
623 
624  if mapped_idx_grid_1[0].shape[0] == 0 or mapped_idx_grid_2[0].shape[0] == 0:
625  # dealing with special cases where we have no mapped residues
626  # we only avoid errors here when using maped_idx_grid_x for indexing
627  # but run the rest of the algorithm anyways which produces some
628  # computational overhead. Thats OK, as this should occur rarely
629  shared_mask_d1 = np.full(d1.shape, False, dtype=bool)
630  shared_mask_d2 = np.full(d2.shape, False, dtype=bool)
631  mapped_nonshared_mask_d1 = np.full(d1.shape, False, dtype=bool)
632  mapped_nonshared_mask_d2 = np.full(d2.shape, False, dtype=bool)
633  if mapped_idx_grid_1[0].shape[0] == 0:
634  # mapped_idx_grid_1 has not a single mapped residue which raises
635  # an error when calling something like d1[mapped_idx_grid_1]
636  mapped_d1_contacts = np.full(d1.shape, False, dtype=bool)
637  else:
638  mapped_d1_contacts = d1[mapped_idx_grid_1] < contact_d
639  mapped_nonshared_mask_d1[mapped_idx_grid_1] = mapped_d1_contacts
640 
641  if mapped_idx_grid_2[0].shape[0] == 0:
642  # mapped_idx_grid_2 has not a single mapped residue which raises
643  # an error when calling something like d2[mapped_idx_grid_2]
644  mapped_d2_contacts = np.full(d2.shape, False, dtype=bool)
645  else:
646  mapped_d2_contacts = d2[mapped_idx_grid_2] < contact_d
647  mapped_nonshared_mask_d2[mapped_idx_grid_2] = mapped_d2_contacts
648  shared_mask = np.full(mapped_d1_contacts.shape, False, dtype=bool)
649  else:
650  mapped_d1_contacts = d1[mapped_idx_grid_1] < contact_d
651  mapped_d2_contacts = d2[mapped_idx_grid_2] < contact_d
652  shared_mask = np.logical_and(mapped_d1_contacts, mapped_d2_contacts)
653  shared_mask_d1 = np.full(d1.shape, False, dtype=bool)
654  shared_mask_d1[mapped_idx_grid_1] = shared_mask
655  shared_mask_d2 = np.full(d2.shape, False, dtype=bool)
656  shared_mask_d2[mapped_idx_grid_2] = shared_mask
657 
658  # get mapped but nonshared masks
659  mapped_nonshared_mask_d1 = np.full(d1.shape, False, dtype=bool)
660  mapped_nonshared_mask_d1[mapped_idx_grid_1] = \
661  np.logical_and(np.logical_not(shared_mask), mapped_d1_contacts)
662  mapped_nonshared_mask_d2 = np.full(d2.shape, False, dtype=bool)
663  mapped_nonshared_mask_d2[mapped_idx_grid_2] = \
664  np.logical_and(np.logical_not(shared_mask), mapped_d2_contacts)
665 
666  # contributions from shared contacts
667  shared_d1 = d1[shared_mask_d1]
668  shared_d2 = d2[shared_mask_d2]
669  shared_min = np.minimum(shared_d1, shared_d2)
670  shared_abs_diff_div_12 = np.abs(np.subtract(shared_d1, shared_d2))/12.0
671  weight_term = np.ones(shared_min.shape[0])
672  bigger_5_mask = shared_min > 5.0
673  weights = np.exp(-2.0*np.square((shared_min[bigger_5_mask]-5.0)/4.28))
674  weight_term[bigger_5_mask] = weights
675  diff_term = np.subtract(np.ones(weight_term.shape[0]),
676  shared_abs_diff_div_12)
677  weighted_scores = np.sum(np.multiply(weight_term, diff_term))
678  weight_sum = np.sum(weight_term)
679 
680  # do weight_extra_all for interface one
681  nonshared_contact_mask_d1 = np.logical_and(np.logical_not(shared_mask_d1),
682  d1 < contact_d)
683  contact_distances = d1[nonshared_contact_mask_d1]
684  bigger_5 = contact_distances[contact_distances > 5]
685  weight_extra_all = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
686  # add 1.0 for all contact distances <= 5.0
687  weight_extra_all += contact_distances.shape[0] - bigger_5.shape[0]
688  # same for interface two
689  nonshared_contact_mask_d2 = np.logical_and(np.logical_not(shared_mask_d2),
690  d2 < contact_d)
691  contact_distances = d2[nonshared_contact_mask_d2]
692  bigger_5 = contact_distances[contact_distances > 5]
693  weight_extra_all += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
694  # add 1.0 for all contact distances <= 5.0
695  weight_extra_all += contact_distances.shape[0] - bigger_5.shape[0]
696 
697  # do weight_extra_mapped for interface one
698  contact_distances = d1[mapped_nonshared_mask_d1]
699  bigger_5 = contact_distances[contact_distances > 5]
700  weight_extra_mapped = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
701  # add 1.0 for all contact distances <= 5.0
702  weight_extra_mapped += contact_distances.shape[0] - bigger_5.shape[0]
703  # same for interface two
704  contact_distances = d2[mapped_nonshared_mask_d2]
705  bigger_5 = contact_distances[contact_distances > 5]
706  weight_extra_mapped += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
707  # add 1.0 for all contact distances <= 5.0
708  weight_extra_mapped += contact_distances.shape[0] - bigger_5.shape[0]
709 
710  return (weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all)
711 
712  def _IndexMapping(self, ch1, ch2):
713  """ Fetches aln and returns indices of (non-)aligned residues
714 
715  returns 2 numpy arrays containing the indices of residues in
716  ch1 and ch2 which are aligned
717  """
718  mapped_indices_1 = list()
719  mapped_indices_2 = list()
720  idx_1 = 0
721  idx_2 = 0
722  for col in self.alnsalns[(ch1, ch2)]:
723  if col[0] != '-' and col[1] != '-':
724  mapped_indices_1.append(idx_1)
725  mapped_indices_2.append(idx_2)
726  if col[0] != '-':
727  idx_1 +=1
728  if col[1] != '-':
729  idx_2 +=1
730  return (np.array(mapped_indices_1), np.array(mapped_indices_2))
731 
732  def _InterfacePenalty1(self, interface):
733  if interface not in self._qsent_1_penalties_qsent_1_penalties:
734  self._qsent_1_penalties_qsent_1_penalties[interface] = \
735  self._InterfacePenalty_InterfacePenalty(self.qsent1qsent1, interface)
736  return self._qsent_1_penalties_qsent_1_penalties[interface]
737 
738  def _InterfacePenalty2(self, interface):
739  if interface not in self._qsent_2_penalties_qsent_2_penalties:
740  self._qsent_2_penalties_qsent_2_penalties[interface] = \
741  self._InterfacePenalty_InterfacePenalty(self.qsent2qsent2, interface)
742  return self._qsent_2_penalties_qsent_2_penalties[interface]
743 
744  def _InterfacePenalty(self, qsent, interface):
745  d = qsent.PairDist(interface[0], interface[1])
746  contact_distances = d[d < qsent.contact_d]
747  bigger_5 = contact_distances[contact_distances > 5]
748  penalty = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
749  # add 1.0 for all contact distances <= 5.0
750  penalty += contact_distances.shape[0] - bigger_5.shape[0]
751  return penalty
752 
753 # specify public interface
754 __all__ = ('QSEntity', 'QSScorer', 'QSScorerResult')
def GetMaxPos(self, chain_name)
Definition: qsscore.py:154
def GetMinPos(self, chain_name)
Definition: qsscore.py:142
def GetPos(self, chain_name)
Definition: qsscore.py:109
def GetSequence(self, chain_name)
Definition: qsscore.py:95
def GetChain(self, chain_name)
Definition: qsscore.py:84
def PairDist(self, chain_name_one, chain_name_two)
Definition: qsscore.py:127
def __init__(self, ent, contact_d=12.0)
Definition: qsscore.py:20
def PotentialInteraction(self, chain_name_one, chain_name_two)
Definition: qsscore.py:166
def _InterfacePenalty1(self, interface)
Definition: qsscore.py:732
def FromMappingResult(mapping_result)
Definition: qsscore.py:395
def _InterfaceScores(self, int1, int2)
Definition: qsscore.py:597
def _InterfacePenalty2(self, interface)
Definition: qsscore.py:738
def _InterfacePenalty(self, qsent, interface)
Definition: qsscore.py:744
def __init__(self, target, chem_groups, model, alns, contact_d=12.0)
Definition: qsscore.py:359
def ScoreInterface(self, trg_ch1, trg_ch2, mdl_ch1, mdl_ch2)
Definition: qsscore.py:485
def _IndexMapping(self, ch1, ch2)
Definition: qsscore.py:712
def Score(self, mapping, check=True)
Definition: qsscore.py:448
def _MappedInterfaceScores(self, int1, int2)
Definition: qsscore.py:583
def FromFlatMapping(self, flat_mapping)
Definition: qsscore.py:528
def __init__(self, weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all, complete_mapping)
Definition: qsscore.py:216