OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
qsscore.py
Go to the documentation of this file.
1 import itertools
2 import numpy as np
3 from scipy.spatial import distance
4 
5 import time
6 from ost import mol
7 
8 class QSEntity:
9  """ Helper object for QS-score computation
10 
11  Holds structural information and getters for interacting chains, i.e.
12  interfaces. Peptide residues are represented by their CB position
13  (CA for GLY) and nucleotides by C3'.
14 
15  :param ent: Structure for QS score computation
16  :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
17  :param contact_d: Pairwise distance of residues to be considered as contacts
18  :type contact_d: :class:`float`
19  """
20  def __init__(self, ent, contact_d = 12.0):
21  pep_query = "(peptide=true and (aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\")))"
22  nuc_query = "(nucleotide=True and aname=\"C3'\")"
23  self._view = ent.Select(" or ".join([pep_query, nuc_query]))
24  self._contact_d = contact_d
25 
26  # the following attributes will be lazily evaluated
27  self._chain_names = None
28  self._interacting_chains = None
29  self._sequence = dict()
30  self._pos = dict()
31  self._pair_dist = dict()
32  # min and max xyz for elements in pos used for fast collision
33  # detection
34  self._min_pos = dict()
35  self._max_pos = dict()
36 
37  @property
38  def view(self):
39  """ Processed structure
40 
41  View that only contains representative atoms. That's CB for peptide
42  residues (CA for GLY) and C3' for nucleotides.
43 
44  :type: :class:`ost.mol.EntityView`
45  """
46  return self._view
47 
48  @property
49  def contact_d(self):
50  """ Pairwise distance of residues to be considered as contacts
51 
52  Given at :class:`QSEntity` construction
53 
54  :type: :class:`float`
55  """
56  return self._contact_d
57 
58  @property
59  def chain_names(self):
60  """ Chain names in :attr:`~view`
61 
62  Names are sorted
63 
64  :type: :class:`list` of :class:`str`
65  """
66  if self._chain_names is None:
67  self._chain_names = sorted([ch.name for ch in self.view.chains])
68  return self._chain_names
69 
70  @property
71  def interacting_chains(self):
72  """ Pairs of chains in :attr:`~view` with at least one contact
73 
74  :type: :class:`list` of :class:`tuples`
75  """
76  if self._interacting_chains is None:
77  self._interacting_chains = list()
78  for x in itertools.combinations(self.chain_names, 2):
79  if self.PotentialInteraction(x[0], x[1]):
80  if np.count_nonzero(self.PairDist(x[0], x[1]) < self.contact_d):
81  self._interacting_chains.append(x)
82  return self._interacting_chains
83 
84  def GetChain(self, chain_name):
85  """ Get chain by name
86 
87  :param chain_name: Chain in :attr:`~view`
88  :type chain_name: :class:`str`
89  """
90  chain = self.view.FindChain(chain_name)
91  if not chain.IsValid():
92  raise RuntimeError(f"view has no chain named \"{chain_name}\"")
93  return chain
94 
95  def GetSequence(self, chain_name):
96  """ Get sequence of chain
97 
98  Returns sequence of specified chain as raw :class:`str`
99 
100  :param chain_name: Chain in :attr:`~view`
101  :type chain_name: :class:`str`
102  """
103  if chain_name not in self._sequence:
104  ch = self.GetChain(chain_name)
105  s = ''.join([r.one_letter_code for r in ch.residues])
106  self._sequence[chain_name] = s
107  return self._sequence[chain_name]
108 
109  def GetPos(self, chain_name):
110  """ Get representative positions of chain
111 
112  That's CB positions for peptide residues (CA for GLY) and C3' for
113  nucleotides. Returns positions as a numpy array of shape
114  (n_residues, 3).
115 
116  :param chain_name: Chain in :attr:`~view`
117  :type chain_name: :class:`str`
118  """
119  if chain_name not in self._pos:
120  ch = self.GetChain(chain_name)
121  pos = np.zeros((len(ch.residues), 3))
122  for i, r in enumerate(ch.residues):
123  pos[i,:] = r.atoms[0].GetPos().data
124  self._pos[chain_name] = pos
125  return self._pos[chain_name]
126 
127  def PairDist(self, chain_name_one, chain_name_two):
128  """ Get pairwise distances between two chains
129 
130  Returns distances as numpy array of shape (a, b).
131  Where a is the number of residues of the chain that comes BEFORE the
132  other in :attr:`~chain_names`
133  """
134  key = (min(chain_name_one, chain_name_two),
135  max(chain_name_one, chain_name_two))
136  if key not in self._pair_dist:
137  self._pair_dist[key] = distance.cdist(self.GetPos(key[0]),
138  self.GetPos(key[1]),
139  'euclidean')
140  return self._pair_dist[key]
141 
142  def GetMinPos(self, chain_name):
143  """ Get min x,y,z cooridnates for given chain
144 
145  Based on positions that are extracted with GetPos
146 
147  :param chain_name: Chain in :attr:`~view`
148  :type chain_name: :class:`str`
149  """
150  if chain_name not in self._min_pos:
151  self._min_pos[chain_name] = self.GetPos(chain_name).min(0)
152  return self._min_pos[chain_name]
153 
154  def GetMaxPos(self, chain_name):
155  """ Get max x,y,z cooridnates for given chain
156 
157  Based on positions that are extracted with GetPos
158 
159  :param chain_name: Chain in :attr:`~view`
160  :type chain_name: :class:`str`
161  """
162  if chain_name not in self._max_pos:
163  self._max_pos[chain_name] = self.GetPos(chain_name).max(0)
164  return self._max_pos[chain_name]
165 
166  def PotentialInteraction(self, chain_name_one, chain_name_two):
167  """ Returns True if chains potentially interact
168 
169  Based on crude collision detection. There is no guarantee
170  that they actually interact if True is returned. However,
171  if False is returned, they don't interact for sure.
172 
173  :param chain_name_one: Chain in :attr:`~view`
174  :type chain_name_one: class:`str`
175  :param chain_name_two: Chain in :attr:`~view`
176  :type chain_name_two: class:`str`
177  """
178  min_one = self.GetMinPos(chain_name_one)
179  max_one = self.GetMaxPos(chain_name_one)
180  min_two = self.GetMinPos(chain_name_two)
181  max_two = self.GetMaxPos(chain_name_two)
182  if np.max(min_one - max_two) > self.contact_d:
183  return False
184  if np.max(min_two - max_one) > self.contact_d:
185  return False
186  return True
187 
189  """
190  Holds data relevant for QS-score computation. Formulas for QS scores:
191 
192  ::
193 
194  - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
195  - QS_global = weighted_scores / (weight_sum + weight_extra_all)
196  -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
197  -> weight_sum = sum(w(min(d1,d2))) for shared
198  -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
199  -> weight_extra_all = sum(w(d)) for all non-shared
200  -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
201 
202  In the formulas above:
203 
204  * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
205  "shared" contacts).
206  * "mapped": we could map chains of two structures and align residues in
207  :attr:`alignments`.
208  * "shared": pairs of residues which are "mapped" and have
209  "inter-chain contact" in both structures.
210  * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
211  (fallback to CA-CA if :attr:`calpha_only` is True).
212  * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
213  from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
214  """
215  def __init__(self, weighted_scores, weight_sum, weight_extra_mapped,
216  weight_extra_all):
217  self._weighted_scores = weighted_scores
218  self._weight_sum = weight_sum
219  self._weight_extra_mapped = weight_extra_mapped
220  self._weight_extra_all = weight_extra_all
221 
222  @property
223  def weighted_scores(self):
224  """ weighted_scores attribute as described in formula section above
225 
226  :type: :class:`float`
227  """
228  return self._weighted_scores
229 
230  @property
231  def weight_sum(self):
232  """ weight_sum attribute as described in formula section above
233 
234  :type: :class:`float`
235  """
236  return self._weight_sum
237 
238  @property
240  """ weight_extra_mapped attribute as described in formula section above
241 
242  :type: :class:`float`
243  """
244  return self._weight_extra_mapped
245 
246  @property
247  def weight_extra_all(self):
248  """ weight_extra_all attribute as described in formula section above
249 
250  :type: :class:`float`
251  """
252  return self._weight_extra_all
253 
254  @property
255  def QS_best(self):
256  """ QS_best - the actual score as described in formula section above
257 
258  :type: :class:`float`
259  """
260  nominator = self.weighted_scores
261  denominator = self.weight_sum + self.weight_extra_mapped
262  if denominator != 0.0:
263  return nominator/denominator
264  else:
265  return 0.0
266 
267  @property
268  def QS_global(self):
269  """ QS_global - the actual score as described in formula section above
270 
271  :type: :class:`float`
272  """
273  nominator = self.weighted_scores
274  denominator = self.weight_sum + self.weight_extra_all
275  if denominator != 0.0:
276  return nominator/denominator
277  else:
278  return 0.0
279 
280 
281 class QSScorer:
282  """ Helper object to compute QS-score
283 
284  Tightly integrated into the mechanisms from the chain_mapping module.
285  The prefered way to derive an object of type :class:`QSScorer` is through
286  the static constructor: :func:`~FromMappingResult`. Example score
287  computation including mapping:
288 
289  ::
290 
291  from ost.mol.alg.qsscore import QSScorer
292  from ost.mol.alg.chain_mapping import ChainMapper
293 
294  ent_1 = io.LoadPDB("path_to_assembly_1.pdb")
295  ent_2 = io.LoadPDB("path_to_assembly_2.pdb")
296 
297  chain_mapper = ChainMapper(ent_1)
298  mapping_result = chain_mapper.GetlDDTMapping(ent_2)
299  qs_scorer = QSScorer.FromMappingResult(mapping_result)
300  score_result = qs_scorer.Score(mapping_result.mapping)
301  print("score:", score_result.QS_global)
302 
303  QS-score computation in :func:`QSScorer.Score` implements caching.
304  Repeated computations with alternative chain mappings thus become faster.
305 
306  :param target: Structure designated as "target". Can be fetched from
307  :class:`ost.mol.alg.chain_mapping.MappingResult`
308  :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
309  :param chem_groups: Groups of chemically equivalent chains in *target*.
310  Can be fetched from
311  :class:`ost.mol.alg.chain_mapping.MappingResult`
312  :type chem_groups: :class:`list` of :class:`list` of :class:`str`
313  :param model: Structure designated as "model". Can be fetched from
314  :class:`ost.mol.alg.chain_mapping.MappingResult`
315  :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
316  :param alns: Each alignment is accessible with ``alns[(t_chain,m_chain)]``.
317  First sequence is the sequence of the respective chain in
318  :attr:`~qsent1`, second sequence the one from :attr:`~qsent2`.
319  Can be fetched from
320  :class:`ost.mol.alg.chain_mapping.MappingResult`
321  :type alns: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
322  :class:`ost.seq.AlignmentHandle`
323  """
324  def __init__(self, target, chem_groups, model, alns, contact_d = 12.0):
325 
326  self._qsent1 = QSEntity(target, contact_d = contact_d)
327 
328  # ensure that target chain names match the ones in chem_groups
329  chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups))
330  if self._qsent1.chain_names != sorted(chem_group_ch_names):
331  raise RuntimeError(f"Expect exact same chain names in chem_groups "
332  f"and in target (which is processed to only "
333  f"contain peptides/nucleotides). target: "
334  f"{self._qsent1.chain_names}, chem_groups: "
335  f"{chem_group_ch_names}")
336 
337  self._chem_groups = chem_groups
338  self._qsent2 = QSEntity(model, contact_d = contact_d)
339  self._alns = alns
340 
341  # cache for mapped interface scores
342  # key: tuple of tuple ((qsent1_ch1, qsent1_ch2),
343  # ((qsent2_ch1, qsent2_ch2))
344  # value: tuple with four numbers referring to QS-score formalism
345  # 1: weighted_scores
346  # 2: weight_sum
347  # 3: weight_extra_mapped
348  # 4: weight_extra_all
349  self._mapped_cache = dict()
350 
351  # cache for non-mapped interfaces in qsent1
352  # key: tuple (qsent1_ch1, qsent1_ch2)
353  # value: contribution of that interface to weight_extra_all
354  self._qsent_1_penalties = dict()
355 
356  # same for qsent2
357  self._qsent_2_penalties = dict()
358 
359  @staticmethod
360  def FromMappingResult(mapping_result):
361  """ The preferred way to get a :class:`QSScorer`
362 
363  Static constructor that derives an object of type :class:`QSScorer`
364  using a :class:`ost.mol.alg.chain_mapping.MappingResult`
365 
366  :param mapping_result: Data source
367  :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult`
368  """
369  qs_scorer = QSScorer(mapping_result.target, mapping_result.chem_groups,
370  mapping_result.model, alns = mapping_result.alns)
371  return qs_scorer
372 
373  @property
374  def qsent1(self):
375  """ Represents *target*
376 
377  :type: :class:`QSEntity`
378  """
379  return self._qsent1
380 
381  @property
382  def chem_groups(self):
383  """ Groups of chemically equivalent chains in *target*
384 
385  Provided at object construction
386 
387  :type: :class:`list` of :class:`list` of :class:`str`
388  """
389  return self._chem_groups
390 
391  @property
392  def qsent2(self):
393  """ Represents *model*
394 
395  :type: :class:`QSEntity`
396  """
397  return self._qsent2
398 
399  @property
400  def alns(self):
401  """ Alignments between chains in :attr:`~qsent1` and :attr:`~qsent2`
402 
403  Provided at object construction. Each alignment is accessible with
404  ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the
405  respective chain in :attr:`~qsent1`, second sequence the one from
406  :attr:`~qsent2`.
407 
408  :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
409  :class:`ost.seq.AlignmentHandle`
410  """
411  return self._alns
412 
413  def Score(self, mapping, check=True):
414  """ Computes QS-score given chain mapping
415 
416  Again, the preferred way is to get *mapping* is from an object
417  of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
418 
419  :param mapping: see
420  :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
421  :type mapping: :class:`list` of :class:`list` of :class:`str`
422  :param check: Perform input checks, can be disabled for speed purposes
423  if you know what you're doing.
424  :type check: :class:`bool`
425  :returns: Result object of type :class:`QSScorerResult`
426  """
427 
428  if check:
429  # ensure that dimensionality of mapping matches self.chem_groups
430  if len(self.chem_groups) != len(mapping):
431  raise RuntimeError("Dimensions of self.chem_groups and mapping "
432  "must match")
433  for a,b in zip(self.chem_groups, mapping):
434  if len(a) != len(b):
435  raise RuntimeError("Dimensions of self.chem_groups and "
436  "mapping must match")
437  # ensure that chain names in mapping are all present in qsent2
438  for name in itertools.chain.from_iterable(mapping):
439  if name is not None and name not in self.qsent2.chain_names:
440  raise RuntimeError(f"Each chain in mapping must be present "
441  f"in self.qsent2. No match for "
442  f"\"{name}\"")
443 
444  flat_mapping = dict()
445  for a, b in zip(self.chem_groups, mapping):
446  flat_mapping.update({x: y for x, y in zip(a, b) if y is not None})
447 
448  return self.FromFlatMapping(flat_mapping)
449 
450  def ScoreInterface(self, trg_ch1, trg_ch2, mdl_ch1, mdl_ch2):
451  """ Computes QS-score only considering one interface
452 
453  This only works for interfaces that are computed in :func:`Score`, i.e.
454  interfaces for which the alignments are set up correctly.
455 
456  :param trg_ch1: Name of first interface chain in target
457  :type trg_ch1: :class:`str`
458  :param trg_ch2: Name of second interface chain in target
459  :type trg_ch2: :class:`str`
460  :param mdl_ch1: Name of first interface chain in model
461  :type mdl_ch1: :class:`str`
462  :param mdl_ch2: Name of second interface chain in model
463  :type mdl_ch2: :class:`str`
464  :returns: Result object of type :class:`QSScorerResult`
465  :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
466  trg_ch2/mdl_ch2 is available.
467  """
468  if (trg_ch1, mdl_ch1) not in self.alns:
469  raise RuntimeError(f"No aln between trg_ch1 ({trg_ch1}) and "
470  f"mdl_ch1 ({mdl_ch1}) available. Did you "
471  f"construct the QSScorer object from a "
472  f"MappingResult and are trg_ch1 and mdl_ch1 "
473  f"mapped to each other?")
474  if (trg_ch2, mdl_ch2) not in self.alns:
475  raise RuntimeError(f"No aln between trg_ch1 ({trg_ch1}) and "
476  f"mdl_ch1 ({mdl_ch1}) available. Did you "
477  f"construct the QSScorer object from a "
478  f"MappingResult and are trg_ch1 and mdl_ch1 "
479  f"mapped to each other?")
480  trg_int = (trg_ch1, trg_ch2)
481  mdl_int = (mdl_ch1, mdl_ch2)
482  a, b, c, d = self._MappedInterfaceScores(trg_int, mdl_int)
483  return QSScorerResult(a, b, c, d)
484 
485  def FromFlatMapping(self, flat_mapping):
486  """ Same as :func:`Score` but with flat mapping
487 
488  :param flat_mapping: Dictionary with target chain names as keys and
489  the mapped model chain names as value
490  :type flat_mapping: :class:`dict` with :class:`str` as key and value
491  :returns: Result object of type :class:`QSScorerResult`
492  """
493 
494  weighted_scores = 0.0
495  weight_sum = 0.0
496  weight_extra_mapped = 0.0
497  weight_extra_all = 0.0
498 
499  # keep track of processed interfaces in qsent2
500  processed_qsent2_interfaces = set()
501 
502  for int1 in self.qsent1.interacting_chains:
503  if int1[0] in flat_mapping and int1[1] in flat_mapping:
504  int2 = (flat_mapping[int1[0]], flat_mapping[int1[1]])
505  a, b, c, d = self._MappedInterfaceScores(int1, int2)
506  weighted_scores += a
507  weight_sum += b
508  weight_extra_mapped += c
509  weight_extra_all += d
510  processed_qsent2_interfaces.add((min(int2[0], int2[1]),
511  max(int2[0], int2[1])))
512  else:
513  weight_extra_all += self._InterfacePenalty1(int1)
514 
515  # process interfaces that only exist in qsent2
516  r_flat_mapping = {v:k for k,v in flat_mapping.items()} # reverse mapping...
517  for int2 in self.qsent2.interacting_chains:
518  if int2 not in processed_qsent2_interfaces:
519  if int2[0] in r_flat_mapping and int2[1] in r_flat_mapping:
520  int1 = (r_flat_mapping[int2[0]], r_flat_mapping[int2[1]])
521  a, b, c, d = self._MappedInterfaceScores(int1, int2)
522  weighted_scores += a
523  weight_sum += b
524  weight_extra_mapped += c
525  weight_extra_all += d
526  else:
527  weight_extra_all += self._InterfacePenalty2(int2)
528 
529  return QSScorerResult(weighted_scores, weight_sum, weight_extra_mapped,
530  weight_extra_all)
531 
532  def _MappedInterfaceScores(self, int1, int2):
533  key_one = (int1, int2)
534  if key_one in self._mapped_cache:
535  return self._mapped_cache[key_one]
536  key_two = ((int1[1], int1[0]), (int2[1], int2[0]))
537  if key_two in self._mapped_cache:
538  return self._mapped_cache[key_two]
539 
540  weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all = \
541  self._InterfaceScores(int1, int2)
542  self._mapped_cache[key_one] = (weighted_scores, weight_sum, weight_extra_mapped,
543  weight_extra_all)
544  return (weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all)
545 
546  def _InterfaceScores(self, int1, int2):
547 
548  d1 = self.qsent1.PairDist(int1[0], int1[1])
549  d2 = self.qsent2.PairDist(int2[0], int2[1])
550 
551  # given two chain names a and b: if a < b, shape of pairwise distances is
552  # (len(a), len(b)). However, if b > a, its (len(b), len(a)) => transpose
553  if int1[0] > int1[1]:
554  d1 = d1.transpose()
555  if int2[0] > int2[1]:
556  d2 = d2.transpose()
557 
558  # indices of the first chain in the two interfaces
559  mapped_indices_1_1, mapped_indices_1_2 = \
560  self._IndexMapping(int1[0], int2[0])
561  # indices of the second chain in the two interfaces
562  mapped_indices_2_1, mapped_indices_2_2 = \
563  self._IndexMapping(int1[1], int2[1])
564 
565  # get shared_masks - for this we first need to select the actual
566  # mapped positions to get a one-to-one relationship and map it back
567  # to the original mask size
568  assert(self.qsent1.contact_d == self.qsent2.contact_d)
569  contact_d = self.qsent1.contact_d
570  mapped_idx_grid_1 = np.ix_(mapped_indices_1_1, mapped_indices_2_1)
571  mapped_idx_grid_2 = np.ix_(mapped_indices_1_2, mapped_indices_2_2)
572 
573  if mapped_idx_grid_1[0].shape[0] == 0 or mapped_idx_grid_2[0].shape[0] == 0:
574  # dealing with special cases where we have no mapped residues
575  # we only avoid errors here when using maped_idx_grid_x for indexing
576  # but run the rest of the algorithm anyways which produces some
577  # computational overhead. Thats OK, as this should occur rarely
578  shared_mask_d1 = np.full(d1.shape, False, dtype=bool)
579  shared_mask_d2 = np.full(d2.shape, False, dtype=bool)
580  mapped_nonshared_mask_d1 = np.full(d1.shape, False, dtype=bool)
581  mapped_nonshared_mask_d2 = np.full(d2.shape, False, dtype=bool)
582  if mapped_idx_grid_1[0].shape[0] == 0:
583  # mapped_idx_grid_1 has not a single mapped residue which raises
584  # an error when calling something like d1[mapped_idx_grid_1]
585  mapped_d1_contacts = np.full(d1.shape, False, dtype=bool)
586  else:
587  mapped_d1_contacts = d1[mapped_idx_grid_1] < contact_d
588  mapped_nonshared_mask_d1[mapped_idx_grid_1] = mapped_d1_contacts
589 
590  if mapped_idx_grid_2[0].shape[0] == 0:
591  # mapped_idx_grid_2 has not a single mapped residue which raises
592  # an error when calling something like d2[mapped_idx_grid_2]
593  mapped_d2_contacts = np.full(d2.shape, False, dtype=bool)
594  else:
595  mapped_d2_contacts = d2[mapped_idx_grid_2] < contact_d
596  mapped_nonshared_mask_d2[mapped_idx_grid_2] = mapped_d2_contacts
597  shared_mask = np.full(mapped_d1_contacts.shape, False, dtype=bool)
598  else:
599  mapped_d1_contacts = d1[mapped_idx_grid_1] < contact_d
600  mapped_d2_contacts = d2[mapped_idx_grid_2] < contact_d
601  shared_mask = np.logical_and(mapped_d1_contacts, mapped_d2_contacts)
602  shared_mask_d1 = np.full(d1.shape, False, dtype=bool)
603  shared_mask_d1[mapped_idx_grid_1] = shared_mask
604  shared_mask_d2 = np.full(d2.shape, False, dtype=bool)
605  shared_mask_d2[mapped_idx_grid_2] = shared_mask
606 
607  # get mapped but nonshared masks
608  mapped_nonshared_mask_d1 = np.full(d1.shape, False, dtype=bool)
609  mapped_nonshared_mask_d1[mapped_idx_grid_1] = \
610  np.logical_and(np.logical_not(shared_mask), mapped_d1_contacts)
611  mapped_nonshared_mask_d2 = np.full(d2.shape, False, dtype=bool)
612  mapped_nonshared_mask_d2[mapped_idx_grid_2] = \
613  np.logical_and(np.logical_not(shared_mask), mapped_d2_contacts)
614 
615  # contributions from shared contacts
616  shared_d1 = d1[shared_mask_d1]
617  shared_d2 = d2[shared_mask_d2]
618  shared_min = np.minimum(shared_d1, shared_d2)
619  shared_abs_diff_div_12 = np.abs(np.subtract(shared_d1, shared_d2))/12.0
620  weight_term = np.ones(shared_min.shape[0])
621  bigger_5_mask = shared_min > 5.0
622  weights = np.exp(-2.0*np.square((shared_min[bigger_5_mask]-5.0)/4.28))
623  weight_term[bigger_5_mask] = weights
624  diff_term = np.subtract(np.ones(weight_term.shape[0]),
625  shared_abs_diff_div_12)
626  weighted_scores = np.sum(np.multiply(weight_term, diff_term))
627  weight_sum = np.sum(weight_term)
628 
629  # do weight_extra_all for interface one
630  nonshared_contact_mask_d1 = np.logical_and(np.logical_not(shared_mask_d1),
631  d1 < contact_d)
632  contact_distances = d1[nonshared_contact_mask_d1]
633  bigger_5 = contact_distances[contact_distances > 5]
634  weight_extra_all = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
635  # add 1.0 for all contact distances <= 5.0
636  weight_extra_all += contact_distances.shape[0] - bigger_5.shape[0]
637  # same for interface two
638  nonshared_contact_mask_d2 = np.logical_and(np.logical_not(shared_mask_d2),
639  d2 < contact_d)
640  contact_distances = d2[nonshared_contact_mask_d2]
641  bigger_5 = contact_distances[contact_distances > 5]
642  weight_extra_all += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
643  # add 1.0 for all contact distances <= 5.0
644  weight_extra_all += contact_distances.shape[0] - bigger_5.shape[0]
645 
646  # do weight_extra_mapped for interface one
647  contact_distances = d1[mapped_nonshared_mask_d1]
648  bigger_5 = contact_distances[contact_distances > 5]
649  weight_extra_mapped = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
650  # add 1.0 for all contact distances <= 5.0
651  weight_extra_mapped += contact_distances.shape[0] - bigger_5.shape[0]
652  # same for interface two
653  contact_distances = d2[mapped_nonshared_mask_d2]
654  bigger_5 = contact_distances[contact_distances > 5]
655  weight_extra_mapped += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
656  # add 1.0 for all contact distances <= 5.0
657  weight_extra_mapped += contact_distances.shape[0] - bigger_5.shape[0]
658 
659  return (weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all)
660 
661  def _IndexMapping(self, ch1, ch2):
662  """ Fetches aln and returns indices of (non-)aligned residues
663 
664  returns 2 numpy arrays containing the indices of residues in
665  ch1 and ch2 which are aligned
666  """
667  mapped_indices_1 = list()
668  mapped_indices_2 = list()
669  idx_1 = 0
670  idx_2 = 0
671  for col in self.alns[(ch1, ch2)]:
672  if col[0] != '-' and col[1] != '-':
673  mapped_indices_1.append(idx_1)
674  mapped_indices_2.append(idx_2)
675  if col[0] != '-':
676  idx_1 +=1
677  if col[1] != '-':
678  idx_2 +=1
679  return (np.array(mapped_indices_1), np.array(mapped_indices_2))
680 
681  def _InterfacePenalty1(self, interface):
682  if interface not in self._qsent_1_penalties:
683  self._qsent_1_penalties[interface] = \
684  self._InterfacePenalty(self.qsent1, interface)
685  return self._qsent_1_penalties[interface]
686 
687  def _InterfacePenalty2(self, interface):
688  if interface not in self._qsent_2_penalties:
689  self._qsent_2_penalties[interface] = \
690  self._InterfacePenalty(self.qsent2, interface)
691  return self._qsent_2_penalties[interface]
692 
693  def _InterfacePenalty(self, qsent, interface):
694  d = qsent.PairDist(interface[0], interface[1])
695  contact_distances = d[d < qsent.contact_d]
696  bigger_5 = contact_distances[contact_distances > 5]
697  penalty = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
698  # add 1.0 for all contact distances <= 5.0
699  penalty += contact_distances.shape[0] - bigger_5.shape[0]
700  return penalty
701 
702 # specify public interface
703 __all__ = ('QSEntity', 'QSScorer')