3 from scipy.spatial
import distance
9 """ Helper object for BBlDDT computation
11 Holds structural information and getters for interacting chains, i.e.
12 interfaces. Peptide residues are represented by their CA position
13 and nucleotides by C3'.
15 :param ent: Structure for BBlDDT score computation
16 :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
17 :param contact_d: Pairwise distance of residues to be considered as contacts
18 :type contact_d: :class:`float`
21 dist_diff_thresholds = [0.5, 1.0, 2.0, 4.0]):
22 pep_query =
"(peptide=true and aname=\"CA\")"
23 nuc_query =
"(nucleotide=True and aname=\"C3'\")"
24 self.
_view_view = ent.Select(
" or ".join([pep_query, nuc_query]))
33 self.
_pos_pos = dict()
46 """ Processed structure
48 View that only contains representative atoms. That's CA for peptide
49 residues and C3' for nucleotides.
51 :type: :class:`ost.mol.EntityView`
53 return self.
_view_view
57 """ Pairwise distance of residues to be considered as contacts
59 Given at :class:`BBlDDTEntity` construction
67 """ Distance difference thresholds for lDDT computation
69 Given at :class:`BBlDDTEntity` construction
71 :type: :class:`list` of :class:`float`
77 """ Chain names in :attr:`~view`
81 :type: :class:`list` of :class:`str`
84 self.
_chain_names_chain_names = sorted([ch.name
for ch
in self.
viewview.chains])
89 """ Pairs of chains in :attr:`~view` with at least one contact
91 :type: :class:`list` of :class:`tuples`
99 for x
in itertools.combinations(self.
chain_nameschain_names, 2):
109 """ Pairs of chains in :attr:`view` with potential contribution to lDDT
111 That are pairs of chains that have at least one interaction within
112 :attr:`~dist_thresh` + max(:attr:`~dist_diff_thresholds`)
117 thresh = self.
dist_threshdist_thresh + max_dist_diff_thresh
118 for x
in itertools.combinations(self.
chain_nameschain_names, 2):
120 slack = max_dist_diff_thresh):
121 n = np.count_nonzero(self.
PairDistPairDist(x[0], x[1]) < thresh)
129 """ Number of contacts in :attr:`~interacting_chains`
131 :type: :class:`list` of :class:`int`
141 """ Number of contacts for single chains in :attr:`~chain_names`
143 :type: :class:`list` of :class:`int`
148 dist_mat = self.
DistDist(cname)
149 n = np.count_nonzero(dist_mat < self.
dist_threshdist_thresh)
153 self.
_n_sc_contacts_n_sc_contacts.append(int((n-dist_mat.shape[0])/2))
158 """ Total number of contacts
160 That's the sum of all :attr:`~n_pair_contacts` and
161 :attr:`~n_sc_contacts`.
170 """ Get chain by name
172 :param chain_name: Chain in :attr:`~view`
173 :type chain_name: :class:`str`
175 chain = self.
viewview.FindChain(chain_name)
176 if not chain.IsValid():
177 raise RuntimeError(f
"view has no chain named \"{chain_name}\"")
181 """ Get sequence of chain
183 Returns sequence of specified chain as raw :class:`str`
185 :param chain_name: Chain in :attr:`~view`
186 :type chain_name: :class:`str`
188 if chain_name
not in self.
_sequence_sequence:
189 ch = self.
GetChainGetChain(chain_name)
190 s =
''.join([r.one_letter_code
for r
in ch.residues])
192 return self.
_sequence_sequence[chain_name]
195 """ Get representative positions of chain
197 That's CA positions for peptide residues and C3' for
198 nucleotides. Returns positions as a numpy array of shape
201 :param chain_name: Chain in :attr:`~view`
202 :type chain_name: :class:`str`
204 if chain_name
not in self.
_pos_pos:
205 ch = self.
GetChainGetChain(chain_name)
206 pos = np.zeros((ch.GetResidueCount(), 3))
207 for i, r
in enumerate(ch.residues):
208 pos[i,:] = r.atoms[0].
GetPos().data
209 self.
_pos_pos[chain_name] = pos
210 return self.
_pos_pos[chain_name]
213 """ Get pairwise distance of residues within same chain
215 Returns distances as square numpy array of shape (a,a)
216 where a is the number of residues in specified chain.
218 if chain_name
not in self.
_sc_dist_sc_dist:
219 self.
_sc_dist_sc_dist[chain_name] = distance.cdist(self.
GetPosGetPos(chain_name),
220 self.
GetPosGetPos(chain_name),
222 return self.
_sc_dist_sc_dist[chain_name]
224 def PairDist(self, chain_name_one, chain_name_two):
225 """ Get pairwise distances between two chains
227 Returns distances as numpy array of shape (a, b).
228 Where a is the number of residues of the chain that comes BEFORE the
229 other in :attr:`~chain_names`
231 key = (min(chain_name_one, chain_name_two),
232 max(chain_name_one, chain_name_two))
235 self.
GetPosGetPos(key[1]),
240 """ Get min x,y,z cooridnates for given chain
242 Based on positions that are extracted with GetPos
244 :param chain_name: Chain in :attr:`~view`
245 :type chain_name: :class:`str`
247 if chain_name
not in self.
_min_pos_min_pos:
248 self.
_min_pos_min_pos[chain_name] = self.
GetPosGetPos(chain_name).min(0)
249 return self.
_min_pos_min_pos[chain_name]
252 """ Get max x,y,z cooridnates for given chain
254 Based on positions that are extracted with GetPos
256 :param chain_name: Chain in :attr:`~view`
257 :type chain_name: :class:`str`
259 if chain_name
not in self.
_max_pos_max_pos:
260 self.
_max_pos_max_pos[chain_name] = self.
GetPosGetPos(chain_name).max(0)
261 return self.
_max_pos_max_pos[chain_name]
265 """ Returns True if chains potentially interact
267 Based on crude collision detection. There is no guarantee
268 that they actually interact if True is returned. However,
269 if False is returned, they don't interact for sure.
271 :param chain_name_one: Chain in :attr:`~view`
272 :type chain_name_one: class:`str`
273 :param chain_name_two: Chain in :attr:`~view`
274 :type chain_name_two: class:`str`
275 :param slack: Add slack to interaction distance threshold
276 :type slack: :class:`float`
278 min_one = self.
GetMinPosGetMinPos(chain_name_one)
279 max_one = self.
GetMaxPosGetMaxPos(chain_name_one)
280 min_two = self.
GetMinPosGetMinPos(chain_name_two)
281 max_two = self.
GetMaxPosGetMaxPos(chain_name_two)
282 if np.max(min_one - max_two) > (self.
dist_threshdist_thresh + slack):
284 if np.max(min_two - max_one) > (self.
dist_threshdist_thresh + slack):
290 """ Helper object to compute Backbone only lDDT score
292 Tightly integrated into the mechanisms from the chain_mapping module.
293 The prefered way to derive an object of type :class:`BBlDDTScorer` is
294 through the static constructor: :func:`~FromMappingResult`.
296 lDDT computation in :func:`BBlDDTScorer.Score` implements caching.
297 Repeated computations with alternative chain mappings thus become faster.
299 :param target: Structure designated as "target". Can be fetched from
300 :class:`ost.mol.alg.chain_mapping.MappingResult`
301 :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
302 :param chem_groups: Groups of chemically equivalent chains in *target*.
304 :class:`ost.mol.alg.chain_mapping.MappingResult`
305 :type chem_groups: :class:`list` of :class:`list` of :class:`str`
306 :param model: Structure designated as "model". Can be fetched from
307 :class:`ost.mol.alg.chain_mapping.MappingResult`
308 :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
309 :param alns: Each alignment is accessible with ``alns[(t_chain,m_chain)]``.
310 First sequence is the sequence of the respective chain in
311 :attr:`~qsent1`, second sequence the one from :attr:`~qsent2`.
313 :class:`ost.mol.alg.chain_mapping.MappingResult`
314 :type alns: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
315 :class:`ost.seq.AlignmentHandle`
316 :param dist_thresh: Max distance of a pairwise interaction in target
317 to be considered as contact in lDDT
318 :type dist_thresh: :class:`float`
319 :param dist_diff_thresholds: Distance difference thresholds for
321 :type dist_diff_thresholds: :class:`list` of :class:`float`
323 def __init__(self, target, chem_groups, model, alns, dist_thresh = 15.0,
324 dist_diff_thresholds = [0.5, 1.0, 2.0, 4.0]):
327 dist_diff_thresholds=dist_diff_thresholds)
330 chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups))
331 if self.
_trg_trg.chain_names != sorted(chem_group_ch_names):
332 raise RuntimeError(f
"Expect exact same chain names in chem_groups "
333 f
"and in target (which is processed to only "
334 f
"contain peptides/nucleotides). target: "
335 f
"{self._trg.chain_names}, chem_groups: "
336 f
"{chem_group_ch_names}")
340 dist_diff_thresholds=dist_diff_thresholds)
341 self.
_alns_alns = alns
362 dist_diff_thresholds = [0.5, 1.0, 2.0, 4.0]):
363 """ The preferred way to get a :clas:`BBlDDTScorer`
365 Static constructor that derives an object of type :class:`QSScorer`
366 using a :class:`ost.mol.alg.chain_mapping.MappingResult`
368 :param mapping_result: Data source
369 :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult`
370 :param dist_thresh: The lDDT distance threshold
371 :type dist_thresh: :class:`float`
372 :param dist_diff_thresholds: The lDDT distance difference thresholds
373 :type dist_diff_thresholds: :class:`list` of :class:`float`
375 scorer =
BBlDDTScorer(mapping_result.target, mapping_result.chem_groups,
376 mapping_result.model, alns = mapping_result.alns,
377 dist_thresh = dist_thresh,
378 dist_diff_thresholds = dist_diff_thresholds)
383 """ The :class:`BBlDDTEntity` representing target
385 :type: :class:`BBlDDTEntity`
391 """ The :class:`BBlDDTEntity` representing model
393 :type: :class:`BBlDDTEntity`
399 """ Alignments between chains in :attr:`~trg` and :attr:`~mdl`
401 Provided at object construction. Each alignment is accessible with
402 ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the
403 respective chain in :attr:`~trg`, second sequence the one from
406 :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
407 :class:`ost.seq.AlignmentHandle`
409 return self.
_alns_alns
413 """ Groups of chemically equivalent chains in :attr:`~trg`
415 Provided at object construction
417 :type: :class:`list` of :class:`list` of :class:`str`
421 def Score(self, mapping, check=True):
422 """ Computes Backbone lDDT given chain mapping
424 Again, the preferred way is to get *mapping* is from an object
425 of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
428 :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
429 :type mapping: :class:`list` of :class:`list` of :class:`str`
430 :param check: Perform input checks, can be disabled for speed purposes
431 if you know what you're doing.
432 :type check: :class:`bool`
437 if len(self.
chem_groupschem_groups) != len(mapping):
438 raise RuntimeError(
"Dimensions of self.chem_groups and mapping "
440 for a,b
in zip(self.
chem_groupschem_groups, mapping):
442 raise RuntimeError(
"Dimensions of self.chem_groups and "
443 "mapping must match")
445 for name
in itertools.chain.from_iterable(mapping):
446 if name
is not None and name
not in self.
mdlmdl.chain_names:
447 raise RuntimeError(f
"Each chain in mapping must be present "
448 f
"in self.mdl. No match for "
451 flat_mapping = dict()
452 for a, b
in zip(self.
chem_groupschem_groups, mapping):
453 flat_mapping.update({x: y
for x, y
in zip(a, b)
if y
is not None})
458 """ Same as :func:`Score` but with flat mapping
460 :param flat_mapping: Dictionary with target chain names as keys and
461 the mapped model chain names as value
462 :type flat_mapping: :class:`dict` with :class:`str` as key and value
463 :returns: :class:`float` representing lDDT
468 for cname
in self.
trgtrg.chain_names:
469 if cname
in flat_mapping:
470 n_conserved += self.
_NSCConserved_NSCConserved(cname, flat_mapping[cname])
473 for interface
in self.
trgtrg.interacting_chains:
474 if interface[0]
in flat_mapping
and interface[1]
in flat_mapping:
475 mdl_interface = (flat_mapping[interface[0]],
476 flat_mapping[interface[1]])
477 n_conserved += self.
_NPairConserved_NPairConserved(interface, mdl_interface)
479 return np.mean(n_conserved / self.
trgtrg.n_contacts)
481 def _NSCConserved(self, trg_ch, mdl_ch):
482 if (trg_ch, mdl_ch)
in self.
_sc_cache_sc_cache:
483 return self.
_sc_cache_sc_cache[(trg_ch, mdl_ch)]
484 trg_dist = self.
trgtrg.Dist(trg_ch)
485 mdl_dist = self.
mdlmdl.Dist(mdl_ch)
486 trg_indices, mdl_indices = self.
_IndexMapping_IndexMapping(trg_ch, mdl_ch)
487 trg_dist = trg_dist[np.ix_(trg_indices, trg_indices)]
488 mdl_dist = mdl_dist[np.ix_(mdl_indices, mdl_indices)]
491 mask = np.triu(trg_dist < self.
_dist_thresh_dist_thresh)
492 n_diag = trg_dist.shape[0]
493 trg_dist = trg_dist[mask]
494 mdl_dist = mdl_dist[mask]
495 dist_diffs = np.absolute(trg_dist - mdl_dist)
498 N = (dist_diffs < thresh).sum()
500 n_conserved[thresh_idx] = int((N - n_diag))
501 self.
_sc_cache_sc_cache[(trg_ch, mdl_ch)] = n_conserved
504 def _NPairConserved(self, trg_int, mdl_int):
505 key_one = (trg_int, mdl_int)
508 key_two = ((trg_int[1], trg_int[0]), (mdl_int[1], mdl_int[0]))
511 trg_dist = self.
trgtrg.PairDist(trg_int[0], trg_int[1])
512 mdl_dist = self.
mdlmdl.PairDist(mdl_int[0], mdl_int[1])
513 if trg_int[0] > trg_int[1]:
514 trg_dist = trg_dist.transpose()
515 if mdl_int[0] > mdl_int[1]:
516 mdl_dist = mdl_dist.transpose()
517 trg_indices_1, mdl_indices_1 = self.
_IndexMapping_IndexMapping(trg_int[0], mdl_int[0])
518 trg_indices_2, mdl_indices_2 = self.
_IndexMapping_IndexMapping(trg_int[1], mdl_int[1])
519 trg_dist = trg_dist[np.ix_(trg_indices_1, trg_indices_2)]
520 mdl_dist = mdl_dist[np.ix_(mdl_indices_1, mdl_indices_2)]
523 trg_dist = trg_dist[mask]
524 mdl_dist = mdl_dist[mask]
525 dist_diffs = np.absolute(trg_dist - mdl_dist)
528 n_conserved[thresh_idx] = (dist_diffs < thresh).sum()
532 def _IndexMapping(self, ch1, ch2):
533 """ Fetches aln and returns indices of aligned residues
535 returns 2 numpy arrays containing the indices of residues in
536 ch1 and ch2 which are aligned
538 mapped_indices_1 = list()
539 mapped_indices_2 = list()
542 for col
in self.
alnsalns[(ch1, ch2)]:
543 if col[0] !=
'-' and col[1] !=
'-':
544 mapped_indices_1.append(idx_1)
545 mapped_indices_2.append(idx_2)
550 return (np.array(mapped_indices_1), np.array(mapped_indices_2))
def potentially_contributing_chains(self)
def GetMaxPos(self, chain_name)
def n_pair_contacts(self)
def Dist(self, chain_name)
def GetMinPos(self, chain_name)
def GetPos(self, chain_name)
def interacting_chains(self)
def dist_diff_thresholds(self)
def GetSequence(self, chain_name)
def GetChain(self, chain_name)
def PairDist(self, chain_name_one, chain_name_two)
def __init__(self, ent, dist_thresh=15.0, dist_diff_thresholds=[0.5, 1.0, 2.0, 4.0])
_potentially_contributing_chains
def PotentialInteraction(self, chain_name_one, chain_name_two, slack=0.0)
def _NSCConserved(self, trg_ch, mdl_ch)
def __init__(self, target, chem_groups, model, alns, dist_thresh=15.0, dist_diff_thresholds=[0.5, 1.0, 2.0, 4.0])
def _NPairConserved(self, trg_int, mdl_int)
def FromMappingResult(mapping_result, dist_thresh=15.0, dist_diff_thresholds=[0.5, 1.0, 2.0, 4.0])
def _IndexMapping(self, ch1, ch2)
def Score(self, mapping, check=True)
def FromFlatMapping(self, flat_mapping)