10 """ Helper object for Contact-score computation
12 def __init__(self, ent, contact_d = 5.0, contact_mode="aa"):
14 if contact_mode
not in [
"aa",
"repr"]:
15 raise RuntimeError(
"contact_mode must be in [\"aa\", \"repr\"]")
17 if contact_mode ==
"repr":
18 for r
in ent.residues:
20 if r.IsPeptideLinking():
24 elif r.GetName() ==
"GLY":
28 elif r.IsNucleotideLinking():
29 c3 = r.FindAtom(
"C3'")
33 raise RuntimeError(f
"Only support peptide and nucleotide "
34 f
"residues in \"repr\" contact mode. "
35 f
"Problematic residue: {r}")
37 raise RuntimeError(f
"Residue {r} has no required "
38 f
"representative atom (CB for peptide "
39 f
"residues (CA for GLY) C3' for "
40 f
"nucleotide residues.")
45 self.
_view_view = ent.CreateFullView()
47 pep_query =
"(peptide=true and (aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\")))"
48 nuc_query =
"(nucleotide=True and aname=\"C3'\")"
49 self.
_view_view = ent.Select(
" or ".join([pep_query, nuc_query]))
63 """ The structure depending on *contact_mode*
65 Full view in case of "aa", view that only contains representative
66 atoms in case of "repr".
68 :type: :class:`ost.mol.EntityView`
70 return self.
_view_view
76 Can either be "aa", meaning that all atoms are considered to identify
77 contacts, or "repr" which only considers distances between
78 representative atoms. For peptides thats CB (CA for GLY), for
79 nucleotides thats C3'.
87 """ Pairwise distance of residues to be considered as contacts
89 Given at :class:`ContactScorer` construction
97 """ Chain names in :attr:`~view`
101 :type: :class:`list` of :class:`str`
104 self.
_chain_names_chain_names = sorted([ch.name
for ch
in self.
viewview.chains])
109 """ Pairs of chains in :attr:`~view` with at least one contact
111 :type: :class:`list` of :class:`tuples`
119 """ Interchain contacts
121 Organized as :class:`dict` with key (cname1, cname2) and values being
122 a set of tuples with the respective residue indices.
123 cname1 < cname2 evaluates to True.
131 """ Human readable interchain contacts
133 Human readable version of :attr:`~contacts`. Simple list with tuples
134 containing two strings specifying the residues in contact. Format:
135 <cname>.<rnum>.<ins_code>
143 """ Interface residues
145 Residues in each chain that are in contact with any other chain.
146 Organized as :class:`dict` with key cname and values the respective
147 residue indices in a :class:`set`.
155 """ Human readable interface residues
157 Human readable version of :attr:`interface_residues`. :class:`list` of
158 strings specifying the interface residues in format:
159 <cname>.<rnum>.<ins_code>
166 """ Get chain by name
168 :param chain_name: Chain in :attr:`~view`
169 :type chain_name: :class:`str`
171 chain = self.
viewview.FindChain(chain_name)
172 if not chain.IsValid():
173 raise RuntimeError(f
"view has no chain named \"{chain_name}\"")
177 """ Get sequence of chain
179 Returns sequence of specified chain as raw :class:`str`
181 :param chain_name: Chain in :attr:`~view`
182 :type chain_name: :class:`str`
184 if chain_name
not in self.
_sequence_sequence:
185 ch = self.
GetChainGetChain(chain_name)
186 s =
''.join([r.one_letter_code
for r
in ch.residues])
188 return self.
_sequence_sequence[chain_name]
190 def _SetupContacts(self):
195 for ch
in self.
viewview.chains:
196 for r_idx, r
in enumerate(ch.residues):
197 r.SetIntProp(
"contact_idx", r_idx)
199 residue_lists = list()
207 min_chain_pos = list()
208 max_chain_pos = list()
211 ch = self.
viewview.FindChain(cname)
212 if ch.GetAtomCount() == 0:
213 raise RuntimeError(f
"Chain without atoms observed: \"{cname}\"")
214 residue_lists.append([r
for r
in ch.residues])
216 for r
in residue_lists[-1]:
217 pos = np.zeros((r.GetAtomCount(), 3))
218 for at_idx, at
in enumerate(r.atoms):
220 pos[(at_idx, 0)] = p[0]
221 pos[(at_idx, 1)] = p[1]
222 pos[(at_idx, 2)] = p[2]
224 min_res_pos = np.vstack([p.min(0)
for p
in res_pos])
225 max_res_pos = np.vstack([p.max(0)
for p
in res_pos])
226 min_res_x.append(min_res_pos[:, 0])
227 min_res_y.append(min_res_pos[:, 1])
228 min_res_z.append(min_res_pos[:, 2])
229 max_res_x.append(max_res_pos[:, 0])
230 max_res_y.append(max_res_pos[:, 1])
231 max_res_z.append(max_res_pos[:, 2])
232 min_chain_pos.append(min_res_pos.min(0))
233 max_chain_pos.append(max_res_pos.max(0))
234 per_res_pos.append(res_pos)
239 for ch1_idx
in range(len(self.
chain_nameschain_names)):
240 for ch2_idx
in range(ch1_idx + 1, len(self.
chain_nameschain_names)):
243 if np.max(min_chain_pos[ch1_idx] - max_chain_pos[ch2_idx]) > self.
contact_dcontact_d:
245 if np.max(min_chain_pos[ch2_idx] - max_chain_pos[ch1_idx]) > self.
contact_dcontact_d:
249 skip_one = np.subtract.outer(min_res_x[ch1_idx], max_res_x[ch2_idx]) > self.
contact_dcontact_d
250 skip_one = np.logical_or(skip_one, np.subtract.outer(min_res_y[ch1_idx], max_res_y[ch2_idx]) > self.
contact_dcontact_d)
251 skip_one = np.logical_or(skip_one, np.subtract.outer(min_res_z[ch1_idx], max_res_z[ch2_idx]) > self.
contact_dcontact_d)
252 skip_two = np.subtract.outer(min_res_x[ch2_idx], max_res_x[ch1_idx]) > self.
contact_dcontact_d
253 skip_two = np.logical_or(skip_two, np.subtract.outer(min_res_y[ch2_idx], max_res_y[ch1_idx]) > self.
contact_dcontact_d)
254 skip_two = np.logical_or(skip_two, np.subtract.outer(min_res_z[ch2_idx], max_res_z[ch1_idx]) > self.
contact_dcontact_d)
255 skip = np.logical_or(skip_one, skip_two.T)
258 r1_indices, r2_indices = np.nonzero(np.logical_not(skip))
259 ch1_per_res_pos = per_res_pos[ch1_idx]
260 ch2_per_res_pos = per_res_pos[ch2_idx]
261 for r1_idx, r2_idx
in zip(r1_indices, r2_indices):
263 p1 = ch1_per_res_pos[r1_idx]
264 p2 = ch2_per_res_pos[r2_idx]
265 x2 = np.sum(p1**2, axis=1)
266 y2 = np.sum(p2**2, axis=1)
267 xy = np.matmul(p1, p2.T)
268 x2 = x2.reshape(-1, 1)
269 squared_distances = x2 - 2*xy + y2
270 if np.min(squared_distances) <= scd:
272 r1 = residue_lists[ch1_idx][r1_idx]
273 r2 = residue_lists[ch2_idx][r2_idx]
275 if cname_key
not in self.
_contacts_contacts:
276 self.
_contacts_contacts[cname_key] = set()
277 self.
_contacts_contacts[cname_key].add((r1.GetIntProp(
"contact_idx"),
278 r2.GetIntProp(
"contact_idx")))
279 rnum1 = r1.GetNumber()
280 hr1 = f
"{self.chain_names[ch1_idx]}.{rnum1.num}.{rnum1.ins_code}"
281 rnum2 = r2.GetNumber()
282 hr2 = f
"{self.chain_names[ch2_idx]}.{rnum2.num}.{rnum2.ins_code}"
283 self.
_hr_contacts_hr_contacts.append((hr1.strip(
"\u0000"),
284 hr2.strip(
"\u0000")))
287 def _SetupInterfaceResidues(self):
289 for k,v
in self.
contactscontacts.items():
294 def _SetupHRInterfaceResidues(self):
295 interface_residues = set()
297 interface_residues.add(item[0])
298 interface_residues.add(item[1])
304 Holds data relevant to compute ics
306 def __init__(self, n_trg_contacts, n_mdl_contacts, n_union, n_intersection):
314 """ Number of contacts in target
322 """ Number of contacts in model
330 """ Precision of model contacts
332 The fraction of model contacts that are also present in target
343 """ Recall of model contacts
345 The fraction of target contacts that are also present in model
356 """ The Interface Contact Similarity score (ICS)
358 Combination of :attr:`precision` and :attr:`recall` using the F1-measure
360 :type: :class:`float`
366 if denominator != 0.0:
367 return 2*nominator/denominator
373 Holds data relevant to compute ips
375 def __init__(self, n_trg_int_res, n_mdl_int_res, n_union, n_intersection):
383 """ Number of interface residues in target
387 return self._n_trg_contacts
391 """ Number of interface residues in model
399 """ Precision of model interface residues
401 The fraction of model interface residues that are also interface
413 """ Recall of model interface residues
415 The fraction of target interface residues that are also interface
427 """ The Interface Patch Similarity score (IPS)
429 Jaccard coefficient of interface residues in model/target.
430 Technically thats :attr:`intersection`/:attr:`union`
432 :type: :class:`float`
439 """ Helper object to compute Contact scores
441 Tightly integrated into the mechanisms from the chain_mapping module.
442 The prefered way to derive an object of type :class:`ContactScorer` is
443 through the static constructor: :func:`~FromMappingResult`.
445 Usage is the same as for :class:`ost.mol.alg.QSScorer`
448 def __init__(self, target, chem_groups, model, alns,
449 contact_mode="aa", contact_d=5.0):
451 contact_d = contact_d)
453 chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups))
454 if self.
_cent1_cent1.chain_names != sorted(chem_group_ch_names):
455 raise RuntimeError(f
"Expect exact same chain names in chem_groups "
456 f
"and in target (which is processed to only "
457 f
"contain peptides/nucleotides). target: "
458 f
"{self._cent1.chain_names}, chem_groups: "
459 f
"{chem_group_ch_names}")
463 contact_d = contact_d)
464 self.
_alns_alns = alns
489 """ The preferred way to get a :class:`ContactScorer`
491 Static constructor that derives an object of type :class:`ContactScorer`
492 using a :class:`ost.mol.alg.chain_mapping.MappingResult`
494 :param mapping_result: Data source
495 :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult`
498 mapping_result.chem_groups,
499 mapping_result.model,
501 contact_mode = contact_mode,
502 contact_d = contact_d)
503 return contact_scorer
507 """ Represents *target*
509 :type: :class:`ContactEntity`
515 """ Groups of chemically equivalent chains in *target*
517 Provided at object construction
519 :type: :class:`list` of :class:`list` of :class:`str`
525 """ Represents *model*
527 :type: :class:`ContactEntity`
533 """ Alignments between chains in :attr:`~cent1` and :attr:`~cent2`
535 Provided at object construction. Each alignment is accessible with
536 ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the
537 respective chain in :attr:`~cent1`, second sequence the one from
540 :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
541 :class:`ost.seq.AlignmentHandle`
543 return self.
_alns_alns
546 """ Computes ICS given chain mapping
548 Again, the preferred way is to get *mapping* is from an object
549 of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
552 :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
553 :type mapping: :class:`list` of :class:`list` of :class:`str`
554 :param check: Perform input checks, can be disabled for speed purposes
555 if you know what you're doing.
556 :type check: :class:`bool`
557 :returns: Result object of type :class:`ContactScorerResultICS`
562 if len(self.
chem_groupschem_groups) != len(mapping):
563 raise RuntimeError(
"Dimensions of self.chem_groups and mapping "
565 for a,b
in zip(self.
chem_groupschem_groups, mapping):
567 raise RuntimeError(
"Dimensions of self.chem_groups and "
568 "mapping must match")
570 for name
in itertools.chain.from_iterable(mapping):
571 if name
is not None and name
not in self.
cent2cent2.chain_names:
572 raise RuntimeError(f
"Each chain in mapping must be present "
573 f
"in self.cent2. No match for "
576 flat_mapping = dict()
577 for a, b
in zip(self.
chem_groupschem_groups, mapping):
578 flat_mapping.update({x: y
for x, y
in zip(a, b)
if y
is not None})
583 """ Computes ICS scores only considering one interface
585 This only works for interfaces that are computed in :func:`Score`, i.e.
586 interfaces for which the alignments are set up correctly.
588 :param trg_ch1: Name of first interface chain in target
589 :type trg_ch1: :class:`str`
590 :param trg_ch2: Name of second interface chain in target
591 :type trg_ch2: :class:`str`
592 :param mdl_ch1: Name of first interface chain in model
593 :type mdl_ch1: :class:`str`
594 :param mdl_ch2: Name of second interface chain in model
595 :type mdl_ch2: :class:`str`
596 :returns: Result object of type :class:`ContactScorerResultICS`
597 :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
598 trg_ch2/mdl_ch2 is available.
600 if (trg_ch1, mdl_ch1)
not in self.
alnsalns:
601 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
602 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
603 f
"construct the QSScorer object from a "
604 f
"MappingResult and are trg_ch1 and mdl_ch1 "
605 f
"mapped to each other?")
606 if (trg_ch2, mdl_ch2)
not in self.
alnsalns:
607 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
608 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
609 f
"construct the QSScorer object from a "
610 f
"MappingResult and are trg_ch1 and mdl_ch1 "
611 f
"mapped to each other?")
612 trg_int = (trg_ch1, trg_ch2)
613 mdl_int = (mdl_ch1, mdl_ch2)
614 trg_int_r = (trg_ch2, trg_ch1)
615 mdl_int_r = (mdl_ch2, mdl_ch1)
617 if trg_int
in self.
cent1cent1.contacts:
618 n_trg = len(self.
cent1cent1.contacts[trg_int])
619 elif trg_int_r
in self.
cent1cent1.contacts:
620 n_trg = len(self.
cent1cent1.contacts[trg_int_r])
624 if mdl_int
in self.
cent2cent2.contacts:
625 n_mdl = len(self.
cent2cent2.contacts[mdl_int])
626 elif mdl_int_r
in self.
cent2cent2.contacts:
627 n_mdl = len(self.
cent2cent2.contacts[mdl_int_r])
635 """ Same as :func:`ScoreICS` but with flat mapping
637 :param flat_mapping: Dictionary with target chain names as keys and
638 the mapped model chain names as value
639 :type flat_mapping: :class:`dict` with :class:`str` as key and value
640 :returns: Result object of type :class:`ContactScorerResultICS`
642 n_trg = sum([len(x)
for x
in self.
cent1cent1.contacts.values()])
643 n_mdl = sum([len(x)
for x
in self.
cent2cent2.contacts.values()])
647 processed_cent2_interfaces = set()
648 for int1
in self.
cent1cent1.interacting_chains:
649 if int1[0]
in flat_mapping
and int1[1]
in flat_mapping:
650 int2 = (flat_mapping[int1[0]], flat_mapping[int1[1]])
654 processed_cent2_interfaces.add((min(int2), max(int2)))
657 r_flat_mapping = {v:k
for k,v
in flat_mapping.items()}
658 for int2
in self.
cent2cent2.interacting_chains:
659 if int2
not in processed_cent2_interfaces:
660 if int2[0]
in r_flat_mapping
and int2[1]
in r_flat_mapping:
661 int1 = (r_flat_mapping[int2[0]], r_flat_mapping[int2[1]])
667 n_union, n_intersection)
670 """ Computes IPS given chain mapping
672 Again, the preferred way is to get *mapping* is from an object
673 of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
676 :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
677 :type mapping: :class:`list` of :class:`list` of :class:`str`
678 :param check: Perform input checks, can be disabled for speed purposes
679 if you know what you're doing.
680 :type check: :class:`bool`
681 :returns: Result object of type :class:`ContactScorerResultIPS`
686 if len(self.
chem_groupschem_groups) != len(mapping):
687 raise RuntimeError(
"Dimensions of self.chem_groups and mapping "
689 for a,b
in zip(self.
chem_groupschem_groups, mapping):
691 raise RuntimeError(
"Dimensions of self.chem_groups and "
692 "mapping must match")
694 for name
in itertools.chain.from_iterable(mapping):
695 if name
is not None and name
not in self.
cent2cent2.chain_names:
696 raise RuntimeError(f
"Each chain in mapping must be present "
697 f
"in self.cent2. No match for "
700 flat_mapping = dict()
701 for a, b
in zip(self.
chem_groupschem_groups, mapping):
702 flat_mapping.update({x: y
for x, y
in zip(a, b)
if y
is not None})
707 """ Computes IPS scores only considering one interface
709 This only works for interfaces that are computed in :func:`Score`, i.e.
710 interfaces for which the alignments are set up correctly.
712 :param trg_ch1: Name of first interface chain in target
713 :type trg_ch1: :class:`str`
714 :param trg_ch2: Name of second interface chain in target
715 :type trg_ch2: :class:`str`
716 :param mdl_ch1: Name of first interface chain in model
717 :type mdl_ch1: :class:`str`
718 :param mdl_ch2: Name of second interface chain in model
719 :type mdl_ch2: :class:`str`
720 :returns: Result object of type :class:`ContactScorerResultIPS`
721 :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
722 trg_ch2/mdl_ch2 is available.
724 if (trg_ch1, mdl_ch1)
not in self.
alnsalns:
725 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
726 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
727 f
"construct the QSScorer object from a "
728 f
"MappingResult and are trg_ch1 and mdl_ch1 "
729 f
"mapped to each other?")
730 if (trg_ch2, mdl_ch2)
not in self.
alnsalns:
731 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
732 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
733 f
"construct the QSScorer object from a "
734 f
"MappingResult and are trg_ch1 and mdl_ch1 "
735 f
"mapped to each other?")
736 trg_int = (trg_ch1, trg_ch2)
737 mdl_int = (mdl_ch1, mdl_ch2)
738 trg_int_r = (trg_ch2, trg_ch1)
739 mdl_int_r = (mdl_ch2, mdl_ch1)
741 if trg_int
in self.
cent1cent1.contacts:
742 n_trg = len(self.
cent1cent1.contacts[trg_int])
743 elif trg_int_r
in self.
cent1cent1.contacts:
744 n_trg = len(self.
cent1cent1.contacts[trg_int_r])
748 if mdl_int
in self.
cent2cent2.contacts:
749 n_mdl = len(self.
cent2cent2.contacts[mdl_int])
750 elif mdl_int_r
in self.
cent2cent2.contacts:
751 n_mdl = len(self.
cent2cent2.contacts[mdl_int_r])
760 """ Same as :func:`ScoreIPS` but with flat mapping
762 :param flat_mapping: Dictionary with target chain names as keys and
763 the mapped model chain names as value
764 :type flat_mapping: :class:`dict` with :class:`str` as key and value
765 :returns: Result object of type :class:`ContactScorerResultIPS`
767 n_trg = sum([len(x)
for x
in self.
cent1cent1.interface_residues.values()])
768 n_mdl = sum([len(x)
for x
in self.
cent2cent2.interface_residues.values()])
772 processed_cent2_chains = set()
773 for trg_ch
in self.
cent1cent1.chain_names:
774 if trg_ch
in flat_mapping:
775 a, b = self.
_MappedSCScores_MappedSCScores(trg_ch, flat_mapping[trg_ch])
778 processed_cent2_chains.add(flat_mapping[trg_ch])
780 n_union += len(self.
cent1cent1.interface_residues[trg_ch])
782 for mdl_ch
in self.
_cent2_cent2.chain_names:
783 if mdl_ch
not in processed_cent2_chains:
784 n_union += len(self.
cent2cent2.interface_residues[mdl_ch])
787 n_union, n_intersection)
790 def _MappedInterfaceScores(self, int1, int2):
791 key_one = (int1, int2)
794 key_two = ((int1[1], int1[0]), (int2[1], int2[0]))
802 def _InterfaceScores(self, int1, int2):
803 if int1
in self.
cent1cent1.contacts:
804 ref_contacts = self.
cent1cent1.contacts[int1]
805 elif (int1[1], int1[0])
in self.
cent1cent1.contacts:
806 ref_contacts = self.
cent1cent1.contacts[(int1[1], int1[0])]
808 ref_contacts = set([(x[1], x[0])
for x
in ref_contacts])
812 if int2
in self.
cent2cent2.contacts:
813 mdl_contacts = self.
cent2cent2.contacts[int2]
814 elif (int2[1], int2[0])
in self.
cent2cent2.contacts:
815 mdl_contacts = self.
cent2cent2.contacts[(int2[1], int2[0])]
817 mdl_contacts = set([(x[1], x[0])
for x
in mdl_contacts])
823 ch1_aln = self.
alnsalns[(int1[0], int2[0])]
824 ch2_aln = self.
alnsalns[(int1[1], int2[1])]
825 mapped_ref_contacts = set()
826 mapped_mdl_contacts = set()
827 for c
in ref_contacts:
828 mapped_c = (ch1_aln.GetPos(0, c[0]), ch2_aln.GetPos(0, c[1]))
829 mapped_ref_contacts.add(mapped_c)
830 for c
in mdl_contacts:
831 mapped_c = (ch1_aln.GetPos(1, c[0]), ch2_aln.GetPos(1, c[1]))
832 mapped_mdl_contacts.add(mapped_c)
834 contact_union = len(mapped_ref_contacts.union(mapped_mdl_contacts))
835 contact_intersection = len(mapped_ref_contacts.intersection(mapped_mdl_contacts))
841 tmp_ref = set([x[0]
for x
in mapped_ref_contacts])
842 tmp_mdl = set([x[0]
for x
in mapped_mdl_contacts])
843 intres_union = len(tmp_ref.union(tmp_mdl))
844 intres_intersection = len(tmp_ref.intersection(tmp_mdl))
847 tmp_ref = set([x[1]
for x
in mapped_ref_contacts])
848 tmp_mdl = set([x[1]
for x
in mapped_mdl_contacts])
849 intres_union += len(tmp_ref.union(tmp_mdl))
850 intres_intersection += len(tmp_ref.intersection(tmp_mdl))
852 return (contact_union, contact_intersection,
853 intres_union, intres_intersection)
855 def _MappedSCScores(self, ref_ch, mdl_ch):
858 n_union, n_intersection = self.
_SCScores_SCScores(ref_ch, mdl_ch)
859 self.
_mapped_cache_sc_mapped_cache_sc[(ref_ch, mdl_ch)] = (n_union, n_intersection)
860 return (n_union, n_intersection)
862 def _SCScores(self, ch1, ch2):
863 ref_int_res = self.
cent1cent1.interface_residues[ch1]
864 mdl_int_res = self.
cent2cent2.interface_residues[ch2]
865 aln = self.
alnsalns[(ch1, ch2)]
866 mapped_ref_int_res = set()
867 mapped_mdl_int_res = set()
868 for r_idx
in ref_int_res:
869 mapped_ref_int_res.add(aln.GetPos(0, r_idx))
870 for r_idx
in mdl_int_res:
871 mapped_mdl_int_res.add(aln.GetPos(1, r_idx))
872 return(len(mapped_ref_int_res.union(mapped_mdl_int_res)),
873 len(mapped_ref_int_res.intersection(mapped_mdl_int_res)))
876 __all__ = (
'ContactEntity',
'ContactScorerResultICS',
'ContactScorerResultIPS',
'ContactScorer')