10 """ Helper object for Contact-score computation
12 def __init__(self, ent, contact_d = 5.0, contact_mode="aa"):
14 if contact_mode
not in [
"aa",
"repr"]:
15 raise RuntimeError(
"contact_mode must be in [\"aa\", \"repr\"]")
17 if contact_mode ==
"repr":
18 for r
in ent.residues:
20 if r.IsPeptideLinking():
24 elif r.GetName() ==
"GLY":
28 elif r.IsNucleotideLinking():
29 c3 = r.FindAtom(
"C3'")
33 raise RuntimeError(f
"Only support peptide and nucleotide "
34 f
"residues in \"repr\" contact mode. "
35 f
"Problematic residue: {r}")
37 raise RuntimeError(f
"Residue {r} has no required "
38 f
"representative atom (CB for peptide "
39 f
"residues (CA for GLY) C3' for "
40 f
"nucleotide residues.")
45 self.
_view = ent.CreateFullView()
47 pep_query =
"(peptide=true and (aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\")))"
48 nuc_query =
"(nucleotide=True and aname=\"C3'\")"
49 self.
_view = ent.Select(
" or ".join([pep_query, nuc_query]))
63 """ The structure depending on *contact_mode*
65 Full view in case of "aa", view that only contains representative
66 atoms in case of "repr".
68 :type: :class:`ost.mol.EntityView`
76 Can either be "aa", meaning that all atoms are considered to identify
77 contacts, or "repr" which only considers distances between
78 representative atoms. For peptides thats CB (CA for GLY), for
79 nucleotides thats C3'.
87 """ Pairwise distance of residues to be considered as contacts
89 Given at :class:`ContactScorer` construction
97 """ Chain names in :attr:`~view`
101 :type: :class:`list` of :class:`str`
104 self.
_chain_names = sorted([ch.name
for ch
in self.view.chains])
109 """ Pairs of chains in :attr:`~view` with at least one contact
111 :type: :class:`list` of :class:`tuples`
119 """ Interchain contacts
121 Organized as :class:`dict` with key (cname1, cname2) and values being
122 a set of tuples with the respective residue indices.
123 cname1 < cname2 evaluates to True.
131 """ Human readable interchain contacts
133 Human readable version of :attr:`~contacts`. Simple list with tuples
134 containing two strings specifying the residues in contact. Format:
135 <cname>.<rnum>.<ins_code>
143 """ Interface residues
145 Residues in each chain that are in contact with any other chain.
146 Organized as :class:`dict` with key cname and values the respective
147 residue indices in a :class:`set`.
155 """ Human readable interface residues
157 Human readable version of :attr:`interface_residues`. :class:`list` of
158 strings specifying the interface residues in format:
159 <cname>.<rnum>.<ins_code>
166 """ Get chain by name
168 :param chain_name: Chain in :attr:`~view`
169 :type chain_name: :class:`str`
171 chain = self.view.FindChain(chain_name)
172 if not chain.IsValid():
173 raise RuntimeError(f
"view has no chain named \"{chain_name}\"")
177 """ Get sequence of chain
179 Returns sequence of specified chain as raw :class:`str`
181 :param chain_name: Chain in :attr:`~view`
182 :type chain_name: :class:`str`
186 s =
''.join([r.one_letter_code
for r
in ch.residues])
190 def _SetupContacts(self):
197 for ch
in self.view.chains:
198 for r_idx, r
in enumerate(ch.residues):
199 r.SetIntProp(
"contact_idx", r_idx)
203 q1 = f
"cname={cname} and {self.contact_d} <> [cname!={cname}]"
205 q2 = f
"cname!={cname} and {self.contact_d} <> [cname={cname}]"
206 v1 = self.view.Select(q1)
207 v2 = self.view.Select(q2)
208 v1_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in v1.residues]
209 for r1, p1
in zip(v1.residues, v1_p):
210 for ch2
in v2.chains:
211 cname2 = ch2.GetName()
213 v2_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in ch2.residues]
214 for r2, p2
in zip(ch2.residues, v2_p):
216 cname_key = (cname, cname2)
219 self.
_contacts[cname_key].add((r1.GetIntProp(
"contact_idx"),
220 r2.GetIntProp(
"contact_idx")))
221 rnum1 = r1.GetNumber()
222 hr1 = f
"{cname}.{rnum1.num}.{rnum1.ins_code}"
223 rnum2 = r2.GetNumber()
224 hr2 = f
"{cname2}.{rnum2.num}.{rnum2.ins_code}"
225 self._hr_contacts.append((hr1.strip(
"\u0000"),
226 hr2.strip(
"\u0000")))
228 def _SetupInterfaceResidues(self):
230 for k,v
in self.contacts.items():
235 def _SetupHRInterfaceResidues(self):
236 interface_residues = set()
238 interface_residues.add(item[0])
239 interface_residues.add(item[1])
245 Holds data relevant to compute ics
247 def __init__(self, n_trg_contacts, n_mdl_contacts, n_union, n_intersection):
255 """ Number of contacts in target
263 """ Number of contacts in model
271 """ Precision of model contacts
273 The fraction of model contacts that are also present in target
284 """ Recall of model contacts
286 The fraction of target contacts that are also present in model
297 """ The Interface Contact Similarity score (ICS)
299 Combination of :attr:`precision` and :attr:`recall` using the F1-measure
301 :type: :class:`float`
307 if denominator != 0.0:
308 return 2*nominator/denominator
314 Holds data relevant to compute ips
316 def __init__(self, n_trg_int_res, n_mdl_int_res, n_union, n_intersection):
324 """ Number of interface residues in target
328 return self._n_trg_contacts
332 """ Number of interface residues in model
340 """ Precision of model interface residues
342 The fraction of model interface residues that are also interface
354 """ Recall of model interface residues
356 The fraction of target interface residues that are also interface
368 """ The Interface Patch Similarity score (IPS)
370 Jaccard coefficient of interface residues in model/target.
371 Technically thats :attr:`intersection`/:attr:`union`
373 :type: :class:`float`
380 """ Helper object to compute Contact scores
382 Tightly integrated into the mechanisms from the chain_mapping module.
383 The prefered way to derive an object of type :class:`ContactScorer` is
384 through the static constructor: :func:`~FromMappingResult`.
386 Usage is the same as for :class:`ost.mol.alg.QSScorer`
389 def __init__(self, target, chem_groups, model, alns,
390 contact_mode=
"aa", contact_d=5.0):
392 contact_d = contact_d)
394 chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups))
395 if self._cent1.chain_names != sorted(chem_group_ch_names):
396 raise RuntimeError(f
"Expect exact same chain names in chem_groups "
397 f
"and in target (which is processed to only "
398 f
"contain peptides/nucleotides). target: "
399 f
"{self._cent1.chain_names}, chem_groups: "
400 f
"{chem_group_ch_names}")
404 contact_d = contact_d)
430 """ The preferred way to get a :class:`ContactScorer`
432 Static constructor that derives an object of type :class:`ContactScorer`
433 using a :class:`ost.mol.alg.chain_mapping.MappingResult`
435 :param mapping_result: Data source
436 :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult`
439 mapping_result.chem_groups,
440 mapping_result.model,
442 contact_mode = contact_mode,
443 contact_d = contact_d)
444 return contact_scorer
448 """ Represents *target*
450 :type: :class:`ContactEntity`
456 """ Groups of chemically equivalent chains in *target*
458 Provided at object construction
460 :type: :class:`list` of :class:`list` of :class:`str`
466 """ Represents *model*
468 :type: :class:`ContactEntity`
474 """ Alignments between chains in :attr:`~cent1` and :attr:`~cent2`
476 Provided at object construction. Each alignment is accessible with
477 ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the
478 respective chain in :attr:`~cent1`, second sequence the one from
481 :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
482 :class:`ost.seq.AlignmentHandle`
487 """ Computes ICS given chain mapping
489 Again, the preferred way is to get *mapping* is from an object
490 of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
493 :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
494 :type mapping: :class:`list` of :class:`list` of :class:`str`
495 :param check: Perform input checks, can be disabled for speed purposes
496 if you know what you're doing.
497 :type check: :class:`bool`
498 :returns: Result object of type :class:`ContactScorerResultICS`
504 raise RuntimeError(
"Dimensions of self.chem_groups and mapping "
508 raise RuntimeError(
"Dimensions of self.chem_groups and "
509 "mapping must match")
511 for name
in itertools.chain.from_iterable(mapping):
512 if name
is not None and name
not in self.cent2.chain_names:
513 raise RuntimeError(f
"Each chain in mapping must be present "
514 f
"in self.cent2. No match for "
517 flat_mapping = dict()
519 flat_mapping.update({x: y
for x, y
in zip(a, b)
if y
is not None})
524 """ Computes ICS scores only considering one interface
526 This only works for interfaces that are computed in :func:`Score`, i.e.
527 interfaces for which the alignments are set up correctly.
529 :param trg_ch1: Name of first interface chain in target
530 :type trg_ch1: :class:`str`
531 :param trg_ch2: Name of second interface chain in target
532 :type trg_ch2: :class:`str`
533 :param mdl_ch1: Name of first interface chain in model
534 :type mdl_ch1: :class:`str`
535 :param mdl_ch2: Name of second interface chain in model
536 :type mdl_ch2: :class:`str`
537 :returns: Result object of type :class:`ContactScorerResultICS`
538 :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
539 trg_ch2/mdl_ch2 is available.
541 if (trg_ch1, mdl_ch1)
not in self.
alns:
542 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
543 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
544 f
"construct the QSScorer object from a "
545 f
"MappingResult and are trg_ch1 and mdl_ch1 "
546 f
"mapped to each other?")
547 if (trg_ch2, mdl_ch2)
not in self.
alns:
548 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
549 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
550 f
"construct the QSScorer object from a "
551 f
"MappingResult and are trg_ch1 and mdl_ch1 "
552 f
"mapped to each other?")
553 trg_int = (trg_ch1, trg_ch2)
554 mdl_int = (mdl_ch1, mdl_ch2)
555 trg_int_r = (trg_ch2, trg_ch1)
556 mdl_int_r = (mdl_ch2, mdl_ch1)
558 if trg_int
in self.cent1.contacts:
559 n_trg = len(self.cent1.contacts[trg_int])
560 elif trg_int_r
in self.cent1.contacts:
561 n_trg = len(self.cent1.contacts[trg_int_r])
565 if mdl_int
in self.cent2.contacts:
566 n_mdl = len(self.cent2.contacts[mdl_int])
567 elif mdl_int_r
in self.cent2.contacts:
568 n_mdl = len(self.cent2.contacts[mdl_int_r])
576 """ Same as :func:`ScoreICS` but with flat mapping
578 :param flat_mapping: Dictionary with target chain names as keys and
579 the mapped model chain names as value
580 :type flat_mapping: :class:`dict` with :class:`str` as key and value
581 :returns: Result object of type :class:`ContactScorerResultICS`
583 n_trg = sum([len(x)
for x
in self.cent1.contacts.values()])
584 n_mdl = sum([len(x)
for x
in self.cent2.contacts.values()])
588 processed_cent2_interfaces = set()
589 for int1
in self.cent1.interacting_chains:
590 if int1[0]
in flat_mapping
and int1[1]
in flat_mapping:
591 int2 = (flat_mapping[int1[0]], flat_mapping[int1[1]])
595 processed_cent2_interfaces.add((min(int2), max(int2)))
598 r_flat_mapping = {v:k
for k,v
in flat_mapping.items()}
599 for int2
in self.cent2.interacting_chains:
600 if int2
not in processed_cent2_interfaces:
601 if int2[0]
in r_flat_mapping
and int2[1]
in r_flat_mapping:
602 int1 = (r_flat_mapping[int2[0]], r_flat_mapping[int2[1]])
608 n_union, n_intersection)
611 """ Computes IPS given chain mapping
613 Again, the preferred way is to get *mapping* is from an object
614 of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
617 :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
618 :type mapping: :class:`list` of :class:`list` of :class:`str`
619 :param check: Perform input checks, can be disabled for speed purposes
620 if you know what you're doing.
621 :type check: :class:`bool`
622 :returns: Result object of type :class:`ContactScorerResultIPS`
628 raise RuntimeError(
"Dimensions of self.chem_groups and mapping "
632 raise RuntimeError(
"Dimensions of self.chem_groups and "
633 "mapping must match")
635 for name
in itertools.chain.from_iterable(mapping):
636 if name
is not None and name
not in self.cent2.chain_names:
637 raise RuntimeError(f
"Each chain in mapping must be present "
638 f
"in self.cent2. No match for "
641 flat_mapping = dict()
643 flat_mapping.update({x: y
for x, y
in zip(a, b)
if y
is not None})
648 """ Computes IPS scores only considering one interface
650 This only works for interfaces that are computed in :func:`Score`, i.e.
651 interfaces for which the alignments are set up correctly.
653 :param trg_ch1: Name of first interface chain in target
654 :type trg_ch1: :class:`str`
655 :param trg_ch2: Name of second interface chain in target
656 :type trg_ch2: :class:`str`
657 :param mdl_ch1: Name of first interface chain in model
658 :type mdl_ch1: :class:`str`
659 :param mdl_ch2: Name of second interface chain in model
660 :type mdl_ch2: :class:`str`
661 :returns: Result object of type :class:`ContactScorerResultIPS`
662 :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
663 trg_ch2/mdl_ch2 is available.
665 if (trg_ch1, mdl_ch1)
not in self.
alns:
666 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
667 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
668 f
"construct the QSScorer object from a "
669 f
"MappingResult and are trg_ch1 and mdl_ch1 "
670 f
"mapped to each other?")
671 if (trg_ch2, mdl_ch2)
not in self.
alns:
672 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
673 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
674 f
"construct the QSScorer object from a "
675 f
"MappingResult and are trg_ch1 and mdl_ch1 "
676 f
"mapped to each other?")
677 trg_int = (trg_ch1, trg_ch2)
678 mdl_int = (mdl_ch1, mdl_ch2)
679 trg_int_r = (trg_ch2, trg_ch1)
680 mdl_int_r = (mdl_ch2, mdl_ch1)
682 if trg_int
in self.cent1.contacts:
683 n_trg = len(self.cent1.contacts[trg_int])
684 elif trg_int_r
in self.cent1.contacts:
685 n_trg = len(self.cent1.contacts[trg_int_r])
689 if mdl_int
in self.cent2.contacts:
690 n_mdl = len(self.cent2.contacts[mdl_int])
691 elif mdl_int_r
in self.cent2.contacts:
692 n_mdl = len(self.cent2.contacts[mdl_int_r])
701 """ Same as :func:`ScoreIPS` but with flat mapping
703 :param flat_mapping: Dictionary with target chain names as keys and
704 the mapped model chain names as value
705 :type flat_mapping: :class:`dict` with :class:`str` as key and value
706 :returns: Result object of type :class:`ContactScorerResultIPS`
708 n_trg = sum([len(x)
for x
in self.cent1.interface_residues.values()])
709 n_mdl = sum([len(x)
for x
in self.cent2.interface_residues.values()])
713 processed_cent2_chains = set()
714 for trg_ch
in self.cent1.chain_names:
715 if trg_ch
in flat_mapping:
719 processed_cent2_chains.add(flat_mapping[trg_ch])
721 n_union += len(self.cent1.interface_residues[trg_ch])
723 for mdl_ch
in self._cent2.chain_names:
724 if mdl_ch
not in processed_cent2_chains:
725 n_union += len(self.cent2.interface_residues[mdl_ch])
728 n_union, n_intersection)
731 def _MappedInterfaceScores(self, int1, int2):
732 key_one = (int1, int2)
735 key_two = ((int1[1], int1[0]), (int2[1], int2[0]))
743 def _InterfaceScores(self, int1, int2):
744 if int1
in self.cent1.contacts:
745 ref_contacts = self.cent1.contacts[int1]
746 elif (int1[1], int1[0])
in self.cent1.contacts:
747 ref_contacts = self.cent1.contacts[(int1[1], int1[0])]
749 ref_contacts = set([(x[1], x[0])
for x
in ref_contacts])
753 if int2
in self.cent2.contacts:
754 mdl_contacts = self.cent2.contacts[int2]
755 elif (int2[1], int2[0])
in self.cent2.contacts:
756 mdl_contacts = self.cent2.contacts[(int2[1], int2[0])]
758 mdl_contacts = set([(x[1], x[0])
for x
in mdl_contacts])
764 ch1_aln = self.
alns[(int1[0], int2[0])]
765 ch2_aln = self.
alns[(int1[1], int2[1])]
766 mapped_ref_contacts = set()
767 mapped_mdl_contacts = set()
768 for c
in ref_contacts:
769 mapped_c = (ch1_aln.GetPos(0, c[0]), ch2_aln.GetPos(0, c[1]))
770 mapped_ref_contacts.add(mapped_c)
771 for c
in mdl_contacts:
772 mapped_c = (ch1_aln.GetPos(1, c[0]), ch2_aln.GetPos(1, c[1]))
773 mapped_mdl_contacts.add(mapped_c)
775 contact_union = len(mapped_ref_contacts.union(mapped_mdl_contacts))
776 contact_intersection = len(mapped_ref_contacts.intersection(mapped_mdl_contacts))
782 tmp_ref = set([x[0]
for x
in mapped_ref_contacts])
783 tmp_mdl = set([x[0]
for x
in mapped_mdl_contacts])
784 intres_union = len(tmp_ref.union(tmp_mdl))
785 intres_intersection = len(tmp_ref.intersection(tmp_mdl))
788 tmp_ref = set([x[1]
for x
in mapped_ref_contacts])
789 tmp_mdl = set([x[1]
for x
in mapped_mdl_contacts])
790 intres_union += len(tmp_ref.union(tmp_mdl))
791 intres_intersection += len(tmp_ref.intersection(tmp_mdl))
793 return (contact_union, contact_intersection,
794 intres_union, intres_intersection)
796 def _MappedSCScores(self, ref_ch, mdl_ch):
799 n_union, n_intersection = self.
_SCScores(ref_ch, mdl_ch)
801 return (n_union, n_intersection)
803 def _SCScores(self, ch1, ch2):
804 ref_int_res = self.cent1.interface_residues[ch1]
805 mdl_int_res = self.cent2.interface_residues[ch2]
806 aln = self.
alns[(ch1, ch2)]
807 mapped_ref_int_res = set()
808 mapped_mdl_int_res = set()
809 for r_idx
in ref_int_res:
810 mapped_ref_int_res.add(aln.GetPos(0, r_idx))
811 for r_idx
in mdl_int_res:
812 mapped_mdl_int_res.add(aln.GetPos(1, r_idx))
813 return(len(mapped_ref_int_res.union(mapped_mdl_int_res)),
814 len(mapped_ref_int_res.intersection(mapped_mdl_int_res)))
817 __all__ = (
'ContactEntity',
'ContactScorerResultICS',
'ContactScorerResultIPS',
'ContactScorer')