5 def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6 ch1_aln =
None, ch2_aln =
None):
7 """ Preprocesses *mdl* and *ref*
9 Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10 the respective reference chains.
11 dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12 and vice versa, 0 otherwise. Same is done for mdl_ch2 and
14 dockq_idx: If a pair of residue is mapped, the same index will be set
15 to both residues. The index is unique otherwise.
17 By default, mapping happens with residue numbers but you can enforce a
18 mapping with an alignment. In the example of ch1_aln, the first sequence
19 corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20 the ATOMSEQ of mdl_ch1 in mdl.
26 for cname
in [ref_ch1, ref_ch2]:
27 ch = ref.FindChain(cname)
29 r.SetIntProp(
"dockq_mapped", 0)
30 r.SetIntProp(
"dockq_idx", -1)
31 for cname
in [mdl_ch1, mdl_ch2]:
32 ch = mdl.FindChain(cname)
34 r.SetIntProp(
"dockq_mapped", 0)
35 r.SetIntProp(
"dockq_idx", -1)
38 if ch1_aln
is not None and ch2_aln
is not None:
41 if ch1_aln.GetCount() != 2
or ch2_aln.GetCount() != 2:
42 raise RuntimeError(
"Expect exactly two sequences in alns provided "
44 tmp = ch1_aln.GetSequence(0)
45 ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46 ref_s1.SetOffset(tmp.GetOffset())
47 ref_s1.AttachView(ref.Select(f
"cname={ref_ch1}"))
48 tmp = ch1_aln.GetSequence(1)
49 mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50 mdl_s1.SetOffset(tmp.GetOffset())
51 mdl_s1.AttachView(mdl.Select(f
"cname={mdl_ch1}"))
52 new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53 for col
in new_ch1_aln:
54 if col[0] !=
'-' and col[1] !=
'-':
55 ref_r = col.GetResidue(0)
56 mdl_r = col.GetResidue(1)
57 if not (ref_r.IsValid()
and ref_r.one_letter_code == col[0]):
58 raise RuntimeError(
"DockQ: mismatch between provided "
59 "alignments and ATOMSEQ in structures")
60 if not (mdl_r.IsValid()
and mdl_r.one_letter_code == col[1]):
61 raise RuntimeError(
"DockQ: mismatch between provided "
62 "alignments and ATOMSEQ in structures")
63 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
64 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
65 ref_r.SetIntProp(
"dockq_mapped", 1)
66 mdl_r.SetIntProp(
"dockq_mapped", 1)
69 tmp = ch2_aln.GetSequence(0)
70 ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71 ref_s2.SetOffset(tmp.GetOffset())
72 ref_s2.AttachView(ref.Select(f
"cname={ref_ch2}"))
73 tmp = ch2_aln.GetSequence(1)
74 mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75 mdl_s2.SetOffset(tmp.GetOffset())
76 mdl_s2.AttachView(mdl.Select(f
"cname={mdl_ch2}"))
77 new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78 for col
in new_ch2_aln:
79 if col[0] !=
'-' and col[1] !=
'-':
80 ref_r = col.GetResidue(0)
81 mdl_r = col.GetResidue(1)
82 if not (ref_r.IsValid()
and ref_r.one_letter_code == col[0]):
83 raise RuntimeError(
"DockQ: mismatch between provided "
84 "alignments and ATOMSEQ in structures")
85 if not (mdl_r.IsValid()
and mdl_r.one_letter_code == col[1]):
86 raise RuntimeError(
"DockQ: mismatch between provided "
87 "alignments and ATOMSEQ in structures")
88 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
89 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
90 ref_r.SetIntProp(
"dockq_mapped", 1)
91 mdl_r.SetIntProp(
"dockq_mapped", 1)
95 for mdl_r
in mdl.Select(f
"cname={mdl_ch1}").residues:
96 ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
98 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
99 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
100 ref_r.SetIntProp(
"dockq_mapped", 1)
101 mdl_r.SetIntProp(
"dockq_mapped", 1)
103 for mdl_r
in mdl.Select(f
"cname={mdl_ch2}").residues:
104 ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
106 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
107 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
108 ref_r.SetIntProp(
"dockq_mapped", 1)
109 mdl_r.SetIntProp(
"dockq_mapped", 1)
113 for cname
in [ref_ch1, ref_ch2]:
114 ch = ref.FindChain(cname)
115 for r
in ch.residues:
116 if r.GetIntProp(
"dockq_idx") == -1:
117 r.SetIntProp(
"dockq_idx", dockq_idx)
119 for cname
in [mdl_ch1, mdl_ch2]:
120 ch = mdl.FindChain(cname)
121 for r
in ch.residues:
122 if r.GetIntProp(
"dockq_idx") == -1:
123 r.SetIntProp(
"dockq_idx", dockq_idx)
126 def _GetContacts(ent, ch1, ch2, dist_thresh):
127 int1 = ent.Select(f
"cname={ch1} and {dist_thresh} <> [cname={ch2}]")
128 int2 = ent.Select(f
"cname={ch2} and {dist_thresh} <> [cname={ch1}]")
130 int1_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in int1.residues]
131 int2_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in int2.residues]
132 for r1, p1
in zip(int1.residues, int1_p):
133 for r2, p2
in zip(int2.residues, int2_p):
134 if p1.IsWithin(p2, dist_thresh):
135 contacts.add((r1.GetIntProp(
"dockq_idx"),
136 r2.GetIntProp(
"dockq_idx")))
139 def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
141 ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142 mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
144 nnat = len(ref_contacts)
145 nmdl = len(mdl_contacts)
147 fnat = len(ref_contacts.intersection(mdl_contacts))
151 fnonnat = len(mdl_contacts.difference(ref_contacts))
152 if len(mdl_contacts) > 0:
153 fnonnat /= len(mdl_contacts)
155 return (nnat, nmdl, fnat, fnonnat)
157 def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0):
160 sup_atoms = [
'CA',
'C',
'N',
'O']
163 mapped_mdl = mdl.Select(f
"cname={mdl_ch1},{mdl_ch2} and grdockq_mapped=1")
164 mapped_ref = ref.Select(f
"cname={ref_ch1},{ref_ch2} and grdockq_mapped=1")
165 ch = mapped_mdl.FindChain(mdl_ch1)
166 mdl_ch1_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
167 ch = mapped_mdl.FindChain(mdl_ch2)
168 mdl_ch2_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
169 ch = mapped_ref.FindChain(ref_ch1)
170 ref_ch1_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
171 ch = mapped_ref.FindChain(ref_ch2)
172 ref_ch2_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
176 int1 = ref.Select(f
"cname={ref_ch1} and {dist_thresh} <> [cname={ref_ch2}]")
177 int2 = ref.Select(f
"cname={ref_ch2} and {dist_thresh} <> [cname={ref_ch1}]")
180 for r
in int1.residues:
181 idx = r.GetIntProp(
"dockq_idx")
182 if idx
in ref_ch1_residues
and idx
in mdl_ch1_residues:
183 ref_r = ref_ch1_residues[idx]
184 mdl_r = mdl_ch1_residues[idx]
185 for aname
in sup_atoms:
186 ref_a = ref_r.FindAtom(aname)
187 mdl_a = mdl_r.FindAtom(aname)
188 if ref_a.IsValid()
and mdl_a.IsValid():
189 ref_pos.append(ref_a.pos)
190 mdl_pos.append(mdl_a.pos)
191 for r
in int2.residues:
192 idx = r.GetIntProp(
"dockq_idx")
193 if idx
in ref_ch2_residues
and idx
in mdl_ch2_residues:
194 ref_r = ref_ch2_residues[idx]
195 mdl_r = mdl_ch2_residues[idx]
196 for aname
in sup_atoms:
197 ref_a = ref_r.FindAtom(aname)
198 mdl_a = mdl_r.FindAtom(aname)
199 if ref_a.IsValid()
and mdl_a.IsValid():
200 ref_pos.append(ref_a.pos)
201 mdl_pos.append(mdl_a.pos)
203 if len(mdl_pos) >= 3:
204 sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
205 irmsd = sup_result.rmsd
212 n_ch1 = len(ref.FindChain(ref_ch1).residues)
213 n_ch2 = len(ref.FindChain(ref_ch2).residues)
215 ref_receptor_residues = ref_ch1_residues.values()
216 ref_ligand_residues = ref_ch2_residues.values()
217 mdl_receptor_residues = \
218 [mdl_ch1_residues[idx]
for idx
in ref_ch1_residues.keys()]
219 mdl_ligand_residues = \
220 [mdl_ch2_residues[idx]
for idx
in ref_ch2_residues.keys()]
222 ref_receptor_residues = ref_ch2_residues.values()
223 ref_ligand_residues = ref_ch1_residues.values()
224 mdl_receptor_residues = \
225 [mdl_ch2_residues[idx]
for idx
in ref_ch2_residues.keys()]
226 mdl_ligand_residues = \
227 [mdl_ch1_residues[idx]
for idx
in ref_ch1_residues.keys()]
232 for ref_r, mdl_r
in zip(ref_receptor_residues, mdl_receptor_residues):
233 for aname
in sup_atoms:
234 ref_a = ref_r.FindAtom(aname)
235 mdl_a = mdl_r.FindAtom(aname)
236 if ref_a.IsValid()
and mdl_a.IsValid():
237 ref_receptor_positions.append(ref_a.pos)
238 mdl_receptor_positions.append(mdl_a.pos)
239 for ref_r, mdl_r
in zip(ref_ligand_residues, mdl_ligand_residues):
240 for aname
in sup_atoms:
241 ref_a = ref_r.FindAtom(aname)
242 mdl_a = mdl_r.FindAtom(aname)
243 if ref_a.IsValid()
and mdl_a.IsValid():
244 ref_ligand_positions.append(ref_a.pos)
245 mdl_ligand_positions.append(mdl_a.pos)
247 if len(mdl_receptor_positions) >= 3:
248 sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
249 ref_receptor_positions)
250 mdl_ligand_positions.ApplyTransform(sup_result.transformation)
251 lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
255 return (irmsd, lrmsd)
257 def _ScaleRMSD(rmsd, d):
258 return 1.0/(1+(rmsd/d)**2)
260 def _DockQ(fnat, lrmsd, irmsd, d1, d2):
261 """ The final number chrunching as described in the DockQ manuscript
263 return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
265 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
266 ch1_aln=
None, ch2_aln=
None):
267 """ Computes DockQ for specified interface
269 DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
270 Quality Measure for Protein-Protein Docking Models", PLOS one
272 Residues are mapped based on residue numbers by default. If you provide
273 *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
275 :param mdl: Model structure
276 :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
277 :param ref: Reference structure, i.e. native structure
278 :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
279 :param mdl_ch1: Specifies chain in model constituting first part of
281 :type mdl_ch1: :class:`str`
282 :param mdl_ch2: Specifies chain in model constituting second part of
284 :type mdl_ch2: :class:`str`
285 :param ref_ch1: ref equivalent of mdl_ch1
286 :type ref_ch1: :class:`str`
287 :param ref_ch2: ref equivalent of mdl_ch2
288 :type ref_ch2: :class:`str`
289 :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
290 The first sequence must match the sequence in *ref_ch1* and
291 the second to *mdl_ch1*.
292 :type ch1_aln: :class:`ost.seq.AlignmentHandle`
293 :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
294 The first sequence must match the sequence in *ref_ch2* and
295 the second to *mdl_ch2*.
296 :type ch2_aln: :class:`ost.seq.AlignmentHandle`
297 :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
298 DockQ which corresponds to the equivalent values in the original
299 DockQ implementation.
301 _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
302 ch1_aln = ch1_aln, ch2_aln = ch2_aln)
303 nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
305 irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2)
306 return {
"nnat": nnat,
310 "irmsd": round(irmsd, 3),
311 "lrmsd": round(lrmsd, 3),
312 "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}