OpenStructure
Loading...
Searching...
No Matches
trajectory_analysis.py
Go to the documentation of this file.
1"""
2**This Module requires numpy**
3
4This module contains functions to analyze trajectories, mainly
5similiraty measures baed on RMSDS and pairwise distances.
6
7Author: Niklaus Johner (niklaus.johner@unibas.ch)
8"""
9
10import ost.mol.alg
11import ost.geom
12from ost import LogError
13import os
14
15def smooth(vec,n):
16#Function to smooth a vector or a list of floats
17#for each element it takes the average over itself and the
18#n elements on each side, so over (2n+1) elements
19 try:
20 vec2=vec.copy()
21 except:
22 vec2=vec[:]
23 for i in range(n):
24 v=0.0
25 count=1.0
26 v+=vec[i]
27 for j in range(n):
28 count+=1
29 v+=vec[i+j+1]
30 for j in range(i):
31 count+=1
32 v+=vec[i-(j+1)]
33 vec2[i]=v/float(count)
34 for i in range(1,n+1):
35 v=0.0
36 count=1.0
37 v+=vec[-i]
38 for j in range(n):
39 count+=1
40 v+=vec[-(i+j+1)]
41 for j in range(i-1):
42 count+=1
43 v+=vec[-i+j+1]
44 vec2[-i]=v/float(count)
45 for i in range(n,len(vec2)-n):
46 v=vec[i]
47 for j in range(n):
48 v+=vec[i+j+1]
49 v+=vec[i-j-1]
50 vec2[i]=v/float(2.*n+1.)
51 return vec2
52
53
54"""
55From here on the module needs numpy
56"""
57
58def RMSD_Matrix_From_Traj(t,sele,first=0,last=-1,align=True,align_sele=None):
59 """
60 This function calculates a matrix M such that M[i,j] is the
61 RMSD (calculated on **sele**) between frames i and j of the trajectory **t**
62 aligned on sele.
63
64 :param t: the trajectory
65 :param sele: the selection used for alignment and RMSD calculation
66 :param first: the first frame of t to be used
67 :param last: the last frame of t to be used
68 :type t: :class:`~ost.mol.CoordGroupHandle`
69 :type sele: :class:`~ost.mol.EntityView`
70 :type first: :class:`int`
71 :type last: :class:`int`
72
73 :return: Returns a numpy N\\ :subscript:`frames`\\ xN\\ :subscript:`frames` matrix,
74 where N\\ :subscript:`frames` is the number of frames.
75 """
76 if not align_sele:align_sele=sele
77 try:
78 import numpy as npy
79 if last==-1:last=t.GetFrameCount()
80 n_frames=last-first
81 rmsd_matrix=npy.identity(n_frames)
82 for i in range(n_frames):
83 if align:
84 t=ost.mol.alg.SuperposeFrames(t,align_sele,begin=first,end=last,ref=i)
85 eh=t.GetEntity()
86 t.CopyFrame(i)
87 rmsd_matrix[i,:]=ost.mol.alg.AnalyzeRMSD(t,sele,sele)
88 if i==0:
89 last=last-first
90 first=0
91 return rmsd_matrix
92 except ImportError:
93 LogError("Function needs numpy, but I could not import it.")
94 raise
95
96
97def PairwiseDistancesFromTraj(t,sele,first=0,last=-1,seq_sep=1):
98 """
99 This function calculates the distances between any pair of atoms in **sele**
100 with sequence separation larger than **seq_sep** from a trajectory **t**.
101 It return a matrix containing one line for each atom pair and N\\ :subscript:`frames` columns, where
102 N\\ :subscript:`frames` is the number of frames in the trajectory.
103
104 :param t: the trajectory
105 :param sele: the selection used to determine the atom pairs
106 :param first: the first frame of t to be used
107 :param last: the last frame of t to be used
108 :param seq_sep: The minimal sequence separation between atom pairs
109 :type t: :class:`~ost.mol.CoordGroupHandle`
110 :type sele: :class:`~ost.mol.EntityView`
111 :type first: :class:`int`
112 :type last: :class:`int`
113 :type seq_sep: :class:`int`
114
115 :return: a numpy N\\ :subscript:`pairs`\\ xN\\ :subscript:`frames` matrix.
116 """
117 try:
118 import numpy as npy
119 if last==-1:last=t.GetFrameCount()
120 n_frames=last-first
121 n_var=0
122 for i,a1 in enumerate(sele.atoms):
123 for j,a2 in enumerate(sele.atoms):
124 if not j-i<seq_sep:n_var+=1
125 #n_var=sele.GetAtomCount()
126 #n_var=(n_var-1)*(n_var)/2.
127 dist_matrix=npy.zeros(n_frames*n_var)
128 dist_matrix=dist_matrix.reshape(n_var,n_frames)
129 k=0
130 for i,a1 in enumerate(sele.atoms):
131 for j,a2 in enumerate(sele.atoms):
132 if j-i<seq_sep:continue
133 dist_matrix[k]=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
134 k+=1
135 return dist_matrix
136 except ImportError:
137 LogError("Function needs numpy, but I could not import it.")
138 raise
139
141 """
142 This function calculates an distance matrix M(N\\ :subscript:`frames`\\ xN\\ :subscript:`frames`\\ ) from
143 the pairwise distances matrix D(N\\ :subscript:`pairs`\\ xN\\ :subscript:`frames`\\ ), where
144 N\\ :subscript:`frames` is the number of frames in the trajectory
145 and N\\ :subscript:`pairs` the number of atom pairs.
146 M[i,j] is the distance between frame i and frame j
147 calculated as a p-norm of the differences in distances
148 from the two frames (distance-RMSD for p=2).
149
150 :param distances: a pairwise distance matrix as obtained from
151 :py:func:`~mol.alg.trajectory_analysis.PairwiseDistancesFromTraj`
152 :param p: exponent used for the p-norm.
153
154 :return: a numpy N\\ :subscript:`frames`\\ xN\\ :subscript:`frames` matrix, where N\\ :subscript:`frames`
155 is the number of frames.
156 """
157 try:
158 import numpy as npy
159 n1=distances.shape[0]
160 n2=distances.shape[1]
161 dist_mat=npy.identity(n2)
162 for i in range(n2):
163 for j in range(n2):
164 if j<=i:continue
165 d=(((abs(distances[:,i]-distances[:,j])**p).sum())/float(n1))**(1./p)
166 dist_mat[i,j]=d
167 dist_mat[j,i]=d
168 return dist_mat
169 except ImportError:
170 LogError("Function needs numpy, but I could not import it.")
171 raise
172
173def DistRMSDFromTraj(t,sele,ref_sele,radius=7.0,average=False,seq_sep=4,first=0,last=-1):
174 """
175 This function calculates the distance RMSD from a trajectory.
176 The distances selected for the calculation are all the distances
177 between pair of atoms from residues that are at least **seq_sep** apart
178 in the sequence and that are smaller than **radius** in **ref_sel**.
179 The number and order of atoms in **ref_sele** and **sele** should be the same.
180
181 :param t: the trajectory
182 :param sele: the selection used to calculate the distance RMSD
183 :param ref_sele: the reference selection used to determine the atom pairs and reference distances
184 :param radius: the upper limit of distances in ref_sele considered for the calculation
185 :param seq_sep: the minimal sequence separation between atom pairs considered for the calculation
186 :param average: use the average distance in the trajectory as reference instead of the distance obtained from ref_sele
187 :param first: the first frame of t to be used
188 :param last: the last frame of t to be used
189
190 :type t: :class:`~ost.mol.CoordGroupHandle`
191 :type sele: :class:`~ost.mol.EntityView`
192 :type ref_sele: :class:`~ost.mol.EntityView`
193 :type radius: :class:`float`
194 :type average: :class:`bool`
195 :type first: :class:`int`
196 :type last: :class:`int`
197 :type seq_sep: :class:`int`
198
199 :return: a numpy vecor dist_rmsd(N\\ :subscript:`frames`).
200 """
201 if not sele.GetAtomCount()==ref_sele.GetAtomCount():
202 print('Not same number of atoms in the two views')
203 return
204 try:
205 import numpy as npy
206 if last==-1:last=t.GetFrameCount()
207 n_frames=last-first
208 dist_rmsd=npy.zeros(n_frames)
209 pair_count=0.0
210 for i,a1 in enumerate(ref_sele.atoms):
211 for j,a2 in enumerate(ref_sele.atoms):
212 if j<=i:continue
213 r1=a1.GetResidue()
214 c1=r1.GetChain()
215 r2=a2.GetResidue()
216 c2=r2.GetChain()
217 if c1==c2 and abs(r2.GetNumber().num-r1.GetNumber().num)<seq_sep:continue
218 d=ost.geom.Distance(a1.pos,a2.pos)
219 if d<radius:
220 a3=sele.atoms[i]
221 a4=sele.atoms[j]
222 d_traj=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a3.GetHandle(),a4.GetHandle())[first:last]
223 if average:d=npy.mean(d_traj)
224 for k,el in enumerate(d_traj):
225 dist_rmsd[k]+=(el-d)**2.0
226 pair_count+=1.0
227 return (dist_rmsd/float(pair_count))**0.5
228 except ImportError:
229 LogError("Function needs numpy, but I could not import it.")
230 raise
231
232def AverageDistanceMatrixFromTraj(t,sele,first=0,last=-1):
233 """
234 This function calcultes the distance between each pair of atoms
235 in **sele**, averaged over the trajectory **t**.
236
237 :param t: the trajectory
238 :param sele: the selection used to determine the atom pairs
239 :param first: the first frame of t to be used
240 :param last: the last frame of t to be used
241 :type t: :class:`~ost.mol.CoordGroupHandle`
242 :type sele: :class:`~ost.mol.EntityView`
243 :type first: :class:`int`
244 :type last: :class:`int`
245
246 :return: a numpy N\\ :subscript:`pairs`\\ xN\\ :subscript:`pairs` matrix, where N\\ :subscript:`pairs`
247 is the number of atom pairs in **sele**.
248 """
249 try:
250 import numpy as npy
251 except ImportError:
252 LogError("Function needs numpy, but I could not import it.")
253 raise
254 n_atoms=sele.GetAtomCount()
255 M=npy.zeros([n_atoms,n_atoms])
256 for i,a1 in enumerate(sele.atoms):
257 for j,a2 in enumerate(sele.atoms):
258 if j>i:continue
259 d=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
260 M[i,j]=npy.mean(d)
261 M[j,i]=npy.mean(d)
262 return M
263
264def AnalyzeDistanceFluctuationMatrix(t,sele,first=0,last=-1):
265 try:
266 import numpy as npy
267 except ImportError:
268 LogError("Function needs numpy, but I could not import it.")
269 raise
270 n_atoms=sele.GetAtomCount()
271 M=npy.zeros([n_atoms,n_atoms])
272 for i,a1 in enumerate(sele.atoms):
273 for j,a2 in enumerate(sele.atoms):
274 if i>j:continue
275 d=ost.mol.alg.AnalyzeDistanceBetwAtoms(t,a1.GetHandle(),a2.GetHandle())[first:last]
276 M[j,i]=npy.std(d)
277 M[i,j]=npy.std(d)
278 return M
279
280def IterativeSuperposition(t,sele,threshold=1.0,initial_sele=None,iterations=5,ref_frame=0):
281 if initial_sele:current_sele=initial_sele
282 else: current_sele=sele
283 for i in range(iterations):
284 t=ost.mol.alg.SuperposeFrames(t,current_sele,ref=ref_frame)
285 al=[a for a in sele.atoms if ost.mol.alg.AnalyzeRMSF(t,ost.mol.CreateViewFromAtoms([a]))<threshold]
286 if len(al)==0:return
287 current_sele=ost.mol.CreateViewFromAtoms(al)
288 return current_sele
289
RMSD_Matrix_From_Traj(t, sele, first=0, last=-1, align=True, align_sele=None)
AnalyzeDistanceFluctuationMatrix(t, sele, first=0, last=-1)
PairwiseDistancesFromTraj(t, sele, first=0, last=-1, seq_sep=1)
IterativeSuperposition(t, sele, threshold=1.0, initial_sele=None, iterations=5, ref_frame=0)
DistRMSDFromTraj(t, sele, ref_sele, radius=7.0, average=False, seq_sep=4, first=0, last=-1)
AverageDistanceMatrixFromTraj(t, sele, first=0, last=-1)
CoordGroupHandle DLLEXPORT_OST_MOL_ALG SuperposeFrames(CoordGroupHandle &cg, EntityView &sel, int begin=0, int end=-1, int ref=-1)
returns a superposed version of coord group, superposed on a reference frame
std::vector< Real > DLLEXPORT_OST_MOL_ALG AnalyzeRMSD(const CoordGroupHandle &traj, const EntityView &reference_view, const EntityView &sele, unsigned int stride=1)
Real DLLEXPORT_OST_MOL_ALG AnalyzeRMSF(const CoordGroupHandle &traj, const EntityView &selection, int from=0, int to=-1, unsigned int stride=1)
std::vector< Real > DLLEXPORT_OST_MOL_ALG AnalyzeDistanceBetwAtoms(const CoordGroupHandle &traj, const AtomHandle &a1, const AtomHandle &a2, unsigned int stride=1)