00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 """
00020 Wrappers for the tmalign and tmscore utilities.
00021
00022 References:
00023
00024 tmscore: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710
00025 tmalign: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
00026
00027
00028 Authors: Pascal Benkert, Marco Biasini
00029 """
00030
00031 import subprocess, os, tempfile, platform
00032 from ost import settings, io, geom, seq
00033
00034 def _SetupFiles(models):
00035
00036 tmp_dir_name=tempfile.mkdtemp()
00037 dia = 'PDB'
00038 for index, model in enumerate(models):
00039 for chain in model.chains:
00040 if len(chain.name) > 1:
00041 dia = 'CHARMM'
00042 break;
00043 for res in chain.residues:
00044 if len(res.name) > 3:
00045 dia = 'CHARMM'
00046 break;
00047 io.SavePDB(model, os.path.join(tmp_dir_name, 'model%02d.pdb' % (index+1)), dialect=dia)
00048 return tmp_dir_name
00049
00050 def _CleanupFiles(dir_name):
00051 import shutil
00052 shutil.rmtree(dir_name)
00053
00054 class TMAlignResult:
00055 """
00056 Holds the result of running TMalign
00057
00058 .. attribute:: rmsd
00059
00060 The RMSD of the common Calpha atoms of both structures
00061
00062 .. attribute:: transform
00063
00064 The transform that superposes the model onto the reference structure.
00065
00066 :type: :class:`~ost.geom.Mat4`
00067
00068 .. attribute:: alignment
00069
00070 The alignment of the structures, that is the pairing of Calphas of both
00071 structures. Since the programs only read ATOM records, residues consisting
00072 of HETATMs (MSE) are not included in the alignment.
00073
00074 :type: :class:`~ost.seq.AlignmentHandle`
00075
00076 .. attribute:: tm_score
00077
00078 The TM-score of the structural superposition
00079
00080 """
00081 def __init__(self, rmsd, tm_score, aligned_length, transform,
00082 ref_sequence, alignment):
00083
00084 self.rmsd=rmsd
00085 self.tm_score=tm_score
00086 self.aligned_length=aligned_length
00087 self.transform=transform
00088 self.ref_sequence =ref_sequence
00089 self.alignment=alignment
00090
00091 def _ParseTmAlign(lines,lines_matrix):
00092 info_line=lines[12].split(',')
00093 aln_length=float(info_line[0].split('=')[1].strip())
00094 rmsd=float(info_line[1].split('=')[1].strip())
00095 tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
00096 tf1=[float(i.strip()) for i in lines_matrix[2].split()]
00097 tf2=[float(i.strip()) for i in lines_matrix[3].split()]
00098 tf3=[float(i.strip()) for i in lines_matrix[4].split()]
00099 rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
00100 tf2[4], tf3[2], tf3[3], tf3[4])
00101 tf=geom.Mat4(rot)
00102 tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
00103 seq1 = seq.CreateSequence("1",lines[18].strip())
00104 seq2 = seq.CreateSequence("2",lines[20].strip())
00105 alignment = seq.CreateAlignment()
00106 alignment.AddSequence(seq2)
00107 alignment.AddSequence(seq1)
00108 return TMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
00109
00110 def _RunTmAlign(tmalign, tmp_dir):
00111 model1_filename=os.path.join(tmp_dir, 'model01.pdb')
00112 model2_filename=os.path.join(tmp_dir, 'model02.pdb')
00113 if platform.system() == "Windows":
00114 tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
00115 command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
00116 else:
00117 tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)
00118 command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
00119 ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
00120 ps.wait()
00121 lines=ps.stdout.readlines()
00122 if (len(lines))<22:
00123 _CleanupFiles(tmp_dir)
00124 raise RuntimeError("tmalign superposition failed")
00125 matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
00126 lines_matrix=matrix_file.readlines()
00127 matrix_file.close()
00128 return _ParseTmAlign(lines,lines_matrix)
00129
00130 class MMAlignResult:
00131 def __init__(self, rmsd, aligned_length, tm_score, transform, alignment):
00132 self.rmsd=rmsd
00133 self.tm_score=tm_score
00134 self.aligned_length=aligned_length
00135 self.transform=transform
00136 self.alignment=alignment
00137
00138 def _ParseMmAlign(lines):
00139 info_line=lines[10].split(',')
00140 aln_length=float(info_line[0].split('=')[1].strip())
00141 rmsd=float(info_line[1].split('=')[1].strip())
00142 tm_score=float(info_line[2].split('=')[1].strip())
00143 tf1=[float(i.strip()) for i in lines[14].split()]
00144 tf2=[float(i.strip()) for i in lines[15].split()]
00145 tf3=[float(i.strip()) for i in lines[16].split()]
00146 rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
00147 tf2[4], tf3[2], tf3[3], tf3[4])
00148 tf=geom.Mat4(rot)
00149 tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
00150 seq1 = seq.CreateSequence("1",lines[19].strip())
00151 seq2 = seq.CreateSequence("2",lines[21].strip())
00152 alignment = seq.CreateAlignment()
00153 alignment.AddSequence(seq2)
00154 alignment.AddSequence(seq1)
00155
00156 return MMAlignResult(rmsd, tm_score, aln_length, tf, seq2, alignment)
00157
00158 def _RunMmAlign(mmalign, tmp_dir):
00159 model1_filename=os.path.join(tmp_dir, 'model01.pdb')
00160 model2_filename=os.path.join(tmp_dir, 'model02.pdb')
00161 if platform.system() == "Windows":
00162 mmalign_path=settings.Locate('mmalign.exe', explicit_file_name=mmalign)
00163 command="\"%s\" %s %s" %(os.path.normpath(mmalign_path), model1_filename, model2_filename)
00164 else:
00165 mmalign_path=settings.Locate('MMalign', explicit_file_name=mmalign)
00166 command="\"%s\" \"%s\" \"%s\"" %(mmalign_path, model1_filename, model2_filename)
00167 ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
00168 ps.wait()
00169 lines=ps.stdout.readlines()
00170 if (len(lines))<22:
00171 _CleanupFiles(tmp_dir)
00172 raise RuntimeError("mmalign superposition failed")
00173 return _ParseMmAlign(lines)
00174
00175 class TMScoreResult:
00176 """
00177 Holds the result of running TMscore
00178
00179 .. attribute:: rmsd_common
00180
00181 The RMSD of the common Calpha atoms of both structures
00182
00183 .. attribute:: rmsd_below_five
00184
00185 The RMSD of all Calpha atoms that can be superposed below five Angstroem
00186
00187 .. attribute:: tm_score
00188
00189 The TM-score of the structural superposition
00190
00191 .. attribute:: transform
00192
00193 The transform that superposes the model onto the reference structure.
00194
00195 :type: :class:`~ost.geom.Mat4`
00196
00197 .. attribute:: gdt_ha
00198
00199 The GDT_HA of the model to the reference structure.
00200
00201 .. attribute:: gdt_ts
00202
00203 The GDT_TS of the model to the reference structure.
00204
00205 """
00206 def __init__(self, rmsd_common, tm_score, max_sub,
00207 gdt_ts, gdt_ha, rmsd_below_five, transform):
00208 self.rmsd_common=rmsd_common
00209 self.tm_score=tm_score
00210 self.max_sub=max_sub
00211 self.gdt_ts=gdt_ts
00212 self.gdt_ha=gdt_ha
00213 self.rmsd_below_five=rmsd_below_five
00214 self.transform=transform
00215
00216 def _ParseTmScore(lines):
00217 tf1=[float(i.strip()) for i in lines[23].split()]
00218 tf2=[float(i.strip()) for i in lines[24].split()]
00219 tf3=[float(i.strip()) for i in lines[25].split()]
00220 rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
00221 tf2[4], tf3[2], tf3[3], tf3[4])
00222 tf=geom.Mat4(rot)
00223 tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
00224 result=TMScoreResult(float(lines[14].split()[-1].strip()),
00225 float(lines[16].split()[2].strip()),
00226 float(lines[17].split()[1].strip()),
00227 float(lines[18].split()[1].strip()),
00228 float(lines[19].split()[1].strip()),
00229 float(lines[27].split()[-1].strip()),
00230 tf)
00231 return result
00232
00233 def _RunTmScore(tmscore, tmp_dir):
00234 model1_filename=os.path.join(tmp_dir, 'model01.pdb')
00235 model2_filename=os.path.join(tmp_dir, 'model02.pdb')
00236 if platform.system() == "Windows":
00237 tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
00238 command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename,
00239 model2_filename)
00240 else:
00241 tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
00242 command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename,
00243 model2_filename)
00244 ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
00245 ps.wait()
00246 lines=ps.stdout.readlines()
00247 if (len(lines))<22:
00248 _CleanupFiles(tmp_dir)
00249 raise RuntimeError("tmscore superposition failed")
00250 return _ParseTmScore(lines)
00251
00252
00253 def TMAlign(model1, model2, tmalign=None):
00254 """
00255 Performs a sequence independent superposition of model1 onto model2, the
00256 reference.
00257
00258
00259 :param model1: The model structure. If the superposition is successful, will
00260 be superposed onto the reference structure
00261 :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00262 :param model2: The reference structure
00263 :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00264 :param tmalign: If not None, the path to the tmalign executable.
00265 :returns: The result of the tmscore superposition
00266 :rtype: :class:`TMAlignResult`
00267
00268 :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
00269 :raises: :class:`RuntimeError` if the superposition failed
00270 """
00271 tmp_dir_name=_SetupFiles((model1, model2))
00272 result=_RunTmAlign(tmalign, tmp_dir_name)
00273 model1.handle.EditXCS().ApplyTransform(result.transform)
00274 _CleanupFiles(tmp_dir_name)
00275 return result
00276
00277 def MMAlign(model1, model2, mmalign=None):
00278 """
00279 Run tmalign on two protein structures
00280 """
00281 tmp_dir_name=_SetupFiles((model1, model2))
00282 result=_RunMmAlign(mmalign, tmp_dir_name)
00283 model1.handle.EditXCS().ApplyTransform(result.transform)
00284 _CleanupFiles(tmp_dir_name)
00285 return result
00286
00287 def TMScore(model1, model2, tmscore=None):
00288 """
00289 Performs a sequence dependent superposition of model1 onto model2,
00290 the reference.
00291
00292 :param model1: The model structure. If the superposition is successful, will
00293 be superposed onto the reference structure
00294 :type model1: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00295 :param model2: The reference structure
00296 :type model2: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
00297 :param tmscore: If not None, the path to the tmscore executable.
00298 :returns: The result of the tmscore superposition
00299 :rtype: :class:`TMScoreResult`
00300
00301 :raises: :class:`~ost.settings.FileNotFound` if tmalign could not be located.
00302 :raises: :class:`RuntimeError` if the superposition failed
00303 """
00304 tmp_dir_name=_SetupFiles((model1, model2))
00305 result=_RunTmScore(tmscore, tmp_dir_name)
00306 model1.handle.EditXCS().ApplyTransform(result.transform)
00307 _CleanupFiles(tmp_dir_name)
00308 return result