00001 '''
00002 Naccess module
00003
00004 Author: Florian Kiefer, Gerardo Tauriello (cleanup/speedup)
00005
00006 This module is for calculating surface areas
00007 from OpenStructure using the external program naccess
00008
00009 How To Use This Module:
00010 1. Import it (e.g. as "from ost.bindings import naccess")
00011 2. Use it (e.g. as "sasa = naccess.CalculateSurfaceArea(entity)")
00012
00013 Requirement:
00014 - naccess installed
00015 '''
00016
00017 import tempfile
00018 import subprocess
00019 import os
00020 import re
00021 from ost import io, mol, settings, LogWarning
00022
00023 def _GetExecutable(naccess_exe):
00024 """
00025 Method to check if naccess executable is present
00026
00027 :param naccess: Explicit path to naccess executable
00028 :returns: Path to the executable
00029 :exception: FileNotFound if executable is not found
00030 """
00031 return settings.Locate('naccess', explicit_file_name=naccess_exe)
00032
00033 def _CheckNaccessRoot(naccess_root):
00034 """
00035 :return: True, if given directory contains "accall" binary and files
00036 "vdw.radii" and "standard.data".
00037 :param naccess_root: Path to naccess folder to check.
00038 """
00039 accall_exe = os.path.join(naccess_root, "accall")
00040 check = (os.path.exists(accall_exe) and os.access(accall_exe, os.X_OK) \
00041 and os.path.exists(os.path.join(naccess_root, "vdw.radii")) \
00042 and os.path.exists(os.path.join(naccess_root, "standard.data")))
00043 if not check:
00044 LogWarning("NACCESS: Could not find required files to launch accall " \
00045 "directly in %s." % naccess_root)
00046 return check
00047
00048 def _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms):
00049 """
00050 Setup files for naccess calculation in temporary directory
00051
00052 :param entity: OST entity to calculate surface
00053 :param selection: Calculate surface for subset of entity
00054 :param scratch_dir: Scratch directory. A subfolder for temporary files
00055 is created in there. If not specified, a default
00056 directory is used (see :func:`tempfile.mkdtemp`).
00057 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
00058 in the default NACCESS version to 50 000)
00059 :returns: Tuple containing temporary directory, input filename for naccess and
00060 directory of the input file
00061 :exception: RuntimeError if selection is not valid or too many atoms
00062 """
00063
00064
00065 tmp_dir_name = ""
00066 if scratch_dir != None:
00067 tmp_dir_name = tempfile.mkdtemp(dir=scratch_dir)
00068 else:
00069 tmp_dir_name = tempfile.mkdtemp()
00070
00071
00072 if selection != "":
00073 entity_view = entity.Select(selection)
00074 else:
00075 entity_view = entity
00076 if len(entity_view.atoms) > max_number_of_atoms:
00077 raise RuntimeError, "Too much atoms for NACCESS (> %s)" % max_number_of_atoms
00078 if not entity_view.IsValid():
00079 raise RuntimeError, "Could not create view for selection (%s)"%(selection)
00080
00081
00082 tmp_file_name = "entity.pdb"
00083 tmp_file_base = os.path.join(tmp_dir_name, "entity")
00084 io.SavePDB(entity_view, os.path.join(tmp_dir_name, tmp_file_name))
00085 return (tmp_dir_name, tmp_file_name, tmp_file_base)
00086
00087
00088 def _ParseAsaFile(entity, file, asa_atom):
00089 """
00090 Reads Area file (.asa) and attach asa per atom to an entitiy
00091
00092 :param entity: EntityHandle or EntityView for attaching sasa on atom level
00093 :param file: Filename of area file
00094 :param asa_atom: Name of the float property for SASA
00095 """
00096
00097 asa_fh = open(file)
00098 asa_lines = asa_fh.readlines()
00099 asa_fh.close()
00100
00101 for l in asa_lines:
00102 if l.startswith("ATOM"):
00103
00104 atom_name = l[12:16]
00105 chain_id = l[21]
00106 res_number = l[22:27]
00107 asa = l[54:63]
00108 atom_name = atom_name.strip()
00109 chain_id = chain_id
00110 res_number = res_number.strip()
00111 asa = asa.strip()
00112 m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
00113 di = m.groupdict()
00114
00115 if di["ins"] == None:
00116 resNum = mol.ResNum(int(di["num"]))
00117 else:
00118 resNum = mol.ResNum(int(di["num"]), di["ins"])
00119
00120 a = entity.FindAtom(chain_id, resNum, atom_name)
00121 if(a.IsValid()):
00122 a.SetFloatProp(asa_atom, float(asa))
00123 else:
00124 LogWarning("NACCESS: invalid asa entry %s %s %s" \
00125 % (chain_id, resNum, atom_name))
00126
00127 def _ParseRsaFile(entity, file, asa_abs, asa_rel):
00128 """
00129 Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy
00130
00131 :param entity: EntityHandle or EntityView for attaching sasa on atom level
00132 :param file: Filename of .rsa file
00133 :param asa_atom: Name of the float property for absolute SASA
00134 :param asa_atom: Name of the float property for relative SASA
00135 :exception: RuntimeError if residue names are not the same
00136 """
00137 area_fh = open(file)
00138 area_lines = area_fh.readlines()
00139 area_fh.close()
00140
00141 area_lines = area_lines[4:]
00142
00143 for l in area_lines:
00144 if l.startswith("RES"):
00145
00146 p = re.compile(r'\s+')
00147 res_name = l[3:8]
00148 res_name = res_name.strip()
00149 chain_id = l[8:9]
00150 res_number = l[9:14]
00151 res_number = res_number.strip()
00152 abs_all, rel_all = l[15:28].strip().split()
00153 m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
00154 di = m.groupdict()
00155 if di["ins"] == None:
00156 resNum = mol.ResNum(int(di["num"]))
00157 else:
00158 resNum = mol.ResNum(int(di["num"]), di["ins"])
00159
00160 res = entity.FindResidue(chain_id, resNum)
00161 if res_name == res.name:
00162 res.SetFloatProp(asa_rel, float(rel_all) )
00163 res.SetFloatProp(asa_abs, float(abs_all) )
00164 else:
00165 raise RuntimeError, "Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name)
00166
00167
00168 def __CleanupFiles(dir_name):
00169 """
00170 Method which recursively deletes a directory
00171
00172 :warning: This method removes also non-empty directories without asking, so
00173 be careful!
00174 """
00175 import shutil
00176 shutil.rmtree(dir_name)
00177
00178
00179 def _RunACCALL(command, temp_dir, query):
00180 """
00181 Fast method to run the Naccess surface calculation.
00182
00183 This method starts the accall binary directly and pipes in the input provided
00184 in *query*. This is faster than calling the "naccess" script since the script
00185 has a constant overhead of roughly 1.3s in each call.
00186
00187 :param command: Command to execute
00188 :param temp_dir: Command is executed with this working directory
00189 :param query: User input to pipe into *command*
00190 :returns: stdout of command
00191 :exception: CalledProcessError for non-zero return value
00192 """
00193 proc = subprocess.Popen(command, stdout=subprocess.PIPE,
00194 stderr=subprocess.PIPE, stdin=subprocess.PIPE,
00195 cwd=temp_dir)
00196 stdout_value, stderr_value = proc.communicate(query)
00197
00198
00199 if proc.returncode != 0:
00200 LogWarning("WARNING: naccess error\n%s\n%s" % (stdout_value, stderr_value))
00201 raise subprocess.CalledProcessError(proc.returncode, command)
00202
00203 return stdout_value
00204
00205
00206 def _RunNACCESS(command, temp_dir):
00207 """
00208 Method to run the Naccess surface calculation.
00209
00210 This method starts the external Naccess executable and returns the stdout.
00211
00212 :param command: Command to execute
00213 :param temp_dir: Command is executed with this working directory
00214 :returns: stdout of command
00215 :exception: CalledProcessError for non-zero return value
00216 """
00217 proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,
00218 cwd=temp_dir)
00219 stdout_value, stderr_value = proc.communicate()
00220
00221
00222 if proc.returncode != 0:
00223 LogWarning("WARNING: naccess error\n%s" % stdout_value)
00224 raise subprocess.CalledProcessError(proc.returncode, command)
00225
00226 return stdout_value
00227
00228
00229 def CalculateSurfaceArea(entity, radius=1.4,
00230 include_hydrogens=False, include_hetatm = False,
00231 include_water=False, selection="", naccess_exe=None,
00232 naccess_root=None, keep_files=False, asa_abs= "asaAbs",
00233 asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None,
00234 max_number_of_atoms=50000):
00235 """
00236 Calculates analytical the solvent accessible surface area by using the
00237 external naccess program
00238
00239 This method calculates the molecular surface areas by invoking the external
00240 program naccess. First, it is checked if the naccess executable is present, then,
00241 the necessary files are prepared in a temporary directory and naccess is
00242 executed. The last step is to remove the temporary directory.
00243
00244
00245 :param entity: OST entity to calculate surface
00246 :param radius: Surface probe radius
00247 :param include_hydrogens: Calculate surface including hydrogens
00248 :param include_hetatm: Calculate surface including hetatms
00249 :param include_water: Calculate surface including water
00250 :param selection: Calculate surface for subset of entity
00251 :param naccess_exe: naccess executable (full path to executable)
00252 :param naccess_root: Path to folder containing "accall" binary and
00253 files "vdw.radii" and "standard.data". This is the
00254 fastest way to call naccess!
00255 :param keep_files: If True, do not delete temporary files
00256 :param asa_abs: Attaches per residue absolute SASA to specified
00257 FloatProp on residue level
00258 :param asa_rel: Attaches per residue relative SASA to specified
00259 FloatProp on residue level
00260 :param asa_atom: Attaches per atom SASA to specified FloatProp at
00261 atom level
00262 :param scratch_dir: Scratch directory. A subfolder for temporary files
00263 is created in there. If not specified, a default
00264 directory is used (see :func:`tempfile.mkdtemp`).
00265 :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
00266 in the default NACCESS version to 50 000)
00267
00268 :returns: absolute SASA calculated using asa_atom
00269 """
00270
00271
00272 if naccess_root and _CheckNaccessRoot(naccess_root):
00273
00274 fast_mode = True
00275 else:
00276
00277 naccess_executable = _GetExecutable(naccess_exe)
00278
00279 naccess_root = os.path.dirname(naccess_executable)
00280 fast_mode = _CheckNaccessRoot(naccess_root)
00281
00282
00283 (naccess_data_dir, naccess_data_file, naccess_data_base) \
00284 = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)
00285
00286 try:
00287
00288 if fast_mode:
00289
00290 query = "PDBFILE %s\n" \
00291 "VDWFILE %s\n" \
00292 "STDFILE %s\n" \
00293 "PROBE %f\n" \
00294 "ZSLICE 0.05\n" \
00295 % (naccess_data_file, os.path.join(naccess_root, "vdw.radii"),
00296 os.path.join(naccess_root, "standard.data"), radius)
00297 if include_hydrogens:
00298 query += "HYDROGENS\n"
00299 if include_water:
00300 query += "WATERS\n"
00301 if include_hetatm:
00302 query += "HETATOMS\n"
00303
00304 command = os.path.join(naccess_root, "accall")
00305 _RunACCALL(command, naccess_data_dir, query)
00306 else:
00307 LogWarning("NACCESS: Falling back to slower call to %s." \
00308 % naccess_executable)
00309
00310 command = "%s %s -p %f " % \
00311 (naccess_executable, naccess_data_file, radius)
00312 if include_hydrogens:
00313 command = "%s -y" % command
00314 if include_water:
00315 command = "%s -w" % command
00316 if include_hetatm:
00317 command = "%s -h" % command
00318
00319 _RunNACCESS(command, naccess_data_dir)
00320
00321
00322 new_asa = os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
00323 _ParseAsaFile(entity, new_asa, asa_atom)
00324
00325 new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
00326 _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)
00327
00328 finally:
00329
00330 if not keep_files:
00331 __CleanupFiles(naccess_data_dir)
00332
00333
00334 sasa = 0.0
00335 for a in entity.atoms:
00336 sasa += a.GetFloatProp(asa_atom, 0.0)
00337
00338 return sasa