OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
mmseqs2.py
Go to the documentation of this file.
1 import subprocess
2 import ost
3 from ost import settings
4 
5 def createdb(infasta, resultDB, exe_path=None):
6  """
7  Convert fasta files containing query and/or target sequences into a mmseqs2 database.
8 
9  :param infasta: The fasta file from which the mmseqs2 database will be created.
10  :type infasta: :class:`string`
11 
12  :param resultDB: The output location for mmseqs2 database.
13  :type resultDB: :class:`string`
14 
15  :param exe_path: The path where mmseqs2 executable is located
16  :type exe_path: :class:`string`
17 
18  """
19 
20  mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
21  args=[mmseqs2_exe, 'createdb', infasta, resultDB]
22 
23  ost.LogInfo(f"running MMseqs2 {' '.join(args)}")
24  mmseqs2_pipe=subprocess.run(args)
25 
26 
27 
28 def create_index(trg_db, exe_path=None, directory=None):
29  """
30 
31  An index file of the targetDB is computed for a fast read-in.
32  It is recommended to compute the index if the targetDB is reused for several searches.
33  A directory for temporary files is generated.
34  It is recommended to create this temporary folder on a local drive.
35 
36  :param trg_db: The target database mmseqs2 file.
37  (You need to initially convert a fasta file into a mmseqs2 database using createdb).
38  :type trg_db: :class:`string`
39 
40  :param exe_path: The path where mmseqs2 executable is located.
41  :type exe_path: :class:`string'
42 
43  :param directory: The directory for temperary files.
44  :type directory: :class:`string`
45 
46  """
47 
48  mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
49  args=[mmseqs2_exe, 'createindex', trg_db, directory]
50 
51  ost.LogInfo(f"running MMseqs2 {' '.join(args)}")
52  mmseqs2_pipe=subprocess.run(args)
53 
54 
55 
56 def alignment(query_db, trg_db, resultDB, directory, resultDB_m8, sen=None, exe_path=None,
57  start_sens=None, sens_steps=None, fmt=None):
58  """
59 
60  The alignment consists of two steps the prefilter and alignment.
61 
62  :param query_db: The query database mmseqs2 file.
63  (You need to initially convert a fasta file into a mmseqs2 database using createdb).
64  :type query_db: :class:`string`
65 
66  :param trg_db: The target database mmseqs2 file.
67  (You need to initially convert a fasta file into a mmseqs2 database using createdb).
68  :type trg_db: :class:`string`
69 
70  :param resultDB: The output location.
71  (Output of createdb)
72  :type resultDB: :class:`string`
73 
74  :param exe_path: The path where mmseqs2 executable is located.
75  :type exe_path: :class:`string`
76 
77  :param directory: The directory for temperary files.
78  :type directory: :class:`string`
79 
80  :param sen: It controls the speed and sensitivity of the search.
81  A very fast search would use a sensitivity of 1.0,
82  while a very sensitive search would use a sensitivity of up to 7.0.
83  :type sen: :class:`float`
84 
85  :param start_sens: Best hit fast. The lowest sensitivity is defined with --start-sens.
86  :type start_sens: :class:`int`
87 
88  :param sens_steps: Best hit fast.
89  The number of steps to reach the highest sensitivity can be defined with --sens-steps.
90  :type sens_steps: :class:`int`
91 
92  Convert the result database into a BLAST tab formatted file.
93  The file is formatted as a tab-separated list with 12 columns:
94  (1,2) identifiers for query and target sequences/profiles,
95  (3) sequence identity,
96  (4) alignment length,
97  (5) number of mismatches,
98  (6) number of gap openings,
99  (7-8, 9-10) domain start and end-position in query and in target,
100  (11) E-value,
101  and (12) bit score.
102 
103  The option --format-output defines a custom output format.
104  The fields that are supported can be found in the following link:
105  https://github.com/soedinglab/mmseqs2/wiki#custom-alignment-format-with-convertalis
106 
107  :param resultDB_m8: The output location
108  :type resultDB_m8: :class:`string`
109 
110  :param fmt: Format output type, if the default is not used.
111  :type fmt: :class:`string`
112 
113  """
114  mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
115  command=[mmseqs2_exe, 'search', query_db, trg_db, resultDB, directory, '-a']
116 
117  if sen:
118  sen=str(sen)
119  command.append('-s')
120  command.append(sen)
121 
122  if start_sens and sens_steps:
123  start_sens=str(start_sens)
124  command.append('--start-sens')
125  command.append(start_sens)
126  sens_steps=str(sens_steps)
127  command.append('--sens-steps')
128  command.append(sens_steps)
129 
130 
131  ost.LogInfo(f"running MMseqs2 {' '.join(command)}")
132 
133  mmseqs2_pipe=subprocess.run(command)
134 
135 
136  args=[mmseqs2_exe, 'convertalis', query_db, trg_db, resultDB, resultDB_m8]
137 
138  if fmt:
139  args.append('--format-output')
140  args.append(fmt)
141 
142 
143  ost.LogInfo(f"running MMseqs2 (' '.join(args))")
144 
145  mmseqs2_pipe=subprocess.run(args)