OpenStructure
Loading...
Searching...
No Matches
mmseqs2.py
Go to the documentation of this file.
1import subprocess
2import ost
3from ost import settings
4
5def createdb(infasta, resultDB, exe_path=None):
6 """
7 Convert fasta files containing query and/or target sequences into a mmseqs2 database.
8
9 :param infasta: The fasta file from which the mmseqs2 database will be created.
10 :type infasta: :class:`string`
11
12 :param resultDB: The output location for mmseqs2 database.
13 :type resultDB: :class:`string`
14
15 :param exe_path: The path where mmseqs2 executable is located
16 :type exe_path: :class:`string`
17
18 """
19
20 mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
21 args=[mmseqs2_exe, 'createdb', infasta, resultDB]
22
23 ost.LogInfo(f"running MMseqs2 {' '.join(args)}")
24 mmseqs2_pipe=subprocess.run(args)
25
26
27
28def create_index(trg_db, exe_path=None, directory=None):
29 """
30
31 An index file of the targetDB is computed for a fast read-in.
32 It is recommended to compute the index if the targetDB is reused for several searches.
33 A directory for temporary files is generated.
34 It is recommended to create this temporary folder on a local drive.
35
36 :param trg_db: The target database mmseqs2 file.
37 (You need to initially convert a fasta file into a mmseqs2 database using createdb).
38 :type trg_db: :class:`string`
39
40 :param exe_path: The path where mmseqs2 executable is located.
41 :type exe_path: :class:`string'
42
43 :param directory: The directory for temperary files.
44 :type directory: :class:`string`
45
46 """
47
48 mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
49 args=[mmseqs2_exe, 'createindex', trg_db, directory]
50
51 ost.LogInfo(f"running MMseqs2 {' '.join(args)}")
52 mmseqs2_pipe=subprocess.run(args)
53
54
55
56def alignment(query_db, trg_db, resultDB, directory, resultDB_m8, sen=None, exe_path=None,
57 start_sens=None, sens_steps=None, fmt=None):
58 """
59
60 The alignment consists of two steps the prefilter and alignment.
61
62 :param query_db: The query database mmseqs2 file.
63 (You need to initially convert a fasta file into a mmseqs2 database using createdb).
64 :type query_db: :class:`string`
65
66 :param trg_db: The target database mmseqs2 file.
67 (You need to initially convert a fasta file into a mmseqs2 database using createdb).
68 :type trg_db: :class:`string`
69
70 :param resultDB: The output location.
71 (Output of createdb)
72 :type resultDB: :class:`string`
73
74 :param exe_path: The path where mmseqs2 executable is located.
75 :type exe_path: :class:`string`
76
77 :param directory: The directory for temperary files.
78 :type directory: :class:`string`
79
80 :param sen: It controls the speed and sensitivity of the search.
81 A very fast search would use a sensitivity of 1.0,
82 while a very sensitive search would use a sensitivity of up to 7.0.
83 :type sen: :class:`float`
84
85 :param start_sens: Best hit fast. The lowest sensitivity is defined with --start-sens.
86 :type start_sens: :class:`int`
87
88 :param sens_steps: Best hit fast.
89 The number of steps to reach the highest sensitivity can be defined with --sens-steps.
90 :type sens_steps: :class:`int`
91
92 Convert the result database into a BLAST tab formatted file.
93 The file is formatted as a tab-separated list with 12 columns:
94 (1,2) identifiers for query and target sequences/profiles,
95 (3) sequence identity,
96 (4) alignment length,
97 (5) number of mismatches,
98 (6) number of gap openings,
99 (7-8, 9-10) domain start and end-position in query and in target,
100 (11) E-value,
101 and (12) bit score.
102
103 The option --format-output defines a custom output format.
104 The fields that are supported can be found in the following link:
105 https://github.com/soedinglab/mmseqs2/wiki#custom-alignment-format-with-convertalis
106
107 :param resultDB_m8: The output location
108 :type resultDB_m8: :class:`string`
109
110 :param fmt: Format output type, if the default is not used.
111 :type fmt: :class:`string`
112
113 """
114 mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
115 command=[mmseqs2_exe, 'search', query_db, trg_db, resultDB, directory, '-a']
116
117 if sen:
118 sen=str(sen)
119 command.append('-s')
120 command.append(sen)
121
122 if start_sens and sens_steps:
123 start_sens=str(start_sens)
124 command.append('--start-sens')
125 command.append(start_sens)
126 sens_steps=str(sens_steps)
127 command.append('--sens-steps')
128 command.append(sens_steps)
129
130
131 ost.LogInfo(f"running MMseqs2 {' '.join(command)}")
132
133 mmseqs2_pipe=subprocess.run(command)
134
135
136 args=[mmseqs2_exe, 'convertalis', query_db, trg_db, resultDB, resultDB_m8]
137
138 if fmt:
139 args.append('--format-output')
140 args.append(fmt)
141
142
143 ost.LogInfo(f"running MMseqs2 (' '.join(args))")
144
145 mmseqs2_pipe=subprocess.run(args)
alignment(query_db, trg_db, resultDB, directory, resultDB_m8, sen=None, exe_path=None, start_sens=None, sens_steps=None, fmt=None)
Definition mmseqs2.py:57
createdb(infasta, resultDB, exe_path=None)
Definition mmseqs2.py:5
create_index(trg_db, exe_path=None, directory=None)
Definition mmseqs2.py:28