59 with open(os.path.join(tmp_dir_name,
'headers.dmp'),
'r')
as f:
60 header_data=f.readlines()
61 with open(os.path.join(tmp_dir_name,
'clusters.dmp'),
'r')
as f:
62 cluster_data=f.readlines()
63 sequences=io.LoadSequenceList(os.path.join(tmp_dir_name,
'fastadb.fasta'))
67 for line
in header_data:
68 header_mapper[int(line.split()[0])]=line.split()[1].strip().strip(
'>')
71 unique_representatives=list()
72 for line
in cluster_data[1:]:
73 actual_cluster=int(line.split()[1])
75 unique_representatives.index(actual_cluster)
77 unique_representatives.append(actual_cluster)
81 for idx
in unique_representatives:
82 clusters[idx]=seq.CreateSequenceList()
83 for line
in cluster_data[1:]:
84 clusters[int(line.split()[1])].AddSequence(sequences.FindSequence(header_mapper[int(line.split()[0])]))
89 for k, v
in clusters.items():
90 res.append(
cluster(v, header_mapper[k]))
94def _RunkClust(tmp_dir_name, clustering_thresh, create_alignments):
96 bitscore=clustering_thresh*0.060269-0.68498
98 executable=settings.Locate(
'kClust')
101 cmd.append(executable)
103 cmd.append(os.path.join(tmp_dir_name,
'fastadb.fasta'))
105 cmd.append(tmp_dir_name)
107 cmd.append(str(bitscore))
110 ps=subprocess.Popen(cmd, shell=
True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
111 stdout, stderr = ps.communicate()
115 if(create_alignments):
118 if len(c.sequences)>1:
119 c.alignment=clustalw.ClustalW(c.sequences)
121 aln=seq.CreateAlignment()
122 aln.AddSequence(c.sequences[0])