+ while (!in.eof()) {
+ Sequence query(in); m->gobble(in);
+ if (query.getName() != "") {
+
+ it = seqGroup.find(query.getName());
+
+ //save names in case no namefile is given
+ if ((namefile == "") && (countfile == "")) { names.insert(query.getName()); }
+
+ if (it != seqGroup.end()) { //not singleton
+ m->openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
+ query.printSequence(outFile);
+ outFile.close();
+
+ copyGroups.erase(query.getName());
+ }
+ }
+ }
+ in.close();
+
+ //warn about sequence in groups that are not in fasta file
+ for(it = copyGroups.begin(); it != copyGroups.end(); it++) {
+ m->mothurOut("ERROR: " + it->first + " is missing from your fastafile. This could happen if your taxonomy file is not unique and your fastafile is, or it could indicate and error."); m->mothurOutEndLine();
+ exit(1);
+ }
+
+ copyGroups.clear();
+
+ //process each distance file
+ for (int i = 0; i < numGroups; i++) {
+
+ string options = "";
+ if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+ else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
+ if (outputDir != "") { options += ", outputdir=" + outputDir; }
+
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ Command* command = new DistanceCommand(options);
+
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ command->execute();
+ delete command;
+
+ m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
+
+ //remove old names files just in case
+ if (namefile != "") { m->mothurRemove((namefile + "." + toString(i) + ".temp")); }
+ else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
+ }
+
+ //restore old fasta file name since dist.seqs overwrites it with the temp files
+ m->setFastaFile(fastafile);
+
+ vector<string> tempDistFiles;
+ for(int i=0;i<numGroups;i++){
+ if (outputDir == "") { outputDir = m->hasPath(fastafile); }
+ string tempDistFile = "";
+ if (classic) { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+ else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
+ tempDistFiles.push_back(tempDistFile);
+ }
+
+ splitNames(seqGroup, numGroups, tempDistFiles);
+
+ if (m->control_pressed) { for (int i = 0; i < dists.size(); i++) { m->mothurRemove((dists[i].begin()->first)); m->mothurRemove((dists[i].begin()->second)); } dists.clear(); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "createDistanceFilesFromTax");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroups){
+ try {
+ map<string, int>::iterator it;
+ map<string, int>::iterator it2;
+
+ ofstream outFile;
+ ifstream dFile;
+ m->openInputFile(distFile, dFile);
+
+
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ m->mothurRemove((distFile + "." + toString(i) + ".temp"));
+ }
+
+ //for buffering the io to improve speed
+ //allow for 10 dists to be stored, then output.
+ vector<string> outputs; outputs.resize(numGroups, "");
+ vector<int> numOutputs; numOutputs.resize(numGroups, 0);
+
+ //you can have a group made, but their may be no distances in the file for this group if the taxonomy file and distance file don't match
+ //this can occur if we have converted the phylip to column, since we reduce the size at that step by using the cutoff value
+ vector<bool> validDistances; validDistances.resize(numGroups, false);
+
+ //for each distance
+ while(dFile){
+ string seqA, seqB;
+ float dist;
+
+ if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { m->mothurRemove((distFile + "." + toString(i) + ".temp")); } }
+
+ dFile >> seqA >> seqB >> dist; m->gobble(dFile);
+
+ //if both sequences are in the same group then they are within the cutoff
+ it = seqGroup.find(seqA);
+ it2 = seqGroup.find(seqB);
+
+ if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons
+ if (it->second == it2->second) { //they are from the same group so add the distance
+ if (numOutputs[it->second] > 30) {
+ m->openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+ outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
+ outFile.close();
+ outputs[it->second] = "";
+ numOutputs[it->second] = 0;
+ validDistances[it->second] = true;
+ }else{
+ outputs[it->second] += seqA + '\t' + seqB + '\t' + toString(dist) + '\n';
+ numOutputs[it->second]++;
+ }
+ }
+ }
+ }
+ dFile.close();
+
+ string inputFile = namefile;
+ if (countfile != "") { inputFile = countfile; }
+
+ vector<string> tempDistFiles;
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ string tempDistFile = distFile + "." + toString(i) + ".temp";
+ tempDistFiles.push_back(tempDistFile);
+ m->mothurRemove((inputFile + "." + toString(i) + ".temp"));
+
+ //write out any remaining buffers
+ if (numOutputs[i] > 0) {
+ m->openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
+ outFile << outputs[i];
+ outFile.close();
+ outputs[i] = "";
+ numOutputs[i] = 0;
+ validDistances[i] = true;
+ }
+ }
+
+ splitNames(seqGroup, numGroups, tempDistFiles);
+
+ if (m->control_pressed) {
+ for (int i = 0; i < dists.size(); i++) {
+ m->mothurRemove((dists[i].begin()->first));
+ m->mothurRemove((dists[i].begin()->second));
+ }
+ dists.clear();
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "splitDistanceFileByTax");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int SplitMatrix::splitDistanceLarge(){
+ try {
+ vector<set<string> > groups;
+
+ //for buffering the io to improve speed
+ //allow for 30 dists to be stored, then output.
+ vector<string> outputs;
+ vector<int> numOutputs;
+ vector<bool> wroteOutPut;
+
+ int numGroups = 0;
+
+ //ofstream outFile;