+ m->openInputFile(distFile, dFile);
+
+
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ m->mothurRemove((distFile + "." + toString(i) + ".temp"));
+ }
+
+ //for buffering the io to improve speed
+ //allow for 10 dists to be stored, then output.
+ vector<string> outputs; outputs.resize(numGroups, "");
+ vector<int> numOutputs; numOutputs.resize(numGroups, 0);
+
+ //you can have a group made, but their may be no distances in the file for this group if the taxonomy file and distance file don't match
+ //this can occur if we have converted the phylip to column, since we reduce the size at that step by using the cutoff value
+ vector<bool> validDistances; validDistances.resize(numGroups, false);
+
+ //for each distance
+ while(dFile){
+ string seqA, seqB;
+ float dist;
+
+ if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { m->mothurRemove((distFile + "." + toString(i) + ".temp")); } }
+
+ dFile >> seqA >> seqB >> dist; m->gobble(dFile);
+
+ //if both sequences are in the same group then they are within the cutoff
+ it = seqGroup.find(seqA);
+ it2 = seqGroup.find(seqB);
+
+ if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons
+ if (it->second == it2->second) { //they are from the same group so add the distance
+ if (numOutputs[it->second] > 30) {
+ m->openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+ outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
+ outFile.close();
+ outputs[it->second] = "";
+ numOutputs[it->second] = 0;
+ validDistances[it->second] = true;
+ }else{
+ outputs[it->second] += seqA + '\t' + seqB + '\t' + toString(dist) + '\n';
+ numOutputs[it->second]++;
+ }
+ }
+ }
+ }
+ dFile.close();
+
+ string inputFile = namefile;
+ if (countfile != "") { inputFile = countfile; }
+
+ vector<string> tempDistFiles;
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ string tempDistFile = distFile + "." + toString(i) + ".temp";
+ tempDistFiles.push_back(tempDistFile);
+ m->mothurRemove((inputFile + "." + toString(i) + ".temp"));
+
+ //write out any remaining buffers
+ if (numOutputs[i] > 0) {
+ m->openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
+ outFile << outputs[i];
+ outFile.close();
+ outputs[i] = "";
+ numOutputs[i] = 0;
+ validDistances[i] = true;
+ }
+ }
+
+ splitNames(seqGroup, numGroups, tempDistFiles);
+
+ if (m->control_pressed) {
+ for (int i = 0; i < dists.size(); i++) {
+ m->mothurRemove((dists[i].begin()->first));
+ m->mothurRemove((dists[i].begin()->second));
+ }
+ dists.clear();
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "splitDistanceFileByTax");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int SplitMatrix::splitDistanceLarge(){
+ try {
+ vector<set<string> > groups;
+
+ //for buffering the io to improve speed
+ //allow for 30 dists to be stored, then output.
+ vector<string> outputs;
+ vector<int> numOutputs;
+ vector<bool> wroteOutPut;
+
+ int numGroups = 0;
+
+ //ofstream outFile;
+ ifstream dFile;
+ m->openInputFile(distFile, dFile);