+ vector<string> outputs; outputs.resize(numGroups, "");
+ vector<int> numOutputs; numOutputs.resize(numGroups, 0);
+
+ //for each distance
+ while(dFile){
+ string seqA, seqB;
+ float dist;
+
+ if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str()); } }
+
+ dFile >> seqA >> seqB >> dist; gobble(dFile);
+
+ //if both sequences are in the same group then they are within the cutoff
+ it = seqGroup.find(seqA);
+ it2 = seqGroup.find(seqB);
+
+ if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons
+ if (it->second == it2->second) { //they are from the same group so add the distance
+ if (numOutputs[it->second] > 10) {
+ openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+ outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
+ outFile.close();
+ outputs[it->second] = "";
+ numOutputs[it->second] = 0;
+ }else{
+ outputs[it->second] += seqA + '\t' + seqB + '\t' + toString(dist) + '\n';
+ numOutputs[it->second]++;
+ }
+ }
+ }
+ }
+ dFile.close();
+
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ remove((namefile + "." + toString(i) + ".temp").c_str());
+
+ //write out any remaining buffers
+ if (numOutputs[it->second] > 0) {
+ openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
+ outFile << outputs[i];
+ outFile.close();
+ outputs[i] = "";
+ numOutputs[i] = 0;
+ }
+ }
+
+ ifstream bigNameFile;
+ openInputFile(namefile, bigNameFile);
+
+ singleton = namefile + ".extra.temp";
+ ofstream remainingNames;
+ openOutputFile(singleton, remainingNames);
+
+ bool wroteExtra = false;
+
+ string name, nameList;
+ while(!bigNameFile.eof()){
+ bigNameFile >> name >> nameList; gobble(bigNameFile);
+
+ //did this sequence get assigned a group
+ it = seqGroup.find(name);
+
+ if (it != seqGroup.end()) {
+ openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+ outFile << name << '\t' << nameList << endl;
+ outFile.close();
+ }else{
+ wroteExtra = true;
+ remainingNames << name << '\t' << nameList << endl;
+ }
+ }
+ bigNameFile.close();
+ remainingNames.close();
+
+ if (!wroteExtra) {
+ remove(singleton.c_str());
+ singleton = "none";
+ }
+
+ for(int i=0;i<numGroups;i++){
+ string tempNameFile = namefile + "." + toString(i) + ".temp";
+ string tempDistFile = distFile + "." + toString(i) + ".temp";
+
+ map<string, string> temp;
+ temp[tempDistFile] = tempNameFile;
+ dists.push_back(temp);
+ }
+
+ if (m->control_pressed) {
+ for (int i = 0; i < dists.size(); i++) {
+ remove((dists[i].begin()->first).c_str());
+ remove((dists[i].begin()->second).c_str());
+ }
+ dists.clear();
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "splitClassify");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int SplitMatrix::splitDistanceLarge(){
+ try {
+ vector<set<string> > groups;
+
+ //for buffering the io to improve speed
+ //allow for 30 dists to be stored, then output.