]> git.donarmstrong.com Git - mothur.git/blobdiff - readcluster.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / readcluster.cpp
index 50787e984bd2a3c45cbce0af354fea178771a6b0..a6adabb5b8aa155d3b633f885fd37f7ba051465f 100644 (file)
 
 /***********************************************************************/
 
-ReadCluster::ReadCluster(string distfile, float c){
+ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
+               m = MothurOut::getInstance();
         distFile = distfile;
                cutoff = c;
+               outputDir = o;
+               sortWanted = s;
+               list = NULL;
 }
 
 /***********************************************************************/
 
-void ReadCluster::read(NameAssignment* nameMap){
+int ReadCluster::read(NameAssignment*& nameMap){
        try {
         
                if (format == "phylip") { convertPhylip2Column(nameMap); }
                else { list = new ListVector(nameMap->getListVector());  }
                
-               createHClusterFile();
+               if (m->control_pressed) { return 0; }
+               
+               if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
+               else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
+               
+               return 0;
                        
        }
        catch(exception& e) {
-               errorOut(e, "ReadCluster", "read");
+               m->errorOut(e, "ReadCluster", "read");
                exit(1);
        }
 }
 /***********************************************************************/
-
-void ReadCluster::createHClusterFile(){
-       try {   
-               string outfile = getRootName(distFile) + "sorted.dist";
-               
-               //if you can, use the unix sort since its been optimized for years
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       string command = "sort -n -k +3 " + distFile + " -o " + outfile;
-                       system(command.c_str());
-               #else //you are stuck with my best attempt...
-                       //windows sort does not have a way to specify a column, only a character in the line
-                       //since we cannot assume that the distance will always be at the the same character location on each line
-                       //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
-               
-                       //read in file line by file and put distance first
-                       string tempDistFile = distFile + ".temp";
-                       ifstream input;
-                       ofstream output;
-                       openInputFile(distFile, input);
-                       openOutputFile(tempDistFile, output);
-
-                       string firstName, secondName;
-                       float dist;
-                       while (input) {
-                               input >> firstName >> secondName >> dist;
-                               output << dist << '\t' << firstName << '\t' << secondName << endl;
-                               gobble(input);
-                       }
-                       input.close();
-                       output.close();
+int ReadCluster::read(CountTable*& ct){
+       try {
+        
+               if (format == "phylip") { convertPhylip2Column(ct); }
+               else { list = new ListVector(ct->getListVector());  }
                
-       
-                       //sort using windows sort
-                       string tempOutfile = outfile + ".temp";
-                       string command = "sort " + tempDistFile + " /O " + tempOutfile;
-                       system(command.c_str());
-               
-                       //read in sorted file and put distance at end again
-                       ifstream input2;
-                       openInputFile(tempOutfile, input2);
-                       openOutputFile(outfile, output);
-               
-                       while (input2) {
-                               input2 >> dist >> firstName >> secondName;
-                               output << firstName << '\t' << secondName << '\t' << dist << endl;
-                               gobble(input2);
-                       }
-                       input2.close();
-                       output.close();
+               if (m->control_pressed) { return 0; }
                
-                       //remove temp files
-                       remove(tempDistFile.c_str());
-                       remove(tempOutfile.c_str());
-               #endif
+               if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
+               else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
                
-               OutPutFile = outfile;
+               return 0;
+        
        }
        catch(exception& e) {
-               errorOut(e, "ReadCluster", "createHClusterFile");
+               m->errorOut(e, "ReadCluster", "read");
                exit(1);
        }
 }
-
-
 /***********************************************************************/
 
-void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
+int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){
        try {   
                //convert phylip file to column file
                map<int, string> rowToName;
@@ -109,15 +73,20 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                ofstream out;
                string tempFile = distFile + ".column.temp";
                
-               openInputFile(distFile, in);
-               openOutputFile(tempFile, out);
+               m->openInputFile(distFile, in);  m->gobble(in);
+               m->openOutputFile(tempFile, out);
                
                float distance;
                int square, nseqs;
                string name;
                vector<string> matrixNames;
-        
-               in >> nseqs >> name;
+               
+               string numTest;
+               in >> numTest >> name;
+               
+               if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+               else { convert(numTest, nseqs); }
+               
                rowToName[0] = name;
                matrixNames.push_back(name);
                
@@ -127,7 +96,7 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                }
                else{
                        list = new ListVector(nameMap->getListVector());
-                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                       if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
                }
         
                char d;
@@ -159,6 +128,9 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                                        list->set(i, name);
                                        
                                        for(int j=0;j<i;j++){
+                                       
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                                                
                                                if (distance == -1) { distance = 1000000; }
@@ -170,9 +142,12 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                                        
                                }
                                else{
-                                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                                       if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
                                        
                                        for(int j=0;j<i;j++){
+                                               
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                                                
                                                if (distance == -1) { distance = 1000000; }
@@ -185,29 +160,31 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                        }
                }
                else{
-                       
                        for(int i=1;i<nseqs;i++){
                                in >> name;                
                                rowToName[i] = name;
                                matrixNames.push_back(name);
-                               
+               
                                if(nameMap == NULL){
                                        list->set(i, name);
                                        for(int j=0;j<nseqs;j++){
-                                               in >> distance;
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
                                                
+                                               in >> distance;
+                                       
                                                if (distance == -1) { distance = 1000000; }
                                                
                                                if(distance < cutoff && j < i){
                                                        out << i << '\t' << j << '\t' << distance << endl;
                                                }
                                        }
-                                       
                                }
                                else{
-                                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                                       if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
                                        
                                        for(int j=0;j<nseqs;j++){
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                         
                                                if (distance == -1) { distance = 1000000; }
@@ -224,36 +201,219 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                list->setLabel("0");
                in.close();
                out.close();
-               
+       
                if(nameMap == NULL){
+                       nameMap = new NameAssignment();
                        for(int i=0;i<matrixNames.size();i++){
                                nameMap->push_back(matrixNames[i]);
                        }
                }
                
+       
                ifstream in2;
                ofstream out2;
                
-               string outputFile = getRootName(distFile) + "column.dist";
-               openInputFile(tempFile, in2);
-               openOutputFile(outputFile, out2);
+               string outputFile = m->getRootName(distFile) + "column.dist";
+               m->openInputFile(tempFile, in2);
+               m->openOutputFile(outputFile, out2);
                
                int first, second;
                float dist;
                
                while (in2) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; }
+                       
                        in2 >> first >> second >> dist;
                        out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
-                       gobble(in2);
+                       m->gobble(in2);
                }
                in2.close();
                out2.close();
                
-               remove(tempFile.c_str());
+               m->mothurRemove(tempFile);
                distFile = outputFile;
+       
+               if (m->control_pressed) {  m->mothurRemove(outputFile);  }
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadCluster", "convertPhylip2Column");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+int ReadCluster::convertPhylip2Column(CountTable*& ct){
+       try {   
+               //convert phylip file to column file
+               map<int, string> rowToName;
+               map<int, string>::iterator it;
+               
+               ifstream in;
+               ofstream out;
+               string tempFile = distFile + ".column.temp";
+               
+               m->openInputFile(distFile, in);  m->gobble(in);
+               m->openOutputFile(tempFile, out);
+               
+               float distance;
+               int square, nseqs;
+               string name;
+               vector<string> matrixNames;
+               
+               string numTest;
+               in >> numTest >> name;
+               
+               if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+               else { convert(numTest, nseqs); }
+               
+               rowToName[0] = name;
+               matrixNames.push_back(name);
+               
+               if(ct == NULL){
+                       list = new ListVector(nseqs);
+                       list->set(0, name);
+               }
+               else{  list = new ListVector(ct->getListVector()); }
+        
+               char d;
+               while((d=in.get()) != EOF){
+                       
+                       if(isalnum(d)){
+                               square = 1;
+                               in.putback(d);
+                               for(int i=0;i<nseqs;i++){
+                                       in >> distance;
+                               }
+                               break;
+                       }
+                       if(d == '\n'){
+                               square = 0;
+                               break;
+                       }
+               }
+        
+               if(square == 0){
+            
+                       for(int i=1;i<nseqs;i++){
+                               in >> name;
+                               rowToName[i] = name;
+                               matrixNames.push_back(name);
+                               
+                               //there's A LOT of repeated code throughout this method...
+                               if(ct == NULL){
+                                       list->set(i, name);
+                                       
+                                       for(int j=0;j<i;j++){
+                        
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
+                                               in >> distance;
+                                               
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                       }
+                                       
+                               }
+                               else{
+                                       
+                                       for(int j=0;j<i;j++){
+                                               
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
+                                               in >> distance;
+                                               
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                       }
+                               }
+                       }
+               }
+               else{
+                       for(int i=1;i<nseqs;i++){
+                               in >> name;                
+                               rowToName[i] = name;
+                               matrixNames.push_back(name);
+                
+                               if(ct == NULL){
+                                       list->set(i, name);
+                                       for(int j=0;j<nseqs;j++){
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
+                                               in >> distance;
+                        
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff && j < i){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                       }
+                               }
+                               else{
+                                       for(int j=0;j<nseqs;j++){
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
+                                               in >> distance;
+                        
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff && j < i){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                               
+                                       }
+                               }
+                       }
+               }
+               
+               list->setLabel("0");
+               in.close();
+               out.close();
+        
+               if(ct == NULL){
+                       ct = new CountTable();
+                       for(int i=0;i<matrixNames.size();i++){
+                               ct->push_back(matrixNames[i]);
+                       }
+               }
+               
+        
+               ifstream in2;
+               ofstream out2;
+               
+               string outputFile = m->getRootName(distFile) + "column.dist";
+               m->openInputFile(tempFile, in2);
+               m->openOutputFile(outputFile, out2);
+               
+               int first, second;
+               float dist;
+               
+               while (in2) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; }
+                       
+                       in2 >> first >> second >> dist;
+                       out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
+                       m->gobble(in2);
+               }
+               in2.close();
+               out2.close();
+               
+               m->mothurRemove(tempFile);
+               distFile = outputFile;
+        
+               if (m->control_pressed) {  m->mothurRemove(outputFile);  }
+        
+               return 0;
        }
        catch(exception& e) {
-               errorOut(e, "ReadCluster", "convertPhylip2Column");
+               m->errorOut(e, "ReadCluster", "convertPhylip2Column");
                exit(1);
        }
 }