]> git.donarmstrong.com Git - mothur.git/blobdiff - readcluster.cpp
added phylip as output file type for commands that output distance matrices. added...
[mothur.git] / readcluster.cpp
index 233b8e13094cf560ef295522a7cd6a8d552dd55a..b6cb71de5fdbb888f4a7c1614987f74cd0bf8b1a 100644 (file)
 
 /***********************************************************************/
 
-ReadCluster::ReadCluster(string distfile, float c){
-               globaldata = GlobalData::getInstance();
+ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
+               m = MothurOut::getInstance();
         distFile = distfile;
                cutoff = c;
+               outputDir = o;
+               sortWanted = s;
+               list = NULL;
 }
 
 /***********************************************************************/
 
-void ReadCluster::read(NameAssignment* nameMap){
+int ReadCluster::read(NameAssignment*& nameMap){
        try {
         
                if (format == "phylip") { convertPhylip2Column(nameMap); }
                else { list = new ListVector(nameMap->getListVector());  }
                
-               createHClusterFile();
-                       
-       }
-       catch(exception& e) {
-               errorOut(e, "ReadCluster", "read");
-               exit(1);
-       }
-}
-/***********************************************************************/
-
-void ReadCluster::createHClusterFile(){
-       try {   
-               string outfile = getRootName(distFile) + "sorted.dist";
-               
-               //if you can, use the unix sort since its been optimized for years
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       string command = "sort -n -k +3 " + distFile + " -o " + outfile;
-                       system(command.c_str());
-               #else //you are stuck with my best attempt...
-                       //windows sort does not have a way to specify a column, only a character in the line
-                       //since we cannot assume that the distance will always be at the the same character location on each line
-                       //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
-               
-                       //read in file line by file and put distance first
-                       string tempDistFile = distFile + ".temp";
-                       ifstream input;
-                       ofstream output;
-                       openInputFile(distFile, input);
-                       openOutputFile(tempDistFile, output);
-
-                       string firstName, secondName;
-                       float dist;
-                       while (input) {
-                               input >> firstName >> secondName >> dist;
-                               output << dist << '\t' << firstName << '\t' << secondName << endl;
-                               gobble(input);
-                       }
-                       input.close();
-                       output.close();
-               
-       
-                       //sort using windows sort
-                       string tempOutfile = outfile + ".temp";
-                       string command = "sort " + tempDistFile + " /O " + tempOutfile;
-                       system(command.c_str());
-               
-                       //read in sorted file and put distance at end again
-                       ifstream input2;
-                       openInputFile(tempOutfile, input2);
-                       openOutputFile(outfile, output);
-               
-                       while (input2) {
-                               input2 >> dist >> firstName >> secondName;
-                               output << firstName << '\t' << secondName << '\t' << dist << endl;
-                               gobble(input2);
-                       }
-                       input2.close();
-                       output.close();
+               if (m->control_pressed) { return 0; }
                
-                       //remove temp files
-                       remove(tempDistFile.c_str());
-                       remove(tempOutfile.c_str());
-               #endif
+               if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
+               else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
                
-               OutPutFile = outfile;
+               return 0;
+                       
        }
        catch(exception& e) {
-               errorOut(e, "ReadCluster", "createHClusterFile");
+               m->errorOut(e, "ReadCluster", "read");
                exit(1);
        }
 }
-
-
 /***********************************************************************/
 
-void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
+int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){
        try {   
                //convert phylip file to column file
                map<int, string> rowToName;
@@ -110,15 +53,20 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                ofstream out;
                string tempFile = distFile + ".column.temp";
                
-               openInputFile(distFile, in);
-               openOutputFile(tempFile, out);
+               m->openInputFile(distFile, in);  m->gobble(in);
+               m->openOutputFile(tempFile, out);
                
                float distance;
                int square, nseqs;
                string name;
                vector<string> matrixNames;
-        
-               in >> nseqs >> name;
+               
+               string numTest;
+               in >> numTest >> name;
+               
+               if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+               else { convert(numTest, nseqs); }
+               
                rowToName[0] = name;
                matrixNames.push_back(name);
                
@@ -128,7 +76,7 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                }
                else{
                        list = new ListVector(nameMap->getListVector());
-                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                       if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
                }
         
                char d;
@@ -160,6 +108,9 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                                        list->set(i, name);
                                        
                                        for(int j=0;j<i;j++){
+                                       
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                                                
                                                if (distance == -1) { distance = 1000000; }
@@ -171,9 +122,12 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                                        
                                }
                                else{
-                                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                                       if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
                                        
                                        for(int j=0;j<i;j++){
+                                               
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                                                
                                                if (distance == -1) { distance = 1000000; }
@@ -194,6 +148,8 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                                if(nameMap == NULL){
                                        list->set(i, name);
                                        for(int j=0;j<nseqs;j++){
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                                        
                                                if (distance == -1) { distance = 1000000; }
@@ -204,9 +160,11 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                                        }
                                }
                                else{
-                                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                                       if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
                                        
                                        for(int j=0;j<nseqs;j++){
+                                               if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
+                                               
                                                in >> distance;
                         
                                                if (distance == -1) { distance = 1000000; }
@@ -223,39 +181,44 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
                list->setLabel("0");
                in.close();
                out.close();
-               
+       
                if(nameMap == NULL){
                        nameMap = new NameAssignment();
                        for(int i=0;i<matrixNames.size();i++){
                                nameMap->push_back(matrixNames[i]);
                        }
-                       globaldata->nameMap = nameMap;
                }
                
        
                ifstream in2;
                ofstream out2;
                
-               string outputFile = getRootName(distFile) + "column.dist";
-               openInputFile(tempFile, in2);
-               openOutputFile(outputFile, out2);
+               string outputFile = m->getRootName(distFile) + "column.dist";
+               m->openInputFile(tempFile, in2);
+               m->openOutputFile(outputFile, out2);
                
                int first, second;
                float dist;
                
                while (in2) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; }
+                       
                        in2 >> first >> second >> dist;
                        out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
-                       gobble(in2);
+                       m->gobble(in2);
                }
                in2.close();
                out2.close();
                
-               remove(tempFile.c_str());
+               m->mothurRemove(tempFile);
                distFile = outputFile;
+       
+               if (m->control_pressed) {  m->mothurRemove(outputFile);  }
+
+               return 0;
        }
        catch(exception& e) {
-               errorOut(e, "ReadCluster", "convertPhylip2Column");
+               m->errorOut(e, "ReadCluster", "convertPhylip2Column");
                exit(1);
        }
 }