]> git.donarmstrong.com Git - mothur.git/blobdiff - formatphylip.cpp
changes while testing
[mothur.git] / formatphylip.cpp
index 206df7edfbd915f00947c46d6de07f44fc021c38..57bc5d7389b2ad8ed980b3612f950c46ce135af7 100644 (file)
@@ -12,7 +12,7 @@
 
 /***********************************************************************/
 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
-        openInputFile(filename, fileHandle);
+        m->openInputFile(filename, fileHandle);
 }
 /***********************************************************************/
 //not using nameMap
@@ -24,10 +24,20 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                        string name;
                        ofstream out;
                        
-                       fileHandle >> nseqs >> name;
-                                               
-                       list = new ListVector(nseqs);
-                       list->set(0, name);
+                       string numTest;
+                       fileHandle >> numTest >> name;
+                       
+                       if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+                       else { convert(numTest, nseqs); }
+               
+            if(nameMap == NULL){
+                list = new ListVector(nseqs);
+                list->set(0, name);
+            }
+            else{
+                list = new ListVector(nameMap->getListVector());
+                if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+            }                  
                        
                        char d;
                        while((d=fileHandle.get()) != EOF){
@@ -37,11 +47,11 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                        fileHandle.close();  //reset file
                                        
                                        //open and get through numSeqs, code below formats rest of file
-                                       openInputFile(filename, fileHandle);
-                                       fileHandle >> nseqs; gobble(fileHandle);
+                                       m->openInputFile(filename, fileHandle);
+                                       fileHandle >> nseqs; m->gobble(fileHandle);
                                        
                                        distFile = filename + ".rowFormatted";
-                                       openOutputFile(distFile, out);
+                                       m->openOutputFile(distFile, out);
                                        break;
                                }
                                if(d == '\n'){
@@ -59,18 +69,20 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                
                                ofstream outTemp;
                                string tempFile = filename + ".temp";
-                               openOutputFile(tempFile, outTemp);
+                               m->openOutputFile(tempFile, outTemp);
                 
                                //convert to square column matrix
                                for(int i=1;i<nseqs;i++){
                                
                                        fileHandle >> name;
                                        
-                                       list->set(i, name);
+                    if(nameMap == NULL){ list->set(i, name); }
+                    else { if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+                    }
                                        
                                        for(int j=0;j<i;j++){
                                        
-                                               if (m->control_pressed) { outTemp.close(); remove(tempFile.c_str()); fileHandle.close();  delete reading; return 0; }
+                                               if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close();  delete reading; return 0; }
                                                                                        
                                                fileHandle >> distance;
                                                
@@ -88,10 +100,10 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                
                                //format from square column to rowFormatted
                                //sort file by first column so the distances for each row are together
-                               string outfile = getRootName(tempFile) + "sorted.dist.temp";
+                               string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
                                
                                //use the unix sort 
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        string command = "sort -n " + tempFile + " -o " + outfile;
                                        system(command.c_str());
                                #else //sort using windows sort
@@ -99,14 +111,14 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                        system(command.c_str());
                                #endif
                                
-                               if (m->control_pressed) { remove(tempFile.c_str()); remove(outfile.c_str());  delete reading; return 0; }
+                               if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile);  delete reading; return 0; }
 
                                //output to new file distance for each row and save positions in file where new row begins
                                ifstream in;
-                               openInputFile(outfile, in);
+                               m->openInputFile(outfile, in);
                                
                                distFile = outfile + ".rowFormatted";
-                               openOutputFile(distFile, out);
+                               m->openOutputFile(distFile, out);
                                
                                rowPos.resize(nseqs, -1);
                                int currentRow;
@@ -123,9 +135,9 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
                                
                                while(!in.eof()) {
-                                       if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); remove(distFile.c_str()); remove(outfile.c_str());  delete reading; return 0; }
+                                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile);  delete reading; return 0; }
 
-                                       in >> first >> second >> dist; gobble(in);
+                                       in >> first >> second >> dist; m->gobble(in);
                                        
                                        if (first != currentRow) {
                                                //save position in file of each new row
@@ -165,10 +177,10 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                in.close();
                                out.close();
                                
-                               remove(tempFile.c_str());
-                               remove(outfile.c_str());
+                               m->mothurRemove(tempFile);
+                               m->mothurRemove(outfile);
                                
-                               if (m->control_pressed) {  remove(distFile.c_str());   delete reading; return 0; }
+                               if (m->control_pressed) {  m->mothurRemove(distFile);   delete reading; return 0; }
 
                        }
                        else{ //square matrix convert directly to formatted row file
@@ -180,10 +192,12 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                for(int i=0;i<nseqs;i++){
                                        fileHandle >> name;                
                                                                        
-                                       list->set(i, name);
+                                       if(nameMap == NULL){ list->set(i, name); }
+                    else { if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+                    }
                                        
                                        for(int j=0;j<nseqs;j++){
-                                               if (m->control_pressed) {  fileHandle.close(); out.close(); remove(distFile.c_str());   delete reading; return 0; }
+                                               if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
                                                
                                                fileHandle >> distance;
                                        
@@ -196,7 +210,7 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                                reading->update(index);
                                        }
                                        
-                                       gobble(fileHandle);
+                                       m->gobble(fileHandle);
                        
                                        //save position in file of each new row
                                        rowPos[i] = out.tellp();
@@ -217,7 +231,7 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                        fileHandle.close();
                        out.close();
                        
-                       if (m->control_pressed) { remove(distFile.c_str());  return 0; }
+                       if (m->control_pressed) { m->mothurRemove(distFile);  return 0; }
                        
                        list->setLabel("0");
                        
@@ -230,6 +244,236 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                 exit(1);
        }
 }
+/***********************************************************************/
+//not using nameMap
+int FormatPhylipMatrix::read(CountTable* nameMap){
+       try {
+        
+        float distance;
+        int square, nseqs;
+        string name;
+        ofstream out;
+        
+        string numTest;
+        fileHandle >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+               
+        if(nameMap == NULL){
+            list = new ListVector(nseqs);
+            list->set(0, name);
+        }
+        else{
+            list = new ListVector(nameMap->getListVector());
+            nameMap->get(name);
+        }                      
+        
+        char d;
+        while((d=fileHandle.get()) != EOF){
+            
+            if(isalnum(d)){  //you are square
+                square = 1;
+                fileHandle.close();  //reset file
+                
+                //open and get through numSeqs, code below formats rest of file
+                m->openInputFile(filename, fileHandle);
+                fileHandle >> nseqs; m->gobble(fileHandle);
+                
+                distFile = filename + ".rowFormatted";
+                m->openOutputFile(distFile, out);
+                break;
+            }
+            if(d == '\n'){
+                square = 0;
+                break;
+            }
+        }
+        
+        Progress* reading;
+        reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
+        
+        //lower triangle, so must go to column then formatted row file
+        if(square == 0){
+            int  index = 0;
+            
+            ofstream outTemp;
+            string tempFile = filename + ".temp";
+            m->openOutputFile(tempFile, outTemp);
+            
+            //convert to square column matrix
+            for(int i=1;i<nseqs;i++){
+                               
+                fileHandle >> name;
+                
+                if(nameMap == NULL){ list->set(i, name); }
+                else { nameMap->get(name); }
+                
+                
+                for(int j=0;j<i;j++){
+                                       
+                    if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close();  delete reading; return 0; }
+                    
+                    fileHandle >> distance;
+                    
+                    if (distance == -1) { distance = 1000000; }
+                    
+                    if(distance < cutoff){
+                        outTemp << i << '\t' << j << '\t' << distance << endl;
+                        outTemp << j << '\t' << i << '\t' << distance << endl;
+                    }
+                    index++;
+                    reading->update(index);
+                }
+            }
+            outTemp.close();
+            
+            //format from square column to rowFormatted
+            //sort file by first column so the distances for each row are together
+            string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
+            
+            //use the unix sort 
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            string command = "sort -n " + tempFile + " -o " + outfile;
+            system(command.c_str());
+#else //sort using windows sort
+            string command = "sort " + tempFile + " /O " + outfile;
+            system(command.c_str());
+#endif
+            
+            if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile);  delete reading; return 0; }
+            
+            //output to new file distance for each row and save positions in file where new row begins
+            ifstream in;
+            m->openInputFile(outfile, in);
+            
+            distFile = outfile + ".rowFormatted";
+            m->openOutputFile(distFile, out);
+            
+            rowPos.resize(nseqs, -1);
+            int currentRow;
+            int first, second;
+            float dist;
+            map<int, float> rowMap;
+            map<int, float>::iterator itRow;
+            
+            //get first currentRow
+            in >> first;
+            currentRow = first;
+            
+            string firstString = toString(first);
+            for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
+            
+            while(!in.eof()) {
+                if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile);  delete reading; return 0; }
+                
+                in >> first >> second >> dist; m->gobble(in);
+                
+                if (first != currentRow) {
+                    //save position in file of each new row
+                    rowPos[currentRow] = out.tellp();
+                    
+                    out << currentRow << '\t' << rowMap.size() << '\t';
+                    
+                    for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                        out << itRow->first << '\t' << itRow->second << '\t';
+                    }
+                    out << endl;
+                    
+                    currentRow = first;
+                    rowMap.clear();
+                    
+                    //save row you just read
+                    rowMap[second] = dist;
+                    
+                    index++;
+                    reading->update(index);
+                }else{
+                    rowMap[second] = dist;
+                }
+            }
+            
+            //print last Row
+            //save position in file of each new row
+            rowPos[currentRow] = out.tellp();
+            
+            out << currentRow << '\t' << rowMap.size() << '\t';
+            
+            for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                out << itRow->first << '\t' << itRow->second << '\t';
+            }
+            out << endl;
+            
+            in.close();
+            out.close();
+            
+            m->mothurRemove(tempFile);
+            m->mothurRemove(outfile);
+            
+            if (m->control_pressed) {  m->mothurRemove(distFile);   delete reading; return 0; }
+            
+        }
+        else{ //square matrix convert directly to formatted row file
+            int index = nseqs;
+            map<int, float> rowMap;
+            map<int, float>::iterator itRow;
+            rowPos.resize(nseqs, -1);
+            
+            for(int i=0;i<nseqs;i++){
+                fileHandle >> name;                
+                
+                if(nameMap == NULL){ list->set(i, name); }
+                else { nameMap->get(name); }
+                
+                for(int j=0;j<nseqs;j++){
+                    if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
+                    
+                    fileHandle >> distance;
+                                       
+                    if (distance == -1) { distance = 1000000; }
+                    
+                    if((distance < cutoff) && (j != i)){
+                        rowMap[j] = distance;
+                    }
+                    index++;
+                    reading->update(index);
+                }
+                
+                m->gobble(fileHandle);
+                
+                //save position in file of each new row
+                rowPos[i] = out.tellp();
+                
+                //output row to file
+                out << i << '\t' << rowMap.size() << '\t';
+                for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
+                    out << itRow->first << '\t' << itRow->second << '\t';
+                }
+                out << endl;
+                
+                //clear map for new row's info
+                rowMap.clear();
+            }
+        }
+        reading->finish();
+        delete reading;
+        fileHandle.close();
+        out.close();
+        
+        if (m->control_pressed) { m->mothurRemove(distFile);  return 0; }
+        
+        list->setLabel("0");
+        
+        return 1;
+        
+        
+       }
+       catch(exception& e) {
+        m->errorOut(e, "FormatPhylipMatrix", "read");
+        exit(1);
+       }
+}
+
 /***********************************************************************/
 FormatPhylipMatrix::~FormatPhylipMatrix(){}
 /***********************************************************************/