]> git.donarmstrong.com Git - mothur.git/blobdiff - mothurout.cpp
working on megastats
[mothur.git] / mothurout.cpp
index fe3ea097e64faf1b0418f22fa3b248b026ad4dd1..0c3ef3d9cddf5cc90544d9a6d0117882d60db6b6 100644 (file)
@@ -257,6 +257,8 @@ int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
 /***********************************************************************/
 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
        try {
+               fileName = getFullPathName(fileName);
+               
                fileHandle.open(fileName.c_str(), ios::app);
                if(!fileHandle) {
                        mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
@@ -346,14 +348,29 @@ string MothurOut::getline(ifstream& fileHandle) {
 }
 /***********************************************************************/
 
+#ifdef USE_COMPRESSION
+inline bool endsWith(string s, const char * suffix){
+  size_t suffixLength = strlen(suffix);
+  return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
+}
+#endif
+
 string MothurOut::getRootName(string longName){
        try {
        
                string rootName = longName;
-               
-               if(longName.find_last_of(".") != longName.npos){
-                       int pos = longName.find_last_of('.')+1;
-                       rootName = longName.substr(0, pos);
+
+#ifdef USE_COMPRESSION
+    if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
+      int pos = rootName.find_last_of('.');
+      rootName = rootName.substr(0, pos);
+      cerr << "shortening " << longName << " to " << rootName << "\n";
+    }
+#endif
+
+               if(rootName.find_last_of(".") != rootName.npos){
+                       int pos = rootName.find_last_of('.')+1;
+                       rootName = rootName.substr(0, pos);
                }
 
                return rootName;
@@ -445,6 +462,9 @@ string MothurOut::getExtension(string longName){
 /***********************************************************************/
 bool MothurOut::isBlank(string fileName){
        try {
+               
+               fileName = getFullPathName(fileName);
+               
                ifstream fileHandle;
                fileHandle.open(fileName.c_str());
                if(!fileHandle) {
@@ -592,6 +612,29 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
                        //get full path name
                        string completeFileName = getFullPathName(fileName);
 
+#ifdef USE_COMPRESSION
+      // check for gzipped or bzipped file
+      if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+        string tempName = string(tmpnam(0));
+        mkfifo(tempName.c_str(), 0666);
+        int fork_result = fork();
+        if (fork_result < 0) {
+          cerr << "Error forking.\n";
+          exit(1);
+        } else if (fork_result == 0) {
+          string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
+          cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+          system(command.c_str());
+          cerr << "Done decompressing " << completeFileName << "\n";
+          remove(tempName.c_str());
+          exit(EXIT_SUCCESS);
+        } else {
+          cerr << "waiting on child process " << fork_result << "\n";
+          completeFileName = tempName;
+        }
+      }
+#endif
+
                        fileHandle.open(completeFileName.c_str());
                        if(!fileHandle) {
                                return 1;
@@ -610,9 +653,34 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
 
 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
        try {
+
                //get full path name
                string completeFileName = getFullPathName(fileName);
 
+#ifdef USE_COMPRESSION
+  // check for gzipped or bzipped file
+  if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+    string tempName = string(tmpnam(0));
+    mkfifo(tempName.c_str(), 0666);
+    int fork_result = fork();
+    if (fork_result < 0) {
+      cerr << "Error forking.\n";
+      exit(1);
+    } else if (fork_result == 0) {
+      string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
+      cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+      system(command.c_str());
+      cerr << "Done decompressing " << completeFileName << "\n";
+      remove(tempName.c_str());
+      exit(EXIT_SUCCESS);
+    } else {
+      cerr << "waiting on child process " << fork_result << "\n";
+      completeFileName = tempName;
+    }
+  }
+#endif
+
+
                fileHandle.open(completeFileName.c_str());
                if(!fileHandle) {
                        mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
@@ -666,7 +734,27 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
        try { 
        
                string completeFileName = getFullPathName(fileName);
-               
+
+#ifdef USE_COMPRESSION
+    // check for gzipped file
+    if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+      string tempName = string(tmpnam(0));
+      mkfifo(tempName.c_str(), 0666);
+      cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+      int fork_result = fork();
+      if (fork_result < 0) {
+        cerr << "Error forking.\n";
+        exit(1);
+      } else if (fork_result == 0) {
+        string command = string(endsWith(completeFileName, ".gz") ?  "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
+        system(command.c_str());
+        exit(0);
+      } else {
+        completeFileName = tempName;
+      }
+    }
+#endif
+
                fileHandle.open(completeFileName.c_str(), ios::trunc);
                if(!fileHandle) {
                        mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
@@ -829,7 +917,8 @@ vector<unsigned long int> MothurOut::setFilePosFasta(string filename, int& num)
 /**************************************************************************************************/
 vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& num) {
        try {
-
+                       filename = getFullPathName(filename);
+                       
                        vector<unsigned long int> positions;
                        ifstream in;
                        openInputFile(filename, in);
@@ -851,7 +940,7 @@ vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& nu
                
                        FILE * pFile;
                        unsigned long int size;
-               
+                       
                        //get num bytes in file
                        pFile = fopen (filename.c_str(),"rb");
                        if (pFile==NULL) perror ("Error opening file");
@@ -881,6 +970,8 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                FILE * pFile;
                unsigned long int size;
                
+               filename = getFullPathName(filename);
+               
                //get num bytes in file
                pFile = fopen (filename.c_str(),"rb");
                if (pFile==NULL) perror ("Error opening file");
@@ -893,7 +984,7 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                //estimate file breaks
                unsigned long int chunkSize = 0;
                chunkSize = size / proc;
-               
+       
                //file to small to divide by processors
                if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
        
@@ -914,15 +1005,16 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                        
                        //there was not another sequence before the end of the file
                        unsigned long int sanityPos = in.tellg();
-                       if (sanityPos = -1) {   break;  }
-                       else {   filePos.push_back(newSpot);  }
+
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(newSpot);  }
                        
                        in.close();
                }
                
                //save end pos
                filePos.push_back(size);
-               
+
                //sanity check filePos
                for (int i = 0; i < (filePos.size()-1); i++) {
                        if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
@@ -1112,7 +1204,21 @@ void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbo
 //This function parses the estimator options and puts them in a vector
 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
        try {
-               string individual;
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == '-'){
+                               container.push_back(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
+                       }
+               }
+               container.push_back(individual);
+
+       
+       /*      string individual;
                
                while (estim.find_first_of('-') != -1) {
                        individual = estim.substr(0,estim.find_first_of('-'));
@@ -1122,7 +1228,7 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                        }
                }
                //get last one
-               container.push_back(estim);
+               container.push_back(estim); */
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtDash");
@@ -1134,17 +1240,31 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
 //This function parses the label options and puts them in a set
 void MothurOut::splitAtDash(string& estim, set<string>& container) {
        try {
-               string individual;
-               
-               while (estim.find_first_of('-') != -1) {
-                       individual = estim.substr(0,estim.find_first_of('-'));
-                       if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-                               estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == '-'){
                                container.insert(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
                        }
                }
+               container.insert(individual);
+
+       //      string individual;
+               
+       //      while (estim.find_first_of('-') != -1) {
+       //              individual = estim.substr(0,estim.find_first_of('-'));
+       //              if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
+       //                      estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+       //                      container.insert(individual);
+       //              }
+       //      }
                //get last one
-               container.insert(estim);
+       //      container.insert(estim);
+       
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtDash");