From: westcott Date: Fri, 17 Sep 2010 12:36:19 +0000 (+0000) Subject: working on megastats X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=7bf581f8c46b08eb3bb40715dac94695edee4a67 working on megastats --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 0594ef6..0f03784 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -489,6 +489,9 @@ A7F6C8E1124229F900299875 /* fisher2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fisher2.c; sourceTree = ""; }; A7F6C8E2124229F900299875 /* fisher2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fisher2.h; sourceTree = ""; }; A7F6C8E3124229F900299875 /* metastats2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = metastats2.c; sourceTree = ""; }; + A7F6C8EA12423C0300299875 /* metastatscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = metastatscommand.h; sourceTree = ""; }; + A7F6C8EB12423C0300299875 /* metastatscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = metastatscommand.cpp; sourceTree = ""; }; + A7F6C9D512425D4600299875 /* metastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = metastats.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXGroup section */ @@ -793,6 +796,8 @@ A7DA2099113FECD400BF472F /* matrixoutputcommand.h */, A7DA209A113FECD400BF472F /* mergefilecommand.cpp */, A7DA209B113FECD400BF472F /* mergefilecommand.h */, + A7F6C8EA12423C0300299875 /* metastatscommand.h */, + A7F6C8EB12423C0300299875 /* metastatscommand.cpp */, A7DA209C113FECD400BF472F /* mgclustercommand.cpp */, A7DA209D113FECD400BF472F /* mgclustercommand.h */, A7DA20AC113FECD400BF472F /* nocommands.cpp */, @@ -1040,6 +1045,7 @@ children = ( A7F6C8E2124229F900299875 /* fisher2.h */, A7F6C8E1124229F900299875 /* fisher2.c */, + A7F6C9D512425D4600299875 /* metastats.h */, A7F6C8E3124229F900299875 /* metastats2.c */, ); name = metastatsource; diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index fb1a702..050a615 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -195,7 +195,7 @@ void ClusterSplitCommand::help(){ m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n"); m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n"); m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify. \n"); - m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and split the distance file based on those groups. \n"); + m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n"); m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file. \n"); m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n"); m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n"); diff --git a/commandfactory.cpp b/commandfactory.cpp index 8562a8d..bbdd616 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -86,6 +86,7 @@ #include "sffinfocommand.h" #include "seqerrorcommand.h" #include "normalizesharedcommand.h" +#include "metastatscommand.h" /*******************************************************/ @@ -176,6 +177,7 @@ CommandFactory::CommandFactory(){ commands["get.relabund"] = "get.relabund"; commands["sffinfo"] = "sffinfo"; commands["normalize.shared"] = "normalize.shared"; + commands["metastats"] = "metastats"; commands["classify.seqs"] = "MPIEnabled"; commands["dist.seqs"] = "MPIEnabled"; commands["filter.seqs"] = "MPIEnabled"; @@ -309,6 +311,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "seq.error") { command = new SeqErrorCommand(optionString); } else if(commandName == "sffinfo") { command = new SffInfoCommand(optionString); } else if(commandName == "normalize.shared") { command = new NormalizeSharedCommand(optionString); } + else if(commandName == "metastats") { command = new MetaStatsCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/distancecommand.cpp b/distancecommand.cpp index 8bdd7cd..871e07b 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -26,7 +26,8 @@ DistanceCommand::DistanceCommand(string option) { else { //valid paramters for this command - string Array[] = {"fasta","oldfasta","column", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir"}; + string Array[] = {"fasta","oldfasta","column", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir","compress"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -114,6 +115,9 @@ DistanceCommand::DistanceCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if(temp == "not found"){ temp = "1"; } convert(temp, processors); + temp = validParameter.validFile(parameters, "compress", false); if(temp == "not found"){ temp = "F"; } + convert(temp, compress); + output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; } if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; } @@ -166,7 +170,7 @@ DistanceCommand::~DistanceCommand(){ void DistanceCommand::help(){ try { m->mothurOut("The dist.seqs command reads a file containing sequences and creates a distance file.\n"); - m->mothurOut("The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, cutoff and processors. \n"); + m->mothurOut("The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors. \n"); m->mothurOut("The fasta parameter is required.\n"); m->mothurOut("The oldfasta and column parameters allow you to append the distances calculated to the column file.\n"); m->mothurOut("The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap.\n"); @@ -174,6 +178,7 @@ void DistanceCommand::help(){ m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n"); m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n"); m->mothurOut("The processors parameter allows you to specify number of processors to use. The default is 1.\n"); + m->mothurOut("The compress parameter allows you to indicate that you want the resulting distance file compressed. The default is false.\n"); m->mothurOut("The dist.seqs command should be in the following format: \n"); m->mothurOut("dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n"); m->mothurOut("Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n"); @@ -445,6 +450,14 @@ int DistanceCommand::execute(){ m->mothurOut(outputFile); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + + + if (m->isTrue(compress)) { + m->mothurOut("Compressing..."); m->mothurOutEndLine(); + m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine(); + system(("gzip -v " + outputFile).c_str()); + } + return 0; } diff --git a/distancecommand.h b/distancecommand.h index 64eac8d..bf463a0 100644 --- a/distancecommand.h +++ b/distancecommand.h @@ -34,7 +34,8 @@ private: Dist* distCalculator; SequenceDB alignDB; - string countends, output, fastafile, calc, outputDir, oldfastafile, column; + string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress; + int processors, numNewFasta; float cutoff; map processIDS; //end line, processid diff --git a/fisher2.c b/fisher2.c index 9e62302..35f0b4d 100644 --- a/fisher2.c +++ b/fisher2.c @@ -1,15 +1,6 @@ -#include -#include -#include -//#include "ctest.h" +#include "fisher2.h" -#include -#define SINT_MAX INT_MAX - -#define max(a, b) ((a) < (b) ? (b) : (a)) -#define min(a, b) ((a) > (b) ? (b) : (a)) - static void f2xact(int *nrow, int *ncol, double *table, int *ldtabl, double *expect, double *percnt, double *emin, double *prt, double *pre, double *fact, int *ico, int @@ -34,17 +25,17 @@ static void f6xact(int *nrow, int *irow, int *iflag, int *kyy, int *key, int *ldkey, int *last, int *ipn); static void f7xact(int *nrow, int *imax, int *idif, int *k, int *ks, int *iflag); -static void f8xact(int *irow, int *is, int *i1, int *izero, int *new); +static void f8xact(int *irow, int *is, int *i1, int *izero, int *myNew); static double f9xact(int *n, int *mm, int *ir, double *fact); static void f10act(int *nrow, int *irow, int *ncol, int *icol, double *val, int *xmin, double *fact, int *nd, int *ne, int *m); -static void f11act(int *irow, int *i1, int *i2, int *new); +static void f11act(int *irow, int *i1, int *i2, int *myNew); static void prterr(int icode, char *mes); static int iwork(int iwkmax, int *iwkpt, int number, int itype); // void fexact(int *nrow, int *ncol, double *table, int *ldtabl, // double *expect, double *percnt, double *emin, double *prt, -// double *pre, /* new in C : */ int *workspace); +// double *pre, /* myNew in C : */ int *workspace); static void isort(int *n, int *ix); static double gammds(double *y, double *p, int *ifault); static double alogam(double *x, int *ifault); @@ -53,11 +44,9 @@ static int iwork(int iwkmax, int *iwkpt, int number, int itype); /* The only public function : */ -void -fexact(int *nrow, int *ncol, double *table, int *ldtabl, +void fexact(int *nrow, int *ncol, double *table, int *ldtabl, double *expect, double *percnt, double *emin, double *prt, - double *pre, /* new in C : */ int *workspace) -{ + double *pre, /* myNew in C : */ int *workspace) { /* ALGORITHM 643, COLLECTED ALGORITHMS FROM ACM. @@ -324,7 +313,7 @@ f2xact(int *nrow, int *ncol, double *table, int *ldtabl, { /* IMAX is the largest representable int on the machine. */ const int imax = SINT_MAX; -// const int imax = 2147483647; //xx: I DON´T like this, and +// const int imax = 2147483647; //xx: I DONÂ¥T like this, and // thanks to the hint from Jason Turner I don't do it anymore. (R.D-U). /* AMISS is a missing value indicator which is returned when the @@ -1741,31 +1730,31 @@ f7xact(int *nrow, int *imax, int *idif, int *k, int *ks, */ void -f8xact(int *irow, int *is, int *i1, int *izero, int *new) +f8xact(int *irow, int *is, int *i1, int *izero, int *myNew) { int i; /* Parameter adjustments */ - --new; + --myNew; --irow; /* Function Body */ for (i = 1; i < *i1; ++i) - new[i] = irow[i]; + myNew[i] = irow[i]; for (i = *i1; i <= *izero - 1; ++i) { if (*is >= irow[i + 1]) break; - new[i] = irow[i + 1]; + myNew[i] = irow[i + 1]; } - new[i] = *is; + myNew[i] = *is; for(;;) { ++i; if (i > *izero) return; - new[i] = irow[i]; + myNew[i] = irow[i]; } } @@ -1886,16 +1875,16 @@ f10act(int *nrow, int *irow, int *ncol, int *icol, double *val, ----------------------------------------------------------------------- */ void -f11act(int *irow, int *i1, int *i2, int *new) +f11act(int *irow, int *i1, int *i2, int *myNew) { int i; /* Parameter adjustments */ - --new; + --myNew; --irow; - for (i = 1; i <= (*i1 - 1); ++i) new[i] = irow[i]; - for (i = *i1; i <= *i2; ++i) new[i] = irow[i + 1]; + for (i = 1; i <= (*i1 - 1); ++i) myNew[i] = irow[i]; + for (i = *i1; i <= *i2; ++i) myNew[i] = irow[i + 1]; return; } @@ -2164,5 +2153,6 @@ L30: (((-a2 * z + a3) * z - a4) * z + a5) / y); } + #endif /* not USING_R */ diff --git a/fisher2.h b/fisher2.h index cba5966..905a1a0 100644 --- a/fisher2.h +++ b/fisher2.h @@ -1,9 +1,29 @@ -//#ifndef GUARD_fisher2 -//#define GUARD_fisher2 +#ifndef GUARD_fisher2 +#define GUARD_fisher2 -void fexact(int *nrow, int *ncol, double *table, int *ldtabl, - double *expect, double *percnt, double *emin, double *prt, - double *pre, /* new in C : */ int *workspace); +#include +#include +#include +#include -//#endif GUARD_fisher2 + +#ifdef __cplusplus +extern "C" { +#endif + +#define SINT_MAX INT_MAX + +#define max(a, b) ((a) < (b) ? (b) : (a)) +#define min(a, b) ((a) > (b) ? (b) : (a)) + + +void fexact(int *, int *, double *, int *, + double *, double *, double *, double *, + double *, int *); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/helpcommand.cpp b/helpcommand.cpp index 0403276..36ce2b7 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -14,6 +14,7 @@ HelpCommand::HelpCommand(string option) { validCommands = CommandFactory::getInstance(); + } //********************************************************************************************************************** diff --git a/helpcommand.h b/helpcommand.h index f6a4aa3..7a19ee6 100644 --- a/helpcommand.h +++ b/helpcommand.h @@ -25,6 +25,7 @@ public: private: CommandFactory* validCommands; + private: }; diff --git a/makefile b/makefile index 6f194bb..44f5484 100644 --- a/makefile +++ b/makefile @@ -61,6 +61,15 @@ ifeq ($(strip $(USEMPI)),yes) CXXFLAGS += -DUSE_MPI endif +# if you want to enable reading and writing of compressed files, set to yes. +# The default is no. this may only work on unix-like systems. + +USECOMPRESSION ?= no + +ifeq ($(strip $(USECOMPRESSION)),yes) + CXXFLAGS += -DUSE_COMPRESSION +endif + # # INCLUDE directories for mothur # @@ -72,13 +81,16 @@ endif # OBJECTS=$(patsubst %.cpp,%.o,$(wildcard *.cpp)) +OBJECTS+=$(patsubst %.c,%.o,$(wildcard *.c)) mothur : $(OBJECTS) $(CXX) $(LDFLAGS) $(TARGET_ARCH) -o $@ $(OBJECTS) install : mothur cp mothur ../Release/mothur - + +%.o : %.c %.h + $(COMPILE.c) $(OUTPUT_OPTION) $< %.o : %.cpp %.h $(COMPILE.cpp) $(OUTPUT_OPTION) $< %.o : %.cpp %.hpp diff --git a/metastats2.c b/metastats2.c index c4d1f46..3b801a0 100644 --- a/metastats2.c +++ b/metastats2.c @@ -1,131 +1,20 @@ -#include -#include -#include -#include -#include -#include "fisher2.h" -void testp(double *permuted_ttests,int *B,double *permuted,double - *Imatrix,int *nc,int *nr,int *g,double *Tinitial,double *ps); -void permute_matrix(double *Imatrix,int *nc,int *nr,double - *permuted,int *g,double *trial_ts,double *Tinitial,double - *counter); -void permute_array(int *array, int n); -void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,double - *Ts,double *Tinitial,double *counter1); -void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *storage); -void start(double *Imatrix,int *g,int *nr,int *nc,double *testing, - double storage[][9]); +#include "metastats.h" -int main (int argc, char *argv[]){ +int metastat_main (char* outputFileName, int numRows, int numCols, double threshold, int numPermutations, double** data, int secondGroupingStart){ - int col=-1, row=-1, size,g=0,c=0,i=0,j=0,k,counter=0,lines=0, B=10000; - double placeholder=0,min=0, thresh=0.05; - - char arr[10000], str[51], a; - char location[41]="jobj.txt", output[41]="out.txt"; - - for(i=0;i<10000;i++){ - arr[i]='q'; - } + int size,c=0,i=0,j=0,k,counter=0, bflag=0; + int B=numPermutations; + int row = numRows; + int col = numCols; + int g = secondGroupingStart; + double thresh=threshold; + double placeholder=0,min=0; -int u,rflag=0,cflag=0, bflag=0; -char *filename; -int numbers; -double numb; -extern char *optarg; -extern int optind, optopt, opterr; - -while ((u = getopt(argc, argv, ":r:c:g:b:t:f:o:")) != -1) { - switch(u) { - case 'r': - numbers = atoi(optarg); - printf("The number of features/rows is %d.\n", numbers); - row = numbers; - rflag = 1; - break; - case 'c': - numbers = atoi(optarg); - printf("The number of samples/columns is %d.\n", numbers); - col = numbers; - cflag = 1; - break; - case 'g': - numbers = atoi(optarg); - printf("Your g-value is %d.\n", numbers); - g = numbers; - break; - case 'b': - numbers = atoi(optarg); - printf("The number of permutations is %d\n", numbers); - B = numbers; - break; - case 't': - numb = atof(optarg); - printf("Threshold is is %lf\n", numb); - thresh = numb; - break; - case 'f': - filename = optarg; - printf("filename input is %s\n", filename); - strcpy(location,filename); - break; - case 'o': - filename = optarg; - printf("filename output %s\n", filename); - strcpy(output,filename); - break; - case ':': - printf("-%c without filename\n", optopt); - break; - case '?': - printf("unknown arg %c\n", optopt); - break; - } -} - - FILE *jobj, *out; - jobj=fopen(location,"r"); - - if(jobj == NULL){ - printf("Please don't forget to save your matrix in the active"); - printf(" directory as \"%s\".\n",location); - return 0; - } - - // Gets the first line of samples names and checks for user error. - fgets(arr,10000,jobj); - - for(i=0;i<10000;i++){ - if(isspace(arr[i])){ - counter++; } - } - - if (cflag == 0) { - printf("You didn't tell us how many subjects there are!\n"); - printf("But we'll still do the analysis as if there are %d subjects.\n\n",col=counter-1); - } - if (cflag == 1) { - if (col != counter-1){ - printf("We would expect %d subjects, but you said %d.\n",counter-1,col); - } - } - - while((a = fgetc(jobj)) != EOF){ - if(a == '\n'){ - lines++; } - } + char output[1024]; + strcpy(output, outputFileName); + FILE *out; - if (rflag == 0) { - printf("You didn't specify the number of features!\n"); - printf("But we'll still do the analysis assuming %d features.\n\n", row=lines-1); - } - if (rflag == 1) { - if ( lines-1 != row ){ - printf("We would expect %d features, but you said %d.\n",lines-1,row); - } - } - if (g>=col || g<=0){ printf("Check your g value\n"); } @@ -141,27 +30,15 @@ while ((u = getopt(argc, argv, ":r:c:g:b:t:f:o:")) != -1) { storage[i][j]=0; } } - // Reset file below and create a separate matrix. - rewind(jobj); - fgets(arr,10000,jobj); - + for(i=0; i= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0; +} +#endif + string MothurOut::getRootName(string longName){ try { string rootName = longName; - - if(longName.find_last_of(".") != longName.npos){ - int pos = longName.find_last_of('.')+1; - rootName = longName.substr(0, pos); + +#ifdef USE_COMPRESSION + if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) { + int pos = rootName.find_last_of('.'); + rootName = rootName.substr(0, pos); + cerr << "shortening " << longName << " to " << rootName << "\n"; + } +#endif + + if(rootName.find_last_of(".") != rootName.npos){ + int pos = rootName.find_last_of('.')+1; + rootName = rootName.substr(0, pos); } return rootName; @@ -493,16 +508,12 @@ string MothurOut::getFullPathName(string fileName){ if (path.rfind("./") == -1) { return fileName; } //already complete name else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name - //char* cwdpath = new char[1024]; + char* cwdpath = new char[1024]; - //size_t size; - //cwdpath=getcwd(cwdpath,size); - //cwd = cwdpath; - - char *cwdpath = NULL; - cwdpath = getcwd(NULL, 0); // or _getcwd - if ( cwdpath != NULL) { cwd = cwdpath; } - else { cwd = ""; } + size_t size; + cwdpath=getcwd(cwdpath,size); + + cwd = cwdpath; //rip off first '/' string simpleCWD; @@ -539,7 +550,6 @@ string MothurOut::getFullPathName(string fileName){ newFileName = "/" + newFileName; return newFileName; - } #else if (path.find("~") != -1) { //go to home directory @@ -596,12 +606,35 @@ string MothurOut::getFullPathName(string fileName){ } } /***********************************************************************/ -//no error open + int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ try { //get full path name string completeFileName = getFullPathName(fileName); +#ifdef USE_COMPRESSION + // check for gzipped or bzipped file + if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { + string tempName = string(tmpnam(0)); + mkfifo(tempName.c_str(), 0666); + int fork_result = fork(); + if (fork_result < 0) { + cerr << "Error forking.\n"; + exit(1); + } else if (fork_result == 0) { + string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName; + cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; + system(command.c_str()); + cerr << "Done decompressing " << completeFileName << "\n"; + remove(tempName.c_str()); + exit(EXIT_SUCCESS); + } else { + cerr << "waiting on child process " << fork_result << "\n"; + completeFileName = tempName; + } + } +#endif + fileHandle.open(completeFileName.c_str()); if(!fileHandle) { return 1; @@ -620,9 +653,34 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ try { + //get full path name string completeFileName = getFullPathName(fileName); +#ifdef USE_COMPRESSION + // check for gzipped or bzipped file + if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { + string tempName = string(tmpnam(0)); + mkfifo(tempName.c_str(), 0666); + int fork_result = fork(); + if (fork_result < 0) { + cerr << "Error forking.\n"; + exit(1); + } else if (fork_result == 0) { + string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName; + cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; + system(command.c_str()); + cerr << "Done decompressing " << completeFileName << "\n"; + remove(tempName.c_str()); + exit(EXIT_SUCCESS); + } else { + cerr << "waiting on child process " << fork_result << "\n"; + completeFileName = tempName; + } + } +#endif + + fileHandle.open(completeFileName.c_str()); if(!fileHandle) { mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine(); @@ -676,7 +734,27 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){ try { string completeFileName = getFullPathName(fileName); - + +#ifdef USE_COMPRESSION + // check for gzipped file + if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { + string tempName = string(tmpnam(0)); + mkfifo(tempName.c_str(), 0666); + cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; + int fork_result = fork(); + if (fork_result < 0) { + cerr << "Error forking.\n"; + exit(1); + } else if (fork_result == 0) { + string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName; + system(command.c_str()); + exit(0); + } else { + completeFileName = tempName; + } + } +#endif + fileHandle.open(completeFileName.c_str(), ios::trunc); if(!fileHandle) { mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();