From: westcott Date: Wed, 28 Jul 2010 15:09:56 +0000 (+0000) Subject: some bug fixes X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=b4f80c1d2be78a8743a408a2b6d462b07f9f71ff some bug fixes --- diff --git a/aligncommand.cpp b/aligncommand.cpp index ee27776..f19810e 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -45,7 +45,7 @@ AlignCommand::AlignCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("align.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -261,7 +261,7 @@ int AlignCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPIWroteAccnos = false; MPI_Status status; @@ -646,7 +646,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName, } //********************************************************************************************************************** #ifdef USE_MPI -int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ +int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector& MPIPos){ try { string outputString = ""; MPI_Status statusReport; diff --git a/aligncommand.h b/aligncommand.h index 186ca63..fb47874 100644 --- a/aligncommand.h +++ b/aligncommand.h @@ -43,7 +43,7 @@ private: void appendReportFiles(string, string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); #endif string candidateFileName, templateFileName, distanceFileName, search, align, outputDir; diff --git a/chimerabellerophoncommand.cpp b/chimerabellerophoncommand.cpp index fd768a7..9199909 100644 --- a/chimerabellerophoncommand.cpp +++ b/chimerabellerophoncommand.cpp @@ -27,7 +27,7 @@ ChimeraBellerophonCommand::ChimeraBellerophonCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("chimera.bellerophon"); map::iterator it; //check to make sure all parameters are valid for command diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index 69c3401..962c15b 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -27,7 +27,7 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("chimera.ccode"); map::iterator it; //check to make sure all parameters are valid for command @@ -228,7 +228,7 @@ int ChimeraCcodeCommand::execute(){ int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -505,7 +505,7 @@ int ChimeraCcodeCommand::driver(linePair* line, string outputFName, string filen } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ +int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ try { MPI_Status status; diff --git a/chimeraccodecommand.h b/chimeraccodecommand.h index c740a2f..5989eb2 100644 --- a/chimeraccodecommand.h +++ b/chimeraccodecommand.h @@ -39,7 +39,7 @@ private: int createProcesses(string, string, string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); #endif bool abort, filter; diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index 44696cb..264eb4c 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -26,7 +26,7 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("chimera.check"); map::iterator it; //check to make sure all parameters are valid for command @@ -262,7 +262,7 @@ int ChimeraCheckCommand::execute(){ int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -490,7 +490,7 @@ int ChimeraCheckCommand::driver(linePair* line, string outputFName, string filen } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector& MPIPos){ +int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector& MPIPos){ try { MPI_File outAccMPI; MPI_Status status; diff --git a/chimeracheckcommand.h b/chimeracheckcommand.h index 89dc6aa..84c75ac 100644 --- a/chimeracheckcommand.h +++ b/chimeracheckcommand.h @@ -40,7 +40,7 @@ private: int createProcesses(string, string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, vector&); #endif bool abort, svg; diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index fa7fd17..f11e1ca 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -27,7 +27,7 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("chimera.pintail"); map::iterator it; //check to make sure all parameters are valid for command @@ -276,7 +276,7 @@ int ChimeraPintailCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -523,7 +523,7 @@ int ChimeraPintailCommand::driver(linePair* line, string outputFName, string fil } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ +int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ try { MPI_Status status; diff --git a/chimerapintailcommand.h b/chimerapintailcommand.h index 9fc6f43..9370745 100644 --- a/chimerapintailcommand.h +++ b/chimerapintailcommand.h @@ -40,7 +40,7 @@ private: int createProcesses(string, string, string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); #endif bool abort, filter; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 89d89e3..321883b 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -29,7 +29,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("chimera.slayer"); map::iterator it; //check to make sure all parameters are valid for command @@ -253,7 +253,7 @@ int ChimeraSlayerCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -529,7 +529,7 @@ int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string file } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ +int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ try { MPI_Status status; int pid; diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index c2d9c45..95541b2 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -39,7 +39,7 @@ private: int createProcesses(string, string, string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); #endif bool abort, realign; diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index aac1637..00f04b0 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -250,7 +250,11 @@ int ClassifyOtuCommand::readNamesFile() { inNames >> names; //read from second column A,B,C,D gobble(inNames); - nameMap[name] = names; + //parse names into vector + vector theseNames; + splitAtComma(names, theseNames); + + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; } if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; } } @@ -309,41 +313,46 @@ string ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, size = 0; for (int i = 0; i < names.size(); i++) { - - if (m->control_pressed) { delete phylo; return conTax; } - - //is this sequence in the taxonomy file - it = taxMap.find(names[i]); - - if (it == taxMap.end()) { //this name is not in taxonomy file, skip it - m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); - }else{ + + //if namesfile include the names + if (namefile != "") { + //is this sequence in the name file - namemap maps seqName -> repSeqName + it2 = nameMap.find(names[i]); - //if namesfile include the names - if (namefile != "") { - //is this sequence in the name file - it2 = nameMap.find(names[i]); + if (it2 == nameMap.end()) { //this name is not in name file, skip it + m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine(); + }else{ - if (it2 == nameMap.end()) { //this name is not in name file, skip it - m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine(); + //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique + it = taxMap.find(it2->second); + + if (it == taxMap.end()) { //this name is not in taxonomy file, skip it + if (names[i] != it->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); } + else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); } }else{ - - vector nameFileNames; - splitAtComma(it2->second, nameFileNames); - - for (int j = 0; j < nameFileNames.size(); j++) { - //add seq to tree - phylo->addSeqToTree(nameFileNames[j], it->second); - size++; - } + + //add seq to tree + phylo->addSeqToTree(names[i], it->second); + size++; } - + } + + }else{ + //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique + it = taxMap.find(names[i]); + + if (it == taxMap.end()) { //this name is not in taxonomy file, skip it + m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }else{ //add seq to tree phylo->addSeqToTree(names[i], it->second); size++; } } + + + if (m->control_pressed) { delete phylo; return conTax; } + } //build tree diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index a20a383..0635fde 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -32,7 +32,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("classify.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -414,7 +414,7 @@ int ClassifySeqsCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -841,7 +841,7 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa } //********************************************************************************************************************** #ifdef USE_MPI -int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector& MPIPos){ +int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector& MPIPos){ try { MPI_Status statusNew; MPI_Status statusTemp; diff --git a/classifyseqscommand.h b/classifyseqscommand.h index 3294118..03e6826 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -61,7 +61,7 @@ private: int MPIReadNamesFile(string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); #endif }; diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index deea384..d6855cc 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -34,7 +34,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("cluster.split"); //check to make sure all parameters are valid for command map::iterator it; diff --git a/collectcommand.cpp b/collectcommand.cpp index 88be43b..8c1565b 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -137,12 +137,13 @@ int CollectCommand::execute(){ vector outputNames; + string hadShared = ""; if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); } - else { inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } + else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } for (int p = 0; p < inputFileNames.size(); p++) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } globaldata->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } globaldata->Groups.clear(); if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } if (outputDir == "") { outputDir += hasPath(inputFileNames[p]); } string fileNameRoot = outputDir + getRootName(getSimpleName(inputFileNames[p])); @@ -221,7 +222,7 @@ int CollectCommand::execute(){ } //if the users entered no valid calculators don't execute command - if (cDisplays.size() == 0) { return 0; } + if (cDisplays.size() == 0) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } read = new ReadOTUFile(inputFileNames[p]); read->read(&*globaldata); @@ -242,6 +243,7 @@ int CollectCommand::execute(){ delete order; globaldata->gorder = NULL; delete validCalculator; globaldata->Groups.clear(); + if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } @@ -256,6 +258,7 @@ int CollectCommand::execute(){ delete order; globaldata->gorder = NULL; delete validCalculator; globaldata->Groups.clear(); + if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } @@ -306,6 +309,7 @@ int CollectCommand::execute(){ delete read; delete validCalculator; globaldata->Groups.clear(); + if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } @@ -341,6 +345,7 @@ int CollectCommand::execute(){ delete order; globaldata->gorder = NULL; delete validCalculator; globaldata->Groups.clear(); + if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } delete order; @@ -354,8 +359,11 @@ int CollectCommand::execute(){ delete validCalculator; } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + //return to shared mode if you changed above + if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } diff --git a/distancecommand.cpp b/distancecommand.cpp index 503f64b..05ed87c 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -32,7 +32,7 @@ DistanceCommand::DistanceCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("dist.seqs"); map::iterator it2; //check to make sure all parameters are valid for command diff --git a/engine.cpp b/engine.cpp index 6534935..7bcb4e4 100644 --- a/engine.cpp +++ b/engine.cpp @@ -51,11 +51,12 @@ bool InteractEngine::getInput(){ while(quitCommandCalled != 1){ + mout->mothurOutEndLine(); input = getCommand(); mout->mothurOutEndLine(); - + if (mout->control_pressed) { input = "quit()"; } //allow user to omit the () on the quit command @@ -331,7 +332,7 @@ bool ScriptEngine::getInput(){ mout->executing = false; #ifdef USE_MPI - cout << pid << " is done in execute" << endl; + //cout << pid << " is done in execute" << endl; } #endif }else { diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 3784c3d..d2c7c6b 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -28,7 +28,7 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("filter.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -258,7 +258,7 @@ int FilterSeqsCommand::filterSequences() { #ifdef USE_MPI int pid, start, end, numSeqsPerProcessor, num; int tag = 2001; - vectorMPIPos; + vectorMPIPos; MPI_Status status; MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running @@ -393,7 +393,7 @@ int FilterSeqsCommand::filterSequences() { } #ifdef USE_MPI /**************************************************************************************/ -int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector& MPIPos) { +int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector& MPIPos) { try { string outputString = ""; int count = 0; @@ -569,7 +569,7 @@ string FilterSeqsCommand::createFilter() { #ifdef USE_MPI int pid, numSeqsPerProcessor, num; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_File inMPI; @@ -797,7 +797,7 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* } #ifdef USE_MPI /**************************************************************************************/ -int FilterSeqsCommand::MPICreateFilter(int start, int num, Filters& F, MPI_File& inMPI, vector& MPIPos) { +int FilterSeqsCommand::MPICreateFilter(int start, int num, Filters& F, MPI_File& inMPI, vector& MPIPos) { try { MPI_Status status; diff --git a/filterseqscommand.h b/filterseqscommand.h index 2ec152b..e068405 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -49,8 +49,8 @@ private: int driverRunFilter(string, string, string, linePair*); int driverCreateFilter(Filters& F, string filename, linePair* line); #ifdef USE_MPI - int driverMPIRun(int, int, MPI_File&, MPI_File&, vector&); - int MPICreateFilter(int, int, Filters&, MPI_File&, vector&); + int driverMPIRun(int, int, MPI_File&, MPI_File&, vector&); + int MPICreateFilter(int, int, Filters&, MPI_File&, vector&); #endif int setLines(string); diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index 89c62fa..b872d1a 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -755,7 +755,7 @@ int GetOTURepCommand::processNames(string filename, string label) { if (sequence != "not found") { if (sorted == "") { //print them out - rep = rep + "|" + toString(i+1); + rep = rep + "\t" + toString(i+1); rep = rep + "|" + toString(binsize); if (groupfile != "") { rep = rep + "|" + group; @@ -781,7 +781,7 @@ int GetOTURepCommand::processNames(string filename, string label) { //print them for (int i = 0; i < reps.size(); i++) { string sequence = fasta->getSequence(reps[i].name); - string outputName = reps[i].name + "|" + toString(reps[i].bin); + string outputName = reps[i].name + "\t" + toString(reps[i].bin); outputName = outputName + "|" + toString(reps[i].size); if (groupfile != "") { outputName = outputName + "|" + reps[i].group; @@ -794,6 +794,7 @@ int GetOTURepCommand::processNames(string filename, string label) { out.close(); out2.close(); + remove(filename.c_str()); rename(tempNameFile.c_str(), filename.c_str()); return 0; diff --git a/makefile b/makefile index 3f0893c..dccd2c3 100644 --- a/makefile +++ b/makefile @@ -13,7 +13,7 @@ CXXFLAGS += -O3 -MOTHUR_FILES = "\"Enter_your_default_path_here\"" +MOTHUR_FILES = "\"../Release\"" ifeq ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"") else CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES} @@ -39,7 +39,7 @@ endif # if you do not want to use the readline library, set this to no. # make sure you have the library installed -USEREADLINE ?= yes +USEREADLINE ?= no ifeq ($(strip $(USEREADLINE)),yes) CXXFLAGS += -DUSE_READLINE @@ -48,7 +48,7 @@ ifeq ($(strip $(USEREADLINE)),yes) -lncurses endif -USEMPI ?= no +USEMPI ?= yes ifeq ($(strip $(USEMPI)),yes) CXX = mpic++ diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 5d6b5d8..217a152 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -131,17 +131,18 @@ int RareFactCommand::execute(){ vector outputNames; + string hadShared = ""; if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); } - else { inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } - - if (m->control_pressed) { return 0; } + else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } + + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } for (int p = 0; p < inputFileNames.size(); p++) { string fileNameRoot = outputDir + getRootName(getSimpleName(inputFileNames[p])); globaldata->inputFileName = inputFileNames[p]; - if (m->control_pressed) { return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -203,12 +204,12 @@ int RareFactCommand::execute(){ set processedLabels; set userLabels = labels; - if (m->control_pressed) { for(int i=0;iginput = NULL; delete order; globaldata->gorder = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for(int i=0;iginput = NULL; delete order; globaldata->gorder = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //as long as you are not at the end of the file or done wih the lines you want while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { for(int i=0;iginput = NULL; delete order; globaldata->gorder = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for(int i=0;iginput = NULL; delete order; globaldata->gorder = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } if(allLines == 1 || labels.count(order->getLabel()) == 1){ @@ -246,7 +247,7 @@ int RareFactCommand::execute(){ order = (input->getOrderVector()); } - if (m->control_pressed) { for(int i=0;iginput = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for(int i=0;iginput = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -261,7 +262,7 @@ int RareFactCommand::execute(){ } } - if (m->control_pressed) { for(int i=0;iginput = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for(int i=0;iginput = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //run last label if you need to if (needToRun == true) { @@ -286,6 +287,8 @@ int RareFactCommand::execute(){ } + if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } m->mothurOutEndLine(); diff --git a/readtreecommand.cpp b/readtreecommand.cpp index 8e292af..07aa5b3 100644 --- a/readtreecommand.cpp +++ b/readtreecommand.cpp @@ -108,8 +108,10 @@ void ReadTreeCommand::help(){ try { m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n"); m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n"); + m->mothurOut("The read.tree command parameters are tree, group and name.\n"); m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n"); m->mothurOut("The tree and group parameters are both required.\n"); + m->mothurOut("The name parameter allows you to enter a namefile.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); } catch(exception& e) { diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 09b82ce..b2fdaff 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -28,7 +28,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("screen.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -179,7 +179,7 @@ int ScreenSeqsCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -749,7 +749,7 @@ int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName, } //********************************************************************************************************************** #ifdef USE_MPI -int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badFile, MPI_File& badAccnosFile, vector& MPIPos, set& badSeqNames){ +int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badFile, MPI_File& badAccnosFile, vector& MPIPos, set& badSeqNames){ try { string outputString = ""; MPI_Status statusGood; diff --git a/screenseqscommand.h b/screenseqscommand.h index 314e661..071724f 100644 --- a/screenseqscommand.h +++ b/screenseqscommand.h @@ -38,7 +38,7 @@ private: int createProcesses(string, string, string, string, set&); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&, set&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&, set&); #endif bool abort; diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index 6640539..5009ff5 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -27,7 +27,7 @@ SeqSummaryCommand::SeqSummaryCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("summary.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -113,7 +113,7 @@ int SeqSummaryCommand::execute(){ int tag = 2001; int startTag = 1; int endTag = 2; int lengthTag = 3; int baseTag = 4; int lhomoTag = 5; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Status statusOut; @@ -360,7 +360,7 @@ int SeqSummaryCommand::driverCreateSummary(vector& startPosition, vector& startPosition, vector& endPosition, vector& seqLength, vector& ambigBases, vector& longHomoPolymer, MPI_File& inMPI, MPI_File& outMPI, vector& MPIPos) { +int SeqSummaryCommand::MPICreateSummary(int start, int num, vector& startPosition, vector& endPosition, vector& seqLength, vector& ambigBases, vector& longHomoPolymer, MPI_File& inMPI, MPI_File& outMPI, vector& MPIPos) { try { int pid; diff --git a/seqsummarycommand.h b/seqsummarycommand.h index 3e576a0..a625726 100644 --- a/seqsummarycommand.h +++ b/seqsummarycommand.h @@ -38,7 +38,7 @@ private: int setLines(string); #ifdef USE_MPI - int MPICreateSummary(int, int, vector&, vector&, vector&, vector&, vector&, MPI_File&, MPI_File&, vector&); + int MPICreateSummary(int, int, vector&, vector&, vector&, vector&, vector&, MPI_File&, MPI_File&, vector&); #endif diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index e2527ff..8c0e473 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -1,674 +1,674 @@ -/* - * sffinfocommand.cpp - * Mothur - * - * Created by westcott on 7/7/10. - * Copyright 2010 Schloss Lab. All rights reserved. - * - */ - -#include "sffinfocommand.h" -#include "endiannessmacros.h" - -//********************************************************************************************************************** - -SffInfoCommand::SffInfoCommand(string option) { - try { - abort = false; - hasAccnos = false; - - //allow user to run help - if(option == "help") { help(); abort = true; } - - else { - //valid paramters for this command - string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - - OptionParser parser(option); - map parameters = parser.getParameters(); - - ValidParameters validParameter; - //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } - } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } - - //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } - - sffFilename = validParameter.validFile(parameters, "sff", false); - if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } - else { - splitAtDash(sffFilename, filenames); - - //go through files and make sure they are good, if not, then disregard them - for (int i = 0; i < filenames.size(); i++) { - if (inputDir != "") { - string path = hasPath(filenames[i]); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { filenames[i] = inputDir + filenames[i]; } - } - - ifstream in; - int ableToOpen = openInputFile(filenames[i], in, "noerror"); - - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + getSimpleName(filenames[i]); - m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = openInputFile(tryPath, in, "noerror"); - filenames[i] = tryPath; - } - } - in.close(); - - if (ableToOpen == 1) { - m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine(); - //erase from file list - filenames.erase(filenames.begin()+i); - i--; - } - } - - //make sure there is at least one valid file left - if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } - } - - accnosName = validParameter.validFile(parameters, "accnos", false); - if (accnosName == "not found") { accnosName = ""; } - else { - hasAccnos = true; - splitAtDash(accnosName, accnosFileNames); - - //go through files and make sure they are good, if not, then disregard them - for (int i = 0; i < accnosFileNames.size(); i++) { - if (inputDir != "") { - string path = hasPath(accnosFileNames[i]); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } - } - - ifstream in; - int ableToOpen = openInputFile(accnosFileNames[i], in, "noerror"); - - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + getSimpleName(accnosFileNames[i]); - m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = openInputFile(tryPath, in, "noerror"); - accnosFileNames[i] = tryPath; - } - } - in.close(); - - if (ableToOpen == 1) { - m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); - //erase from file list - accnosFileNames.erase(accnosFileNames.begin()+i); - i--; - } - } - - //make sure there is at least one valid file left - if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } - } - - if (hasAccnos) { - if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); } - } - - string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; } - qual = isTrue(temp); - - temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } - fasta = isTrue(temp); - - temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } - flow = isTrue(temp); - - temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } - trim = isTrue(temp); - - temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } - sfftxt = isTrue(temp); - } - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); - exit(1); - } -} -//********************************************************************************************************************** - -void SffInfoCommand::help(){ - try { - m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n"); - m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"); - m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"); - m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"); - m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"); - m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"); - m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"); - m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"); - m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"); - m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n"); - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "help"); - exit(1); - } -} -//********************************************************************************************************************** - -SffInfoCommand::~SffInfoCommand(){} - -//********************************************************************************************************************** -int SffInfoCommand::execute(){ - try { - - if (abort == true) { return 0; } - - for (int s = 0; s < filenames.size(); s++) { - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - - int start = time(NULL); - - m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); - - string accnos = ""; - if (hasAccnos) { accnos = accnosFileNames[s]; } - - int numReads = extractSffInfo(filenames[s], accnos); - - m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); - } - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - - //report output filenames - m->mothurOutEndLine(); - m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } - m->mothurOutEndLine(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "execute"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::extractSffInfo(string input, string accnos){ - try { - - if (outputDir == "") { outputDir += hasPath(input); } - - if (accnos != "") { readAccnosFile(accnos); } - else { seqNames.clear(); } - - ofstream outSfftxt, outFasta, outQual, outFlow; - string outFastaFileName, outQualFileName; - string sfftxtFileName = outputDir + getRootName(getSimpleName(input)) + "sff.txt"; - string outFlowFileName = outputDir + getRootName(getSimpleName(input)) + "flow"; - if (trim) { - outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "fasta"; - outQualFileName = outputDir + getRootName(getSimpleName(input)) + "qual"; - }else{ - outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "raw.fasta"; - outQualFileName = outputDir + getRootName(getSimpleName(input)) + "raw.qual"; - } - - if (sfftxt) { openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); } - if (fasta) { openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); } - if (qual) { openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); } - if (flow) { openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); } - - ifstream in; - in.open(input.c_str(), ios::binary); - - CommonHeader header; - readCommonHeader(in, header); - - int count = 0; - - //check magic number and version - if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } - if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } - - //print common header - if (sfftxt) { printCommonHeader(outSfftxt, header); } - - //read through the sff file - while (!in.eof()) { - - bool print = true; - - //read header - Header readheader; - readHeader(in, readheader); - - //read data - seqRead read; - readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); - - //if you have provided an accosfile and this seq is not in it, then dont print - if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } - - //print - if (print) { - if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); } - if (fasta) { printFastaSeqData(outFasta, read, readheader); } - if (qual) { printQualSeqData(outQual, read, readheader); } - if (flow) { printFlowSeqData(outFlow, read, readheader); } - } - - count++; - - //report progress - if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } - - if (m->control_pressed) { count = 0; break; } - - if (count >= header.numReads) { break; } - } - - //report progress - if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } - - in.close(); - - if (sfftxt) { outSfftxt.close(); } - if (fasta) { outFasta.close(); } - if (qual) { outQual.close(); } - if (flow) { outFlow.close(); } - - return count; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "extractSffInfo"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ - try { - - if (!in.eof()) { - - //read magic number - char buffer[4]; - in.read(buffer, 4); - header.magicNumber = be_int4(*(unsigned int *)(&buffer)); - - //read version - char buffer9[4]; - in.read(buffer9, 4); - header.version = ""; - for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); } - - //read offset - char buffer2 [8]; - in.read(buffer2, 8); - header.indexOffset = be_int8(*(unsigned long int *)(&buffer2)); - - //read index length - char buffer3 [4]; - in.read(buffer3, 4); - header.indexLength = be_int4(*(unsigned int *)(&buffer3)); - - //read num reads - char buffer4 [4]; - in.read(buffer4, 4); - header.numReads = be_int4(*(unsigned int *)(&buffer4)); - - //read header length - char buffer5 [2]; - in.read(buffer5, 2); - header.headerLength = be_int2(*(unsigned short *)(&buffer5)); - - //read key length - char buffer6 [2]; - in.read(buffer6, 2); - header.keyLength = be_int2(*(unsigned short *)(&buffer6)); - - //read number of flow reads - char buffer7 [2]; - in.read(buffer7, 2); - header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7)); - - //read format code - char buffer8 [1]; - in.read(buffer8, 1); - header.flogramFormatCode = (int)(buffer8[0]); - - //read flow chars - char* tempBuffer = new char[header.numFlowsPerRead]; - in.read(&(*tempBuffer), header.numFlowsPerRead); - header.flowChars = tempBuffer; - if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); } - delete[] tempBuffer; - - //read key - char* tempBuffer2 = new char[header.keyLength]; - in.read(&(*tempBuffer2), header.keyLength); - header.keySequence = tempBuffer2; - if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); } - delete[] tempBuffer2; - - /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; // ~ inverts - in.seekg(spot); - - }else{ - m->mothurOut("Error reading sff common header."); m->mothurOutEndLine(); - } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "readCommonHeader"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::readHeader(ifstream& in, Header& header){ - try { - - if (!in.eof()) { - - //read header length - char buffer [2]; - in.read(buffer, 2); - header.headerLength = be_int2(*(unsigned short *)(&buffer)); - - //read name length - char buffer2 [2]; - in.read(buffer2, 2); - header.nameLength = be_int2(*(unsigned short *)(&buffer2)); - - //read num bases - char buffer3 [4]; - in.read(buffer3, 4); - header.numBases = be_int4(*(unsigned int *)(&buffer3)); - - //read clip qual left - char buffer4 [2]; - in.read(buffer4, 2); - header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); - - //read clip qual right - char buffer5 [2]; - in.read(buffer5, 2); - header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); - - //read clipAdapterLeft - char buffer6 [2]; - in.read(buffer6, 2); - header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6)); - - //read clipAdapterRight - char buffer7 [2]; - in.read(buffer7, 2); - header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7)); - - //read name - char* tempBuffer = new char[header.nameLength]; - in.read(&(*tempBuffer), header.nameLength); - header.name = tempBuffer; - if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); } - delete[] tempBuffer; - - /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; - in.seekg(spot); - - }else{ - m->mothurOut("Error reading sff header info."); m->mothurOutEndLine(); - } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "readHeader"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){ - try { - - if (!in.eof()) { - - //read flowgram - read.flowgram.resize(numFlowReads); - for (int i = 0; i < numFlowReads; i++) { - char buffer [2]; - in.read(buffer, 2); - read.flowgram[i] = be_int2(*(unsigned short *)(&buffer)); - } - - //read flowIndex - read.flowIndex.resize(numBases); - for (int i = 0; i < numBases; i++) { - char temp[1]; - in.read(temp, 1); - read.flowIndex[i] = be_int1(*(unsigned char *)(&temp)); - } - - //read bases - char* tempBuffer = new char[numBases]; - in.read(&(*tempBuffer), numBases); - read.bases = tempBuffer; - if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases); } - delete[] tempBuffer; - - //read qual scores - read.qualScores.resize(numBases); - for (int i = 0; i < numBases; i++) { - char temp[1]; - in.read(temp, 1); - read.qualScores[i] = be_int1(*(unsigned char *)(&temp)); - } - - /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; - in.seekg(spot); - - }else{ - m->mothurOut("Error reading."); m->mothurOutEndLine(); - } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "readSeqData"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) { - try { - - out << "Common Header:\nMagic Number: " << header.magicNumber << endl; - out << "Version: " << header.version << endl; - out << "Index Offset: " << header.indexOffset << endl; - out << "Index Length: " << header.indexLength << endl; - out << "Number of Reads: " << header.numReads << endl; - out << "Header Length: " << header.headerLength << endl; - out << "Key Length: " << header.keyLength << endl; - out << "Number of Flows: " << header.numFlowsPerRead << endl; - out << "Format Code: " << header.flogramFormatCode << endl; - out << "Flow Chars: " << header.flowChars << endl; - out << "Key Sequence: " << header.keySequence << endl << endl; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printCommonHeader"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::printHeader(ofstream& out, Header& header) { - try { - - out << ">" << header.name << endl; - out << "Run Prefix: " << endl; - out << "Region #: " << endl; - out << "XY Location: " << endl << endl; - - out << "Run Name: " << endl; - out << "Analysis Name: " << endl; - out << "Full Path: " << endl << endl; - - out << "Read Header Len: " << header.headerLength << endl; - out << "Name Length: " << header.nameLength << endl; - out << "# of Bases: " << header.numBases << endl; - out << "Clip Qual Left: " << header.clipQualLeft << endl; - out << "Clip Qual Right: " << header.clipQualRight << endl; - out << "Clip Adap Left: " << header.clipAdapterLeft << endl; - out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printHeader"); - exit(1); - } -} - -//********************************************************************************************************************** -int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) { - try { - - out << "FlowGram: "; - for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } - - out << endl << "Flow Indexes: "; - int sum = 0; - for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } - - //make the bases you want to clip lowercase and the bases you want to keep upper case - for (int i = 0; i < header.clipQualLeft; i++) { read.bases[i] = tolower(read.bases[i]); } - for (int i = header.clipQualLeft; i < (header.clipQualRight-header.clipQualLeft); i++) { read.bases[i] = toupper(read.bases[i]); } - for (int i = (header.clipQualRight-header.clipQualLeft); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } - - out << endl << "Bases: " << read.bases << endl << "Quality Scores: "; - for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } - - - out << endl << endl; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) { - try { - - string seq = read.bases; - - - if (trim) { - seq = seq.substr(header.clipQualLeft, (header.clipQualRight-header.clipQualLeft)); - }else{ - //if you wanted the sfftxt then you already converted the bases to the right case - if (!sfftxt) { - //make the bases you want to clip lowercase and the bases you want to keep upper case - for (int i = 0; i < header.clipQualLeft; i++) { seq[i] = tolower(seq[i]); } - for (int i = header.clipQualLeft; i < (header.clipQualRight-header.clipQualLeft); i++) { seq[i] = toupper(seq[i]); } - for (int i = (header.clipQualRight-header.clipQualLeft); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } - } - } - - out << ">" << header.name << endl; - out << seq << endl; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printFastaSeqData"); - exit(1); - } -} - -//********************************************************************************************************************** -int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) { - try { - - if (trim) { - out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; - for (int i = header.clipQualLeft; i < (header.clipQualRight-header.clipQualLeft); i++) { out << read.qualScores[i] << '\t'; } - }else{ - out << ">" << header.name << " length=" << read.qualScores.size() << endl; - for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } - } - - out << endl; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printQualSeqData"); - exit(1); - } -} - -//********************************************************************************************************************** -int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { - try { - - out << ">" << header.name << endl; - for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } - out << endl; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printFlowSeqData"); - exit(1); - } -} -//********************************************************************************************************************** -int SffInfoCommand::readAccnosFile(string filename) { - try { - //remove old names - seqNames.clear(); - - ifstream in; - openInputFile(filename, in); - string name; - - while(!in.eof()){ - in >> name; gobble(in); - - seqNames.insert(name); - - if (m->control_pressed) { seqNames.clear(); break; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "readAccnosFile"); - exit(1); - } -} -//**********************************************************************************************************************/ +/* + * sffinfocommand.cpp + * Mothur + * + * Created by westcott on 7/7/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "sffinfocommand.h" +#include "endiannessmacros.h" + +//********************************************************************************************************************** + +SffInfoCommand::SffInfoCommand(string option) { + try { + abort = false; + hasAccnos = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } + + sffFilename = validParameter.validFile(parameters, "sff", false); + if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(sffFilename, filenames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < filenames.size(); i++) { + if (inputDir != "") { + string path = hasPath(filenames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { filenames[i] = inputDir + filenames[i]; } + } + + ifstream in; + int ableToOpen = openInputFile(filenames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + filenames[i] = tryPath; + } + } + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + filenames.erase(filenames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + accnosName = validParameter.validFile(parameters, "accnos", false); + if (accnosName == "not found") { accnosName = ""; } + else { + hasAccnos = true; + splitAtDash(accnosName, accnosFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < accnosFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(accnosFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } + } + + ifstream in; + int ableToOpen = openInputFile(accnosFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + accnosFileNames[i] = tryPath; + } + } + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + accnosFileNames.erase(accnosFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + if (hasAccnos) { + if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); } + } + + string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; } + qual = isTrue(temp); + + temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } + fasta = isTrue(temp); + + temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } + flow = isTrue(temp); + + temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } + trim = isTrue(temp); + + temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } + sfftxt = isTrue(temp); + } + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void SffInfoCommand::help(){ + try { + m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n"); + m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"); + m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"); + m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"); + m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"); + m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"); + m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"); + m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"); + m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"); + m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "help"); + exit(1); + } +} +//********************************************************************************************************************** + +SffInfoCommand::~SffInfoCommand(){} + +//********************************************************************************************************************** +int SffInfoCommand::execute(){ + try { + + if (abort == true) { return 0; } + + for (int s = 0; s < filenames.size(); s++) { + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + int start = time(NULL); + + m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); + + string accnos = ""; + if (hasAccnos) { accnos = accnosFileNames[s]; } + + int numReads = extractSffInfo(filenames[s], accnos); + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //report output filenames + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::extractSffInfo(string input, string accnos){ + try { + + if (outputDir == "") { outputDir += hasPath(input); } + + if (accnos != "") { readAccnosFile(accnos); } + else { seqNames.clear(); } + + ofstream outSfftxt, outFasta, outQual, outFlow; + string outFastaFileName, outQualFileName; + string sfftxtFileName = outputDir + getRootName(getSimpleName(input)) + "sff.txt"; + string outFlowFileName = outputDir + getRootName(getSimpleName(input)) + "flow"; + if (trim) { + outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "fasta"; + outQualFileName = outputDir + getRootName(getSimpleName(input)) + "qual"; + }else{ + outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "raw.fasta"; + outQualFileName = outputDir + getRootName(getSimpleName(input)) + "raw.qual"; + } + + if (sfftxt) { openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); } + if (fasta) { openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); } + if (qual) { openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); } + if (flow) { openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); } + + ifstream in; + in.open(input.c_str(), ios::binary); + + CommonHeader header; + readCommonHeader(in, header); + + int count = 0; + + //check magic number and version + if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } + if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } + + //print common header + if (sfftxt) { printCommonHeader(outSfftxt, header); } + + //read through the sff file + while (!in.eof()) { + + bool print = true; + + //read header + Header readheader; + readHeader(in, readheader); + + //read data + seqRead read; + readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); + + //if you have provided an accosfile and this seq is not in it, then dont print + if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } + + //print + if (print) { + if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); } + if (fasta) { printFastaSeqData(outFasta, read, readheader); } + if (qual) { printQualSeqData(outQual, read, readheader); } + if (flow) { printFlowSeqData(outFlow, read, readheader); } + } + + count++; + + //report progress + if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } + + if (m->control_pressed) { count = 0; break; } + + if (count >= header.numReads) { break; } + } + + //report progress + if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } + + in.close(); + + if (sfftxt) { outSfftxt.close(); } + if (fasta) { outFasta.close(); } + if (qual) { outQual.close(); } + if (flow) { outFlow.close(); } + + return count; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "extractSffInfo"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ + try { + + if (!in.eof()) { + + //read magic number + char buffer[4]; + in.read(buffer, 4); + header.magicNumber = be_int4(*(unsigned int *)(&buffer)); + + //read version + char buffer9[4]; + in.read(buffer9, 4); + header.version = ""; + for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); } + + //read offset + char buffer2 [8]; + in.read(buffer2, 8); + header.indexOffset = be_int8(*(unsigned long int *)(&buffer2)); + + //read index length + char buffer3 [4]; + in.read(buffer3, 4); + header.indexLength = be_int4(*(unsigned int *)(&buffer3)); + + //read num reads + char buffer4 [4]; + in.read(buffer4, 4); + header.numReads = be_int4(*(unsigned int *)(&buffer4)); + + //read header length + char buffer5 [2]; + in.read(buffer5, 2); + header.headerLength = be_int2(*(unsigned short *)(&buffer5)); + + //read key length + char buffer6 [2]; + in.read(buffer6, 2); + header.keyLength = be_int2(*(unsigned short *)(&buffer6)); + + //read number of flow reads + char buffer7 [2]; + in.read(buffer7, 2); + header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7)); + + //read format code + char buffer8 [1]; + in.read(buffer8, 1); + header.flogramFormatCode = (int)(buffer8[0]); + + //read flow chars + char* tempBuffer = new char[header.numFlowsPerRead]; + in.read(&(*tempBuffer), header.numFlowsPerRead); + header.flowChars = tempBuffer; + if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); } + delete[] tempBuffer; + + //read key + char* tempBuffer2 = new char[header.keyLength]; + in.read(&(*tempBuffer2), header.keyLength); + header.keySequence = tempBuffer2; + if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); } + delete[] tempBuffer2; + + /* Pad to 8 chars */ + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; // ~ inverts + in.seekg(spot); + + }else{ + m->mothurOut("Error reading sff common header."); m->mothurOutEndLine(); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "readCommonHeader"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::readHeader(ifstream& in, Header& header){ + try { + + if (!in.eof()) { + + //read header length + char buffer [2]; + in.read(buffer, 2); + header.headerLength = be_int2(*(unsigned short *)(&buffer)); + + //read name length + char buffer2 [2]; + in.read(buffer2, 2); + header.nameLength = be_int2(*(unsigned short *)(&buffer2)); + + //read num bases + char buffer3 [4]; + in.read(buffer3, 4); + header.numBases = be_int4(*(unsigned int *)(&buffer3)); + + //read clip qual left + char buffer4 [2]; + in.read(buffer4, 2); + header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); + + //read clip qual right + char buffer5 [2]; + in.read(buffer5, 2); + header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); + + //read clipAdapterLeft + char buffer6 [2]; + in.read(buffer6, 2); + header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6)); + + //read clipAdapterRight + char buffer7 [2]; + in.read(buffer7, 2); + header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7)); + + //read name + char* tempBuffer = new char[header.nameLength]; + in.read(&(*tempBuffer), header.nameLength); + header.name = tempBuffer; + if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); } + delete[] tempBuffer; + + /* Pad to 8 chars */ + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; + in.seekg(spot); + + }else{ + m->mothurOut("Error reading sff header info."); m->mothurOutEndLine(); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "readHeader"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){ + try { + + if (!in.eof()) { + + //read flowgram + read.flowgram.resize(numFlowReads); + for (int i = 0; i < numFlowReads; i++) { + char buffer [2]; + in.read(buffer, 2); + read.flowgram[i] = be_int2(*(unsigned short *)(&buffer)); + } + + //read flowIndex + read.flowIndex.resize(numBases); + for (int i = 0; i < numBases; i++) { + char temp[1]; + in.read(temp, 1); + read.flowIndex[i] = be_int1(*(unsigned char *)(&temp)); + } + + //read bases + char* tempBuffer = new char[numBases]; + in.read(&(*tempBuffer), numBases); + read.bases = tempBuffer; + if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases); } + delete[] tempBuffer; + + //read qual scores + read.qualScores.resize(numBases); + for (int i = 0; i < numBases; i++) { + char temp[1]; + in.read(temp, 1); + read.qualScores[i] = be_int1(*(unsigned char *)(&temp)); + } + + /* Pad to 8 chars */ + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; + in.seekg(spot); + + }else{ + m->mothurOut("Error reading."); m->mothurOutEndLine(); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "readSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) { + try { + + out << "Common Header:\nMagic Number: " << header.magicNumber << endl; + out << "Version: " << header.version << endl; + out << "Index Offset: " << header.indexOffset << endl; + out << "Index Length: " << header.indexLength << endl; + out << "Number of Reads: " << header.numReads << endl; + out << "Header Length: " << header.headerLength << endl; + out << "Key Length: " << header.keyLength << endl; + out << "Number of Flows: " << header.numFlowsPerRead << endl; + out << "Format Code: " << header.flogramFormatCode << endl; + out << "Flow Chars: " << header.flowChars << endl; + out << "Key Sequence: " << header.keySequence << endl << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printCommonHeader"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::printHeader(ofstream& out, Header& header) { + try { + + out << ">" << header.name << endl; + out << "Run Prefix: " << endl; + out << "Region #: " << endl; + out << "XY Location: " << endl << endl; + + out << "Run Name: " << endl; + out << "Analysis Name: " << endl; + out << "Full Path: " << endl << endl; + + out << "Read Header Len: " << header.headerLength << endl; + out << "Name Length: " << header.nameLength << endl; + out << "# of Bases: " << header.numBases << endl; + out << "Clip Qual Left: " << header.clipQualLeft << endl; + out << "Clip Qual Right: " << header.clipQualRight << endl; + out << "Clip Adap Left: " << header.clipAdapterLeft << endl; + out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printHeader"); + exit(1); + } +} + +//********************************************************************************************************************** +int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + out << "Flowgram: "; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } + + out << endl << "Flow Indexes: "; + int sum = 0; + for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } + + //make the bases you want to clip lowercase and the bases you want to keep upper case + for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); } + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { read.bases[i] = toupper(read.bases[i]); } + for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } + + out << endl << "Bases: " << read.bases << endl << "Quality Scores: "; + for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + + + out << endl << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + string seq = read.bases; + + + if (trim) { + seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft+1)); + }else{ + //if you wanted the sfftxt then you already converted the bases to the right case + if (!sfftxt) { + //make the bases you want to clip lowercase and the bases you want to keep upper case + for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + } + } + + out << ">" << header.name << endl; + out << seq << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printFastaSeqData"); + exit(1); + } +} + +//********************************************************************************************************************** +int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + if (trim) { + out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft+1) << endl; + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; } + }else{ + out << ">" << header.name << " length=" << read.qualScores.size() << endl; + for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + } + + out << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printQualSeqData"); + exit(1); + } +} + +//********************************************************************************************************************** +int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + out << ">" << header.name << endl; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } + out << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printFlowSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::readAccnosFile(string filename) { + try { + //remove old names + seqNames.clear(); + + ifstream in; + openInputFile(filename, in); + string name; + + while(!in.eof()){ + in >> name; gobble(in); + + seqNames.insert(name); + + if (m->control_pressed) { seqNames.clear(); break; } + } + in.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "readAccnosFile"); + exit(1); + } +} +//**********************************************************************************************************************/ diff --git a/summarycommand.cpp b/summarycommand.cpp index 5ad1ee6..b086dff 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -136,10 +136,11 @@ int SummaryCommand::execute(){ vector outputNames; + string hadShared = ""; if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); } - else { inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } + else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } - if (m->control_pressed) { return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } for (int p = 0; p < inputFileNames.size(); p++) { @@ -204,7 +205,7 @@ int SummaryCommand::execute(){ } //if the users entered no valid calculators don't execute command - if (sumCalculators.size() == 0) { return 0; } + if (sumCalculators.size() == 0) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } ofstream outputFileHandle; openOutputFile(fileNameRoot, outputFileHandle); @@ -231,11 +232,11 @@ int SummaryCommand::execute(){ set processedLabels; set userLabels = labels; - if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ @@ -247,7 +248,7 @@ int SummaryCommand::execute(){ for(int i=0;i data = sumCalculators[i]->getValues(sabund); - if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); @@ -269,7 +270,7 @@ int SummaryCommand::execute(){ for(int i=0;i data = sumCalculators[i]->getValues(sabund); - if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); @@ -286,7 +287,7 @@ int SummaryCommand::execute(){ sabund = input->getSAbundVector(); } - if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } //output error messages about any remaining user labels set::iterator it; @@ -311,7 +312,7 @@ int SummaryCommand::execute(){ for(int i=0;i data = sumCalculators[i]->getValues(sabund); - if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); @@ -322,7 +323,7 @@ int SummaryCommand::execute(){ outputFileHandle.close(); - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } delete input; globaldata->ginput = NULL; @@ -332,6 +333,8 @@ int SummaryCommand::execute(){ for(int i=0;isetSharedFile(hadShared); globaldata->setFormat("sharedfile"); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } m->mothurOutEndLine(); diff --git a/validparameter.cpp b/validparameter.cpp index e82a9e8..1faa651 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -15,6 +15,20 @@ ValidParameters::ValidParameters() { try { m = MothurOut::getInstance(); initParameterRanges(); + commandName = ""; + } + catch(exception& e) { + m->errorOut(e, "ValidParameters", "ValidParameters"); + exit(1); + } +} +/***********************************************************************/ + +ValidParameters::ValidParameters(string c) { + try { + m = MothurOut::getInstance(); + initParameterRanges(); + commandName = c; } catch(exception& e) { m->errorOut(e, "ValidParameters", "ValidParameters"); @@ -214,6 +228,8 @@ string ValidParameters::validFile(map& container, string paramet MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); + if (commandName == "") { processors = 1; } + if (pid == 0) { #endif @@ -234,12 +250,26 @@ string ValidParameters::validFile(map& container, string paramet #ifdef USE_MPI for(int i = 1; i < processors; i++) { MPI_Send(&ableToOpen, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + + int length = container[parameter].length(); + MPI_Send(&length, 1, MPI_INT, i, 2001, MPI_COMM_WORLD); + MPI_Send(&(container[parameter][0]), length, MPI_CHAR, i, 2001, MPI_COMM_WORLD); + } }else { MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + + int length; + MPI_Recv(&length, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + //recieve container + char* tempBuf = new char[length]; + MPI_Recv(&tempBuf[0], length, MPI_CHAR, 0, 2001, MPI_COMM_WORLD, &status); + + container[parameter] = tempBuf; + if (container[parameter].length() > length) { container[parameter] = container[parameter].substr(0, length); } + delete tempBuf; } - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #endif if (ableToOpen == 1) { diff --git a/validparameter.h b/validparameter.h index 782931f..04d201d 100644 --- a/validparameter.h +++ b/validparameter.h @@ -22,17 +22,19 @@ class ValidParameters { public: ValidParameters(); + ValidParameters(string); ~ValidParameters(); //bool isValidParameter(string, string, string) {return true;} bool isValidParameter(string, vector, string); vector addParameters(string[], int); void initParameterRanges(); - string validFile(map&, string, bool); //container, parameter, isFile + string validFile(map&, string, bool); //container, parameter, isFile, commandName private: map::iterator it; map > parameterRanges; MothurOut* m; + string commandName; }; diff --git a/venncommand.cpp b/venncommand.cpp index 35c814d..192611d 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -128,8 +128,11 @@ VennCommand::VennCommand(string option) { } } - venn = new Venn(outputDir); + //if the users entered no valid calculators don't execute command + if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); abort = true; } + else { venn = new Venn(outputDir); } } + } @@ -188,9 +191,6 @@ int VennCommand::execute(){ string lastLabel; vector outputNames; - //if the users entered no valid calculators don't execute command - if (vennCalculators.size() == 0) { return 0; } - if (format == "sharedfile") { //you have groups read = new ReadOTUFile(globaldata->inputFileName);