From: Sarah Westcott Date: Tue, 6 Mar 2012 12:54:36 +0000 (-0500) Subject: changed sffinfo flow default to true. fixed bug in trim.seqs and filter.seqs related... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=fc3b1fc4fc1c4e38fde6b0c0ee7896b5fe0b9d57 changed sffinfo flow default to true. fixed bug in trim.seqs and filter.seqs related to windows paralellization mods, added help and description to concurrence command --- diff --git a/cooccurrencecommand.cpp b/cooccurrencecommand.cpp index fa9f723..b67c6ea 100644 --- a/cooccurrencecommand.cpp +++ b/cooccurrencecommand.cpp @@ -33,8 +33,16 @@ vector CooccurrenceCommand::setParameters() { //********************************************************************************************************************** string CooccurrenceCommand::getHelpString(){ try { - string helpString = "help!"; - + string helpString = "The cooccurrence command calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; + helpString += "The cooccurrence command parameters are shared, metric, matrixmodel, iters, label and groups."; + helpString += "The matrixmodel parameter options are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8 and sim9. Default=sim2"; + helpString += "The metric parameter options are cscore, checker, combo and vratio. Default=cscore"; + helpString += "The label parameter is used to analyze specific labels in your input.\n"; + helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n"; + helpString += "The cooccurrence command should be in the following format: \n"; + helpString += "cooccurrence(shared=yourSharedFile) \n"; + helpString += "Example cooccurrence(shared=final.an.shared).\n"; + helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourShared).\n"; return helpString; } catch(exception& e) { diff --git a/cooccurrencecommand.h b/cooccurrencecommand.h index da7cde8..8f60e08 100644 --- a/cooccurrencecommand.h +++ b/cooccurrencecommand.h @@ -30,7 +30,7 @@ public: string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/Cooccurrence"; } - string getDescription() { return "Cooccurrence"; } + string getDescription() { return "calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; } int execute(); void help() { m->mothurOut(getHelpString()); } diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 806ca0d..9096bb6 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -420,7 +420,19 @@ int FilterSeqsCommand::filterSequences() { MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - vector positions = savedPositions[s]; + + vector positions; + if (savedPositions.size() != 0) { positions = savedPositions[s]; } + else { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + positions = m->divideFile(fastafileNames[s], processors); +#else + if(processors != 1){ + int numFastaSeqs = 0; + positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); + } +#endif + } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //vector positions = m->divideFile(fastafileNames[s], processors); diff --git a/linearalgebra.cpp b/linearalgebra.cpp index 0269fc4..2e0321e 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -9,6 +9,7 @@ #include "linearalgebra.h" +// This class references functions used from "Numerical Recipes in C++" // /*********************************************************************************************************************************/ inline double SIGN(const double a, const double b) diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 4965cfd..e008ce4 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -16,7 +16,7 @@ vector SffInfoCommand::setParameters(){ CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos); CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt); - CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow); + CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow); CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim); CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta); CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile); @@ -41,7 +41,7 @@ string SffInfoCommand::getHelpString(){ helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"; helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"; helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"; - helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"; + helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n"; helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"; helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"; helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"; @@ -256,7 +256,7 @@ SffInfoCommand::SffInfoCommand(string option) { temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } fasta = m->isTrue(temp); - temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } + temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "T"; } flow = m->isTrue(temp); temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } diff --git a/sortseqscommand.cpp b/sortseqscommand.cpp index 2c9e854..0236a50 100644 --- a/sortseqscommand.cpp +++ b/sortseqscommand.cpp @@ -232,7 +232,7 @@ int SortSeqsCommand::execute(){ //read through the correct file and output lines you want to keep if (accnosfile != "") { readAccnos(); } if (fastafile != "") { readFasta(); } - if (flowfile != "") { readFlow(); } + if (flowfile != "") { readFlow(); } if (qualfile != "") { readQual(); } if (namefile != "") { readName(); } if (groupfile != "") { readGroup(); } diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index f55272e..9913727 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -371,7 +371,7 @@ int TrimSeqsCommand::execute(){ outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } - + //fills lines and qlines setLines(fastaFile, qFileName); @@ -1071,8 +1071,6 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { //set file positions for fasta file fastaFilePos = m->divideFile(filename, processors); - if (qfilename == "") { return processors; } - //get name of first sequence in each chunk map firstSeqNames; for (int i = 0; i < (fastaFilePos.size()-1); i++) { @@ -1085,59 +1083,61 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { in.close(); } - - //seach for filePos of each first name in the qfile and save in qfileFilePos - ifstream inQual; - m->openInputFile(qfilename, inQual); - - string input; - while(!inQual.eof()){ - input = m->getline(inQual); - - if (input.length() != 0) { - if(input[0] == '>'){ //this is a sequence name line - istringstream nameStream(input); - - string sname = ""; nameStream >> sname; - sname = sname.substr(1); - - map::iterator it = firstSeqNames.find(sname); - - if(it != firstSeqNames.end()) { //this is the start of a new chunk - unsigned long long pos = inQual.tellg(); - qfileFilePos.push_back(pos - input.length() - 1); - firstSeqNames.erase(it); - } - } - } - - if (firstSeqNames.size() == 0) { break; } - } - inQual.close(); - - - if (firstSeqNames.size() != 0) { - for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { - m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine(); - } - qFileName = ""; - return processors; - } - - //get last file position of qfile - FILE * pFile; - unsigned long long size; - - //get num bytes in file - pFile = fopen (qfilename.c_str(),"rb"); - if (pFile==NULL) perror ("Error opening file"); - else{ - fseek (pFile, 0, SEEK_END); - size=ftell (pFile); - fclose (pFile); - } - qfileFilePos.push_back(size); + if(qfilename != "") { + //seach for filePos of each first name in the qfile and save in qfileFilePos + ifstream inQual; + m->openInputFile(qfilename, inQual); + + string input; + while(!inQual.eof()){ + input = m->getline(inQual); + + if (input.length() != 0) { + if(input[0] == '>'){ //this is a sequence name line + istringstream nameStream(input); + + string sname = ""; nameStream >> sname; + sname = sname.substr(1); + + map::iterator it = firstSeqNames.find(sname); + + if(it != firstSeqNames.end()) { //this is the start of a new chunk + unsigned long long pos = inQual.tellg(); + qfileFilePos.push_back(pos - input.length() - 1); + firstSeqNames.erase(it); + } + } + } + + if (firstSeqNames.size() == 0) { break; } + } + inQual.close(); + + + if (firstSeqNames.size() != 0) { + for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { + m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine(); + } + qFileName = ""; + return processors; + } + + //get last file position of qfile + FILE * pFile; + unsigned long long size; + + //get num bytes in file + pFile = fopen (qfilename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + + qfileFilePos.push_back(size); + } for (int i = 0; i < (fastaFilePos.size()-1); i++) { lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));