From: westcott Date: Mon, 19 Jul 2010 10:50:54 +0000 (+0000) Subject: added fasta, qfile, trim, accnos and sfftxt parameter to sffinfo command. added... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=dfae916a398508554d35c6b3c8002b69becb53be added fasta, qfile, trim, accnos and sfftxt parameter to sffinfo command. added lci hci outputs for thetayc --- diff --git a/collect.cpp b/collect.cpp index fa6f677..79e997d 100644 --- a/collect.cpp +++ b/collect.cpp @@ -88,16 +88,17 @@ try { //initialize labels for output //makes 'uniqueAB uniqueAC uniqueBC' if your groups are A, B, C getGroupComb(); - groupLabel = ""; - for (int s = 0; s < groupComb.size(); s++) { - groupLabel = groupLabel + label + groupComb[s] + "\t"; - } - - //for multi displays - string groupLabelAll = groupLabel + label + "all\t"; for(int i=0;iregisterDisplay(displays[i]); //adds a display[i] to cdd + bool hasLciHci = displays[i]->hasLciHci(); + groupLabel = ""; + for (int s = 0; s < groupComb.size(); s++) { + if (hasLciHci) { groupLabel = groupLabel + label + groupComb[s] + "\t" + label + groupComb[s] + "lci\t" + label + groupComb[s] + "hci\t"; } + else{ groupLabel = groupLabel + label + groupComb[s] + "\t"; } + } + + string groupLabelAll = groupLabel + label + "all\t"; if ((displays[i]->isCalcMultiple() == true) && (displays[i]->getAll() == true)) { displays[i]->init(groupLabelAll); } else { displays[i]->init(groupLabel); } } diff --git a/collectdisplay.h b/collectdisplay.h index 15cc5a7..d33efe9 100644 --- a/collectdisplay.h +++ b/collectdisplay.h @@ -72,14 +72,19 @@ public: output->output(numSeqs, groupData); } }; - + void init(string s) { output->initFile(s); }; void reset() { output->resetFile(); }; void close() { output->resetFile(); }; void setAll(bool a) { all = a; } bool getAll() { return all; } - bool isCalcMultiple() { return estimate->getMultiple(); } + + bool isCalcMultiple() { return estimate->getMultiple(); } + bool hasLciHci() { + if (estimate->getCols() == 3) { return true; } + else{ return false; } + } string getName() { return estimate->getName(); } diff --git a/collectorscurvedata.h b/collectorscurvedata.h index 9c70e7a..bde016a 100644 --- a/collectorscurvedata.h +++ b/collectorscurvedata.h @@ -40,7 +40,7 @@ class SharedCollectorsCurveData : public Observable { public: SharedCollectorsCurveData() { }; //: shared1(0), shared2(0) - void registerDisplay(Display* o) { displays.insert(o); }; + void registerDisplay(Display* o) { displays.insert(o); }; void removeDisplay(Display* o) { displays.erase(o); delete o; }; void SharedDataChanged() { notifyDisplays(); }; void updateSharedData(vector s, int numSeqs, int numGroupComb) { shared = s; NumSeqs = numSeqs; NumGroupComb = numGroupComb; SharedDataChanged(); }; diff --git a/display.h b/display.h index 8671fa6..8c73b57 100644 --- a/display.h +++ b/display.h @@ -19,6 +19,7 @@ public: virtual void close() = 0; virtual bool isCalcMultiple() = 0; virtual void setAll(bool){} + virtual bool hasLciHci(){ return false; } virtual bool getAll() { bool a; return a; } virtual string getName() { return ""; }; virtual ~Display() {} diff --git a/makefile b/makefile index efac3fb..8ea410e 100644 --- a/makefile +++ b/makefile @@ -24,6 +24,10 @@ endif ifeq ($(strip $(64BIT_VERSION)),yes) TARGET_ARCH += -arch x86_64 CXXFLAGS += -DBIT_VERSION + + #if you are using centos uncomment the following lines + #CC = g++44 + #CXXFLAGS += -mtune=native -march=native -m64 endif # if you do not want to use the readline library, set this to no. diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 9c96491..885aadc 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -15,13 +15,14 @@ SffInfoCommand::SffInfoCommand(string option) { try { abort = false; + hasAccnos = false; //allow user to run help if(option == "help") { help(); abort = true; } else { //valid paramters for this command - string Array[] = {"sff","outputdir","inputdir", "outputdir"}; + string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -67,6 +68,55 @@ SffInfoCommand::SffInfoCommand(string option) { //make sure there is at least one valid file left if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } } + + accnosName = validParameter.validFile(parameters, "accnos", false); + if (accnosName == "not found") { accnosName = ""; } + else { + hasAccnos = true; + splitAtDash(accnosName, accnosFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < accnosFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(accnosFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } + } + + ifstream in; + int ableToOpen = openInputFile(accnosFileNames[i], in); + in.close(); + + if (ableToOpen == 1) { + m->mothurOut(accnosFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + accnosFileNames.erase(accnosFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + if (hasAccnos) { + if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); } + } + + string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; } + qual = isTrue(temp); + + temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } + fasta = isTrue(temp); + + temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } + flow = isTrue(temp); + + temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } + trim = isTrue(temp); + + temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } + sfftxt = isTrue(temp); } } catch(exception& e) { @@ -106,12 +156,10 @@ int SffInfoCommand::execute(){ m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); - if (outputDir == "") { outputDir += hasPath(filenames[s]); } - string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt"; - - int numReads = extractSffInfo(filenames[s], outputFileName); + string accnos = ""; + if (hasAccnos) { accnos = accnosFileNames[s]; } - outputNames.push_back(outputFileName); + int numReads = extractSffInfo(filenames[s], accnos); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); } @@ -132,13 +180,30 @@ int SffInfoCommand::execute(){ } } //********************************************************************************************************************** -int SffInfoCommand::extractSffInfo(string input, string output){ +int SffInfoCommand::extractSffInfo(string input, string accnos){ try { - ofstream out; - openOutputFile(output, out); + if (outputDir == "") { outputDir += hasPath(input); } - out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + if (accnos != "") { readAccnosFile(accnos); } + else { seqNames.clear(); } + + ofstream outSfftxt, outFasta, outQual, outFlow; + string outFastaFileName, outQualFileName; + string sfftxtFileName = outputDir + getRootName(getSimpleName(input)) + "sff.txt"; + string outFlowFileName = outputDir + getRootName(getSimpleName(input)) + "flow"; + if (trim) { + outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "fasta"; + outQualFileName = outputDir + getRootName(getSimpleName(input)) + "qual"; + }else{ + outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "raw.fasta"; + outQualFileName = outputDir + getRootName(getSimpleName(input)) + "raw.qual"; + } + + if (sfftxt) { openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); } + if (fasta) { openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); } + if (qual) { openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); } + if (flow) { openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); } ifstream in; in.open(input.c_str(), ios::binary); @@ -146,31 +211,38 @@ int SffInfoCommand::extractSffInfo(string input, string output){ CommonHeader header; readCommonHeader(in, header); - //print common header - printCommonHeader(out, header); - int count = 0; //check magic number and version if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } + + //print common header + if (sfftxt) { printCommonHeader(outSfftxt, header); } //read through the sff file while (!in.eof()) { + bool print = true; + //read header Header readheader; readHeader(in, readheader); - - //print header - printHeader(out, readheader); //read data seqRead read; readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); - - //print data - printSeqData(out, read); + + //if you have provided an accosfile and this seq is not in it, then dont print + if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } + + //print + if (print) { + if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read); } + if (fasta) { printFastaSeqData(outFasta, read, readheader); } + if (qual) { printQualSeqData(outQual, read, readheader); } + if (flow) { printFlowSeqData(outFlow, read, readheader); } + } count++; @@ -186,7 +258,11 @@ int SffInfoCommand::extractSffInfo(string input, string output){ if (!m->control_pressed) { if((count) % 500 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } in.close(); - out.close(); + + if (sfftxt) { outSfftxt.close(); } + if (fasta) { outFasta.close(); } + if (qual) { outQual.close(); } + if (flow) { outFlow.close(); } return count; } @@ -416,6 +492,7 @@ int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) { //********************************************************************************************************************** int SffInfoCommand::printHeader(ofstream& out, Header& header) { try { + out << ">" << header.name << endl; out << "Run Prefix: " << endl; out << "Region #: " << endl; @@ -442,7 +519,7 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) { } //********************************************************************************************************************** -int SffInfoCommand::printSeqData(ofstream& out, seqRead& read) { +int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read) { try { out << "FlowGram: "; @@ -459,7 +536,92 @@ int SffInfoCommand::printSeqData(ofstream& out, seqRead& read) { return 0; } catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printSeqData"); + m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + string seq = read.bases; + + + if (trim) { + seq = seq.substr(header.clipQualLeft, (header.clipQualRight-header.clipQualLeft)); + } + + out << ">" << header.name << endl; + out << seq << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printFastaSeqData"); + exit(1); + } +} + +//********************************************************************************************************************** +int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + if (trim) { + out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = header.clipQualLeft; i < (header.clipQualRight-header.clipQualLeft); i++) { out << read.qualScores[i] << '\t'; } + }else{ + out << ">" << header.name << " length=" << read.qualScores.size() << endl; + for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + } + + out << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printQualSeqData"); + exit(1); + } +} + +//********************************************************************************************************************** +int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + out << ">" << header.name << endl; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } + out << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printFlowSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::readAccnosFile(string filename) { + try { + //remove old names + seqNames.clear(); + + ifstream in; + openInputFile(filename, in); + string name; + + while(!in.eof()){ + in >> name; gobble(in); + + seqNames.insert(name); + + if (m->control_pressed) { seqNames.clear(); break; } + } + in.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "readAccnosFile"); exit(1); } } diff --git a/sffinfocommand.h b/sffinfocommand.h index 219e125..b533f28 100644 --- a/sffinfocommand.h +++ b/sffinfocommand.h @@ -65,9 +65,10 @@ public: void help(); private: - string sffFilename, outputDir; - vector filenames, outputNames; - bool abort; + string sffFilename, outputDir, accnosName; + vector filenames, outputNames, accnosFileNames; + bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos; + set seqNames; int extractSffInfo(string, string); int readCommonHeader(ifstream&, CommonHeader&); @@ -76,7 +77,11 @@ private: int printCommonHeader(ofstream&, CommonHeader&); int printHeader(ofstream&, Header&); - int printSeqData(ofstream&, seqRead&); + int printSffTxtSeqData(ofstream&, seqRead&); + int printFlowSeqData(ofstream&, seqRead&, Header&); + int printFastaSeqData(ofstream&, seqRead&, Header&); + int printQualSeqData(ofstream&, seqRead&, Header&); + int readAccnosFile(string); }; diff --git a/sharedthetayc.cpp b/sharedthetayc.cpp index 6f3f250..ca066dd 100644 --- a/sharedthetayc.cpp +++ b/sharedthetayc.cpp @@ -70,6 +70,10 @@ EstOutput ThetaYC::getValues(vector shared) { data[1] = thetaYC - ci; data[2] = thetaYC + ci; + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } + if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } + return data; } catch(exception& e) {