From e7729bb337cfefcaba24549092cc89b44002af39 Mon Sep 17 00:00:00 2001 From: pschloss Date: Thu, 18 Jun 2009 12:20:36 +0000 Subject: [PATCH] added an alignreport option to screen seqs --- aligncommand.cpp | 3 -- libshuffcommand.cpp | 1 - screenseqscommand.cpp | 64 +++++++++++++++++++++++++++++++++++++------ screenseqscommand.h | 5 ++-- trimseqscommand.cpp | 54 +++++++++++++++++++++++------------- trimseqscommand.h | 2 +- 6 files changed, 95 insertions(+), 34 deletions(-) diff --git a/aligncommand.cpp b/aligncommand.cpp index e628908..ccfd9bb 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -145,9 +145,6 @@ int AlignCommand::execute(){ try { if (abort == true) { return 0; } - srand( (unsigned)time( NULL ) ); //needed to assign names to temporary files - - if(search == "kmer") { templateDB = new KmerDB(templateFileName, kmerSize); } else if(search == "suffix") { templateDB = new SuffixDB(templateFileName); } else if(search == "blast") { templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch); } diff --git a/libshuffcommand.cpp b/libshuffcommand.cpp index 9a71a03..75e6137 100644 --- a/libshuffcommand.cpp +++ b/libshuffcommand.cpp @@ -22,7 +22,6 @@ LibShuffCommand::LibShuffCommand(string option){ try { - srand( (unsigned)time( NULL ) ); globaldata = GlobalData::getInstance(); abort = false; diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index bc375de..1c70a2b 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -21,7 +21,8 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option){ else { //valid paramters for this command - string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"}; + string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", + "name", "group", "alignreport"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); @@ -47,7 +48,10 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option){ if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } - + alignreport = validParameter.validFile(parameters, "alignreport", true); + if (alignreport == "not open") { abort = true; } + else if (alignreport == "not found") { namefile = ""; } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; @@ -152,12 +156,9 @@ int ScreenSeqsCommand::execute(){ } gobble(inFASTA); } - if(namefile != ""){ - screenNameGroupFile(badSeqNames); - } - else if(groupfile != ""){ - screenGroupFile(badSeqNames); - } + if(namefile != "") { screenNameGroupFile(badSeqNames); } + if(groupfile != "") { screenGroupFile(badSeqNames); } + if(alignreport != "") { screenAlignReport(badSeqNames); } return 0; } @@ -282,4 +283,51 @@ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ //*************************************************************************************************************** +void ScreenSeqsCommand::screenAlignReport(set badSeqNames){ + + ifstream inputAlignReport; + openInputFile(alignreport, inputAlignReport); + string seqName, group; + set::iterator it; + + string goodAlignReportFile = getRootName(alignreport) + "good" + getExtension(alignreport); + string badAlignReportFile = getRootName(alignreport) + "bad" + getExtension(alignreport); + + ofstream goodAlignReportOut; openOutputFile(goodAlignReportFile, goodAlignReportOut); + ofstream badAlignReportOut; openOutputFile(badAlignReportFile, badAlignReportOut); + + while (!inputAlignReport.eof()) { // need to copy header + char c = inputAlignReport.get(); + goodAlignReportOut << c; + badAlignReportOut << c; + if (c == 10 || c == 13){ break; } + } + + while(!inputAlignReport.eof()){ + inputAlignReport >> seqName; + it = badSeqNames.find(seqName); + string line; + while (!inputAlignReport.eof()) { // need to copy header + char c = inputAlignReport.get(); + line += c; + if (c == 10 || c == 13){ break; } + } + + if(it != badSeqNames.end()){ + badSeqNames.erase(it); + badAlignReportOut << seqName << '\t' << line;; + } + else{ + goodAlignReportOut << seqName << '\t' << line; + } + gobble(inputAlignReport); + } + inputAlignReport.close(); + goodAlignReportOut.close(); + badAlignReportOut.close(); + +} + +//*************************************************************************************************************** + diff --git a/screenseqscommand.h b/screenseqscommand.h index eab66ee..269275d 100644 --- a/screenseqscommand.h +++ b/screenseqscommand.h @@ -23,9 +23,10 @@ public: private: void screenNameGroupFile(set); void screenGroupFile(set); - + void screenAlignReport(set); + bool abort; - string fastafile, namefile, groupfile; + string fastafile, namefile, groupfile, alignreport; int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength; }; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 132cc6b..e616b74 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -78,16 +78,19 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = isTrue(temp); - + if(allFiles && oligoFile == ""){ cout << "You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request." << endl; } - + if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ + cout << "You didn't provide a quality file name, quality criteria will be ignored." << endl; + qAverage=0; + qThreshold=0; + } if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){ cout << "You didn't set any options... quiting command." << endl; abort = true; } - } } @@ -141,21 +144,21 @@ int TrimSeqsCommand::execute(){ try{ if (abort == true) { return 0; } - - vector groupFileNames; - vector fastaFileNames; - if(oligoFile != "") { getOligos(fastaFileNames, groupFileNames); } ifstream inFASTA; openInputFile(fastaFile, inFASTA); - + ofstream outFASTA; string trimSeqFile = getRootName(fastaFile) + "trim.fasta"; openOutputFile(trimSeqFile, outFASTA); ofstream outGroups; - string groupFile = getRootName(fastaFile) + "groups"; - openOutputFile(groupFile, outGroups); + vector fastaFileNames; + if(oligoFile != ""){ + string groupFile = getRootName(fastaFile) + "groups"; + openOutputFile(groupFile, outGroups); + getOligos(fastaFileNames); + } ofstream scrapFASTA; string scrapSeqFile = getRootName(fastaFile) + "scrap.fasta"; @@ -210,7 +213,6 @@ int TrimSeqsCommand::execute(){ outGroups << currSeq.getName() << '\t' << groupVector[group] << endl; if(allFiles){ - *groupFileNames[group] << currSeq.getName() << '\t' << groupVector[group] << endl; currSeq.printSequence(*fastaFileNames[group]); } } @@ -227,14 +229,29 @@ int TrimSeqsCommand::execute(){ scrapFASTA.close(); outGroups.close(); - for(int i=0;iclose(); - delete groupFileNames[i]; - + for(int i=0;iclose(); delete fastaFileNames[i]; + } + + for(int i=0;i'){ + inFASTA >> seqName; + outGroups << seqName << '\t' << groupVector[i] << endl; + } + while (!inFASTA.eof()) { char c = inFASTA.get(); if (c == 10 || c == 13){ break; } } + } + outGroups.close(); + inFASTA.close(); } + return 0; } catch(exception& e) { @@ -249,7 +266,7 @@ int TrimSeqsCommand::execute(){ //*************************************************************************************************************** -void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& outGroupsVec){ +void TrimSeqsCommand::getOligos(vector& outFASTAVec){ ifstream inOligos; openInputFile(oligoFile, inOligos); @@ -258,10 +275,10 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector> type; - + if(type[0] == '#'){ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there } @@ -286,7 +303,6 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector&, vector&); + void getOligos(vector&); bool stripQualThreshold(Sequence&, ifstream&); bool cullQualAverage(Sequence&, ifstream&); bool stripBarcode(Sequence&, int&); -- 2.39.2