]> git.donarmstrong.com Git - mothur.git/commitdiff
added an alignreport option to screen seqs
authorpschloss <pschloss>
Thu, 18 Jun 2009 12:20:36 +0000 (12:20 +0000)
committerpschloss <pschloss>
Thu, 18 Jun 2009 12:20:36 +0000 (12:20 +0000)
aligncommand.cpp
libshuffcommand.cpp
screenseqscommand.cpp
screenseqscommand.h
trimseqscommand.cpp
trimseqscommand.h

index e628908e07f7994a117ff70585fc6c0e54d2377e..ccfd9bbec751e3000afd318d063a094c9745ec4a 100644 (file)
@@ -145,9 +145,6 @@ int AlignCommand::execute(){
        try {
                if (abort == true) {    return 0;       }
                
-               srand( (unsigned)time( NULL ) );  //needed to assign names to temporary files
-               
-               
                if(search == "kmer")                    {       templateDB = new KmerDB(templateFileName, kmerSize);    }
                else if(search == "suffix")             {       templateDB = new SuffixDB(templateFileName);                    }
                else if(search == "blast")              {       templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch);        }
index 9a71a038cd4d0bd29d7eaeeeaae96d9847d81fa8..75e613719f9a819dc9cf4882b6442feb031cf296 100644 (file)
@@ -22,7 +22,6 @@
 
 LibShuffCommand::LibShuffCommand(string option){
        try {
-               srand( (unsigned)time( NULL ) );
                
                globaldata = GlobalData::getInstance();
                abort = false;
index bc375dee4e36de59fa2e8ae66d3516aa41c9339b..1c70a2b5a9723b64a4b9edd34cb2fc81e374e42d 100644 (file)
@@ -21,7 +21,8 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string AlignArray[] =  {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"};
+                       string AlignArray[] =  {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength",
+                                                                       "name", "group", "alignreport"};
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -47,7 +48,10 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option){
                        if (namefile == "not open") { abort = true; }
                        else if (namefile == "not found") { namefile = ""; }    
 
-               
+                       alignreport = validParameter.validFile(parameters, "alignreport", true);
+                       if (alignreport == "not open") { abort = true; }
+                       else if (alignreport == "not found") { namefile = ""; } 
+                       
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        string temp;
@@ -152,12 +156,9 @@ int ScreenSeqsCommand::execute(){
                        }
                        gobble(inFASTA);
                }       
-               if(namefile != ""){
-                       screenNameGroupFile(badSeqNames);
-               }
-               else if(groupfile != ""){
-                       screenGroupFile(badSeqNames);
-               }
+               if(namefile != "")              {       screenNameGroupFile(badSeqNames);       }
+               if(groupfile != "")             {       screenGroupFile(badSeqNames);           }
+               if(alignreport != "")   {       screenAlignReport(badSeqNames);         }
                
                return 0;
        }
@@ -282,4 +283,51 @@ void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
 
 //***************************************************************************************************************
 
+void ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
+       
+       ifstream inputAlignReport;
+       openInputFile(alignreport, inputAlignReport);
+       string seqName, group;
+       set<string>::iterator it;
+       
+       string goodAlignReportFile = getRootName(alignreport) + "good" + getExtension(alignreport);
+       string badAlignReportFile = getRootName(alignreport) + "bad" + getExtension(alignreport);
+       
+       ofstream goodAlignReportOut;    openOutputFile(goodAlignReportFile, goodAlignReportOut);
+       ofstream badAlignReportOut;             openOutputFile(badAlignReportFile, badAlignReportOut);          
+
+       while (!inputAlignReport.eof()) {               //      need to copy header
+               char c = inputAlignReport.get();
+               goodAlignReportOut << c;
+               badAlignReportOut << c;
+               if (c == 10 || c == 13){        break;  }       
+       }
+
+       while(!inputAlignReport.eof()){
+               inputAlignReport >> seqName;
+               it = badSeqNames.find(seqName);
+               string line;            
+               while (!inputAlignReport.eof()) {               //      need to copy header
+                       char c = inputAlignReport.get();
+                       line += c;
+                       if (c == 10 || c == 13){        break;  }       
+               }
+               
+               if(it != badSeqNames.end()){
+                       badSeqNames.erase(it);
+                       badAlignReportOut << seqName << '\t' << line;;
+               }
+               else{
+                       goodAlignReportOut << seqName << '\t' << line;
+               }
+               gobble(inputAlignReport);
+       }
+       inputAlignReport.close();
+       goodAlignReportOut.close();
+       badAlignReportOut.close();
+       
+}
+
+//***************************************************************************************************************
+
 
index eab66ee23d5c0d07a6498b67ab130fb8a7622f4d..269275d38409c251ef3f9b0b1c2552602367f348 100644 (file)
@@ -23,9 +23,10 @@ public:
 private:
        void screenNameGroupFile(set<string>);
        void screenGroupFile(set<string>);
-
+       void screenAlignReport(set<string>);
+       
        bool abort;
-       string fastafile, namefile, groupfile;
+       string fastafile, namefile, groupfile, alignreport;
        int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
 };
 
index 132cc6b767956e2c5b522adb4facb9904fc30a78..e616b744b2ec04d52639eca24cef8fb2af65e783 100644 (file)
@@ -78,16 +78,19 @@ TrimSeqsCommand::TrimSeqsCommand(string option){
                        
                        temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
                        allFiles = isTrue(temp);
-
+                       
                        if(allFiles && oligoFile == ""){
                                cout << "You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request." << endl;
                        }
-                       
+                       if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
+                               cout << "You didn't provide a quality file name, quality criteria will be ignored." << endl;
+                               qAverage=0;
+                               qThreshold=0;
+                       }
                        if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
                                cout << "You didn't set any options... quiting command." << endl;
                                abort = true;
                        }
-                       
                }
 
        }
@@ -141,21 +144,21 @@ int TrimSeqsCommand::execute(){
        try{
        
                if (abort == true) { return 0; }
-       
-               vector<ofstream*> groupFileNames;
-               vector<ofstream*> fastaFileNames;
-               if(oligoFile != "")     {       getOligos(fastaFileNames, groupFileNames);      }
 
                ifstream inFASTA;
                openInputFile(fastaFile, inFASTA);
-
+               
                ofstream outFASTA;
                string trimSeqFile = getRootName(fastaFile) + "trim.fasta";
                openOutputFile(trimSeqFile, outFASTA);
                
                ofstream outGroups;
-               string groupFile = getRootName(fastaFile) + "groups"; 
-               openOutputFile(groupFile, outGroups);
+               vector<ofstream*> fastaFileNames;
+               if(oligoFile != ""){
+                       string groupFile = getRootName(fastaFile) + "groups"; 
+                       openOutputFile(groupFile, outGroups);
+                       getOligos(fastaFileNames);
+               }
                
                ofstream scrapFASTA;
                string scrapSeqFile = getRootName(fastaFile) + "scrap.fasta";
@@ -210,7 +213,6 @@ int TrimSeqsCommand::execute(){
                                        outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
                                        
                                        if(allFiles){
-                                               *groupFileNames[group] << currSeq.getName() << '\t' << groupVector[group] << endl;                                      
                                                currSeq.printSequence(*fastaFileNames[group]);                                  
                                        }
                                }
@@ -227,14 +229,29 @@ int TrimSeqsCommand::execute(){
                scrapFASTA.close();
                outGroups.close();
                
-               for(int i=0;i<groupFileNames.size();i++){
-                       groupFileNames[i]->close();
-                       delete groupFileNames[i];
-
+               for(int i=0;i<fastaFileNames.size();i++){
                        fastaFileNames[i]->close();
                        delete fastaFileNames[i];
+               }               
+               
+               for(int i=0;i<fastaFileNames.size();i++){
+                       string seqName;
+                       openInputFile(getRootName(fastaFile) + groupVector[i] + ".fasta", inFASTA);
+                       ofstream outGroups;
+                       openOutputFile(getRootName(fastaFile) + groupVector[i] + ".groups", outGroups);
+                       
+                       while(!inFASTA.eof()){
+                               if(inFASTA.get() == '>'){
+                                       inFASTA >> seqName;
+                                       outGroups << seqName << '\t' << groupVector[i] << endl;
+                               }
+                               while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
+                       }
+                       outGroups.close();
+                       inFASTA.close();
                }
                
+               
                return 0;               
        }
        catch(exception& e) {
@@ -249,7 +266,7 @@ int TrimSeqsCommand::execute(){
 
 //***************************************************************************************************************
 
-void TrimSeqsCommand::getOligos(vector<ofstream*>& outFASTAVec, vector<ofstream*>& outGroupsVec){
+void TrimSeqsCommand::getOligos(vector<ofstream*>& outFASTAVec){
        
        ifstream inOligos;
        openInputFile(oligoFile, inOligos);
@@ -258,10 +275,10 @@ void TrimSeqsCommand::getOligos(vector<ofstream*>& outFASTAVec, vector<ofstream*
        
        string type, oligo, group;
        int index=0;
-       
+
        while(!inOligos.eof()){
                inOligos >> type;
-               
+
                if(type[0] == '#'){
                        while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
                }
@@ -286,7 +303,6 @@ void TrimSeqsCommand::getOligos(vector<ofstream*>& outFASTAVec, vector<ofstream*
                                        
                                if(allFiles){
                                        outFASTAVec.push_back(new ofstream((getRootName(fastaFile) + group + ".fasta").c_str(), ios::ate));
-                                       outGroupsVec.push_back(new ofstream((getRootName(fastaFile) + group + ".groups").c_str(), ios::ate));
                                }
                        }
                }
index ee79d777a8d11ffaa2e279e69463314f9c3b8938..87b813ff9afa8575855c5cca69916de7287f9d9f 100644 (file)
@@ -22,7 +22,7 @@ public:
        void help();
        
 private:
-       void getOligos(vector<ofstream*>&, vector<ofstream*>&);
+       void getOligos(vector<ofstream*>&);
        bool stripQualThreshold(Sequence&, ifstream&);
        bool cullQualAverage(Sequence&, ifstream&);
        bool stripBarcode(Sequence&, int&);