]> git.donarmstrong.com Git - mothur.git/commitdiff
paralellized seq.error and dist.shared added some error checks to libshuff and dist...
authorwestcott <westcott>
Tue, 24 May 2011 19:39:22 +0000 (19:39 +0000)
committerwestcott <westcott>
Tue, 24 May 2011 19:39:22 +0000 (19:39 +0000)
20 files changed:
deconvolutecommand.cpp
distancecommand.cpp
getlineagecommand.cpp
getlineagecommand.h
getseqscommand.cpp
libshuffcommand.cpp
matrixoutputcommand.cpp
matrixoutputcommand.h
mothurout.cpp
mothurout.h
refchimeratest.cpp
refchimeratest.h
removelineagecommand.cpp
removelineagecommand.h
seqerrorcommand.cpp
seqerrorcommand.h
sequencedb.cpp
sequencedb.h
summarysharedcommand.cpp
trimseqscommand.cpp

index df97fd88ad471ba97daaa593233d4177f1fe21a2..6f7657f3da640514fb5125527b17cfd779a5cad0 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "deconvolutecommand.h"
+#include "sequence.hpp"
 
 //**********************************************************************************************************************
 vector<string> DeconvoluteCommand::setParameters(){    
@@ -144,15 +145,95 @@ int DeconvoluteCommand::execute() {
                string outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "names";
                string outFastaFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique" + m->getExtension(inFastaName);
                
-               FastaMap fastamap;
-       
-               if(oldNameMapFName == "")       {       fastamap.readFastaFile(inFastaName);                                    }
-               else                                            {       fastamap.readFastaFile(inFastaName, oldNameMapFName);   }
+               map<string, string> nameMap;
+               map<string, string>::iterator itNames;
+               if (oldNameMapFName != "")  {  m->readNames(oldNameMapFName, nameMap); }
                
                if (m->control_pressed) { return 0; }
                
-               fastamap.printCondensedFasta(outFastaFile);
-               fastamap.printNamesFile(outNameFile);
+               ifstream in; 
+               m->openInputFile(inFastaName, in);
+               
+               ofstream outFasta;
+               m->openOutputFile(outFastaFile, outFasta);
+               
+               map<string, string> sequenceStrings; //sequenceString -> list of names.  "atgc...." -> seq1,seq2,seq3.
+               map<string, string>::iterator itStrings;
+               set<string> nameInFastaFile; //for sanity checking
+               set<string>::iterator itname;
+               int count = 0;
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); outFasta.close(); remove(outFastaFile.c_str()); return 0; }
+                       
+                       Sequence seq(in);
+                       
+                       if (seq.getName() != "") {
+                               
+                               //sanity checks
+                               itname = nameInFastaFile.find(seq.getName());
+                               if (itname == nameInFastaFile.end()) { nameInFastaFile.insert(seq.getName());  }
+                               else { m->mothurOut("[ERROR]: You already have a sequence named " + seq.getName() + " in your fasta file, sequence names must be unique, please correct."); m->mothurOutEndLine(); }
+
+                               itStrings = sequenceStrings.find(seq.getAligned());
+                               
+                               if (itStrings == sequenceStrings.end()) { //this is a new unique sequence
+                                       //output to unique fasta file
+                                       seq.printSequence(outFasta);
+                                       
+                                       if (oldNameMapFName != "") {
+                                               itNames = nameMap.find(seq.getName());
+                                               
+                                               if (itNames == nameMap.end()) { //namefile and fastafile do not match
+                                                       m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine();
+                                               }else {
+                                                       sequenceStrings[seq.getAligned()] = itNames->second;
+                                               }
+                                       }else { sequenceStrings[seq.getAligned()] = seq.getName();      }
+                               }else { //this is a dup
+                                       if (oldNameMapFName != "") {
+                                               itNames = nameMap.find(seq.getName());
+                                               
+                                               if (itNames == nameMap.end()) { //namefile and fastafile do not match
+                                                       m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine();
+                                               }else {
+                                                       sequenceStrings[seq.getAligned()] += "," + itNames->second;
+                                               }
+                                       }else { sequenceStrings[seq.getAligned()] += "," + seq.getName();       }
+                               }
+                               
+                               count++;
+                       }
+                       
+                       m->gobble(in);
+                       
+                       if(count % 1000 == 0)   { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine();       }
+               }
+               
+               if(count % 1000 != 0)   { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine();       }
+               
+               in.close();
+               outFasta.close();
+               
+               if (m->control_pressed) { remove(outFastaFile.c_str()); return 0; }
+               
+               //print new names file
+               ofstream outNames;
+               m->openOutputFile(outNameFile, outNames);
+               
+               for (itStrings = sequenceStrings.begin(); itStrings != sequenceStrings.end(); itStrings++) {
+                       if (m->control_pressed) { outputTypes.clear(); remove(outFastaFile.c_str()); outNames.close(); remove(outNameFile.c_str()); return 0; }
+                       
+                       //get rep name
+                       int pos = (itStrings->second).find_first_of(',');
+                       
+                       if (pos == string::npos) { // only reps itself
+                               outNames << itStrings->second << '\t' << itStrings->second << endl;
+                       }else {
+                               outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
+                       }
+               }
+               outNames.close();
                
                if (m->control_pressed) { outputTypes.clear(); remove(outFastaFile.c_str()); remove(outNameFile.c_str()); return 0; }
                
index 34206a9460bf21eac543ee3d91031f8fe4af96ad..b73cd7c751176c386a95931b0887c6790f046583 100644 (file)
@@ -247,6 +247,8 @@ int DistanceCommand::execute(){
                int numSeqs = alignDB.getNumSeqs();
                cutoff += 0.005;
                
+               if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
+               
                string outputFile;
                                
                if (output == "lt") { //does the user want lower triangle phylip formatted file 
index c6671d1721c691f4a3fdf0e6faba5eb890fe6e1e..fc173100942199c9f5f5ea1b400d0cb0ad978303 100644 (file)
@@ -210,7 +210,7 @@ GetLineageCommand::GetLineageCommand(string option)  {
                                if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
                                if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
                        }
-                       
+                       m->splitAtChar(taxons, listOfTaxons, '-');
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                }
@@ -550,15 +550,18 @@ int GetLineageCommand::readTax(){
                string name, tax;
                
                //bool wroteSomething = false;
-               
-               bool taxonsHasConfidence = false;
-               vector< map<string, float> > searchTaxons;
-               string noConfidenceTaxons = taxons;
-               int hasConPos = taxons.find_first_of('(');
-               if (hasConPos != string::npos) {  
-                       taxonsHasConfidence = true; 
-                       searchTaxons = getTaxons(taxons); 
-                       noConfidenceTaxons = removeConfidences(taxons);
+               vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
+               vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
+               vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
+               
+               for (int i = 0; i < listOfTaxons.size(); i++) {
+                       noConfidenceTaxons[i] = listOfTaxons[i];
+                       int hasConPos = listOfTaxons[i].find_first_of('(');
+                       if (hasConPos != string::npos) {  
+                               taxonsHasConfidence[i] = true; 
+                               searchTaxons[i] = getTaxons(listOfTaxons[i]); 
+                               noConfidenceTaxons[i] = removeConfidences(listOfTaxons[i]);
+                       }
                }
                
                
@@ -569,91 +572,96 @@ int GetLineageCommand::readTax(){
                        in >> name;                             //read from first column
                        in >> tax;                      //read from second column
                        
-                       string newtax = tax;
-                       
+                       for (int j = 0; j < listOfTaxons.size(); j++) {
+                                                       
+                               string newtax = tax;
                        
-                       //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
-                       if (!taxonsHasConfidence) {
-                               int hasConfidences = tax.find_first_of('(');
-                               if (hasConfidences != string::npos) { 
-                                       newtax = removeConfidences(tax);
-                               }
-                               
-                               int pos = newtax.find(taxons);
+                               //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
+                               if (!taxonsHasConfidence[j]) {
+                                       int hasConfidences = tax.find_first_of('(');
+                                       if (hasConfidences != string::npos) { 
+                                               newtax = removeConfidences(tax);
+                                       }
                                
-                               if (pos != string::npos) { //this sequence contains the taxon the user wants
-                                       names.insert(name);
-                                       out << name << '\t' << tax << endl;
-                               }
+                                       int pos = newtax.find(noConfidenceTaxons[j]);
                                
-                       }else{//if taxons has them and you don't them remove taxons
-                               int hasConfidences = tax.find_first_of('(');
-                               if (hasConfidences == string::npos) { 
-                                       
-                                       int pos = newtax.find(noConfidenceTaxons);
-                                       
                                        if (pos != string::npos) { //this sequence contains the taxon the user wants
-                                               names.insert(name);
+                                               names.insert(name); 
                                                out << name << '\t' << tax << endl;
+                                               //since you belong to at least one of the taxons we want you are included so no need to search for other
+                                               break;
                                        }
-                               }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
-                                       //first remove confidences from both and see if the taxonomy exists
-                                       
-                                       string noNewTax = tax;
+                               }else{//if listOfTaxons[i] has them and you don't them remove taxons
                                        int hasConfidences = tax.find_first_of('(');
-                                       if (hasConfidences != string::npos) { 
-                                               noNewTax = removeConfidences(tax);
-                                       }
+                                       if (hasConfidences == string::npos) { 
                                        
-                                       int pos = noNewTax.find(noConfidenceTaxons);
+                                               int pos = newtax.find(noConfidenceTaxons[j]);
                                        
-                                       if (pos != string::npos) { //if yes, then are the confidences okay
+                                               if (pos != string::npos) { //this sequence contains the taxon the user wants
+                                                       names.insert(name);
+                                                       out << name << '\t' << tax << endl;
+                                                       //since you belong to at least one of the taxons we want you are included so no need to search for other
+                                                       break;
+                                               }
+                                       }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
+                                               //first remove confidences from both and see if the taxonomy exists
+                                       
+                                               string noNewTax = tax;
+                                               int hasConfidences = tax.find_first_of('(');
+                                               if (hasConfidences != string::npos) { 
+                                                       noNewTax = removeConfidences(tax);
+                                               }
+                                       
+                                               int pos = noNewTax.find(noConfidenceTaxons[j]);
+                                       
+                                               if (pos != string::npos) { //if yes, then are the confidences okay
                                                
-                                               bool good = true;
-                                               vector< map<string, float> > usersTaxon = getTaxons(newtax);
+                                                       bool good = true;
+                                                       vector< map<string, float> > usersTaxon = getTaxons(newtax);
                                                
-                                               //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
-                                               //we want to "line them up", so we will find the the index where the searchstring starts
-                                               int index = 0;
-                                               for (int i = 0; i < usersTaxon.size(); i++) {
+                                                       //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
+                                                       //we want to "line them up", so we will find the the index where the searchstring starts
+                                                       int index = 0;
+                                                       for (int i = 0; i < usersTaxon.size(); i++) {
                                                        
-                                                       if (usersTaxon[i].begin()->first == searchTaxons[0].begin()->first) { 
-                                                               index = i;  
-                                                               int spot = 0;
-                                                               bool goodspot = true;
-                                                               //is this really the start, or are we dealing with a taxon of the same name?
-                                                               while ((spot < searchTaxons.size()) && ((i+spot) < usersTaxon.size())) {
-                                                                       if (usersTaxon[i+spot].begin()->first != searchTaxons[spot].begin()->first) { goodspot = false; break; }
-                                                                       else { spot++; }
-                                                               }
+                                                               if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
+                                                                       index = i;  
+                                                                       int spot = 0;
+                                                                       bool goodspot = true;
+                                                                       //is this really the start, or are we dealing with a taxon of the same name?
+                                                                       while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
+                                                                               if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
+                                                                               else { spot++; }
+                                                                       }
                                                                
-                                                               if (goodspot) { break; }
+                                                                       if (goodspot) { break; }
+                                                               }
                                                        }
-                                               }
                                                
-                                               for (int i = 0; i < searchTaxons.size(); i++) {
+                                                       for (int i = 0; i < searchTaxons[j].size(); i++) {
                                                        
-                                                       if ((i+index) < usersTaxon.size()) { //just in case, should never be false
-                                                               if (usersTaxon[i+index].begin()->second < searchTaxons[i].begin()->second) { //is the users cutoff less than the search taxons
+                                                               if ((i+index) < usersTaxon.size()) { //just in case, should never be false
+                                                                       if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
+                                                                               good = false;
+                                                                               break;
+                                                                       }
+                                                               }else {
                                                                        good = false;
                                                                        break;
                                                                }
-                                                       }else {
-                                                               good = false;
-                                                               break;
                                                        }
-                                               }
                                                
-                                               //passed the test so add you
-                                               if (good) {
-                                                       names.insert(name);
-                                                       out << name << '\t' << tax << endl;
+                                                       //passed the test so add you
+                                                       if (good) {
+                                                               names.insert(name);
+                                                               out << name << '\t' << tax << endl;
+                                                               break;
+                                                       }
                                                }
                                        }
                                }
-                       }
-                       
                        
+                       }
                        
                        m->gobble(in);
                }
index d62c50f55d253b7e3fd15790e2a6d4b21cb35b8f..ca84bf2fad3e11e60808daa15230b5674a4a7849 100644 (file)
@@ -32,7 +32,7 @@ class GetLineageCommand : public Command {
        
        private:
                set<string> names;
-               vector<string> outputNames;
+               vector<string> outputNames, listOfTaxons;
                string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
                bool abort, dups;
                
index ad9698b695815cc77459cc85d2cca1ec7cd24f67..a4697ca3fded8c34ec5320630a588e243601373d 100644 (file)
@@ -25,7 +25,8 @@ vector<string> GetSeqsCommand::setParameters(){
                CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -232,6 +233,11 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (qualfile == "not open") { abort = true; }
                        else if (qualfile == "not found") {  qualfile = "";  }
                        
+                       accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
+                       if (accnosfile2 == "not open") { abort = true; }
+                       else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
+                       
+                       
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
                        dups = m->isTrue(temp);
@@ -824,7 +830,15 @@ int GetSeqsCommand::compareAccnos(){
                        in >> name;
                        
                        if (namesAccnos.count(name) == 0){ //name unique to accnos2
-                               namesAccnos2.insert(name);
+                               int pos = name.find_last_of('_');
+                               string tempName = name;
+                               if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
+                               if (namesAccnos.count(tempName) == 0){
+                                       namesAccnos2.insert(name);
+                               }else { //you are in both so erase
+                                       namesAccnos.erase(name);
+                                       namesDups.insert(name);
+                               }
                        }else { //you are in both so erase
                                namesAccnos.erase(name);
                                namesDups.insert(name);
index 02104142c753b626f245f256e910c4ed3351e6bb..11430117dd1df9c9e2696010324eb3c51b4f2fe6 100644 (file)
@@ -243,7 +243,11 @@ int LibShuffCommand::execute(){
                
                        
                setGroups();                                                            //set the groups to be analyzed and sorts them
-                       
+               
+               if (numGroups < 2) { m->mothurOut("[ERROR]: libshuff requires at least 2 groups, you only have " + toString(numGroups) + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+               
+               if (m->control_pressed) { delete groupMap; delete matrix; return 0; }
+               
                /********************************************************************************************/
                //this is needed because when we read the matrix we sort it into groups in alphabetical order
                //the rest of the command and the classes used in this command assume specific order
index 9c7a83681bb8208d1317a629947a3078084a351e..adb145aecfd388a9eee5941e30d7bc31a8404358 100644 (file)
@@ -327,7 +327,6 @@ int MatrixOutputCommand::execute(){
                for (int i = 0; i < processors; i++) {
                        lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
                        lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
-                       cout << i << '\t' << lines[i].start << '\t' << lines[i].end << endl;
                }       
                
                if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear(); return 0;  }
index 2c403aa1570895546ea1cbeb8e6c2e46d406c7e3..b42cfb92c62a74fdb55b7902c3bfa9644c7359f7 100644 (file)
@@ -38,21 +38,30 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       void printSims(ostream&);
+       struct linePair {
+               int start;
+               int end;
+       };
+       vector<linePair> lines;
+       
+       void printSims(ostream&, vector< vector<float> >&);
        int process(vector<SharedRAbundVector*>);
        
        vector<Calculator*> matrixCalculators;
-       vector< vector<float> > simMatrix;
+       //vector< vector<float> > simMatrix;
        InputData* input;
        vector<SharedRAbundVector*> lookup;
        string exportFileName, output, sharedfile;
-       int numGroups;
+       int numGroups, processors;
        ofstream out;
 
        bool abort, allLines;
        set<string> labels; //holds labels to be used
        string outputFile, calc, groups, label, outputDir;
        vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
+       int process(vector<SharedRAbundVector*>, string, string);
+       int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
+
 };
        
        
index 5a0ea1281b3644e1c0b42035054f9f7f2e0cf0f6..a56a6217a8c136e1fcb9f3f3078c5418547220fa 100644 (file)
@@ -247,6 +247,53 @@ void MothurOut::mothurOutEndLine() {
        }
 }
 /*********************************************************************************************/
+void MothurOut::mothurOut(string output, ofstream& outputFile) {
+       try {
+               
+#ifdef USE_MPI
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               
+               if (pid == 0) { //only one process should output to screen
+#endif
+                       
+                       cout << output;
+                       out << output;
+                       outputFile << output;
+                       
+#ifdef USE_MPI
+               }
+#endif
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "MothurOut");
+               exit(1);
+       }
+}
+/*********************************************************************************************/
+void MothurOut::mothurOutEndLine(ofstream& outputFile) {
+       try {
+#ifdef USE_MPI
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               
+               if (pid == 0) { //only one process should output to screen
+#endif
+                       
+                       cout << endl;
+                       out << endl;
+                       outputFile << endl;
+                       
+#ifdef USE_MPI
+               }
+#endif
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "MothurOutEndLine");
+               exit(1);
+       }
+}
+/*********************************************************************************************/
 void MothurOut::mothurOutJustToLog(string output) {
        try {
                #ifdef USE_MPI
index 7b71fcfbd48c823db529f0bc8d1a54f78422543d..fb5b432f549de88a1fa519bc83541649a0882389 100644 (file)
@@ -21,8 +21,10 @@ class MothurOut {
                static MothurOut* getInstance();
                void setFileName(string);
                
-               void mothurOut(string);
-               void mothurOutEndLine();
+               void mothurOut(string); //writes to cout and the logfile
+               void mothurOutEndLine(); //writes to cout and the logfile
+               void mothurOut(string, ofstream&); //writes to the ofstream, cout and the logfile
+               void mothurOutEndLine(ofstream&); //writes to the ofstream, cout and the logfile
                void mothurOutJustToLog(string);
                void errorOut(exception&, string, string);
                void closeLog();
index 28e6728907470c85b97538b097180e0db71170dc..2f397e8f71ec0b8a1d47338b1f12cfc552642ed2 100644 (file)
@@ -14,11 +14,10 @@ int MAXINT = numeric_limits<int>::max();
 
 //***************************************************************************************************************
 
-RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileName){
+RefChimeraTest::RefChimeraTest(vector<Sequence>& refs){
 
        m = MothurOut::getInstance();
 
-       m->openOutputFile(chimeraReportFileName, chimeraReportFile);
        numRefSeqs = refs.size();
 
        referenceSeqs.resize(numRefSeqs);
@@ -30,14 +29,22 @@ RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileN
        
        alignLength = referenceSeqs[0].length();
 
-       chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
-//     chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
 
 }
+//***************************************************************************************************************
 
+int RefChimeraTest::printHeader(ofstream& chimeraReportFile){
+       try {
+               chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
+               return 0; 
+       }catch(exception& e) {
+               m->errorOut(e, "RefChimeraTest", "execute");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 
-int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
+int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& chimeraReportFile){
        
        vector<vector<int> > left(numRefSeqs);
        vector<int> singleLeft, bestLeft;
index 6b648fb9703d255a690fec9924dd439efd28df17..d4983e05c07ca3b134811f5595290a472af477a4 100644 (file)
@@ -16,8 +16,9 @@
 class RefChimeraTest {
        
 public:
-       RefChimeraTest(vector<Sequence>&, string);
-       int analyzeQuery(string, string);
+       RefChimeraTest(vector<Sequence>&);
+       int printHeader(ofstream&);
+       int analyzeQuery(string, string, ofstream&);
        int getClosestRefIndex();
 private:
        int getMismatches(string&, vector<vector<int> >&, vector<vector<int> >&, int&);
@@ -32,7 +33,7 @@ private:
        int numRefSeqs;
        int alignLength;
        int bestMatch;
-       ofstream chimeraReportFile;
+       //ofstream chimeraReportFile;
        
        MothurOut* m;
 };
index 22453a46078a7e99b33c07351722913fbb88f854..5f5435f8679a9b65bb4799dae9dff30555818d3c 100644 (file)
@@ -211,7 +211,7 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                                if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
                                if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
                        }
-                       
+                       m->splitAtChar(taxons, listOfTaxons, '-');
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                
@@ -541,14 +541,18 @@ int RemoveLineageCommand::readTax(){
                
                bool wroteSomething = false;
                
-               bool taxonsHasConfidence = false;
-               vector< map<string, float> > searchTaxons;
-               string noConfidenceTaxons = taxons;
-               int hasConPos = taxons.find_first_of('(');
-               if (hasConPos != string::npos) {  
-                       taxonsHasConfidence = true; 
-                       searchTaxons = getTaxons(taxons); 
-                       noConfidenceTaxons = removeConfidences(taxons);
+               vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
+               vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
+               vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
+               
+               for (int i = 0; i < listOfTaxons.size(); i++) {
+                       noConfidenceTaxons[i] = listOfTaxons[i];
+                       int hasConPos = listOfTaxons[i].find_first_of('(');
+                       if (hasConPos != string::npos) {  
+                               taxonsHasConfidence[i] = true; 
+                               searchTaxons[i] = getTaxons(listOfTaxons[i]); 
+                               noConfidenceTaxons[i] = removeConfidences(listOfTaxons[i]);
+                       }
                }
                
                
@@ -559,102 +563,107 @@ int RemoveLineageCommand::readTax(){
                        in >> name;                             //read from first column
                        in >> tax;                      //read from second column
                        
-                       string newtax = tax;
+                       bool remove = false;
                        
-                       //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
-                       if (!taxonsHasConfidence) {
-                               
-                               int hasConfidences = tax.find_first_of('(');
-                               if (hasConfidences != string::npos) { 
-                                       newtax = removeConfidences(tax);
-                               }
-                               
-                               int pos = newtax.find(taxons);
+                       for (int j = 0; j < listOfTaxons.size(); j++) {
+                               string newtax = tax;
                                
-                               if (pos == string::npos) { 
-                                       wroteSomething = true;
-                                       out << name << '\t' << tax << endl;
-                               }else{ //this sequence contains the taxon the user wants to remove
-                                       names.insert(name);
-                               }
-                               
-                       }else{//if taxons has them and you don't them remove taxons
-                               int hasConfidences = tax.find_first_of('(');
-                               if (hasConfidences == string::npos) { 
-                               
-                                       int pos = newtax.find(noConfidenceTaxons);
+                               //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
+                               if (!taxonsHasConfidence[j]) {
                                        
-                                       if (pos == string::npos) { 
-                                               wroteSomething = true;
-                                               out << name << '\t' << tax << endl;
-                                       }else{ //this sequence contains the taxon the user wants to remove
-                                               names.insert(name);
-                                       }
-                               }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
-                                       //first remove confidences from both and see if the taxonomy exists
-                               
-                                       string noNewTax = tax;
                                        int hasConfidences = tax.find_first_of('(');
                                        if (hasConfidences != string::npos) { 
-                                               noNewTax = removeConfidences(tax);
+                                               newtax = removeConfidences(tax);
                                        }
                                        
-                                       int pos = noNewTax.find(noConfidenceTaxons);
+                                       int pos = newtax.find(noConfidenceTaxons[j]);
                                        
-                                       if (pos != string::npos) { //if yes, then are the confidences okay
+                                       if (pos == string::npos) { 
+                                               //wroteSomething = true;
+                                               //out << name << '\t' << tax << endl;
+                                       }else{ //this sequence contains the taxon the user wants to remove
+                                               names.insert(name);
+                                               remove=true; break;
+                                       }
+                                       
+                               }else{//if taxons has them and you don't them remove taxons
+                                       int hasConfidences = tax.find_first_of('(');
+                                       if (hasConfidences == string::npos) { 
                                                
-                                               bool remove = false;
-                                               vector< map<string, float> > usersTaxon = getTaxons(newtax);
+                                               int pos = newtax.find(noConfidenceTaxons[j]);
                                                
-                                               //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
-                                               //we want to "line them up", so we will find the the index where the searchstring starts
-                                               int index = 0;
-                                               for (int i = 0; i < usersTaxon.size(); i++) {
+                                               if (pos == string::npos) { 
+                                                       //wroteSomething = true;
+                                                       //out << name << '\t' << tax << endl;
+                                               }else{ //this sequence contains the taxon the user wants to remove
+                                                       names.insert(name);
+                                                       remove=true; break;
+                                               }
+                                       }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
+                                               //first remove confidences from both and see if the taxonomy exists
+                                               
+                                               string noNewTax = tax;
+                                               int hasConfidences = tax.find_first_of('(');
+                                               if (hasConfidences != string::npos) { 
+                                                       noNewTax = removeConfidences(tax);
+                                               }
+                                               
+                                               int pos = noNewTax.find(noConfidenceTaxons[j]);
+                                               
+                                               if (pos != string::npos) { //if yes, then are the confidences okay
                                                        
-                                                       if (usersTaxon[i].begin()->first == searchTaxons[0].begin()->first) { 
-                                                               index = i;  
-                                                               int spot = 0;
-                                                               bool goodspot = true;
-                                                               //is this really the start, or are we dealing with a taxon of the same name?
-                                                               while ((spot < searchTaxons.size()) && ((i+spot) < usersTaxon.size())) {
-                                                                       if (usersTaxon[i+spot].begin()->first != searchTaxons[spot].begin()->first) { goodspot = false; break; }
-                                                                       else { spot++; }
-                                                               }
+                                                       vector< map<string, float> > usersTaxon = getTaxons(newtax);
+                                                       
+                                                       //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
+                                                       //we want to "line them up", so we will find the the index where the searchstring starts
+                                                       int index = 0;
+                                                       for (int i = 0; i < usersTaxon.size(); i++) {
                                                                
-                                                               if (goodspot) { break; }
+                                                               if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
+                                                                       index = i;  
+                                                                       int spot = 0;
+                                                                       bool goodspot = true;
+                                                                       //is this really the start, or are we dealing with a taxon of the same name?
+                                                                       while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
+                                                                               if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
+                                                                               else { spot++; }
+                                                                       }
+                                                                       
+                                                                       if (goodspot) { break; }
+                                                               }
                                                        }
-                                               }
-                                               
-                                               for (int i = 0; i < searchTaxons.size(); i++) {
                                                        
-                                                       if ((i+index) < usersTaxon.size()) { //just in case, should never be false
-                                                               if (usersTaxon[i+index].begin()->second < searchTaxons[i].begin()->second) { //is the users cutoff less than the search taxons
+                                                       for (int i = 0; i < searchTaxons[j].size(); i++) {
+                                                               
+                                                               if ((i+index) < usersTaxon.size()) { //just in case, should never be false
+                                                                       if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
+                                                                               remove = true;
+                                                                               break;
+                                                                       }
+                                                               }else {
                                                                        remove = true;
                                                                        break;
                                                                }
+                                                       }
+                                                       
+                                                       //passed the test so remove you
+                                                       if (remove) {
+                                                               names.insert(name);
+                                                               remove=true; break;
                                                        }else {
-                                                               remove = true;
-                                                               break;
+                                                               //wroteSomething = true;
+                                                               //out << name << '\t' << tax << endl;
                                                        }
-                                               }
-                                               
-                                               //passed the test so remove you
-                                               if (remove) {
-                                                       names.insert(name);
                                                }else {
-                                                       wroteSomething = true;
-                                                       out << name << '\t' << tax << endl;
+                                                       //wroteSomething = true;
+                                                       //out << name << '\t' << tax << endl;
                                                }
-                                       }else {
-                                               wroteSomething = true;
-                                               out << name << '\t' << tax << endl;
                                        }
                                }
+                               
                        }
                        
-                       
-                       
-                       
+                       if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
                        m->gobble(in);
                }
                in.close();
index 124f6e309f4b5e6a5842f88da7fdcaabbbf22eac..310a66a5a90ea24acf34d7a94d8f7ff7df5a154d 100644 (file)
@@ -31,7 +31,7 @@ class RemoveLineageCommand : public Command {
        
        private:
                set<string> names;
-               vector<string> outputNames;
+               vector<string> outputNames, listOfTaxons;
                string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
                bool abort, dups;
                
index d06a5addf90f74334224a4220e64ea4f0320dedf..2774922a78d43f08186d15d29e15632b2af76de8 100644 (file)
@@ -11,6 +11,7 @@
 #include "reportfile.h"
 #include "qualityscores.h"
 #include "refchimeratest.h"
+#include "filterseqscommand.h"
 
 //**********************************************************************************************************************
 vector<string> SeqErrorCommand::setParameters(){       
@@ -23,6 +24,7 @@ vector<string> SeqErrorCommand::setParameters(){
                CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pignorechimeras);
                CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pthreshold);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pfilter("filter", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfilter);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -199,8 +201,11 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
                        convert(temp, threshold);  
                        
-                       temp = validParameter.validFile(parameters, "ignorechimeras", false);   if (temp == "not found") { temp = "1"; }
-                       convert(temp, ignoreChimeras);  
+                       temp = validParameter.validFile(parameters, "ignorechimeras", false);   if (temp == "not found") { temp = "T"; }
+                       ignoreChimeras = m->isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "filter", false);   if (temp == "not found") { temp = "T"; }
+                       filter = m->isTrue(temp);  
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
@@ -220,96 +225,443 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
 int SeqErrorCommand::execute(){
        try{
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-
+               
+               int start = time(NULL);
                maxLength = 2000;
+               totalBases = 0;
+               totalMatches = 0;
+
+               //run vertical filter on query and reference files.
+               if (filter) {
+                       string inputString = "fasta=" + queryFileName + "-" + referenceFileName;
+                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+                       m->mothurOut("Running command: filter.seqs(" + inputString + ") to improve processing time."); m->mothurOutEndLine(); 
+                       
+                       Command* filterCommand = new FilterSeqsCommand(inputString);
+                       filterCommand->execute();
+                       
+                       map<string, vector<string> > filenames = filterCommand->getOutputFiles();
+                       
+                       delete filterCommand;
+                       
+                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+                       
+                       queryFileName = filenames["fasta"][0];
+                       referenceFileName = filenames["fasta"][1];
+               }
                
                string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
-               m->openOutputFile(errorSummaryFileName, errorSummaryFile);
                outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
-               printErrorHeader();
-               
+                       
                string errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
-               m->openOutputFile(errorSeqFileName, errorSeqFile);
                outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
-
+               
+               string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
+               outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
+               
                getReferences();        //read in reference sequences - make sure there's no ambiguous bases
 
-               map<string, int> weights;
                if(namesFileName != ""){        weights = getWeights(); }
                
-               ifstream queryFile;
-               m->openInputFile(queryFileName, queryFile);
+               vector<unsigned long int> fastaFilePos;
+               vector<unsigned long int> qFilePos;
+               vector<unsigned long int> reportFilePos;
                
-               ifstream reportFile;
-               ifstream qualFile;
+               setLines(queryFileName, qualFileName, reportFileName, fastaFilePos, qFilePos, reportFilePos);
+               
+               if (m->control_pressed) { return 0; }
+               
+               for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+                       lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
+                       if (qualFileName != "") {  qLines.push_back(linePair(qFilePos[i], qFilePos[(i+1)]));  }
+                       if (reportFileName != "") {  rLines.push_back(linePair(reportFilePos[i], reportFilePos[(i+1)]));  }
+               }       
+               if(qualFileName == "")  {       qLines = lines; rLines = lines; } //fills with duds
+               
+               int numSeqs = 0;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               if(processors == 1){
+                       numSeqs = driver(queryFileName, qualFileName, reportFileName, errorSummaryFileName, errorSeqFileName, errorChimeraFileName, lines[0], qLines[0], rLines[0]);
+               }else{
+                       numSeqs = createProcesses(queryFileName, qualFileName, reportFileName, errorSummaryFileName, errorSeqFileName, errorChimeraFileName);
+               }       
+#else
+               numSeqs = driver(queryFileName, qualFileName, reportFileName, errorSummaryFileName, errorSeqFileName, errorChimeraFileName, lines[0], qLines[0], rLines[0]);
+#endif
+               
+               if(qualFileName != "" && reportFileName != ""){         
+                       printErrorQuality(qScoreErrorMap);
+                       printQualityFR(qualForwardMap, qualReverseMap);
+               }
+               
+               printErrorFRFile(errorForward, errorReverse);
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 
-               ReportFile report;
-               QualityScores quality;
-               vector<vector<int> > qualForwardMap;
-               vector<vector<int> > qualReverseMap;
+               string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
+               ofstream errorCountFile;
+               m->openOutputFile(errorCountFileName, errorCountFile);
+               outputNames.push_back(errorCountFileName);  outputTypes["error.count"].push_back(errorCountFileName);
+               m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n");
+               m->mothurOut("Errors\tSequences\n");
+               errorCountFile << "Errors\tSequences\n";                
+               for(int i=0;i<misMatchCounts.size();i++){
+                       m->mothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n');
+                       errorCountFile << i << '\t' << misMatchCounts[i] << endl;
+               }
+               errorCountFile.close();
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+
+               printSubMatrix();
+                               
+               string megAlignmentFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.ref-query";
+               ofstream megAlignmentFile;
+               m->openOutputFile(megAlignmentFileName, megAlignmentFile);
+               outputNames.push_back(megAlignmentFileName);  outputTypes["error.ref-query"].push_back(megAlignmentFileName);
+               
+               for(int i=0;i<numRefs;i++){
+                       megAlignmentFile << referenceSeqs[i].getInlineSeq() << endl;
+                       megAlignmentFile << megaAlignVector[i] << endl;
+               }
+               
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");
+               m->mothurOutEndLine();
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+               m->mothurOutEndLine();
+               
+               return 0;       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SeqErrorCommand::createProcesses(string filename, string qFileName, string rFileName, string summaryFileName, string errorOutputFileName, string chimeraOutputFileName) {  
+       try {
+               int process = 1;
+               processIDS.clear();
+               map<char, vector<int> >::iterator it;
+               int num = 0;
                
-               if(qualFileName != "" && reportFileName != ""){
-                       m->openInputFile(qualFileName, qualFile);
-                       report = ReportFile(reportFile, reportFileName);
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
                        
-                       qualForwardMap.resize(maxLength);
-                       qualReverseMap.resize(maxLength);
-                       for(int i=0;i<maxLength;i++){
-                               qualForwardMap[i].assign(41,0);
-                               qualReverseMap[i].assign(41,0);
-                       }                               
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               
+                               num = driver(filename, qFileName, rFileName, summaryFileName + toString(getpid()) + ".temp", errorOutputFileName+ toString(getpid()) + ".temp", chimeraOutputFileName + toString(getpid()) + ".temp", lines[process], qLines[process], rLines[process]);
+                               
+                               //pass groupCounts to parent
+                               ofstream out;
+                               string tempFile = filename + toString(getpid()) + ".info.temp";
+                               m->openOutputFile(tempFile, out);
+                               
+                               //output totalBases and totalMatches
+                               out << num << '\t' << totalBases << '\t' << totalMatches << endl << endl;
+                               
+                               //output substitutionMatrix
+                               for(int i = 0; i < substitutionMatrix.size(); i++) {
+                                       for (int j = 0; j < substitutionMatrix[i].size(); j++) {
+                                               out << substitutionMatrix[i][j] << '\t';
+                                       }
+                                       out << endl;
+                               }
+                               out << endl;
+                               
+                               //output qScoreErrorMap
+                               for (it = qScoreErrorMap.begin(); it != qScoreErrorMap.end(); it++) {
+                                       vector<int> thisScoreErrorMap = it->second;
+                                       out << it->first << '\t';
+                                       for (int i = 0; i < thisScoreErrorMap.size(); i++) {
+                                               out << thisScoreErrorMap[i] << '\t';
+                                       }
+                                       out << endl;
+                               }
+                               out << endl;
+                               
+                               //output qualForwardMap
+                               for(int i = 0; i < qualForwardMap.size(); i++) {
+                                       for (int j = 0; j < qualForwardMap[i].size(); j++) {
+                                               out << qualForwardMap[i][j] << '\t';
+                                       }
+                                       out << endl;
+                               }
+                               out << endl;
+                               
+                               //output qualReverseMap
+                               for(int i = 0; i < qualReverseMap.size(); i++) {
+                                       for (int j = 0; j < qualReverseMap[i].size(); j++) {
+                                               out << qualReverseMap[i][j] << '\t';
+                                       }
+                                       out << endl;
+                               }
+                               out << endl;
+                               
+                               
+                               //output errorForward
+                               for (it = errorForward.begin(); it != errorForward.end(); it++) {
+                                       vector<int> thisErrorForward = it->second;
+                                       out << it->first << '\t';
+                                       for (int i = 0; i < thisErrorForward.size(); i++) {
+                                               out << thisErrorForward[i] << '\t';
+                                       }
+                                       out << endl;
+                               }
+                               out << endl;
+                               
+                               //output errorReverse
+                               for (it = errorReverse.begin(); it != errorReverse.end(); it++) {
+                                       vector<int> thisErrorReverse = it->second;
+                                       out << it->first << '\t';
+                                       for (int i = 0; i < thisErrorReverse.size(); i++) {
+                                               out << thisErrorReverse[i] << '\t';
+                                       }
+                                       out << endl;
+                               }
+                               out << endl;
+                               
+                               //output misMatchCounts
+                               out << misMatchCounts.size() << endl;
+                               for (int j = 0; j < misMatchCounts.size(); j++) {
+                                       out << misMatchCounts[j] << '\t';
+                               }
+                               out << endl;
+                               
+                               
+                               //output megaAlignVector
+                               for (int j = 0; j < megaAlignVector.size(); j++) {
+                                       out << megaAlignVector[j] << endl;
+                               }
+                               out << endl;
+                               
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               num = driver(filename, qFileName, rFileName, summaryFileName, errorOutputFileName, chimeraOutputFileName, lines[0], qLines[0], rLines[0]);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
                }
                
-               int totalBases = 0;
-               int totalMatches = 0;
+               //append files
+               for(int i=0;i<processIDS.size();i++){
+                       
+                       m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
+                       
+                       m->appendFiles((summaryFileName + toString(processIDS[i]) + ".temp"), summaryFileName);
+                       remove((summaryFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->appendFiles((errorOutputFileName + toString(processIDS[i]) + ".temp"), errorOutputFileName);
+                       remove((errorOutputFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->appendFiles((chimeraOutputFileName + toString(processIDS[i]) + ".temp"), chimeraOutputFileName);
+                       remove((chimeraOutputFileName + toString(processIDS[i]) + ".temp").c_str());
+                       
+                       ifstream in;
+                       string tempFile =  filename + toString(processIDS[i]) + ".info.temp";
+                       m->openInputFile(tempFile, in);
+                       
+                       //input totalBases and totalMatches
+                       int tempBases, tempMatches, tempNumSeqs;
+                       in >> tempNumSeqs >> tempBases >> tempMatches; m->gobble(in);
+                       totalBases += tempBases; totalMatches += tempMatches; num += tempNumSeqs;
+                       
+                       //input substitutionMatrix
+                       int tempNum;
+                       for(int i = 0; i < substitutionMatrix.size(); i++) {
+                               for (int j = 0; j < substitutionMatrix[i].size(); j++) {
+                                       in >> tempNum; substitutionMatrix[i][j] += tempNum;
+                               }
+                               m->gobble(in);
+                       }
+                       m->gobble(in);
+                       
+                       //input qScoreErrorMap
+                       char first;
+                       for (int i = 0; i < qScoreErrorMap.size(); i++) {
+                               in >> first;
+                               vector<int> thisScoreErrorMap = qScoreErrorMap[first];
+                               
+                               for (int i = 0; i < thisScoreErrorMap.size(); i++) {
+                                       in >> tempNum; thisScoreErrorMap[i] += tempNum;
+                               }
+                               qScoreErrorMap[first] = thisScoreErrorMap;
+                               m->gobble(in);
+                       }
+                       m->gobble(in);
+                       
+                       //input qualForwardMap
+                       for(int i = 0; i < qualForwardMap.size(); i++) {
+                               for (int j = 0; j < qualForwardMap[i].size(); j++) {
+                                       in >> tempNum; qualForwardMap[i][j] += tempNum;
+                               }
+                               m->gobble(in);
+                       }
+                       m->gobble(in);
+                       
+                       //input qualReverseMap
+                       for(int i = 0; i < qualReverseMap.size(); i++) {
+                               for (int j = 0; j < qualReverseMap[i].size(); j++) {
+                                       in >> tempNum; qualReverseMap[i][j] += tempNum;
+                               }
+                               m->gobble(in);
+                       }
+                       m->gobble(in);
+                       
+                       //input errorForward
+                       for (int i = 0; i < errorForward.size(); i++) {
+                               in >> first;
+                               vector<int> thisErrorForward = errorForward[first];
+                               
+                               for (int i = 0; i < thisErrorForward.size(); i++) {
+                                       in >> tempNum; thisErrorForward[i] += tempNum;
+                               }
+                               errorForward[first] = thisErrorForward;
+                               m->gobble(in);
+                       }
+                       m->gobble(in);
+                       
+                       //input errorReverse
+                       for (int i = 0; i < errorReverse.size(); i++) {
+                               in >> first;
+                               vector<int> thisErrorReverse = errorReverse[first];
+                               
+                               for (int i = 0; i < thisErrorReverse.size(); i++) {
+                                       in >> tempNum; thisErrorReverse[i] += tempNum;
+                               }
+                               errorReverse[first] = thisErrorReverse;
+                               m->gobble(in);
+                       }
+                       m->gobble(in);
+                       
+                       //input misMatchCounts
+                       int misMatchSize;
+                       in >> misMatchSize; m->gobble(in);
+                       if (misMatchSize > misMatchCounts.size()) {     misMatchCounts.resize(misMatchSize, 0); }
+                       for (int j = 0; j < misMatchCounts.size(); j++) {
+                               in >> tempNum; misMatchCounts[j] += tempNum;
+                       }
+                       m->gobble(in);
+                       
+                       //input megaAlignVector
+                       string thisLine;
+                       for (int j = 0; j < megaAlignVector.size(); j++) {
+                               thisLine = m->getline(in); m->gobble(in); megaAlignVector[j] += thisLine + '\n';
+                       }
+                       m->gobble(in);
+                       
+                       in.close(); remove(tempFile.c_str());
+                       
+               }
+               
+               return num;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SeqErrorCommand::driver(string filename, string qFileName, string rFileName, string summaryFileName, string errorOutputFileName, string chimeraOutputFileName, linePair line, linePair qline, linePair rline) {    
+       
+       try {
+               ReportFile report;
+               QualityScores quality;
                
-               vector<int> misMatchCounts(11, 0);
+               misMatchCounts.resize(11, 0);
                int maxMismatch = 0;
                int numSeqs = 0;
                
                map<string, int>::iterator it;
-               map<char, vector<int> > qScoreErrorMap;
                qScoreErrorMap['m'].assign(41, 0);
                qScoreErrorMap['s'].assign(41, 0);
                qScoreErrorMap['i'].assign(41, 0);
                qScoreErrorMap['a'].assign(41, 0);
                
-               map<char, vector<int> > errorForward;
                errorForward['m'].assign(maxLength,0);
                errorForward['s'].assign(maxLength,0);
                errorForward['i'].assign(maxLength,0);
                errorForward['d'].assign(maxLength,0);
                errorForward['a'].assign(maxLength,0);
                
-               map<char, vector<int> > errorReverse;
                errorReverse['m'].assign(maxLength,0);
                errorReverse['s'].assign(maxLength,0);
                errorReverse['i'].assign(maxLength,0);
                errorReverse['d'].assign(maxLength,0);
                errorReverse['a'].assign(maxLength,0);  
                
-
-               string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
-               RefChimeraTest chimeraTest(referenceSeqs, errorChimeraFileName);
-               outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
+               //open inputfiles and go to beginning place for this processor
+               ifstream queryFile;
+               m->openInputFile(filename, queryFile);
+               queryFile.seekg(line.start);
+               
+               ifstream reportFile;
+               ifstream qualFile;
+               if(qFileName != "" && rFileName != ""){
+                       m->openInputFile(qFileName, qualFile);
+                       qualFile.seekg(qline.start);  
+                       
+                       //gobble headers
+                       if (rline.start == 0) {  report = ReportFile(reportFile, rFileName); } 
+                       else{
+                               m->openInputFile(rFileName, reportFile);
+                               reportFile.seekg(rline.start); 
+                       }
+                       
+                       qualForwardMap.resize(maxLength);
+                       qualReverseMap.resize(maxLength);
+                       for(int i=0;i<maxLength;i++){
+                               qualForwardMap[i].assign(41,0);
+                               qualReverseMap[i].assign(41,0);
+                       }       
+               }
+               
+               ofstream outChimeraReport;
+               m->openOutputFile(chimeraOutputFileName, outChimeraReport);
+               RefChimeraTest chimeraTest(referenceSeqs);
+               if (line.start == 0) { chimeraTest.printHeader(outChimeraReport); }
+               
+               ofstream errorSummaryFile;
+               m->openOutputFile(summaryFileName, errorSummaryFile);
+               if (line.start == 0) { printErrorHeader(errorSummaryFile); }
+               
+               ofstream errorSeqFile;
+               m->openOutputFile(errorOutputFileName, errorSeqFile);
+               
+               megaAlignVector.resize(numRefs, "");
                
-               vector<string> megaAlignVector(numRefs, "");
-
                int index = 0;
                bool ignoreSeq = 0;
                
-               while(queryFile){
-
-                       if (m->control_pressed) { errorSummaryFile.close();     errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-               
+               bool moreSeqs = 1;
+               while (moreSeqs) {
+                       
+                       if (m->control_pressed) { queryFile.close(); if(qFileName != "" && rFileName != ""){  reportFile.close(); qualFile.close(); } outChimeraReport.close(); errorSummaryFile.close();errorSeqFile.close(); return 0; }
+                       
                        Sequence query(queryFile);
                        
-                       int numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned());
+                       int numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned(), outChimeraReport);
                        int closestRefIndex = chimeraTest.getClosestRefIndex();
-
+                       
                        if(numParentSeqs > 1 && ignoreChimeras == 1)    {       ignoreSeq = 1;  }
                        else                                                                                    {       ignoreSeq = 0;  }
-
+                       
                        Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
                        
                        if(namesFileName != ""){
@@ -317,35 +669,35 @@ int SeqErrorCommand::execute(){
                                minCompare.weight = it->second;
                        }
                        else{   minCompare.weight = 1;  }
-
-                       printErrorData(minCompare, numParentSeqs);
-               
+                       
+                       printErrorData(minCompare, numParentSeqs, errorSummaryFile, errorSeqFile);
+                       
                        if(!ignoreSeq){
-
+                               
                                for(int i=0;i<minCompare.sequence.length();i++){
                                        char letter = minCompare.sequence[i];
-
+                                       
                                        errorForward[letter][i] += minCompare.weight;
                                        errorReverse[letter][minCompare.total-i-1] += minCompare.weight;                                
                                }
                        }
-
+                       
                        if(qualFileName != "" && reportFileName != ""){
                                report = ReportFile(reportFile);
                                
-//                             int origLength = report.getQueryLength();
+                               //                              int origLength = report.getQueryLength();
                                int startBase = report.getQueryStart();
                                int endBase = report.getQueryEnd();
-
+                               
                                quality = QualityScores(qualFile);
-
+                               
                                if(!ignoreSeq){
                                        quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
                                        quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight);
                                        quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
                                }
                        }                       
-
+                       
                        if(minCompare.errorRate < threshold && !ignoreSeq){
                                totalBases += (minCompare.total * minCompare.weight);
                                totalMatches += minCompare.matches * minCompare.weight;
@@ -358,65 +710,33 @@ int SeqErrorCommand::execute(){
                                
                                megaAlignVector[closestRefIndex] += query.getInlineSeq() + '\n';
                        }
-
+                       
                        index++;
                        
-                       if(index % 1000 == 0){  m->mothurOut(toString(index) + '\n');   }
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               unsigned long int pos = queryFile.tellg();
+                               if ((pos == -1) || (pos >= line.end)) { break; }
+                       #else
+                               if (queryFile.eof()) { break; }
+                       #endif
+                       
+                       if(index % 100 == 0){   m->mothurOut(toString(index) + '\n');   }
                }
                queryFile.close();
+               if(qFileName != "" && rFileName != ""){  reportFile.close(); qualFile.close(); }
                errorSummaryFile.close();       
                errorSeqFile.close();
-
-               if(qualFileName != "" && reportFileName != ""){         
-                       printErrorQuality(qScoreErrorMap);
-                       printQualityFR(qualForwardMap, qualReverseMap);
-               }
-               
-               printErrorFRFile(errorForward, errorReverse);
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-
-               string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
-               ofstream errorCountFile;
-               m->openOutputFile(errorCountFileName, errorCountFile);
-               outputNames.push_back(errorCountFileName);  outputTypes["error.count"].push_back(errorCountFileName);
-               m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n");
-               m->mothurOut("Errors\tSequences\n");
-               errorCountFile << "Errors\tSequences\n";                
-               for(int i=0;i<misMatchCounts.size();i++){
-                       m->mothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n');
-                       errorCountFile << i << '\t' << misMatchCounts[i] << endl;
-               }
-               errorCountFile.close();
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-
-               printSubMatrix();
-                               
-               string megAlignmentFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.ref-query";
-               ofstream megAlignmentFile;
-               m->openOutputFile(megAlignmentFileName, megAlignmentFile);
-               outputNames.push_back(megAlignmentFileName);  outputTypes["error.ref-query"].push_back(megAlignmentFileName);
-               
-               for(int i=0;i<numRefs;i++){
-                       megAlignmentFile << referenceSeqs[i].getInlineSeq() << endl;
-                       megAlignmentFile << megaAlignVector[i] << endl;
-               }
                
+               //report progress
+               if(index % 100 != 0){   m->mothurOut(toString(index) + '\n');   }
                
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
-               m->mothurOutEndLine();
-               
-               return 0;       
+               return index;
        }
        catch(exception& e) {
-               m->errorOut(e, "SeqErrorCommand", "execute");
+               m->errorOut(e, "SeqErrorCommand", "driver");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
 
 void SeqErrorCommand::getReferences(){
@@ -572,7 +892,7 @@ map<string, int> SeqErrorCommand::getWeights(){
 
 //***************************************************************************************************************
 
-void SeqErrorCommand::printErrorHeader(){
+void SeqErrorCommand::printErrorHeader(ofstream& errorSummaryFile){
        try {
                errorSummaryFile << "query\treference\tweight\t";
                errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
@@ -589,7 +909,7 @@ void SeqErrorCommand::printErrorHeader(){
 
 //***************************************************************************************************************
 
-void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs){
+void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs, ofstream& errorSummaryFile, ofstream& errorSeqFile){
        try {
 
                errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
@@ -825,5 +1145,182 @@ void SeqErrorCommand::printQualityFR(vector<vector<int> > qualForwardMap, vector
        }
        
 }
+/**************************************************************************************************/
 
+int SeqErrorCommand::setLines(string filename, string qfilename, string rfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos, vector<unsigned long int>& rfileFilePos) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               //set file positions for fasta file
+               fastaFilePos = m->divideFile(filename, processors);
+               
+               if (qfilename == "") { return processors; }
+               
+               //get name of first sequence in each chunk
+               map<string, int> firstSeqNames;
+               for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+                       ifstream in;
+                       m->openInputFile(filename, in);
+                       in.seekg(fastaFilePos[i]);
+                       
+                       Sequence temp(in); 
+                       firstSeqNames[temp.getName()] = i;
+                       
+                       in.close();
+               }
+               
+               //make copy to use below
+               map<string, int> firstSeqNamesReport = firstSeqNames;
+               
+               //seach for filePos of each first name in the qfile and save in qfileFilePos
+               ifstream inQual;
+               m->openInputFile(qfilename, inQual);
+               
+               string input;
+               while(!inQual.eof()){   
+                       input = m->getline(inQual);
+                       
+                       if (input.length() != 0) {
+                               if(input[0] == '>'){ //this is a sequence name line
+                                       istringstream nameStream(input);
+                                       
+                                       string sname = "";  nameStream >> sname;
+                                       sname = sname.substr(1);
+                                       
+                                       map<string, int>::iterator it = firstSeqNames.find(sname);
+                                       
+                                       if(it != firstSeqNames.end()) { //this is the start of a new chunk
+                                               unsigned long int pos = inQual.tellg(); 
+                                               qfileFilePos.push_back(pos - input.length() - 1);       
+                                               firstSeqNames.erase(it);
+                                       }
+                               }
+                       }
+                       
+                       if (firstSeqNames.size() == 0) { break; }
+               }
+               inQual.close();
+               
+               if (firstSeqNames.size() != 0) { 
+                       for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+                               m->mothurOut(it->first + " is in your fasta file and not in your quality file, aborting."); m->mothurOutEndLine();
+                       }
+                       m->control_pressed = true;
+                       return processors;
+               }
+               
+               //get last file position of qfile
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (qfilename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+               
+               qfileFilePos.push_back(size);
+               
+               //seach for filePos of each first name in the rfile and save in rfileFilePos
+               string junk;
+               ifstream inR;
+               m->openInputFile(rfilename, inR);
+               
+               //read column headers
+               for (int i = 0; i < 16; i++) {  
+                       if (!inR.eof()) {       inR >> junk;    }
+                       else                    {       break;                  }
+               }
+               
+               while(!inR.eof()){
+                       
+                       if (m->control_pressed) { inR.close();  return processors; }
+                       
+                       input = m->getline(inR);        
+                       
+                       if (input.length() != 0) {
+                               
+                               istringstream nameStream(input);
+                               string sname = "";  nameStream >> sname;
+                               
+                               map<string, int>::iterator it = firstSeqNamesReport.find(sname);
+                       
+                               if(it != firstSeqNamesReport.end()) { //this is the start of a new chunk
+                                       unsigned long int pos = inR.tellg(); 
+                                       rfileFilePos.push_back(pos - input.length() - 1);       
+                                       firstSeqNamesReport.erase(it);
+                               }
+                       }
+                       
+                       if (firstSeqNamesReport.size() == 0) { break; }
+                       m->gobble(inR);
+               }
+               inR.close();
+               
+               if (firstSeqNamesReport.size() != 0) { 
+                       for (map<string, int>::iterator it = firstSeqNamesReport.begin(); it != firstSeqNamesReport.end(); it++) {
+                               m->mothurOut(it->first + " is in your fasta file and not in your report file, aborting."); m->mothurOutEndLine();
+                       }
+                       m->control_pressed = true;
+                       return processors;
+               }
+               
+               //get last file position of qfile
+               FILE * rFile;
+               unsigned long int sizeR;
+               
+               //get num bytes in file
+               rFile = fopen (rfilename.c_str(),"rb");
+               if (rFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (rFile, 0, SEEK_END);
+                       sizeR=ftell (rFile);
+                       fclose (rFile);
+               }
+               
+               rfileFilePos.push_back(sizeR);
+               
+               return processors;
+               
+#else
+               
+               fastaFilePos.push_back(0); qfileFilePos.push_back(0);
+               //get last file position of fastafile
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (filename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+               fastaFilePos.push_back(size);
+               
+               //get last file position of fastafile
+               FILE * qFile;
+               
+               //get num bytes in file
+               qFile = fopen (qfilename.c_str(),"rb");
+               if (qFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (qFile, 0, SEEK_END);
+                       size=ftell (qFile);
+                       fclose (qFile);
+               }
+               qfileFilePos.push_back(size);
+               
+               return 1;
+               
+#endif
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "setLines");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
index 7e87ec63eb209a184f7036fafeeb35184fef9b90..cb715ce05bc1edad8f2790cd78b67a356eafd7c3 100644 (file)
@@ -56,27 +56,51 @@ public:
        
 private:
        bool abort;
+       
+       struct linePair {
+               unsigned long int start;
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
+       };
+       
+       vector<int> processIDS;   //processid
+       vector<linePair> lines;
+       vector<linePair> qLines;
+       vector<linePair> rLines;
 
        void getReferences();
        map<string,int> getWeights();
        Compare getErrors(Sequence, Sequence);
-       void printErrorHeader();
-       void printErrorData(Compare, int);
+       void printErrorHeader(ofstream&);
+       void printErrorData(Compare, int, ofstream&, ofstream&);
        void printSubMatrix();
        void printErrorFRFile(map<char, vector<int> >, map<char, vector<int> >);
        void printErrorQuality(map<char, vector<int> >);
        void printQualityFR(vector<vector<int> >, vector<vector<int> >);
+       
+       int setLines(string, string, string, vector<unsigned long int>&, vector<unsigned long int>&, vector<unsigned long int>&);
+       int driver(string, string, string, string, string, string, linePair, linePair, linePair);
+       int createProcesses(string, string, string, string, string, string);
 
        string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir;
        double threshold;
-       bool ignoreChimeras;
+       bool ignoreChimeras, filter;
        int numRefs, processors;
-       int maxLength;
-       ofstream errorSummaryFile, errorSeqFile;
+       int maxLength, totalBases, totalMatches;
+       //ofstream errorSummaryFile, errorSeqFile;
        vector<string> outputNames;
        
        vector<Sequence> referenceSeqs;
        vector<vector<int> > substitutionMatrix;
+       vector<vector<int> > qualForwardMap;
+       vector<vector<int> > qualReverseMap;
+       vector<int> misMatchCounts;
+       map<char, vector<int> > qScoreErrorMap;
+       map<char, vector<int> > errorForward;
+       map<char, vector<int> > errorReverse;
+       map<string, int> weights;
+       vector<string> megaAlignVector;
+
 };
 
 #endif
index 585b3b3d8347272cb7b47afc9a923fb0af28eaa9..e798ee14ac70837e795a7be3191a14f68788c234 100644 (file)
@@ -15,7 +15,7 @@
 
 /***********************************************************************/
 
-SequenceDB::SequenceDB() {  m = MothurOut::getInstance();  }
+SequenceDB::SequenceDB() {  m = MothurOut::getInstance();  length = 0; samelength = true; }
 /***********************************************************************/
 //the clear function free's the memory
 SequenceDB::~SequenceDB() { clear(); }
@@ -24,19 +24,25 @@ SequenceDB::~SequenceDB() { clear(); }
 
 SequenceDB::SequenceDB(int newSize) {
        data.resize(newSize, Sequence());
+       length = 0; samelength = true;
 }
 
 /***********************************************************************/
 
 SequenceDB::SequenceDB(ifstream& filehandle) {
        try{
+               length = 0; samelength = true;
                                
                //read through file
                while (!filehandle.eof()) {
                        //input sequence info into sequencedb
                        Sequence newSequence(filehandle);
                        
-                       if (newSequence.getName() != "") {   data.push_back(newSequence);  }
+                       if (newSequence.getName() != "") {   
+                               if (length == 0) { length = newSequence.getAligned().length(); }
+                               if (length != newSequence.getAligned().length()) { samelength = false; }
+                               data.push_back(newSequence);  
+                       }
                        
                        //takes care of white space
                        m->gobble(filehandle);
@@ -92,7 +98,10 @@ string SequenceDB::readSequence(ifstream& in) {
                                break;
                        }else {  sequence += line;      }
                }
-                       
+               
+               if (length == 0) { length = sequence.length(); }
+               if (length != sequence.length()) { samelength = false; }
+               
                return sequence;
        }
        catch(exception& e) {
@@ -111,6 +120,9 @@ int SequenceDB::getNumSeqs() {
 
 void SequenceDB::set(int index, string newUnaligned) {
        try {
+               if (length == 0) { length = newUnaligned.length(); }
+               if (length != newUnaligned.length()) { samelength = false; }
+               
                data[index] = Sequence(data[index].getName(), newUnaligned);
        }
        catch(exception& e) {
@@ -123,6 +135,9 @@ void SequenceDB::set(int index, string newUnaligned) {
 
 void SequenceDB::set(int index, Sequence newSeq) {
        try {
+               if (length == 0) { length = newSeq.getAligned().length(); }
+               if (length != newSeq.getAligned().length()) { samelength = false; }
+
                data[index] = newSeq;
        }
        catch(exception& e) {
@@ -185,6 +200,9 @@ void SequenceDB::print(ostream& out) {
 
 void SequenceDB::push_back(Sequence newSequence) {
        try {
+               if (length == 0) { length = newSequence.getAligned().length(); }
+               if (length != newSequence.getAligned().length()) { samelength = false; }
+
                data.push_back(newSequence);
        }
        catch(exception& e) {
index 367254949fa6a2a43a59589db159810a82499dc7..8f7640ed3f90f1feb827448120759911d2da8e21 100644 (file)
@@ -37,12 +37,15 @@ public:
        void clear();              //clears data - remeber to loop through and delete the sequences inside or you will have a memory leak
        int size();                //returns datas size
        void print(ostream&);      //loops through data using sequence class print
+       bool sameLength() { return samelength; }
                
 private:
        vector<Sequence> data;
        string readName(ifstream&);
        string readSequence(ifstream&);
        MothurOut* m;
+       bool samelength;
+       int length;
 
 };
 
index 30761e10d4a3048d1af36071471865bd793e28b7..4363eaa9523570cd56146a75805b9dfc8e65e3d7 100644 (file)
@@ -580,7 +580,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                                        ifstream intemp;
                                                        m->openInputFile(tempdistFileName, intemp);
                                                        
-                                                       for (int i = 0; i < calcDists.size(); i++) {
+                                                       for (int k = 0; k < calcDists.size(); k++) {
                                                                int size = 0;
                                                                intemp >> size; m->gobble(intemp);
                                                                        
@@ -592,7 +592,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                                                        intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
                                                                        
                                                                        seqDist tempDist(seq1, seq2, dist);
-                                                                       calcDists[i].push_back(tempDist);
+                                                                       calcDists[k].push_back(tempDist);
                                                                }
                                                        }
                                                        intemp.close();
index 64522a5f157820e529b3261d0acaf898f32136b6..dc87fff99489ebe9ebfc29bec6ff2328875d7030 100644 (file)
@@ -240,7 +240,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "name", true);      
                        if (temp == "not found")        {       nameFile = "";          }
-                       else if(temp == "not open")     {       abort = true;           }
+                       else if(temp == "not open")     {       nameFile = "";  abort = true;           }
                        else                                            {       nameFile = temp;        }
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
@@ -818,10 +818,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                ofstream temp;
                m->openOutputFile(trimFASTAFileName, temp);             temp.close();
                m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
-               m->openOutputFile(trimQualFileName, temp);              temp.close();
-               m->openOutputFile(scrapQualFileName, temp);             temp.close();
-               m->openOutputFile(trimNameFileName, temp);              temp.close();
-               m->openOutputFile(scrapNameFileName, temp);             temp.close();
+               if(qFileName != ""){
+                       m->openOutputFile(trimQualFileName, temp);              temp.close();
+                       m->openOutputFile(scrapQualFileName, temp);             temp.close();
+               }
+               if (nameFile != "") {
+                       m->openOutputFile(trimNameFileName, temp);              temp.close();
+                       m->openOutputFile(scrapNameFileName, temp);             temp.close();
+               }
 
                driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);