]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
changes while testing 1.27
[mothur.git] / trimseqscommand.cpp
index ae9f707aa2fe5ace59e59d1697a8bf6e50319421..ae8dfb7efbb7826f40459ef87198ab72480a65ea 100644 (file)
@@ -97,6 +97,29 @@ string TrimSeqsCommand::getHelpString(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){        
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "qfile")            {   outputFileName =  "qual";   }
+            else if (type == "fasta")            {   outputFileName =  "fasta";   }
+            else if (type == "group")            {   outputFileName =  "groups";   }
+            else if (type == "name")            {   outputFileName =  "names";   }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
 
 
 //**********************************************************************************************************************
@@ -329,19 +352,21 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+        numSpacers = 0;
+        numLinkers = 0;
                createGroup = false;
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
                vector<vector<string> > nameFileNames;
                
-               string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
+               string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
                outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
                
-               string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
+               string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
                outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
                
-               string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
-               string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
+               string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
+               string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
                
                if (qFileName != "") {
                        outputNames.push_back(trimQualFile);
@@ -350,8 +375,8 @@ int TrimSeqsCommand::execute(){
                        outputTypes["qfile"].push_back(scrapQualFile); 
                }
                
-               string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
-               string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
+               string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
+               string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
                
                if (nameFile != "") {
                        m->readNames(nameFile, nameMap);
@@ -367,31 +392,20 @@ int TrimSeqsCommand::execute(){
                if(oligoFile != ""){
                        createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
                        if (createGroup) {
-                               outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+                               outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                }
+       
+        //fills lines and qlines
+               setLines(fastaFile, qFileName);
                
-               vector<unsigned long long> fastaFilePos;
-               vector<unsigned long long> qFilePos;
+        if(processors == 1){
+            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+        }else{
+            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
+        }      
                
-               setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
-               
-               for (int i = 0; i < (fastaFilePos.size()-1); i++) {
-                       lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
-                       if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
-               }       
-               if(qFileName == "")     {       qLines = lines; } //files with duds
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               if(processors == 1){
-                                       driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
-                               }else{
-                                       createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
-                               }       
-               #else
-                               driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
-               #endif
                
                if (m->control_pressed) {  return 0; }                  
        
@@ -437,7 +451,7 @@ int TrimSeqsCommand::execute(){
                                m->openInputFile(it->first, in);
                                
                                ofstream out;
-                               string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
+                               string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
                                outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
                                m->openOutputFile(thisGroupName, out);
                                
@@ -446,6 +460,17 @@ int TrimSeqsCommand::execute(){
                                        
                                        Sequence currSeq(in); m->gobble(in);
                                        out << currSeq.getName() << '\t' << it->second << endl;
+                    
+                    if (nameFile != "") {
+                        map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                        if (itName != nameMap.end()) { 
+                            vector<string> thisSeqsNames; 
+                            m->splitAtChar(itName->second, thisSeqsNames, ',');
+                            for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                out << thisSeqsNames[k] << '\t' << it->second << endl;
+                            }
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                  
+                    }
                                }
                                in.close();
                                out.close();
@@ -503,7 +528,7 @@ int TrimSeqsCommand::execute(){
                
 /**************************************************************************************/
 
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {   
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {     
                
        try {
                
@@ -550,17 +575,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                ifstream inFASTA;
                m->openInputFile(filename, inFASTA);
-               inFASTA.seekg(line->start);
+               inFASTA.seekg(line.start);
                
                ifstream qFile;
                if(qFileName != "")     {
                        m->openInputFile(qFileName, qFile);
-                       qFile.seekg(qline->start);  
+                       qFile.seekg(qline.start);  
                }
                
                int count = 0;
                bool moreSeqs = 1;
-               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
+               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
        
                while (moreSeqs) {
                                
@@ -582,6 +607,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
                        //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+            
                        QualityScores currQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
@@ -595,7 +621,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                
                 if(numLinkers != 0){
                                        success = trimOligos.stripLinker(currSeq, currQual);
-                                       if(!success)                            {       trashCode += 'k';       }
+                                       if(success > ldiffs)            {       trashCode += 'k';       }
+                                       else{ currentSeqsDiffs += success;  }
+
                                }
                 
                                if(barcodes.size() != 0){
@@ -603,10 +631,18 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
+                
+                if(rbarcodes.size() != 0){
+                                       success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
+                                       if(success > bdiffs)            {       trashCode += 'b';       }
+                                       else{ currentSeqsDiffs += success;  }
+                               }
                                
                 if(numSpacers != 0){
                                        success = trimOligos.stripSpacer(currSeq, currQual);
-                                       if(!success)                            {       trashCode += 's';       }
+                                       if(success > sdiffs)            {       trashCode += 's';       }
+                                       else{ currentSeqsDiffs += success;  }
+
                                }
                 
                                if(numFPrimers != 0){
@@ -675,6 +711,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                currQual.printQScores(trimQualFile);
                                        }
                                        
+                    
                                        if(nameFile != ""){
                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
@@ -696,11 +733,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        
                                                        outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
                                                        
+                            int numRedundants = 0;
                                                        if (nameFile != "") {
                                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                                if (itName != nameMap.end()) { 
                                                                        vector<string> thisSeqsNames; 
                                                                        m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                    numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
                                                                        for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
                                                                                outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
                                                                        }
@@ -708,8 +747,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        }
                                                        
                                                        map<string, int>::iterator it = groupCounts.find(thisGroup);
-                                                       if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
-                                                       else { groupCounts[it->first]++; }
+                                                       if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
+                                                       else { groupCounts[it->first] += (1 + numRedundants); }
                                                                
                                                }
                                        }
@@ -753,9 +792,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
-                               if ((pos == -1) || (pos >= line->end)) { break; }
+                               if ((pos == -1) || (pos >= line.end)) { break; }
                        
                        #else
                                if (inFASTA.eof()) { break; }
@@ -788,12 +827,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
+        
+        int process = 1;
                int exitCommand = 1;
                processIDS.clear();
                
-               //loop through and create all the processes you want
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
                        
@@ -842,6 +882,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 tempNameFileNames,
                                                                 lines[process],
                                                                 qLines[process]);
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
                                
                                //pass groupCounts to parent
                                if(createGroup){
@@ -884,8 +926,105 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        int temp = processIDS[i];
                        wait(&temp);
                }
-               
-               //append files
+#else
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the trimData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<trimData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++){
+                       
+            string extension = "";
+                       if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+            vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+            vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+            vector<vector<string> > tempNameFileNames = nameFileNames;
+            
+            if(allFiles){
+                ofstream temp;
+                
+                for(int i=0;i<tempFASTAFileNames.size();i++){
+                    for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                        if (tempFASTAFileNames[i][j] != "") {
+                            tempFASTAFileNames[i][j] += extension;
+                            m->openOutputFile(tempFASTAFileNames[i][j], temp);                 temp.close();
+                            
+                            if(qFileName != ""){
+                                tempPrimerQualFileNames[i][j] += extension;
+                                m->openOutputFile(tempPrimerQualFileNames[i][j], temp);                temp.close();
+                            }
+                            if(nameFile != ""){
+                                tempNameFileNames[i][j] += extension;
+                                m->openOutputFile(tempNameFileNames[i][j], temp);              temp.close();
+                            }
+                        }
+                    }
+                }
+            }
+
+            
+                       trimData* tempTrim = new trimData(filename,
+                                              qFileName, nameFile,
+                                              (trimFASTAFileName+extension),
+                                              (scrapFASTAFileName+extension),
+                                              (trimQualFileName+extension),
+                                              (scrapQualFileName+extension),
+                                              (trimNameFileName+extension),
+                                              (scrapNameFileName+extension),
+                                              (groupFile+extension),
+                                              tempFASTAFileNames,
+                                              tempPrimerQualFileNames,
+                                              tempNameFileNames,
+                                              lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
+                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
+                                             primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
+                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+                       pDataArray.push_back(tempTrim);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+        
+        //parent do my part
+               ofstream temp;
+               m->openOutputFile(trimFASTAFileName, temp);             temp.close();
+               m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
+               if(qFileName != ""){
+                       m->openOutputFile(trimQualFileName, temp);              temp.close();
+                       m->openOutputFile(scrapQualFileName, temp);             temp.close();
+               }
+               if (nameFile != "") {
+                       m->openOutputFile(trimNameFileName, temp);              temp.close();
+                       m->openOutputFile(scrapNameFileName, temp);             temp.close();
+               }
+        
+               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+        processIDS.push_back(processors-1);
+
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
+                map<string, int>::iterator it2 = groupCounts.find(it->first);
+                if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
+                else { groupCounts[it->first] += it->second; }
+            }
+            CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        
+#endif         
+        
+        
+        //append files
                for(int i=0;i<processIDS.size();i++){
                        
                        m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
@@ -936,6 +1075,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                        }
                        
+            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(createGroup){
                                ifstream in;
                                string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
@@ -948,7 +1088,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                if (tempNum != 0) {
                                        while (!in.eof()) { 
                                                in >> group >> tempNum; m->gobble(in);
-                               
+                        
                                                map<string, int>::iterator it = groupCounts.find(group);
                                                if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
                                                else { groupCounts[it->first] += tempNum; }
@@ -956,11 +1096,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                                in.close(); m->mothurRemove(tempFile);
                        }
-                       
+            #endif
                }
-       
-               return exitCommand;
-#endif         
+
+        return exitCommand;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
@@ -970,14 +1109,16 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
 
 /**************************************************************************************************/
 
-int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
+int TrimSeqsCommand::setLines(string filename, string qfilename) {
        try {
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+        
+        vector<unsigned long long> fastaFilePos;
+               vector<unsigned long long> qfileFilePos;
+               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //set file positions for fasta file
                fastaFilePos = m->divideFile(filename, processors);
                
-               if (qfilename == "") { return processors; }
-               
                //get name of first sequence in each chunk
                map<string, int> firstSeqNames;
                for (int i = 0; i < (fastaFilePos.size()-1); i++) {
@@ -990,66 +1131,102 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
                
                        in.close();
                }
-                               
-               //seach for filePos of each first name in the qfile and save in qfileFilePos
-               ifstream inQual;
-               m->openInputFile(qfilename, inQual);
-               
-               string input;
-               while(!inQual.eof()){   
-                       input = m->getline(inQual);
-
-                       if (input.length() != 0) {
-                               if(input[0] == '>'){ //this is a sequence name line
-                                       istringstream nameStream(input);
-                                       
-                                       string sname = "";  nameStream >> sname;
-                                       sname = sname.substr(1);
-                                       
-                                       map<string, int>::iterator it = firstSeqNames.find(sname);
-                                       
-                                       if(it != firstSeqNames.end()) { //this is the start of a new chunk
-                                               unsigned long long pos = inQual.tellg(); 
-                                               qfileFilePos.push_back(pos - input.length() - 1);       
-                                               firstSeqNames.erase(it);
-                                       }
-                               }
-                       }
-                       
-                       if (firstSeqNames.size() == 0) { break; }
-               }
-               inQual.close();
-               
                
-               if (firstSeqNames.size() != 0) { 
-                       for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
-                               m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
-                       }
-                       qFileName = "";
-                       return processors;
-               }
-
-               //get last file position of qfile
-               FILE * pFile;
-               unsigned long long size;
-               
-               //get num bytes in file
-               pFile = fopen (qfilename.c_str(),"rb");
-               if (pFile==NULL) perror ("Error opening file");
-               else{
-                       fseek (pFile, 0, SEEK_END);
-                       size=ftell (pFile);
-                       fclose (pFile);
-               }
-               
-               qfileFilePos.push_back(size);
+               if(qfilename != "")     {
+            //seach for filePos of each first name in the qfile and save in qfileFilePos
+            ifstream inQual;
+            m->openInputFile(qfilename, inQual);
+            
+            string input;
+            while(!inQual.eof()){      
+                input = m->getline(inQual);
+                
+                if (input.length() != 0) {
+                    if(input[0] == '>'){ //this is a sequence name line
+                        istringstream nameStream(input);
+                        
+                        string sname = "";  nameStream >> sname;
+                        sname = sname.substr(1);
+                        
+                        map<string, int>::iterator it = firstSeqNames.find(sname);
+                        
+                        if(it != firstSeqNames.end()) { //this is the start of a new chunk
+                            unsigned long long pos = inQual.tellg(); 
+                            qfileFilePos.push_back(pos - input.length() - 1);  
+                            firstSeqNames.erase(it);
+                        }
+                    }
+                }
+                
+                if (firstSeqNames.size() == 0) { break; }
+            }
+            inQual.close();
+            
+            
+            if (firstSeqNames.size() != 0) { 
+                for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+                    m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
+                }
+                qFileName = "";
+                return processors;
+            }
+            
+            //get last file position of qfile
+            FILE * pFile;
+            unsigned long long size;
+            
+            //get num bytes in file
+            pFile = fopen (qfilename.c_str(),"rb");
+            if (pFile==NULL) perror ("Error opening file");
+            else{
+                fseek (pFile, 0, SEEK_END);
+                size=ftell (pFile);
+                fclose (pFile);
+            }
+            
+            qfileFilePos.push_back(size);
+        }
+        
+        for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+            if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
+                       lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
+                       if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
+               }       
+               if(qfilename == "")     {       qLines = lines; } //files with duds
                
                return processors;
                
                #else
-               
-                       fastaFilePos.push_back(0); qfileFilePos.push_back(0);
-                       fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+            
+        if (processors == 1) { //save time
+                       //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
+                       //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+            lines.push_back(linePair(0, 1000));
+            if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
+        }else{
+            int numFastaSeqs = 0;
+            fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
+            if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
+        
+            if (qfilename != "") { 
+                int numQualSeqs = 0;
+                qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
+                
+                if (numFastaSeqs != numQualSeqs) {
+                    m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
+                }
+            }
+        
+            //figure out how many sequences you have to process
+            int numSeqsPerProcessor = numFastaSeqs / processors;
+            for (int i = 0; i < processors; i++) {
+                int startIndex =  i * numSeqsPerProcessor;
+                if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
+                if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
+            }
+        }
+            if(qfilename == "")        {       qLines = lines; } //files with duds
                        return 1;
                
                #endif
@@ -1113,13 +1290,36 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        primerNameVector.push_back(group);
                                }
                                else if(type == "REVERSE"){
-                                       Sequence oligoRC("reverse", oligo);
-                                       oligoRC.reverseComplement();
-                                       revPrimer.push_back(oligoRC.getUnaligned());
+                                       //Sequence oligoRC("reverse", oligo);
+                                       //oligoRC.reverseComplement();
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
+                    
+                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
+                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
+                                       string temp = "";
+                    while (!inOligos.eof())    {       
+                                               char c = inOligos.get(); 
+                                               if (c == 10 || c == 13){        break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  temp += c;  }
+                                       } 
                                        
+                    //then this is illumina data with 4 columns
+                    if (temp != "") {  
+                        string reverseBarcode = reverseOligo(group); //reverse barcode
+                        group = temp;
+                        
+                        //check for repeat barcodes
+                        map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
+                        if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
+                                               
+                        rbarcodes[reverseBarcode]=indexBarcode; 
+                    }
+                        
                                        //check for repeat barcodes
                                        map<string, int>::iterator itBar = barcodes.find(oligo);
                                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
@@ -1131,7 +1331,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                }else if(type == "SPACER"){
                                        spacer.push_back(oligo);
                                }
-                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
+                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
                        }
                        m->gobble(inOligos);
                }       
@@ -1338,6 +1538,46 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){
        }
        
 }
+//********************************************************************/
+string TrimSeqsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************