]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimseqscommand.cpp
index 3ab1236ed8c970d1afa331f06997344a540fb23e..9f8fafb24e14a807baa8dd5a981c93556c46e3b7 100644 (file)
@@ -329,6 +329,8 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+        numSpacers = 0;
+        numLinkers = 0;
                createGroup = false;
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
@@ -371,19 +373,16 @@ int TrimSeqsCommand::execute(){
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                }
-               
+       
         //fills lines and qlines
                setLines(fastaFile, qFileName);
                
-               //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               if(processors == 1){
-                                       driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
-                               }else{
-                                       createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
-                               }       
-               //#else
-                       //      driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
-               //#endif
+        if(processors == 1){
+            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+        }else{
+            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
+        }      
+               
                
                if (m->control_pressed) {  return 0; }                  
        
@@ -438,6 +437,17 @@ int TrimSeqsCommand::execute(){
                                        
                                        Sequence currSeq(in); m->gobble(in);
                                        out << currSeq.getName() << '\t' << it->second << endl;
+                    
+                    if (nameFile != "") {
+                        map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                        if (itName != nameMap.end()) { 
+                            vector<string> thisSeqsNames; 
+                            m->splitAtChar(itName->second, thisSeqsNames, ',');
+                            for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                out << thisSeqsNames[k] << '\t' << it->second << endl;
+                            }
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                  
+                    }
                                }
                                in.close();
                                out.close();
@@ -749,7 +759,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= line.end)) { break; }
                        
@@ -789,7 +799,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                int exitCommand = 1;
                processIDS.clear();
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -1030,7 +1040,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                        }
                        
-            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(createGroup){
                                ifstream in;
                                string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
@@ -1070,12 +1080,10 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
         vector<unsigned long long> fastaFilePos;
                vector<unsigned long long> qfileFilePos;
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //set file positions for fasta file
                fastaFilePos = m->divideFile(filename, processors);
                
-               if (qfilename == "") { return processors; }
-               
                //get name of first sequence in each chunk
                map<string, int> firstSeqNames;
                for (int i = 0; i < (fastaFilePos.size()-1); i++) {
@@ -1088,59 +1096,61 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                
                        in.close();
                }
-                               
-               //seach for filePos of each first name in the qfile and save in qfileFilePos
-               ifstream inQual;
-               m->openInputFile(qfilename, inQual);
-               
-               string input;
-               while(!inQual.eof()){   
-                       input = m->getline(inQual);
-
-                       if (input.length() != 0) {
-                               if(input[0] == '>'){ //this is a sequence name line
-                                       istringstream nameStream(input);
-                                       
-                                       string sname = "";  nameStream >> sname;
-                                       sname = sname.substr(1);
-                                       
-                                       map<string, int>::iterator it = firstSeqNames.find(sname);
-                                       
-                                       if(it != firstSeqNames.end()) { //this is the start of a new chunk
-                                               unsigned long long pos = inQual.tellg(); 
-                                               qfileFilePos.push_back(pos - input.length() - 1);       
-                                               firstSeqNames.erase(it);
-                                       }
-                               }
-                       }
-                       
-                       if (firstSeqNames.size() == 0) { break; }
-               }
-               inQual.close();
-               
-               
-               if (firstSeqNames.size() != 0) { 
-                       for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
-                               m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
-                       }
-                       qFileName = "";
-                       return processors;
-               }
-
-               //get last file position of qfile
-               FILE * pFile;
-               unsigned long long size;
-               
-               //get num bytes in file
-               pFile = fopen (qfilename.c_str(),"rb");
-               if (pFile==NULL) perror ("Error opening file");
-               else{
-                       fseek (pFile, 0, SEEK_END);
-                       size=ftell (pFile);
-                       fclose (pFile);
-               }
                
-               qfileFilePos.push_back(size);
+               if(qfilename != "")     {
+            //seach for filePos of each first name in the qfile and save in qfileFilePos
+            ifstream inQual;
+            m->openInputFile(qfilename, inQual);
+            
+            string input;
+            while(!inQual.eof()){      
+                input = m->getline(inQual);
+                
+                if (input.length() != 0) {
+                    if(input[0] == '>'){ //this is a sequence name line
+                        istringstream nameStream(input);
+                        
+                        string sname = "";  nameStream >> sname;
+                        sname = sname.substr(1);
+                        
+                        map<string, int>::iterator it = firstSeqNames.find(sname);
+                        
+                        if(it != firstSeqNames.end()) { //this is the start of a new chunk
+                            unsigned long long pos = inQual.tellg(); 
+                            qfileFilePos.push_back(pos - input.length() - 1);  
+                            firstSeqNames.erase(it);
+                        }
+                    }
+                }
+                
+                if (firstSeqNames.size() == 0) { break; }
+            }
+            inQual.close();
+            
+            
+            if (firstSeqNames.size() != 0) { 
+                for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+                    m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
+                }
+                qFileName = "";
+                return processors;
+            }
+            
+            //get last file position of qfile
+            FILE * pFile;
+            unsigned long long size;
+            
+            //get num bytes in file
+            pFile = fopen (qfilename.c_str(),"rb");
+            if (pFile==NULL) perror ("Error opening file");
+            else{
+                fseek (pFile, 0, SEEK_END);
+                size=ftell (pFile);
+                fclose (pFile);
+            }
+            
+            qfileFilePos.push_back(size);
+        }
         
         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
                        lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
@@ -1160,6 +1170,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
         }else{
             int numFastaSeqs = 0;
             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
+            if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
         
             if (qfilename != "") { 
                 int numQualSeqs = 0;
@@ -1179,9 +1190,8 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
             }
-        
-            if(qfilename == "")        {       qLines = lines; } //files with duds
         }
+            if(qfilename == "")        {       qLines = lines; } //files with duds
                        return 1;
                
                #endif
@@ -1245,9 +1255,10 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        primerNameVector.push_back(group);
                                }
                                else if(type == "REVERSE"){
-                                       Sequence oligoRC("reverse", oligo);
-                                       oligoRC.reverseComplement();
-                                       revPrimer.push_back(oligoRC.getUnaligned());
+                                       //Sequence oligoRC("reverse", oligo);
+                                       //oligoRC.reverseComplement();
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
@@ -1470,6 +1481,46 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){
        }
        
 }
+//********************************************************************/
+string TrimSeqsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************