]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimseqscommand.cpp
index f55272e5b08a0b6e84a3052be2eabbcc22b170f3..9f8fafb24e14a807baa8dd5a981c93556c46e3b7 100644 (file)
@@ -329,6 +329,8 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+        numSpacers = 0;
+        numLinkers = 0;
                createGroup = false;
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
@@ -371,7 +373,7 @@ int TrimSeqsCommand::execute(){
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                }
-               
+       
         //fills lines and qlines
                setLines(fastaFile, qFileName);
                
@@ -435,6 +437,17 @@ int TrimSeqsCommand::execute(){
                                        
                                        Sequence currSeq(in); m->gobble(in);
                                        out << currSeq.getName() << '\t' << it->second << endl;
+                    
+                    if (nameFile != "") {
+                        map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                        if (itName != nameMap.end()) { 
+                            vector<string> thisSeqsNames; 
+                            m->splitAtChar(itName->second, thisSeqsNames, ',');
+                            for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                out << thisSeqsNames[k] << '\t' << it->second << endl;
+                            }
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                  
+                    }
                                }
                                in.close();
                                out.close();
@@ -746,7 +759,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= line.end)) { break; }
                        
@@ -786,7 +799,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                int exitCommand = 1;
                processIDS.clear();
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -1027,7 +1040,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                        }
                        
-            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(createGroup){
                                ifstream in;
                                string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
@@ -1067,12 +1080,10 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
         vector<unsigned long long> fastaFilePos;
                vector<unsigned long long> qfileFilePos;
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //set file positions for fasta file
                fastaFilePos = m->divideFile(filename, processors);
                
-               if (qfilename == "") { return processors; }
-               
                //get name of first sequence in each chunk
                map<string, int> firstSeqNames;
                for (int i = 0; i < (fastaFilePos.size()-1); i++) {
@@ -1085,59 +1096,61 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                
                        in.close();
                }
-                               
-               //seach for filePos of each first name in the qfile and save in qfileFilePos
-               ifstream inQual;
-               m->openInputFile(qfilename, inQual);
-               
-               string input;
-               while(!inQual.eof()){   
-                       input = m->getline(inQual);
-
-                       if (input.length() != 0) {
-                               if(input[0] == '>'){ //this is a sequence name line
-                                       istringstream nameStream(input);
-                                       
-                                       string sname = "";  nameStream >> sname;
-                                       sname = sname.substr(1);
-                                       
-                                       map<string, int>::iterator it = firstSeqNames.find(sname);
-                                       
-                                       if(it != firstSeqNames.end()) { //this is the start of a new chunk
-                                               unsigned long long pos = inQual.tellg(); 
-                                               qfileFilePos.push_back(pos - input.length() - 1);       
-                                               firstSeqNames.erase(it);
-                                       }
-                               }
-                       }
-                       
-                       if (firstSeqNames.size() == 0) { break; }
-               }
-               inQual.close();
-               
                
-               if (firstSeqNames.size() != 0) { 
-                       for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
-                               m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
-                       }
-                       qFileName = "";
-                       return processors;
-               }
-
-               //get last file position of qfile
-               FILE * pFile;
-               unsigned long long size;
-               
-               //get num bytes in file
-               pFile = fopen (qfilename.c_str(),"rb");
-               if (pFile==NULL) perror ("Error opening file");
-               else{
-                       fseek (pFile, 0, SEEK_END);
-                       size=ftell (pFile);
-                       fclose (pFile);
-               }
-               
-               qfileFilePos.push_back(size);
+               if(qfilename != "")     {
+            //seach for filePos of each first name in the qfile and save in qfileFilePos
+            ifstream inQual;
+            m->openInputFile(qfilename, inQual);
+            
+            string input;
+            while(!inQual.eof()){      
+                input = m->getline(inQual);
+                
+                if (input.length() != 0) {
+                    if(input[0] == '>'){ //this is a sequence name line
+                        istringstream nameStream(input);
+                        
+                        string sname = "";  nameStream >> sname;
+                        sname = sname.substr(1);
+                        
+                        map<string, int>::iterator it = firstSeqNames.find(sname);
+                        
+                        if(it != firstSeqNames.end()) { //this is the start of a new chunk
+                            unsigned long long pos = inQual.tellg(); 
+                            qfileFilePos.push_back(pos - input.length() - 1);  
+                            firstSeqNames.erase(it);
+                        }
+                    }
+                }
+                
+                if (firstSeqNames.size() == 0) { break; }
+            }
+            inQual.close();
+            
+            
+            if (firstSeqNames.size() != 0) { 
+                for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+                    m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
+                }
+                qFileName = "";
+                return processors;
+            }
+            
+            //get last file position of qfile
+            FILE * pFile;
+            unsigned long long size;
+            
+            //get num bytes in file
+            pFile = fopen (qfilename.c_str(),"rb");
+            if (pFile==NULL) perror ("Error opening file");
+            else{
+                fseek (pFile, 0, SEEK_END);
+                size=ftell (pFile);
+                fclose (pFile);
+            }
+            
+            qfileFilePos.push_back(size);
+        }
         
         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
                        lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
@@ -1157,6 +1170,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
         }else{
             int numFastaSeqs = 0;
             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
+            if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
         
             if (qfilename != "") { 
                 int numQualSeqs = 0;
@@ -1176,9 +1190,8 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
             }
-        
-            if(qfilename == "")        {       qLines = lines; } //files with duds
         }
+            if(qfilename == "")        {       qLines = lines; } //files with duds
                        return 1;
                
                #endif
@@ -1242,9 +1255,10 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        primerNameVector.push_back(group);
                                }
                                else if(type == "REVERSE"){
-                                       Sequence oligoRC("reverse", oligo);
-                                       oligoRC.reverseComplement();
-                                       revPrimer.push_back(oligoRC.getUnaligned());
+                                       //Sequence oligoRC("reverse", oligo);
+                                       //oligoRC.reverseComplement();
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
@@ -1467,6 +1481,46 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){
        }
        
 }
+//********************************************************************/
+string TrimSeqsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************