X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=9f8fafb24e14a807baa8dd5a981c93556c46e3b7;hb=e7ae6e6b27c45b5691c19f423ec56faae8e2f255;hp=f55272e5b08a0b6e84a3052be2eabbcc22b170f3;hpb=755185afe1c287b8c6eddf9eedd293a38fc9f998;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index f55272e..9f8fafb 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -329,6 +329,8 @@ int TrimSeqsCommand::execute(){ numFPrimers = 0; //this needs to be initialized numRPrimers = 0; + numSpacers = 0; + numLinkers = 0; createGroup = false; vector > fastaFileNames; vector > qualFileNames; @@ -371,7 +373,7 @@ int TrimSeqsCommand::execute(){ outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } - + //fills lines and qlines setLines(fastaFile, qFileName); @@ -435,6 +437,17 @@ int TrimSeqsCommand::execute(){ Sequence currSeq(in); m->gobble(in); out << currSeq.getName() << '\t' << it->second << endl; + + if (nameFile != "") { + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + vector thisSeqsNames; + m->splitAtChar(itName->second, thisSeqsNames, ','); + for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self + out << thisSeqsNames[k] << '\t' << it->second << endl; + } + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } } in.close(); out.close(); @@ -746,7 +759,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string count++; } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= line.end)) { break; } @@ -786,7 +799,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName int exitCommand = 1; processIDS.clear(); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -1027,7 +1040,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName } } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(createGroup){ ifstream in; string tempFile = filename + toString(processIDS[i]) + ".num.temp"; @@ -1067,12 +1080,10 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { vector fastaFilePos; vector qfileFilePos; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //set file positions for fasta file fastaFilePos = m->divideFile(filename, processors); - if (qfilename == "") { return processors; } - //get name of first sequence in each chunk map firstSeqNames; for (int i = 0; i < (fastaFilePos.size()-1); i++) { @@ -1085,59 +1096,61 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { in.close(); } - - //seach for filePos of each first name in the qfile and save in qfileFilePos - ifstream inQual; - m->openInputFile(qfilename, inQual); - - string input; - while(!inQual.eof()){ - input = m->getline(inQual); - - if (input.length() != 0) { - if(input[0] == '>'){ //this is a sequence name line - istringstream nameStream(input); - - string sname = ""; nameStream >> sname; - sname = sname.substr(1); - - map::iterator it = firstSeqNames.find(sname); - - if(it != firstSeqNames.end()) { //this is the start of a new chunk - unsigned long long pos = inQual.tellg(); - qfileFilePos.push_back(pos - input.length() - 1); - firstSeqNames.erase(it); - } - } - } - - if (firstSeqNames.size() == 0) { break; } - } - inQual.close(); - - if (firstSeqNames.size() != 0) { - for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { - m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine(); - } - qFileName = ""; - return processors; - } - - //get last file position of qfile - FILE * pFile; - unsigned long long size; - - //get num bytes in file - pFile = fopen (qfilename.c_str(),"rb"); - if (pFile==NULL) perror ("Error opening file"); - else{ - fseek (pFile, 0, SEEK_END); - size=ftell (pFile); - fclose (pFile); - } - - qfileFilePos.push_back(size); + if(qfilename != "") { + //seach for filePos of each first name in the qfile and save in qfileFilePos + ifstream inQual; + m->openInputFile(qfilename, inQual); + + string input; + while(!inQual.eof()){ + input = m->getline(inQual); + + if (input.length() != 0) { + if(input[0] == '>'){ //this is a sequence name line + istringstream nameStream(input); + + string sname = ""; nameStream >> sname; + sname = sname.substr(1); + + map::iterator it = firstSeqNames.find(sname); + + if(it != firstSeqNames.end()) { //this is the start of a new chunk + unsigned long long pos = inQual.tellg(); + qfileFilePos.push_back(pos - input.length() - 1); + firstSeqNames.erase(it); + } + } + } + + if (firstSeqNames.size() == 0) { break; } + } + inQual.close(); + + + if (firstSeqNames.size() != 0) { + for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { + m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine(); + } + qFileName = ""; + return processors; + } + + //get last file position of qfile + FILE * pFile; + unsigned long long size; + + //get num bytes in file + pFile = fopen (qfilename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + + qfileFilePos.push_back(size); + } for (int i = 0; i < (fastaFilePos.size()-1); i++) { lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)])); @@ -1157,6 +1170,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { }else{ int numFastaSeqs = 0; fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); + if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); } if (qfilename != "") { int numQualSeqs = 0; @@ -1176,9 +1190,8 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl; if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); } } - - if(qfilename == "") { qLines = lines; } //files with duds } + if(qfilename == "") { qLines = lines; } //files with duds return 1; #endif @@ -1242,9 +1255,10 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< primerNameVector.push_back(group); } else if(type == "REVERSE"){ - Sequence oligoRC("reverse", oligo); - oligoRC.reverseComplement(); - revPrimer.push_back(oligoRC.getUnaligned()); + //Sequence oligoRC("reverse", oligo); + //oligoRC.reverseComplement(); + string oligoRC = reverseOligo(oligo); + revPrimer.push_back(oligoRC); } else if(type == "BARCODE"){ inOligos >> group; @@ -1467,6 +1481,46 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){ } } +//********************************************************************/ +string TrimSeqsCommand::reverseOligo(string oligo){ + try { + string reverse = ""; + + for(int i=oligo.length()-1;i>=0;i--){ + + if(oligo[i] == 'A') { reverse += 'T'; } + else if(oligo[i] == 'T'){ reverse += 'A'; } + else if(oligo[i] == 'U'){ reverse += 'A'; } + + else if(oligo[i] == 'G'){ reverse += 'C'; } + else if(oligo[i] == 'C'){ reverse += 'G'; } + + else if(oligo[i] == 'R'){ reverse += 'Y'; } + else if(oligo[i] == 'Y'){ reverse += 'R'; } + + else if(oligo[i] == 'M'){ reverse += 'K'; } + else if(oligo[i] == 'K'){ reverse += 'M'; } + + else if(oligo[i] == 'W'){ reverse += 'W'; } + else if(oligo[i] == 'S'){ reverse += 'S'; } + + else if(oligo[i] == 'B'){ reverse += 'V'; } + else if(oligo[i] == 'V'){ reverse += 'B'; } + + else if(oligo[i] == 'D'){ reverse += 'H'; } + else if(oligo[i] == 'H'){ reverse += 'D'; } + + else { reverse += 'N'; } + } + + + return reverse; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "reverseOligo"); + exit(1); + } +} //***************************************************************************************************************