X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=a90e7adc5c8cb870714c716b02bec5eb6bfddd73;hb=6c2b1e530a5c0bb87040e58a3e410097acdfcc3d;hp=c019a70e4a2a7d35374192b3f8cca11787e7ecef;hpb=e0ce7cbc93d7d2fbb753ca694182db092a0ea0e7;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index c019a70..a90e7ad 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -97,6 +97,29 @@ string TrimSeqsCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "qfile") { outputFileName = "qual"; } + else if (type == "fasta") { outputFileName = "fasta"; } + else if (type == "group") { outputFileName = "groups"; } + else if (type == "name") { outputFileName = "names"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** @@ -336,14 +359,14 @@ int TrimSeqsCommand::execute(){ vector > qualFileNames; vector > nameFileNames; - string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; + string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta"); outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); - string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta"; + string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta"); outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile); - string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; - string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual"; + string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile"); + string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile"); if (qFileName != "") { outputNames.push_back(trimQualFile); @@ -352,8 +375,8 @@ int TrimSeqsCommand::execute(){ outputTypes["qfile"].push_back(scrapQualFile); } - string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names"; - string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names"; + string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name"); + string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name"); if (nameFile != "") { m->readNames(nameFile, nameMap); @@ -369,7 +392,7 @@ int TrimSeqsCommand::execute(){ if(oligoFile != ""){ createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames); if (createGroup) { - outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; + outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group"); outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } @@ -428,7 +451,7 @@ int TrimSeqsCommand::execute(){ m->openInputFile(it->first, in); ofstream out; - string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups"; + string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); m->openOutputFile(thisGroupName, out); @@ -584,9 +607,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string Sequence currSeq(inFASTA); m->gobble(inFASTA); //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl; + QualityScores currQual; if(qFileName != ""){ currQual = QualityScores(qFile); m->gobble(qFile); + //cout << currQual.getName() << endl; } string origSeq = currSeq.getUnaligned(); @@ -687,6 +712,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string currQual.printQScores(trimQualFile); } + if(nameFile != ""){ map::iterator itName = nameMap.find(currSeq.getName()); if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } @@ -708,11 +734,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; + int numRedundants = 0; if (nameFile != "") { map::iterator itName = nameMap.find(currSeq.getName()); if (itName != nameMap.end()) { vector thisSeqsNames; m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; } @@ -720,8 +748,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } - else { groupCounts[it->first]++; } + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } + else { groupCounts[it->first] += (1 + numRedundants); } } } @@ -855,6 +883,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName tempNameFileNames, lines[process], qLines[process]); + + if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); } //pass groupCounts to parent if(createGroup){ @@ -1159,6 +1189,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { } for (int i = 0; i < (fastaFilePos.size()-1); i++) { + if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); } lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)])); if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); } } @@ -1224,7 +1255,9 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< while(!inOligos.eof()){ inOligos >> type; - + + if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } + if(type[0] == '#'){ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there m->gobble(inOligos); @@ -1235,6 +1268,8 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< for(int i=0;i> oligo; + + if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } for(int i=0;i >& fastaFileNames, vector< map::iterator itPrime = primers.find(oligo); if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } + primers[oligo]=indexPrimer; indexPrimer++; primerNameVector.push_back(group); } @@ -1280,15 +1317,22 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< //then this is illumina data with 4 columns if (temp != "") { - string reverseBarcode = reverseOligo(group); //reverse barcode - group = temp; + for(int i=0;idebug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); } + + string reverseBarcode = reverseOligo(group); //reverse barcode //check for repeat barcodes map::iterator itBar = rbarcodes.find(reverseBarcode); - if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine(); } - + if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine(); } + + group = temp; rbarcodes[reverseBarcode]=indexBarcode; - } + }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } } //check for repeat barcodes map::iterator itBar = barcodes.find(oligo); @@ -1301,7 +1345,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< }else if(type == "SPACER"){ spacer.push_back(oligo); } - else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } + else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } } m->gobble(inOligos); } @@ -1407,7 +1451,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< break; } } - + if (allBlank) { m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); allFiles = false;