X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=3c0d378f709042cc3e1c2ca2b87e730a62dbf359;hb=3247d888e7aafc4a65ec9062a94dfd166c2c5b1d;hp=4e7a12994f4854c487d036dd422aa1f3130e288c;hpb=3aa0b70bf057656ccc2bae1de9816583f2a91779;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 4e7a129..3c0d378 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -13,7 +13,7 @@ //********************************************************************************************************************** vector TrimSeqsCommand::getValidParameters(){ try { - string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; @@ -74,7 +74,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + string AlignArray[] = {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -124,6 +124,14 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["qfile"] = inputDir + it->second; } } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } } @@ -151,6 +159,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else if(temp == "not open"){ abort = true; } else { oligoFile = temp; } + temp = validParameter.validFile(parameters, "group", true); + if (temp == "not found"){ groupfile = ""; } + else if(temp == "not open"){ abort = true; } + else { groupfile = temp; } + temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxAmbig); @@ -206,8 +219,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); - if(allFiles && oligoFile == ""){ - m->mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); m->mothurOutEndLine(); + if ((oligoFile != "") && (groupfile != "")) { + m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true; + } + + + if(allFiles && (oligoFile == "") && (groupfile == "")){ + m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine(); } if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine(); @@ -231,8 +249,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { void TrimSeqsCommand::help(){ try { m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); - m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); + m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); + m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n"); m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); m->mothurOut("The oligos parameter .... The default is "".\n"); m->mothurOut("The maxambig parameter .... The default is -1.\n"); @@ -275,6 +294,8 @@ int TrimSeqsCommand::execute(){ numFPrimers = 0; //this needs to be initialized numRPrimers = 0; + vector fastaFileNames; + vector qualFileNames; string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); @@ -283,15 +304,40 @@ int TrimSeqsCommand::execute(){ string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual"; if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); } - string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; + string groupFile = ""; + if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; } + else{ + groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups"; + outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + + if(allFiles){ + for (int i = 0; i < groupMap->namesOfGroups.size(); i++) { + groupToIndex[groupMap->namesOfGroups[i]] = i; + groupVector.push_back(groupMap->namesOfGroups[i]); + fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta")); + + //we append later, so we want to clear file + ofstream outRemove; + m->openOutputFile(fastaFileNames[i], outRemove); + outRemove.close(); + if(qFileName != ""){ + qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual")); + ofstream outRemove2; + m->openOutputFile(qualFileNames[i], outRemove2); + outRemove2.close(); + } + } + } + comboStarts = fastaFileNames.size()-1; + } - vector fastaFileNames; - vector qualFileNames; if(oligoFile != ""){ outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); getOligos(fastaFileNames, qualFileNames); } - + cout << fastaFileNames.size() << '\t' << qualFileNames.size() << endl; vector fastaFilePos; vector qFilePos; @@ -305,89 +351,20 @@ int TrimSeqsCommand::execute(){ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); - - for (int j = 0; j < fastaFileNames.size(); j++) { - rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str()); - } - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str()); - } - } - }else{ createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); - - rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str()); - rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str()); - rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str()); - - if(qFileName != ""){ - rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str()); - rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str()); - } - - - for (int j = 0; j < fastaFileNames.size(); j++) { - rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str()); - } - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str()); - } - } - - //append files - for(int i=1;iappendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile); - remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str()); - m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile); - remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str()); - - m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile); - remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str()); - m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile); - remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str()); - - m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); - remove((groupFile + toString(processIDS[i]) + ".temp").c_str()); - for (int j = 0; j < fastaFileNames.size(); j++) { - m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]); - remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str()); - } - - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]); - remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str()); - } - } - - - } - } - - if (m->control_pressed) { return 0; } + } #else driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); - - for (int j = 0; j < fastaFileNames.size(); j++) { - rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str()); - } - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str()); - } - } - - if (m->control_pressed) { return 0; } #endif - + + if (m->control_pressed) { return 0; } for(int i=0;iisBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } + cout << fastaFileNames[i] << endl; + + if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); } else { ifstream inFASTA; @@ -405,7 +382,7 @@ int TrimSeqsCommand::execute(){ if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; } } }else{ thisGroup = groupVector[i]; } - + while(!inFASTA.eof()){ if(inFASTA.get() == '>'){ inFASTA >> seqName; @@ -420,8 +397,9 @@ int TrimSeqsCommand::execute(){ if(qFileName != ""){ for(int i=0;iisBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str()); } - else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); } + cout << qualFileNames[i] << endl; + if (m->isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str()); } + else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); } else { ifstream inQual; string seqName; @@ -482,43 +460,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } ofstream outGroups; - //vector fastaFileNames; - //vector qualFileNames; if (oligoFile != "") { m->openOutputFile(groupFile, outGroups); - for (int i = 0; i < fastaNames.size(); i++) { - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); - //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); - //clear old file if it exists - ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - if(qFileName != ""){ - qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); - //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); - //clear old file if it exists - ofstream temp2; - m->openOutputFile(qualNames[i], temp2); - temp2.close(); - } - #else - //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); - fastaNames[i] = (fastaNames[i] + toString(i) + ".temp"); - ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - if(qFileName != ""){ - //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); - qualNames[i] = (qualNames[i] + toString(i) + ".temp"); - ofstream temp2; - m->openOutputFile(qualNames[i], temp2); - temp2.close(); - } - #endif - } } ifstream inFASTA; @@ -536,12 +480,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (m->control_pressed) { inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") { outGroups.close(); } - - //for(int i=0;iclose(); delete fastaFileNames[i]; } if(qFileName != ""){ qFile.close(); - //for(int i=0;iclose(); delete qualFileNames[i]; } } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } @@ -643,6 +584,32 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } } } + + if (groupfile != "") { + string thisGroup = groupMap->getGroup(currSeq.getName()); + + if (thisGroup != "not found") { + outGroups << currSeq.getName() << '\t' << thisGroup << endl; + if (allFiles) { + ofstream outTemp; + m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp); + currSeq.printSequence(outTemp); + outTemp.close(); + if(qFileName != ""){ + ofstream outTemp2; + m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2); + currQual.printQScores(outTemp2); + outTemp2.close(); + } + } + }else{ + m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine(); + outGroups << currSeq.getName() << '\t' << "XXX" << endl; + if (allFiles) { + m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine(); + } + } + } } else{ currSeq.setName(currSeq.getName() + '|' + trashCode); @@ -675,18 +642,6 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (oligoFile != "") { outGroups.close(); } if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); } - //for(int i=0;iclose(); - // delete fastaFileNames[i]; - //} - - //if(qFileName != ""){ - //for(int i=0;iclose(); - //delete qualFileNames[i]; - //} - //} - return count; } catch(exception& e) { @@ -700,7 +655,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector fastaNames, vector qualNames) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; int exitCommand = 1; processIDS.clear(); @@ -712,17 +667,93 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ + for (int i = 0; i < fastaNames.size(); i++) { + fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + if(qFileName != ""){ + qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); + } + } + driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]); exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //parent do my part + for (int i = 0; i < fastaNames.size(); i++) { + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + if(qFileName != ""){ + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); + } } + driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]); + + //force parent to wait until all the processes are done - for (int i=0;imothurOut("Appending files from process " + processIDS[i]); m->mothurOutEndLine(); + + m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile); + remove((trimFile + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile); + remove((scrapFile + toString(processIDS[i]) + ".temp").c_str()); + + m->mothurOut("Done with fasta files"); m->mothurOutEndLine(); + + if(qFileName != ""){ + m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile); + remove((trimQFile + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile); + remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str()); + + m->mothurOut("Done with quality files"); m->mothurOutEndLine(); + } + + m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); + remove((groupFile + toString(processIDS[i]) + ".temp").c_str()); + + m->mothurOut("Done with group file"); m->mothurOutEndLine(); + + for (int j = 0; j < fastaNames.size(); j++) { + m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]); + remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str()); + } + + if(qFileName != ""){ + for (int j = 0; j < qualNames.size(); j++) { + m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]); + remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str()); + } + } + + if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); } + } + return exitCommand; #endif }