//**********************************************************************************************************************
vector<string> TrimSeqsCommand::getValidParameters(){
try {
- string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile",
+ string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile",
"qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
//**********************************************************************************************************************
TrimSeqsCommand::TrimSeqsCommand(){
try {
+ abort = true;
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
else {
//valid paramters for this command
- string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile",
+ string AlignArray[] = {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile",
"qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["qfile"] = inputDir + it->second; }
}
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
}
else if(temp == "not open"){ abort = true; }
else { oligoFile = temp; }
+ temp = validParameter.validFile(parameters, "group", true);
+ if (temp == "not found"){ groupfile = ""; }
+ else if(temp == "not open"){ abort = true; }
+ else { groupfile = temp; }
+
temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
convert(temp, maxAmbig);
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
convert(temp, processors);
- if(allFiles && oligoFile == ""){
- m->mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); m->mothurOutEndLine();
+ if ((oligoFile != "") && (groupfile != "")) {
+ m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
+ }
+
+
+ if(allFiles && (oligoFile == "") && (groupfile == "")){
+ m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine();
}
if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
void TrimSeqsCommand::help(){
try {
m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
- m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
+ m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
m->mothurOut("The fasta parameter is required.\n");
+ m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
m->mothurOut("The oligos parameter .... The default is "".\n");
m->mothurOut("The maxambig parameter .... The default is -1.\n");
numFPrimers = 0; //this needs to be initialized
numRPrimers = 0;
+ vector<string> fastaFileNames;
+ vector<string> qualFileNames;
string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
- string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+ string groupFile = "";
+ if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
+ else{
+ groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
+ outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
+ groupMap = new GroupMap(groupfile);
+ groupMap->readMap();
+
+ if(allFiles){
+ for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
+ groupToIndex[groupMap->namesOfGroups[i]] = i;
+ groupVector.push_back(groupMap->namesOfGroups[i]);
+ fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta"));
+
+ //we append later, so we want to clear file
+ ofstream outRemove;
+ m->openOutputFile(fastaFileNames[i], outRemove);
+ outRemove.close();
+ if(qFileName != ""){
+ qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual"));
+ ofstream outRemove2;
+ m->openOutputFile(qualFileNames[i], outRemove2);
+ outRemove2.close();
+ }
+ }
+ }
+ comboStarts = fastaFileNames.size()-1;
+ }
- vector<string> fastaFileNames;
- vector<string> qualFileNames;
if(oligoFile != ""){
outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
getOligos(fastaFileNames, qualFileNames);
if (m->control_pressed) { return 0; }
#endif
-
+
for(int i=0;i<fastaFileNames.size();i++){
if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
}
ofstream outGroups;
- vector<ofstream*> fastaFileNames;
- vector<ofstream*> qualFileNames;
+ //vector<ofstream*> fastaFileNames;
+ //vector<ofstream*> qualFileNames;
if (oligoFile != "") {
m->openOutputFile(groupFile, outGroups);
for (int i = 0; i < fastaNames.size(); i++) {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate));
+ fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
+ //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate));
+ //clear old file if it exists
+ ofstream temp;
+ m->openOutputFile(fastaNames[i], temp);
+ temp.close();
if(qFileName != ""){
- qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate));
+ qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
+ //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate));
+ //clear old file if it exists
+ ofstream temp2;
+ m->openOutputFile(qualNames[i], temp2);
+ temp2.close();
}
#else
- fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));
+ //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));
+ fastaNames[i] = (fastaNames[i] + toString(i) + ".temp");
+ ofstream temp;
+ m->openOutputFile(fastaNames[i], temp);
+ temp.close();
if(qFileName != ""){
- qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));
+ //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));
+ qualNames[i] = (qualNames[i] + toString(i) + ".temp");
+ ofstream temp2;
+ m->openOutputFile(qualNames[i], temp2);
+ temp2.close();
}
#endif
}
inFASTA.close(); outFASTA.close(); scrapFASTA.close();
if (oligoFile != "") { outGroups.close(); }
- for(int i=0;i<fastaFileNames.size();i++){ fastaFileNames[i]->close(); delete fastaFileNames[i]; }
+ //for(int i=0;i<fastaFileNames.size();i++){ fastaFileNames[i]->close(); delete fastaFileNames[i]; }
if(qFileName != ""){
qFile.close();
- for(int i=0;i<qualFileNames.size();i++){ qualFileNames[i]->close(); delete qualFileNames[i]; }
+ //for(int i=0;i<qualFileNames.size();i++){ qualFileNames[i]->close(); delete qualFileNames[i]; }
}
for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
}
outGroups << currSeq.getName() << '\t' << thisGroup << endl;
if(allFiles){
- currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
+ ofstream outTemp;
+ m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
+ //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
+ currSeq.printSequence(outTemp);
+ outTemp.close();
if(qFileName != ""){
- currQual.printQScores(*qualFileNames[indexToFastaFile]);
+ //currQual.printQScores(*qualFileNames[indexToFastaFile]);
+ ofstream outTemp2;
+ m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
+ currQual.printQScores(outTemp2);
+ outTemp2.close();
+ }
+ }
+ }
+
+ if (groupfile != "") {
+ string thisGroup = groupMap->getGroup(currSeq.getName());
+
+ if (thisGroup != "not found") {
+ outGroups << currSeq.getName() << '\t' << thisGroup << endl;
+ if (allFiles) {
+ ofstream outTemp;
+ m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
+ currSeq.printSequence(outTemp);
+ outTemp.close();
+ if(qFileName != ""){
+ ofstream outTemp2;
+ m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
+ currQual.printQScores(outTemp2);
+ outTemp2.close();
+ }
+ }
+ }else{
+ m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
+ outGroups << currSeq.getName() << '\t' << "XXX" << endl;
+ if (allFiles) {
+ m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
}
}
}
if (oligoFile != "") { outGroups.close(); }
if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); }
- for(int i=0;i<fastaFileNames.size();i++){
- fastaFileNames[i]->close();
- delete fastaFileNames[i];
- }
+ //for(int i=0;i<fastaFileNames.size();i++){
+ // fastaFileNames[i]->close();
+ // delete fastaFileNames[i];
+ //}
- if(qFileName != ""){
- for(int i=0;i<qualFileNames.size();i++){
- qualFileNames[i]->close();
- delete qualFileNames[i];
- }
- }
+ //if(qFileName != ""){
+ //for(int i=0;i<qualFileNames.size();i++){
+ //qualFileNames[i]->close();
+ //delete qualFileNames[i];
+ //}
+ //}
return count;
}
}else if (pid == 0){
driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
exit(0);
- }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
}
//force parent to wait until all the processes are done
//seach for filePos of each first name in the qfile and save in qfileFilePos
ifstream inQual;
m->openInputFile(qfilename, inQual);
-
+
string input;
while(!inQual.eof()){
input = m->getline(inQual);
}
inQual.close();
+
if (firstSeqNames.size() != 0) {
for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();