//**********************************************************************************************************************
vector<string> ChimeraPerseusCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none",false,false); parameters.push_back(pname);
- CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none",false,false); parameters.push_back(pcount);
- CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
- CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff);
- CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha);
- CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
+
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+ CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
+ CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
+ CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
try {
string helpString = "";
helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
- helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
+ helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n";
helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
+ helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
helpString += "The alpha parameter .... The default is -5.54. \n";
helpString += "The beta parameter .... The default is 0.33. \n";
helpString += "The cutoff parameter .... The default is 0.50. \n";
}
}
//**********************************************************************************************************************
-string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){
- try {
- string outputFileName = "";
- map<string, vector<string> >::iterator it;
+string ChimeraPerseusCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
- //is this a type this command creates
- it = outputTypes.find(type);
- if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
- else {
- if (type == "chimera") { outputFileName = "perseus.chimeras"; }
- else if (type == "accnos") { outputFileName = "perseus.accnos"; }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
- }
- return outputFileName;
- }
- catch(exception& e) {
- m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
- exit(1);
- }
+ if (type == "chimera") { pattern = "[filename],perseus.chimeras"; }
+ else if (type == "accnos") { pattern = "[filename],perseus.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
+ exit(1);
+ }
}
//**********************************************************************************************************************
ChimeraPerseusCommand::ChimeraPerseusCommand(){
bool ignore = false;
if (countfileNames[i] == "current") {
countfileNames[i] = m->getCountTableFile();
- if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+ if (countfileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
else {
m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
//erase from file list
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.33"; }
m->mothurConvert(temp, beta);
+
+ temp = validParameter.validFile(parameters, "dereplicate", false);
+ if (temp == "not found") {
+ if (groupfile != "") { temp = "false"; }
+ else { temp = "true"; }
+ }
+ dups = m->isTrue(temp);
}
}
catch(exception& e) {
m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
int start = time(NULL);
- if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+ if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+ string outputFileName = getOutputFileName("chimera", variables);
+ string accnosFileName = getOutputFileName("accnos", variables);
//string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
map<string, string> uniqueNames = cparser->getAllSeqsMap();
- numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ if (!dups) {
+ numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ }
delete cparser;
m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
map<string, string> uniqueNames = parser->getAllSeqsMap();
- numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ if (!dups) {
+ numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ }
delete parser;
m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
it = counts.find(thisGroupsSeqs[i].getName());
if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
else {
+ thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
}
if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
else {
int num = m->getNumNames(it->second);
+ thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
}
it = nameMap.find(temp.getName());
if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
else {
+ temp.setAligned(removeNs(temp.getUnaligned()));
sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
}
}
}
//**********************************************************************************************************************
+string ChimeraPerseusCommand::removeNs(string seq){
+ try {
+ string newSeq = "";
+ for (int i = 0; i < seq.length(); i++) {
+ if (seq[i] != 'N') { newSeq += seq[i]; }
+ }
+ return newSeq;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
try {
//read fasta file and create sequenceData structure - checking for file mismatches
int count = ct->getNumSeqs(temp.getName());
if (m->control_pressed) { break; }
else {
+ temp.setAligned(removeNs(temp.getUnaligned()));
sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
}
type = "chimera";
chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
}
- ;
- if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+ if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
//Close all thread handles and free memory allocations.
for(int i=0; i < pDataArray.size(); i++){
+ if (pDataArray[i]->count != pDataArray[i]->end) {
+ m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
+ }
num += pDataArray[i]->count;
CloseHandle(hThreadArray[i]);
delete pDataArray[i];