X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraperseuscommand.cpp;h=b4e478caea6bab2c3cf3a7d13a756f95a29f2223;hb=98ca1e6ab30ab49e4a78bb6012d34c9c2db22702;hp=0955f2dc662dc903c49b34d1fe70103ef8a2b5b0;hpb=c7e8c2d15bd7cedcfdf18675cb0ea1a0dcd0e3c0;p=mothur.git diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp index 0955f2d..b4e478c 100644 --- a/chimeraperseuscommand.cpp +++ b/chimeraperseuscommand.cpp @@ -15,16 +15,18 @@ //********************************************************************************************************************** vector ChimeraPerseusCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); - CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); - CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha); - CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); + + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff); + CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha); + CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -40,13 +42,14 @@ string ChimeraPerseusCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n"; - helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n"; + helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n"; helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; + helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The alpha parameter .... The default is -5.54. \n"; helpString += "The beta parameter .... The default is 0.33. \n"; helpString += "The cutoff parameter .... The default is 0.50. \n"; @@ -62,25 +65,20 @@ string ChimeraPerseusCommand::getHelpString(){ } } //********************************************************************************************************************** -string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string ChimeraPerseusCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "chimera") { outputFileName = "perseus.chimeras"; } - else if (type == "accnos") { outputFileName = "perseus.accnos"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "chimera") { pattern = "[filename],perseus.chimeras"; } + else if (type == "accnos") { pattern = "[filename],perseus.accnos"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** ChimeraPerseusCommand::ChimeraPerseusCommand(){ @@ -295,7 +293,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { bool ignore = false; if (countfileNames[i] == "current") { countfileNames[i] = m->getCountTableFile(); - if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); } + if (countfileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; //erase from file list @@ -466,6 +464,13 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.33"; } m->mothurConvert(temp, beta); + + temp = validParameter.validFile(parameters, "dereplicate", false); + if (temp == "not found") { + if (groupfile != "") { temp = "false"; } + else { temp = "true"; } + } + dups = m->isTrue(temp); } } catch(exception& e) { @@ -486,9 +491,11 @@ int ChimeraPerseusCommand::execute(){ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); - if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera"); - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos"); + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); + string outputFileName = getOutputFileName("chimera", variables); + string accnosFileName = getOutputFileName("accnos", variables); //string newFasta = m->getRootName(fastaFileNames[s]) + "temp"; @@ -527,7 +534,9 @@ int ChimeraPerseusCommand::execute(){ if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } map uniqueNames = cparser->getAllSeqsMap(); - numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName); + if (!dups) { + numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName); + } delete cparser; m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); @@ -563,7 +572,9 @@ int ChimeraPerseusCommand::execute(){ if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } map uniqueNames = parser->getAllSeqsMap(); - numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName); + if (!dups) { + numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName); + } delete parser; m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); @@ -695,6 +706,7 @@ vector ChimeraPerseusCommand::loadSequences(string group){ it = counts.find(thisGroupsSeqs[i].getName()); if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); } else { + thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned())); sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second)); if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } } @@ -712,6 +724,7 @@ vector ChimeraPerseusCommand::loadSequences(string group){ if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); } else { int num = m->getNumNames(it->second); + thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned())); sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num)); if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } } @@ -753,6 +766,7 @@ vector ChimeraPerseusCommand::readFiles(string inputFile, string name){ it = nameMap.find(temp.getName()); if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); } else { + temp.setAligned(removeNs(temp.getUnaligned())); sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second)); if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); } } @@ -772,6 +786,20 @@ vector ChimeraPerseusCommand::readFiles(string inputFile, string name){ } } //********************************************************************************************************************** +string ChimeraPerseusCommand::removeNs(string seq){ + try { + string newSeq = ""; + for (int i = 0; i < seq.length(); i++) { + if (seq[i] != 'N') { newSeq += seq[i]; } + } + return newSeq; + } + catch(exception& e) { + m->errorOut(e, "ChimeraPerseusCommand", "removeNs"); + exit(1); + } +} +//********************************************************************************************************************** vector ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){ try { //read fasta file and create sequenceData structure - checking for file mismatches @@ -786,6 +814,7 @@ vector ChimeraPerseusCommand::readFiles(string inputFile, CountTable* c int count = ct->getNumSeqs(temp.getName()); if (m->control_pressed) { break; } else { + temp.setAligned(removeNs(temp.getUnaligned())); sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count)); if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); } } @@ -896,8 +925,8 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector& seque type = "chimera"; chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps); } - ; - if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; } + + if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; } double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);