X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraperseuscommand.cpp;h=b4e478caea6bab2c3cf3a7d13a756f95a29f2223;hb=7aa301dfa67cfcb5b00c6b4e38a7ad56eb8337db;hp=76f0103c38ec64a5ebc21f3175df441a15ccae0e;hpb=d6c0a11d1cecfac18b323285e7ffadb7f58e848f;p=mothur.git diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp index 76f0103..b4e478c 100644 --- a/chimeraperseuscommand.cpp +++ b/chimeraperseuscommand.cpp @@ -10,18 +10,23 @@ #include "chimeraperseuscommand.h" #include "deconvolutecommand.h" #include "sequence.hpp" +#include "counttable.h" +#include "sequencecountparser.h" //********************************************************************************************************************** vector ChimeraPerseusCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); - CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha); - CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); + + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff); + CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha); + CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -36,13 +41,15 @@ vector ChimeraPerseusCommand::setParameters(){ string ChimeraPerseusCommand::getHelpString(){ try { string helpString = ""; - helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n"; - helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n"; + helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n"; + helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; - helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n"; + helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n"; + helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; + helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The alpha parameter .... The default is -5.54. \n"; helpString += "The beta parameter .... The default is 0.33. \n"; helpString += "The cutoff parameter .... The default is 0.50. \n"; @@ -58,6 +65,22 @@ string ChimeraPerseusCommand::getHelpString(){ } } //********************************************************************************************************************** +string ChimeraPerseusCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "chimera") { pattern = "[filename],perseus.chimeras"; } + else if (type == "accnos") { pattern = "[filename],perseus.accnos"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** ChimeraPerseusCommand::ChimeraPerseusCommand(){ try { abort = true; calledHelp = true; @@ -75,6 +98,8 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(){ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { try { abort = false; calledHelp = false; + hasCount = false; + hasName = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -86,7 +111,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter("chimera.uchime"); + ValidParameters validParameter("chimera.perseus"); map::iterator it; //check to make sure all parameters are valid for command @@ -182,15 +207,9 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { //check for required parameters - bool hasName = true; namefile = validParameter.validFile(parameters, "name", false); - if (namefile == "not found") { - //if there is a current fasta file, use it - string filename = m->getNameFile(); - if (filename != "") { nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; } - hasName = false; - }else { + if (namefile == "not found") { namefile = ""; } + else { m->splitAtDash(namefile, nameFileNames); //go through files and make sure they are good, if not, then disregard them @@ -256,12 +275,101 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { } } } + } + + if (nameFileNames.size() != 0) { hasName = true; } + + //check for required parameters + vector countfileNames; + countfile = validParameter.validFile(parameters, "count", false); + if (countfile == "not found") { + countfile = ""; + }else { + m->splitAtDash(countfile, countfileNames); - //make sure there is at least one valid file left - if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; } + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < countfileNames.size(); i++) { + + bool ignore = false; + if (countfileNames[i] == "current") { + countfileNames[i] = m->getCountTableFile(); + if (countfileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + countfileNames.erase(countfileNames.begin()+i); + i--; + } + } + + if (!ignore) { + + if (inputDir != "") { + string path = m->hasPath(countfileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; } + } + + int ableToOpen; + ifstream in; + + ableToOpen = m->openInputFile(countfileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]); + m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + countfileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]); + m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + countfileNames[i] = tryPath; + } + } + + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + countfileNames.erase(countfileNames.begin()+i); + i--; + }else { + m->setCountTableFile(countfileNames[i]); + } + } + } } - - if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + + if (countfileNames.size() != 0) { hasCount = true; } + + //make sure there is at least one valid file left + if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + + if (!hasName && !hasCount) { + //if there is a current name file, use it, else look for current count file + string filename = m->getNameFile(); + if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); } + else { + filename = m->getCountTableFile(); + if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true; } + } + } + if (!hasName && hasCount) { nameFileNames = countfileNames; } + + if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } bool hasGroup = true; groupfile = validParameter.validFile(parameters, "group", false); @@ -339,6 +447,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -355,6 +464,13 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option) { temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.33"; } m->mothurConvert(temp, beta); + + temp = validParameter.validFile(parameters, "dereplicate", false); + if (temp == "not found") { + if (groupfile != "") { temp = "false"; } + else { temp = "true"; } + } + dups = m->isTrue(temp); } } catch(exception& e) { @@ -375,9 +491,12 @@ int ChimeraPerseusCommand::execute(){ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); - if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera"; - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.accnos"; + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); + string outputFileName = getOutputFileName("chimera", variables); + string accnosFileName = getOutputFileName("accnos", variables); + //string newFasta = m->getRootName(fastaFileNames[s]) + "temp"; //you provided a groupfile @@ -393,41 +512,86 @@ int ChimeraPerseusCommand::execute(){ int numSeqs = 0; int numChimeras = 0; - - if (groupFile != "") { - //Parse sequences by group - SequenceParser parser(groupFile, fastaFileNames[s], nameFile); - vector groups = parser.getNamesOfGroups(); - - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - //clears files - ofstream out, out1, out2; - m->openOutputFile(outputFileName, out); out.close(); - m->openOutputFile(accnosFileName, out1); out1.close(); - - if(processors == 1) { numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups); } - else { numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile); } - - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName); - - m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); - - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - }else{ - if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } - - //read sequences and store sorted by frequency - vector sequences = readFiles(fastaFileNames[s], nameFile); - - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); + + if (hasCount) { + CountTable* ct = new CountTable(); + ct->readTable(nameFile); + + if (ct->hasGroupInfo()) { + cparser = new SequenceCountParser(fastaFileNames[s], *ct); + + vector groups = cparser->getNamesOfGroups(); + + if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + //clears files + ofstream out, out1, out2; + m->openOutputFile(outputFileName, out); out.close(); + m->openOutputFile(accnosFileName, out1); out1.close(); + + if(processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups); } + else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile); } + + if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + map uniqueNames = cparser->getAllSeqsMap(); + if (!dups) { + numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName); + } + delete cparser; + + m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); + + if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + }else { + if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } + + //read sequences and store sorted by frequency + vector sequences = readFiles(fastaFileNames[s], ct); + + if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); + } + delete ct; + }else { + if (groupFile != "") { + //Parse sequences by group + parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile); + vector groups = parser->getNamesOfGroups(); + + if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + //clears files + ofstream out, out1, out2; + m->openOutputFile(outputFileName, out); out.close(); + m->openOutputFile(accnosFileName, out1); out1.close(); + + if(processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups); } + else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile); } + + if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + map uniqueNames = parser->getAllSeqsMap(); + if (!dups) { + numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName); + } + delete parser; + + m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + }else{ + if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } + + //read sequences and store sorted by frequency + vector sequences = readFiles(fastaFileNames[s], nameFile); + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); + } } - + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine(); @@ -466,14 +630,15 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){ string inputString = "fasta=" + inputFile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - + m->mothurCalling = true; + Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; - + m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); nameFile = filenames["name"][0]; @@ -487,7 +652,7 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){ } } //********************************************************************************************************************** -int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFName, string accnos, int start, int end, vector groups){ +int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, int start, int end, vector groups){ try { int totalSeqs = 0; @@ -499,7 +664,7 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa int start = time(NULL); if (m->control_pressed) { return 0; } - vector sequences = loadSequences(parser, groups[i]); + vector sequences = loadSequences(groups[i]); if (m->control_pressed) { return 0; } @@ -524,32 +689,50 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa } } //********************************************************************************************************************** -vector ChimeraPerseusCommand::loadSequences(SequenceParser& parser, string group){ +vector ChimeraPerseusCommand::loadSequences(string group){ try { - - vector thisGroupsSeqs = parser.getSeqs(group); - map nameMap = parser.getNameMap(group); - map::iterator it; - - vector sequences; - bool error = false; - alignLength = 0; - - for (int i = 0; i < thisGroupsSeqs.size(); i++) { - - if (m->control_pressed) { return sequences; } - - it = nameMap.find(thisGroupsSeqs[i].getName()); - if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); } - else { - int num = m->getNumNames(it->second); - sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num)); - if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } - } + bool error = false; + alignLength = 0; + vector sequences; + if (hasCount) { + vector thisGroupsSeqs = cparser->getSeqs(group); + map counts = cparser->getCountTable(group); + map::iterator it; + + for (int i = 0; i < thisGroupsSeqs.size(); i++) { + + if (m->control_pressed) { return sequences; } + + it = counts.find(thisGroupsSeqs[i].getName()); + if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); } + else { + thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned())); + sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second)); + if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } + } + } + }else{ + vector thisGroupsSeqs = parser->getSeqs(group); + map nameMap = parser->getNameMap(group); + map::iterator it; + + for (int i = 0; i < thisGroupsSeqs.size(); i++) { + + if (m->control_pressed) { return sequences; } + + it = nameMap.find(thisGroupsSeqs[i].getName()); + if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); } + else { + int num = m->getNumNames(it->second); + thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned())); + sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num)); + if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } + } + } + } - if (error) { m->control_pressed = true; } - + if (error) { m->control_pressed = true; } //sort by frequency sort(sequences.rbegin(), sequences.rend()); @@ -583,6 +766,7 @@ vector ChimeraPerseusCommand::readFiles(string inputFile, string name){ it = nameMap.find(temp.getName()); if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); } else { + temp.setAligned(removeNs(temp.getUnaligned())); sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second)); if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); } } @@ -596,6 +780,52 @@ vector ChimeraPerseusCommand::readFiles(string inputFile, string name){ return sequences; } + catch(exception& e) { + m->errorOut(e, "ChimeraPerseusCommand", "readFiles"); + exit(1); + } +} +//********************************************************************************************************************** +string ChimeraPerseusCommand::removeNs(string seq){ + try { + string newSeq = ""; + for (int i = 0; i < seq.length(); i++) { + if (seq[i] != 'N') { newSeq += seq[i]; } + } + return newSeq; + } + catch(exception& e) { + m->errorOut(e, "ChimeraPerseusCommand", "removeNs"); + exit(1); + } +} +//********************************************************************************************************************** +vector ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){ + try { + //read fasta file and create sequenceData structure - checking for file mismatches + vector sequences; + ifstream in; + m->openInputFile(inputFile, in); + alignLength = 0; + + while (!in.eof()) { + Sequence temp(in); m->gobble(in); + + int count = ct->getNumSeqs(temp.getName()); + if (m->control_pressed) { break; } + else { + temp.setAligned(removeNs(temp.getUnaligned())); + sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count)); + if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); } + } + } + in.close(); + + //sort by frequency + sort(sequences.rbegin(), sequences.rend()); + + return sequences; + } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile"); exit(1); @@ -695,8 +925,8 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector& seque type = "chimera"; chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps); } - ; - if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; } + + if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; } double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel); @@ -748,7 +978,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector& seque } } /**************************************************************************************************/ -int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector groups, string group, string fasta, string name) { +int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, vector groups, string group, string fasta, string name) { try { vector processIDS; @@ -768,7 +998,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string lines.push_back(linePair(startIndex, endIndex)); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -778,7 +1008,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); + num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; @@ -796,7 +1026,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string } //do my part - num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;i& uniqueNames, string outputFileName, string accnosFileName){ try { - map uniqueNames = parser.getAllSeqsMap(); map::iterator itUnique; int total = 0;