2 * chimeraperseuscommand.cpp
5 * Created by westcott on 10/26/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimeraperseuscommand.h"
11 #include "deconvolutecommand.h"
12 #include "sequence.hpp"
13 #include "counttable.h"
14 #include "sequencecountparser.h"
15 //**********************************************************************************************************************
16 vector<string> ChimeraPerseusCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
23 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
28 CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
29 CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
31 vector<string> myArray;
32 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
40 //**********************************************************************************************************************
41 string ChimeraPerseusCommand::getHelpString(){
43 string helpString = "";
44 helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
45 helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
46 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
47 helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
48 helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed.\n";
49 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
50 helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
52 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
53 helpString += "The alpha parameter .... The default is -5.54. \n";
54 helpString += "The beta parameter .... The default is 0.33. \n";
55 helpString += "The cutoff parameter .... The default is 0.50. \n";
56 helpString += "The chimera.perseus command should be in the following format: \n";
57 helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
58 helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
59 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
63 m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string ChimeraPerseusCommand::getOutputPattern(string type) {
72 if (type == "chimera") { pattern = "[filename],perseus.chimeras"; }
73 else if (type == "accnos") { pattern = "[filename],perseus.accnos"; }
74 else if (type == "count") { pattern = "[filename],perseus.pick.count_table"; }
75 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
80 m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
84 //**********************************************************************************************************************
85 ChimeraPerseusCommand::ChimeraPerseusCommand(){
87 abort = true; calledHelp = true;
89 vector<string> tempOutNames;
90 outputTypes["chimera"] = tempOutNames;
91 outputTypes["accnos"] = tempOutNames;
92 outputTypes["count"] = tempOutNames;
95 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
99 //***************************************************************************************************************
100 ChimeraPerseusCommand::ChimeraPerseusCommand(string option) {
102 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111 vector<string> myArray = setParameters();
113 OptionParser parser(option);
114 map<string,string> parameters = parser.getParameters();
116 ValidParameters validParameter("chimera.perseus");
117 map<string,string>::iterator it;
119 //check to make sure all parameters are valid for command
120 for (it = parameters.begin(); it != parameters.end(); it++) {
121 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
124 vector<string> tempOutNames;
125 outputTypes["chimera"] = tempOutNames;
126 outputTypes["accnos"] = tempOutNames;
127 outputTypes["count"] = tempOutNames;
129 //if the user changes the input directory command factory will send this info to us in the output parameter
130 string inputDir = validParameter.validFile(parameters, "inputdir", false);
131 if (inputDir == "not found"){ inputDir = ""; }
133 //check for required parameters
134 fastafile = validParameter.validFile(parameters, "fasta", false);
135 if (fastafile == "not found") {
136 //if there is a current fasta file, use it
137 string filename = m->getFastaFile();
138 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
139 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
141 m->splitAtDash(fastafile, fastaFileNames);
143 //go through files and make sure they are good, if not, then disregard them
144 for (int i = 0; i < fastaFileNames.size(); i++) {
147 if (fastaFileNames[i] == "current") {
148 fastaFileNames[i] = m->getFastaFile();
149 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
151 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
152 //erase from file list
153 fastaFileNames.erase(fastaFileNames.begin()+i);
160 if (inputDir != "") {
161 string path = m->hasPath(fastaFileNames[i]);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
169 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
171 //if you can't open it, try default location
172 if (ableToOpen == 1) {
173 if (m->getDefaultPath() != "") { //default path is set
174 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
175 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
177 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
179 fastaFileNames[i] = tryPath;
183 if (ableToOpen == 1) {
184 if (m->getOutputDir() != "") { //default path is set
185 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
186 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
188 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190 fastaFileNames[i] = tryPath;
196 if (ableToOpen == 1) {
197 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
198 //erase from file list
199 fastaFileNames.erase(fastaFileNames.begin()+i);
202 m->setFastaFile(fastaFileNames[i]);
207 //make sure there is at least one valid file left
208 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
212 //check for required parameters
213 namefile = validParameter.validFile(parameters, "name", false);
214 if (namefile == "not found") { namefile = ""; }
216 m->splitAtDash(namefile, nameFileNames);
218 //go through files and make sure they are good, if not, then disregard them
219 for (int i = 0; i < nameFileNames.size(); i++) {
222 if (nameFileNames[i] == "current") {
223 nameFileNames[i] = m->getNameFile();
224 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
226 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
227 //erase from file list
228 nameFileNames.erase(nameFileNames.begin()+i);
235 if (inputDir != "") {
236 string path = m->hasPath(nameFileNames[i]);
237 //if the user has not given a path then, add inputdir. else leave path alone.
238 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
244 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
246 //if you can't open it, try default location
247 if (ableToOpen == 1) {
248 if (m->getDefaultPath() != "") { //default path is set
249 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
250 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
252 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
254 nameFileNames[i] = tryPath;
258 if (ableToOpen == 1) {
259 if (m->getOutputDir() != "") { //default path is set
260 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
261 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
263 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
265 nameFileNames[i] = tryPath;
271 if (ableToOpen == 1) {
272 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
273 //erase from file list
274 nameFileNames.erase(nameFileNames.begin()+i);
277 m->setNameFile(nameFileNames[i]);
283 if (nameFileNames.size() != 0) { hasName = true; }
285 //check for required parameters
286 vector<string> countfileNames;
287 countfile = validParameter.validFile(parameters, "count", false);
288 if (countfile == "not found") {
291 m->splitAtDash(countfile, countfileNames);
293 //go through files and make sure they are good, if not, then disregard them
294 for (int i = 0; i < countfileNames.size(); i++) {
297 if (countfileNames[i] == "current") {
298 countfileNames[i] = m->getCountTableFile();
299 if (countfileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
301 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
302 //erase from file list
303 countfileNames.erase(countfileNames.begin()+i);
310 if (inputDir != "") {
311 string path = m->hasPath(countfileNames[i]);
312 //if the user has not given a path then, add inputdir. else leave path alone.
313 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
319 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
321 //if you can't open it, try default location
322 if (ableToOpen == 1) {
323 if (m->getDefaultPath() != "") { //default path is set
324 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
325 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
327 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
329 countfileNames[i] = tryPath;
333 if (ableToOpen == 1) {
334 if (m->getOutputDir() != "") { //default path is set
335 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
336 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
338 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
340 countfileNames[i] = tryPath;
346 if (ableToOpen == 1) {
347 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
348 //erase from file list
349 countfileNames.erase(countfileNames.begin()+i);
352 m->setCountTableFile(countfileNames[i]);
358 if (countfileNames.size() != 0) { hasCount = true; }
360 //make sure there is at least one valid file left
361 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
363 if (!hasName && !hasCount) {
364 //if there is a current name file, use it, else look for current count file
365 string filename = m->getNameFile();
366 if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
368 filename = m->getCountTableFile();
369 if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
370 else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true; }
373 if (!hasName && hasCount) { nameFileNames = countfileNames; }
375 if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
377 bool hasGroup = true;
378 groupfile = validParameter.validFile(parameters, "group", false);
379 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
381 m->splitAtDash(groupfile, groupFileNames);
383 //go through files and make sure they are good, if not, then disregard them
384 for (int i = 0; i < groupFileNames.size(); i++) {
387 if (groupFileNames[i] == "current") {
388 groupFileNames[i] = m->getGroupFile();
389 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
391 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
392 //erase from file list
393 groupFileNames.erase(groupFileNames.begin()+i);
400 if (inputDir != "") {
401 string path = m->hasPath(groupFileNames[i]);
402 //if the user has not given a path then, add inputdir. else leave path alone.
403 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
409 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
411 //if you can't open it, try default location
412 if (ableToOpen == 1) {
413 if (m->getDefaultPath() != "") { //default path is set
414 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
415 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
417 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
419 groupFileNames[i] = tryPath;
423 if (ableToOpen == 1) {
424 if (m->getOutputDir() != "") { //default path is set
425 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
426 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
428 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
430 groupFileNames[i] = tryPath;
436 if (ableToOpen == 1) {
437 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
438 //erase from file list
439 groupFileNames.erase(groupFileNames.begin()+i);
442 m->setGroupFile(groupFileNames[i]);
447 //make sure there is at least one valid file left
448 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
451 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
453 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
455 //if the user changes the output directory command factory will send this info to us in the output parameter
456 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
458 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
459 m->setProcessors(temp);
460 m->mothurConvert(temp, processors);
462 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.50"; }
463 m->mothurConvert(temp, cutoff);
465 temp = validParameter.validFile(parameters, "alpha", false); if (temp == "not found"){ temp = "-5.54"; }
466 m->mothurConvert(temp, alpha);
468 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.33"; }
469 m->mothurConvert(temp, beta);
471 temp = validParameter.validFile(parameters, "dereplicate", false);
472 if (temp == "not found") { temp = "false"; }
473 dups = m->isTrue(temp);
476 catch(exception& e) {
477 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
481 //***************************************************************************************************************
483 int ChimeraPerseusCommand::execute(){
485 if (abort == true) { if (calledHelp) { return 0; } return 2; }
489 for (int s = 0; s < fastaFileNames.size(); s++) {
491 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
493 int start = time(NULL);
494 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
495 map<string, string> variables;
496 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
497 string outputFileName = getOutputFileName("chimera", variables);
498 string accnosFileName = getOutputFileName("accnos", variables);
499 string newCountFile = "";
501 //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
503 //you provided a groupfile
504 string groupFile = "";
505 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
507 string nameFile = "";
508 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
509 nameFile = nameFileNames[s];
510 }else { nameFile = getNamesFile(fastaFileNames[s]); }
512 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
518 CountTable* ct = new CountTable();
519 ct->readTable(nameFile, true);
521 if (ct->hasGroupInfo()) {
522 cparser = new SequenceCountParser(fastaFileNames[s], *ct);
523 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
524 newCountFile = getOutputFileName("count", variables);
526 vector<string> groups = cparser->getNamesOfGroups();
528 if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
531 ofstream out, out1, out2;
532 m->openOutputFile(outputFileName, out); out.close();
533 m->openOutputFile(accnosFileName, out1); out1.close();
535 if(processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, newCountFile, 0, groups.size(), groups);
537 CountTable c; c.readTable(nameFile, true);
538 if (!m->isBlank(newCountFile)) {
540 m->openInputFile(newCountFile, in2);
544 in2 >> name >> group; m->gobble(in2);
545 c.setAbund(name, group, 0);
549 m->mothurRemove(newCountFile);
550 c.printTable(newCountFile);
554 else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, newCountFile, groups, groupFile, fastaFileNames[s], nameFile); }
556 if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
557 map<string, string> uniqueNames = cparser->getAllSeqsMap();
559 numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
561 set<string> doNotRemove;
562 CountTable c; c.readTable(newCountFile, true);
563 vector<string> namesInTable = c.getNamesOfSeqs();
564 for (int i = 0; i < namesInTable.size(); i++) {
565 int temp = c.getNumSeqs(namesInTable[i]);
566 if (temp == 0) { c.remove(namesInTable[i]); }
567 else { doNotRemove.insert((namesInTable[i])); }
569 //remove names we want to keep from accnos file.
570 set<string> accnosNames = m->readAccnos(accnosFileName);
572 m->openOutputFile(accnosFileName, out2);
573 for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
574 if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
577 c.printTable(newCountFile);
578 outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
583 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
585 if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
588 if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
590 //read sequences and store sorted by frequency
591 vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
593 if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
595 numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);
599 if (groupFile != "") {
600 //Parse sequences by group
601 parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
602 vector<string> groups = parser->getNamesOfGroups();
604 if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
607 ofstream out, out1, out2;
608 m->openOutputFile(outputFileName, out); out.close();
609 m->openOutputFile(accnosFileName, out1); out1.close();
611 if(processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, "", 0, groups.size(), groups); }
612 else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, "", groups, groupFile, fastaFileNames[s], nameFile); }
614 if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
615 map<string, string> uniqueNames = parser->getAllSeqsMap();
617 numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
621 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
623 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
625 if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
627 //read sequences and store sorted by frequency
628 vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
630 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
632 numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);
636 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
638 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
639 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
640 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
643 //set accnos file as new current accnosfile
645 itTypes = outputTypes.find("accnos");
646 if (itTypes != outputTypes.end()) {
647 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
650 itTypes = outputTypes.find("count");
651 if (itTypes != outputTypes.end()) {
652 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
655 m->mothurOutEndLine();
656 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
657 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
658 m->mothurOutEndLine();
663 catch(exception& e) {
664 m->errorOut(e, "ChimeraPerseusCommand", "execute");
668 //**********************************************************************************************************************
669 string ChimeraPerseusCommand::getNamesFile(string& inputFile){
671 string nameFile = "";
673 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
675 //use unique.seqs to create new name and fastafile
676 string inputString = "fasta=" + inputFile;
677 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
678 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
679 m->mothurCalling = true;
681 Command* uniqueCommand = new DeconvoluteCommand(inputString);
682 uniqueCommand->execute();
684 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
686 delete uniqueCommand;
687 m->mothurCalling = false;
688 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
690 nameFile = filenames["name"][0];
691 inputFile = filenames["fasta"][0];
695 catch(exception& e) {
696 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
700 //**********************************************************************************************************************
701 int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, string countlist, int start, int end, vector<string> groups){
707 ofstream outCountList;
708 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
710 for (int i = start; i < end; i++) {
712 m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "..."); m->mothurOutEndLine();
714 int start = time(NULL); if (m->control_pressed) { return 0; }
716 vector<seqData> sequences = loadSequences(groups[i]);
718 if (m->control_pressed) { return 0; }
720 int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
721 totalSeqs += numSeqs;
723 if (m->control_pressed) { return 0; }
726 if (!m->isBlank(accnos+groups[i])) {
728 m->openInputFile(accnos+groups[i], in);
732 in >> name; m->gobble(in);
733 outCountList << name << '\t' << groups[i] << endl;
737 map<string, string> thisnamemap = parser->getNameMap(groups[i]);
738 map<string, string>::iterator itN;
740 m->openOutputFile(accnos+groups[i]+".temp", out);
742 in >> name; m->gobble(in);
743 itN = thisnamemap.find(name);
744 if (itN != thisnamemap.end()) {
745 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
746 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
748 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
752 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
759 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
760 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
762 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
765 if (hasCount && dups) { outCountList.close(); }
770 catch(exception& e) {
771 m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
775 //**********************************************************************************************************************
776 vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
780 vector<seqData> sequences;
782 vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
783 map<string, int> counts = cparser->getCountTable(group);
784 map<string, int>::iterator it;
786 for (int i = 0; i < thisGroupsSeqs.size(); i++) {
788 if (m->control_pressed) { return sequences; }
790 it = counts.find(thisGroupsSeqs[i].getName());
791 if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
793 thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
794 sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
795 if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
799 vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
800 map<string, string> nameMap = parser->getNameMap(group);
801 map<string, string>::iterator it;
803 for (int i = 0; i < thisGroupsSeqs.size(); i++) {
805 if (m->control_pressed) { return sequences; }
807 it = nameMap.find(thisGroupsSeqs[i].getName());
808 if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
810 int num = m->getNumNames(it->second);
811 thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
812 sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
813 if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
819 if (error) { m->control_pressed = true; }
821 sort(sequences.rbegin(), sequences.rend());
825 catch(exception& e) {
826 m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
831 //**********************************************************************************************************************
832 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
834 map<string, int>::iterator it;
835 map<string, int> nameMap = m->readNames(name);
837 //read fasta file and create sequenceData structure - checking for file mismatches
838 vector<seqData> sequences;
841 m->openInputFile(inputFile, in);
846 if (m->control_pressed) { in.close(); return sequences; }
848 Sequence temp(in); m->gobble(in);
850 it = nameMap.find(temp.getName());
851 if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
853 temp.setAligned(removeNs(temp.getUnaligned()));
854 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
855 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
860 if (error) { m->control_pressed = true; }
863 sort(sequences.rbegin(), sequences.rend());
867 catch(exception& e) {
868 m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
872 //**********************************************************************************************************************
873 string ChimeraPerseusCommand::removeNs(string seq){
876 for (int i = 0; i < seq.length(); i++) {
877 if (seq[i] != 'N') { newSeq += seq[i]; }
881 catch(exception& e) {
882 m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
886 //**********************************************************************************************************************
887 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
889 //read fasta file and create sequenceData structure - checking for file mismatches
890 vector<seqData> sequences;
892 m->openInputFile(inputFile, in);
896 Sequence temp(in); m->gobble(in);
898 int count = ct->getNumSeqs(temp.getName());
899 if (m->control_pressed) { break; }
901 temp.setAligned(removeNs(temp.getUnaligned()));
902 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
903 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
909 sort(sequences.rbegin(), sequences.rend());
913 catch(exception& e) {
914 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
918 //**********************************************************************************************************************
919 int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
922 vector<vector<double> > correctModel(4); //could be an option in the future to input own model matrix
923 for(int i=0;i<4;i++){ correctModel[i].resize(4); }
925 correctModel[0][0] = 0.000000; //AA
926 correctModel[1][0] = 11.619259; //CA
927 correctModel[2][0] = 11.694004; //TA
928 correctModel[3][0] = 7.748623; //GA
930 correctModel[1][1] = 0.000000; //CC
931 correctModel[2][1] = 7.619657; //TC
932 correctModel[3][1] = 12.852562; //GC
934 correctModel[2][2] = 0.000000; //TT
935 correctModel[3][2] = 10.964048; //TG
937 correctModel[3][3] = 0.000000; //GG
939 for(int i=0;i<4;i++){
940 for(int j=0;j<i;j++){
941 correctModel[j][i] = correctModel[i][j];
945 int numSeqs = sequences.size();
946 //int alignLength = sequences[0].sequence.size();
948 ofstream chimeraFile;
950 m->openOutputFile(chimeraFileName, chimeraFile);
951 m->openOutputFile(accnosFileName, accnosFile);
954 vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
956 chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
958 vector<bool> chimeras(numSeqs, 0);
960 for(int i=0;i<numSeqs;i++){
961 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
963 vector<bool> restricted = chimeras;
965 vector<vector<int> > leftDiffs(numSeqs);
966 vector<vector<int> > leftMaps(numSeqs);
967 vector<vector<int> > rightDiffs(numSeqs);
968 vector<vector<int> > rightMaps(numSeqs);
970 vector<int> singleLeft, bestLeft;
971 vector<int> singleRight, bestRight;
973 int bestSingleIndex, bestSingleDiff;
974 vector<pwAlign> alignments(numSeqs);
976 int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
977 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
979 int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
981 string dummyA, dummyB;
983 if (sequences[i].sequence.size() < 3) {
984 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
985 }else if(comparisons >= 2){
986 minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
987 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
989 int minMismatchToTrimera = numeric_limits<int>::max();
990 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
992 if(minMismatchToChimera >= 3 && comparisons >= 3){
993 minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
994 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
997 double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
999 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1002 string chimeraRefSeq;
1004 if(minMismatchToChimera - minMismatchToTrimera >= 3){
1006 chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
1010 chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
1013 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1015 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
1017 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1019 double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
1020 double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);
1022 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1024 chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
1025 chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
1026 chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
1027 chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
1029 double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
1031 chimeraFile << probability << '\t';
1033 if(probability > cutoff){
1034 chimeraFile << type << endl;
1035 accnosFile << sequences[i].seqName << endl;
1040 chimeraFile << "good" << endl;
1045 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
1049 if((i+1) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(i+1) + "\n"); }
1052 if((numSeqs) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs) + "\n"); }
1054 chimeraFile.close();
1059 catch(exception& e) {
1060 m->errorOut(e, "ChimeraPerseusCommand", "driver");
1064 /**************************************************************************************************/
1065 int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, string newCountFile, vector<string> groups, string group, string fasta, string name) {
1068 vector<int> processIDS;
1072 CountTable newCount;
1073 if (hasCount && dups) { newCount.readTable(name, true); }
1076 if (groups.size() < processors) { processors = groups.size(); }
1078 //divide the groups between the processors
1079 vector<linePair> lines;
1080 int numGroupsPerProcessor = groups.size() / processors;
1081 for (int i = 0; i < processors; i++) {
1082 int startIndex = i * numGroupsPerProcessor;
1083 int endIndex = (i+1) * numGroupsPerProcessor;
1084 if(i == (processors - 1)){ endIndex = groups.size(); }
1085 lines.push_back(linePair(startIndex, endIndex));
1088 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1090 //loop through and create all the processes you want
1091 while (process != processors) {
1095 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1097 }else if (pid == 0){
1098 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1100 //pass numSeqs to parent
1102 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1103 m->openOutputFile(tempFile, out);
1109 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1110 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1116 num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1118 //force parent to wait until all the processes are done
1119 for (int i=0;i<processIDS.size();i++) {
1120 int temp = processIDS[i];
1124 for (int i = 0; i < processIDS.size(); i++) {
1126 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1127 m->openInputFile(tempFile, in);
1128 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1129 in.close(); m->mothurRemove(tempFile);
1133 //////////////////////////////////////////////////////////////////////////////////////////////////////
1134 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1135 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1136 //////////////////////////////////////////////////////////////////////////////////////////////////////
1138 vector<perseusData*> pDataArray;
1139 DWORD dwThreadIdArray[processors-1];
1140 HANDLE hThreadArray[processors-1];
1142 //Create processor worker threads.
1143 for( int i=1; i<processors; i++ ){
1144 // Allocate memory for thread data.
1145 string extension = toString(i) + ".temp";
1147 perseusData* tempPerseus = new perseusData(dups, hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1149 pDataArray.push_back(tempPerseus);
1150 processIDS.push_back(i);
1152 //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
1153 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1154 hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1158 //using the main process as a worker saves time and memory
1159 num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1161 //Wait until all threads have terminated.
1162 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1164 //Close all thread handles and free memory allocations.
1165 for(int i=0; i < pDataArray.size(); i++){
1166 num += pDataArray[i]->count;
1167 CloseHandle(hThreadArray[i]);
1168 delete pDataArray[i];
1172 if (hasCount && dups) {
1173 if (!m->isBlank(accnos + ".byCount")) {
1175 m->openInputFile(accnos + ".byCount", in2);
1178 while (!in2.eof()) {
1179 in2 >> name >> group; m->gobble(in2);
1180 newCount.setAbund(name, group, 0);
1184 m->mothurRemove(accnos + ".byCount");
1188 //append output files
1189 for(int i=0;i<processIDS.size();i++){
1190 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1191 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1193 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1194 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1196 if (hasCount && dups) {
1197 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1199 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1202 while (!in2.eof()) {
1203 in2 >> name >> group; m->gobble(in2);
1204 newCount.setAbund(name, group, 0);
1208 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1213 //print new *.pick.count_table
1214 if (hasCount && dups) { newCount.printTable(newCountFile); }
1219 catch(exception& e) {
1220 m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
1224 //**********************************************************************************************************************
1225 int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
1227 map<string, string>::iterator itUnique;
1232 m->openInputFile(accnosFileName, in2);
1235 m->openOutputFile(accnosFileName+".temp", out2);
1238 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1239 set<string>::iterator itNames;
1240 set<string> chimerasInFile;
1241 set<string>::iterator itChimeras;
1244 while (!in2.eof()) {
1245 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1247 in2 >> name; m->gobble(in2);
1250 itUnique = uniqueNames.find(name);
1252 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1254 itChimeras = chimerasInFile.find((itUnique->second));
1256 if (itChimeras == chimerasInFile.end()) {
1257 out2 << itUnique->second << endl;
1258 chimerasInFile.insert((itUnique->second));
1266 m->mothurRemove(accnosFileName);
1267 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1271 m->openInputFile(outputFileName, in);
1274 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1276 int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
1277 float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
1278 string index, BestMatchName, parent1, parent2, flag;
1280 namesInFile.clear();
1281 //assumptions - in file each read will always look like
1283 SequenceIndex Name DiffsToBestMatch BestMatchIndex BestMatchName DiffstToChimera IndexofLeftParent IndexOfRightParent NameOfLeftParent NameOfRightParent DistanceToBestMatch cIndex (cIndex - singleDist) loonIndex MismatchesToChimera MismatchToTrimera ChimeraBreakPoint LogisticProbability TypeOfSequence
1284 0 F01QG4L02JVBQY 0 0 Null 0 0 0 Null Null 0.0 0.0 0.0 0.0 0 0 0 0.0 0.0 good
1285 1 F01QG4L02ICTC6 0 0 Null 0 0 0 Null Null 0.0 0.0 0.0 0.0 0 0 0 0.0 0.0 good
1286 2 F01QG4L02JZOEC 48 0 F01QG4L02JVBQY 47 0 0 F01QG4L02JVBQY F01QG4L02JVBQY 2.0449 2.03545 -0.00944493 0 47 2147483647 138 0 good
1287 3 F01QG4L02G7JEC 42 0 F01QG4L02JVBQY 40 1 0 F01QG4L02ICTC6 F01QG4L02JVBQY 1.87477 1.81113 -0.0636404 5.80145 40 2147483647 25 0 good
1290 //get and print headers
1291 BestMatchName = m->getline(in); m->gobble(in);
1292 out << BestMatchName << endl;
1296 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1299 in >> index; m->gobble(in);
1301 if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
1302 in >> name; m->gobble(in);
1303 in >> DiffsToBestMatch; m->gobble(in);
1304 in >> BestMatchIndex; m->gobble(in);
1305 in >> BestMatchName; m->gobble(in);
1306 in >> DiffstToChimera; m->gobble(in);
1307 in >> IndexofLeftParent; m->gobble(in);
1308 in >> IndexOfRightParent; m->gobble(in);
1309 in >> parent1; m->gobble(in);
1310 in >> parent2; m->gobble(in);
1311 in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
1314 itUnique = uniqueNames.find(name);
1316 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1318 name = itUnique->second;
1319 //is this name already in the file
1320 itNames = namesInFile.find((name));
1322 if (itNames == namesInFile.end()) { //no not in file
1323 if (flag == "good") { //are you really a no??
1324 //is this sequence really not chimeric??
1325 itChimeras = chimerasInFile.find(name);
1327 //then you really are a no so print, otherwise skip
1328 if (itChimeras == chimerasInFile.end()) { print = true; }
1329 }else{ print = true; }
1334 out << index << '\t' << name << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
1335 namesInFile.insert(name);
1337 if (BestMatchName != "Null") {
1338 itUnique = uniqueNames.find(BestMatchName);
1339 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1340 else { out << itUnique->second << '\t'; }
1341 }else { out << "Null" << '\t'; }
1343 out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
1345 if (parent1 != "Null") {
1346 itUnique = uniqueNames.find(parent1);
1347 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1348 else { out << itUnique->second << '\t'; }
1349 }else { out << "Null" << '\t'; }
1351 if (parent1 != "Null") {
1352 itUnique = uniqueNames.find(parent2);
1353 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1354 else { out << itUnique->second << '\t'; }
1355 }else { out << "Null" << '\t'; }
1357 out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;
1359 }else { index = m->getline(in); m->gobble(in); }
1364 m->mothurRemove(outputFileName);
1365 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1369 catch(exception& e) {
1370 m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
1374 //**********************************************************************************************************************