*/
#include "chimeraseqscommand.h"
-#include "eachgapdist.h"
+#include "bellerophon.h"
+#include "pintail.h"
+#include "ccode.h"
+#include "chimeracheckrdp.h"
+#include "chimeraslayer.h"
+
//***************************************************************************************************************
else {
//valid paramters for this command
- string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment" };
+ string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask",
+ "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
map<string,string> parameters = parser.getParameters();
ValidParameters validParameter;
+ map<string,string>::iterator it;
//check to make sure all parameters are valid for command
- for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("template");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["template"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("conservation");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["conservation"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("quantile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["quantile"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+ }
+
+
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
if (fastafile == "not open") { abort = true; }
else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; }
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = "";
+ outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
+ }
+
+ templatefile = validParameter.validFile(parameters, "template", true);
+ if (templatefile == "not open") { abort = true; }
+ else if (templatefile == "not found") { templatefile = ""; }
+
+ consfile = validParameter.validFile(parameters, "conservation", true);
+ if (consfile == "not open") { abort = true; }
+ else if (consfile == "not found") { consfile = ""; }
+
+ quanfile = validParameter.validFile(parameters, "quantile", true);
+ if (quanfile == "not open") { abort = true; }
+ else if (quanfile == "not found") { quanfile = ""; }
+
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { abort = true; }
+ else if (namefile == "not found") { namefile = ""; }
+
+ maskfile = validParameter.validFile(parameters, "mask", false);
+ if (maskfile == "not found") { maskfile = ""; }
+ else if (maskfile != "default") {
+ if (inputDir != "") {
+ string path = hasPath(maskfile);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { maskfile = inputDir + maskfile; }
+ }
+
+ ifstream in;
+ int ableToOpen = openInputFile(maskfile, in);
+ if (ableToOpen == 1) { abort = true; }
+ in.close();
+ }
+
+ method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; }
+
string temp;
- temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "T"; }
+ temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
filter = isTrue(temp);
temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
convert(temp, processors);
- temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
+ temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
+ convert(temp, ksize);
+
+ temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; }
+ svg = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "window", false);
+ if ((temp == "not found") && (method == "chimeraslayer")) { temp = "50"; }
+ else if (temp == "not found") { temp = "0"; }
convert(temp, window);
-
- temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "10"; }
+
+ temp = validParameter.validFile(parameters, "match", false); if (temp == "not found") { temp = "5"; }
+ convert(temp, match);
+
+ temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-4"; }
+ convert(temp, mismatch);
+
+ temp = validParameter.validFile(parameters, "divergence", false); if (temp == "not found") { temp = "1.007"; }
+ convert(temp, divR);
+
+ temp = validParameter.validFile(parameters, "minsim", false); if (temp == "not found") { temp = "90"; }
+ convert(temp, minSimilarity);
+
+ temp = validParameter.validFile(parameters, "mincov", false); if (temp == "not found") { temp = "70"; }
+ convert(temp, minCoverage);
+
+ temp = validParameter.validFile(parameters, "minbs", false); if (temp == "not found") { temp = "90"; }
+ convert(temp, minBS);
+
+ temp = validParameter.validFile(parameters, "minsnp", false); if (temp == "not found") { temp = "10"; }
+ convert(temp, minSNP);
+
+ temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "3"; }
+ convert(temp, parents);
+
+ temp = validParameter.validFile(parameters, "iters", false);
+ if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }
+ else if (temp == "not found") { temp = "1000"; }
+ convert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "increment", false);
+ if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
+ else if ((temp == "not found") && (method == "chimeraslayer")) { temp = "5"; }
+ else if (temp == "not found") { temp = "25"; }
convert(temp, increment);
-
- method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "bellerophon"; }
- if (method != "bellerophon") { mothurOut(method + " is not a valid method."); mothurOutEndLine(); abort = true; }
+ temp = validParameter.validFile(parameters, "numwanted", false);
+ if ((temp == "not found") && (method == "chimeraslayer")) { temp = "15"; }
+ else if (temp == "not found") { temp = "20"; }
+ convert(temp, numwanted);
+
+
+
+ if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode, chimeraslayer or chimeracheck methods."); mothurOutEndLine(); abort = true; }
+
}
}
void ChimeraSeqsCommand::help(){
try {
- mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n");
- mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method. fasta is required.\n");
- mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter. The default is false. \n");
- mothurOut("The correction parameter allows you to ..... The default is true. \n");
+
+ //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
+ //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
+ mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
+ mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
+ mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
+ mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
+ mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
- mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is bellerophon. \n");
+ mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
+ mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
+ mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
+ mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
+ mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
+ mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
+ mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
+ mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
+ mothurOut("The ksize parameter allows you to input kmersize. \n");
+ mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
+ mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+ mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+ mothurOut("The minsim parameter allows you .... \n");
+ mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n");
+ mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
+ mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
+ mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
+ mothurOut("Details for each method: \n");
+ mothurOut("\tpintail: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n");
+ mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
+ mothurOut("\tbellerophon: \n");
+ mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n");
+ mothurOut("\tccode: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n");
+ mothurOut("\tchimeracheck: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n");
+ mothurOut("\tchimeraslayer: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n");
mothurOut("The chimera.seqs command should be in the following format: \n");
mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
- mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
+ mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
}
catch(exception& e) {
exit(1);
}
}
-//********************************************************************************************************************
-//sorts highest score to lowest
-inline bool comparePref(Preference left, Preference right){
- return (left.score[0] > right.score[0]);
-}
//***************************************************************************************************************
if (abort == true) { return 0; }
+ int start = time(NULL);
- //do soft filter
- if (filter) {
- string optionString = "fasta=" + fastafile + ", soft=50, vertical=F";
- filterSeqs = new FilterSeqsCommand(optionString);
- filterSeqs->execute();
- delete filterSeqs;
-
- //reset fastafile to filtered file
- fastafile = getRootName(fastafile) + "filter.fasta";
- }
+ if (method == "bellerophon") { chimera = new Bellerophon(fastafile, outputDir); }
+ else if (method == "pintail") { chimera = new Pintail(fastafile, outputDir); }
+ else if (method == "ccode") { chimera = new Ccode(fastafile, outputDir); }
+ else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, outputDir); }
+ else if (method == "chimeraslayer") { chimera = new ChimeraSlayer("blast"); }
+ else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
- distCalculator = new eachGapDist();
+ //set user options
+ if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine(); }
- //read in sequences
- readSeqs();
-
- int numSeqs = seqs.size();
-
- if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); return 0; }
+ chimera->setCons(consfile);
+ chimera->setQuantiles(quanfile);
+ chimera->setMask(maskfile);
+ chimera->setFilter(filter);
+ chimera->setCorrection(correction);
+ chimera->setProcessors(processors);
+ chimera->setWindow(window);
+ chimera->setIncrement(increment);
+ chimera->setNumWanted(numwanted);
+ chimera->setKmerSize(ksize);
+ chimera->setSVG(svg);
+ chimera->setName(namefile);
+ chimera->setMatch(match);
+ chimera->setMisMatch(mismatch);
+ chimera->setDivR(divR);
+ chimera->setParents(parents);
+ chimera->setMinSim(minSimilarity);
+ chimera->setMinCoverage(minCoverage);
+ chimera->setMinBS(minBS);
+ chimera->setMinSNP(minSNP);
+ chimera->setIters(iters);
+ chimera->setTemplateFile(templatefile);
+
- //set default window to 25% of sequence length
- string seq0 = seqs[0].getAligned();
- if (window == 0) { window = seq0.length() / 4; }
- else if (window > (seq0.length() / 2)) {
- mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
- window = (seq0.length() / 2);
- }
- if (increment > (seqs[0].getAlignLength() - (2*window))) {
- if (increment != 10) {
+ vector<Sequence*> templateSeqs;
+ if ((method != "bellerophon") && (method != "chimeracheck")) {
+ templateSeqs = chimera->readSeqs(templatefile);
+ if (chimera->getUnaligned()) {
+ mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine();
+ //free memory
+ for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
+ return 0;
+ }
- mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
- increment = 10;
- if (increment > (seqs[0].getAlignLength() - (2*window))) { increment = 0; }
-
- }else{ increment = 0; }
+ //set options
+ chimera->setTemplateSeqs(templateSeqs);
+
+ }else if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it
+ chimera->getChimeras();
+
+ string outputFName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
+ ofstream out;
+ openOutputFile(outputFName, out);
+
+ chimera->print(out);
+ out.close();
+ return 0;
}
-cout << "increment = " << increment << endl;
- if (increment == 0) { iters = 1; }
- else { iters = ((seqs[0].getAlignLength() - (2*window)) / increment); }
- //initialize pref
- pref.resize(numSeqs);
+ //some methods need to do prep work before processing the chimeras
+ chimera->doPrep();
- for (int i = 0; i < numSeqs; i++ ) {
- pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2); pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
- pref[i].name = seqs[i].getName();
- pref[i].score[0] = 0.0; pref[i].score[1] = 0.0;
- pref[i].closestLeft[0] = 100000.0; pref[i].closestLeft[1] = 100000.0;
- pref[i].closestRight[0] = 100000.0; pref[i].closestRight[1] = 100000.0;
- }
+ ofstream outHeader;
+ string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader";
+ openOutputFile(tempHeader, outHeader);
+
+ chimera->printHeader(outHeader);
+ outHeader.close();
+
+ string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
- int midpoint = window;
- int count = 0;
- while (count < iters) {
+ //break up file
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ ifstream inFASTA;
+ openInputFile(fastafile, inFASTA);
+ numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+ inFASTA.close();
- //create 2 vectors of sequences, 1 for left side and one for right side
- vector<Sequence> left; vector<Sequence> right;
+ lines.push_back(new linePair(0, numSeqs));
- for (int i = 0; i < seqs.size(); i++) {
-//cout << "whole = " << seqs[i].getAligned() << endl;
- //save left side
- string seqLeft = seqs[i].getAligned().substr(midpoint-window, window);
- Sequence tempLeft;
- tempLeft.setName(seqs[i].getName());
- tempLeft.setAligned(seqLeft);
- left.push_back(tempLeft);
-//cout << "left = " << tempLeft.getAligned() << endl;
- //save right side
- string seqRight = seqs[i].getAligned().substr(midpoint, window);
- Sequence tempRight;
- tempRight.setName(seqs[i].getName());
- tempRight.setAligned(seqRight);
- right.push_back(tempRight);
-//cout << "right = " << seqRight << endl;
- }
+ driver(lines[0], outputFileName, fastafile);
- //adjust midpoint by increment
- midpoint += increment;
+ }else{
+ vector<int> positions;
+ processIDS.resize(0);
+ ifstream inFASTA;
+ openInputFile(fastafile, inFASTA);
- //this should be parallelized
- //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
- //create a matrix containing the distance from left to left and right to right
- //calculate distances
- SparseMatrix* SparseLeft = new SparseMatrix();
- SparseMatrix* SparseRight = new SparseMatrix();
+ string input;
+ while(!inFASTA.eof()){
+ input = getline(inFASTA);
+ if (input.length() != 0) {
+ if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+ }
+ }
+ inFASTA.close();
- createSparseMatrix(0, left.size(), SparseLeft, left);
- createSparseMatrix(0, right.size(), SparseRight, right);
+ numSeqs = positions.size();
- vector<SeqMap> distMapRight;
- vector<SeqMap> distMapLeft;
+ int numSeqsPerProcessor = numSeqs / processors;
- // Create a data structure to quickly access the distance information.
- // It consists of a vector of distance maps, where each map contains
- // all distances of a certain sequence. Vector and maps are accessed
- // via the index of a sequence in the distance matrix
- distMapRight = vector<SeqMap>(numSeqs);
- distMapLeft = vector<SeqMap>(numSeqs);
- //cout << "left" << endl << endl;
- for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
- distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
- //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
- }
- //cout << "right" << endl << endl;
- for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
- distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
- //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+ for (int i = 0; i < processors; i++) {
+ long int startPos = positions[ i * numSeqsPerProcessor ];
+ if(i == processors - 1){
+ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+ }
+ lines.push_back(new linePair(startPos, numSeqsPerProcessor));
}
- delete SparseLeft;
- delete SparseRight;
+ createProcesses(outputFileName, fastafile);
- //fill preference structure
- generatePreferences(distMapLeft, distMapRight, midpoint);
-
- count++;
-
- }
-
- delete distCalculator;
-
- //find average pref score across windows
- //if (increment != 0) {
-
- //for (int i = 0; i < pref.size(); i++) {
- //pref[i].score[0] = pref[i].score[0] / iters;
- //}
- //}
+ rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+
+ //append alignment and report files
+ for(int i=1;i<processors;i++){
+ appendOutputFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+ remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+ }
+ }
+
+ #else
+ ifstream inFASTA;
+ openInputFile(candidateFileNames[s], inFASTA);
+ numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+ inFASTA.close();
+ lines.push_back(new linePair(0, numSeqs));
+
+ driver(lines[0], outputFileName, fastafile);
+ #endif
- //sort Preferences highest to lowest
- sort(pref.begin(), pref.end(), comparePref);
+ //mothurOut("Output File Names: ");
+ //if ((filter) && (method == "bellerophon")) { mothurOut(
+ //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
+ // else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
- string outputFileName = getRootName(fastafile) + "chimeras";
- ofstream out;
- openOutputFile(outputFileName, out);
+ appendOutputFiles(tempHeader, outputFileName);
+ remove(tempHeader.c_str());
+
+ for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
- int above1 = 0;
- out << "Name\tScore\tLeft\tRight\t" << endl;
- //output prefenence structure to .chimeras file
- for (int i = 0; i < pref.size(); i++) {
- out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
-
- //calc # of seqs with preference above 1.0
- if (pref[i].score[0] > 1.0) {
- above1++;
- mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
- mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
- }
-
-
- }
+ if (method == "chimeracheck") { mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); }
- //output results to screen
- mothurOutEndLine();
- mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine();
- int spot;
- spot = pref.size()-1;
- mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.975;
- mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.75;
- mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.50;
- mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.25;
- mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.025;
- mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = 0;
- mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+ mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); mothurOutEndLine();
return 0;
+
}
catch(exception& e) {
errorOut(e, "ChimeraSeqsCommand", "execute");
exit(1);
}
-}
+}//**********************************************************************************************************************
-//***************************************************************************************************************
-void ChimeraSeqsCommand::readSeqs(){
+int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename){
try {
+ ofstream out;
+ openOutputFile(outputFName, out);
+
ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
+ openInputFile(filename, inFASTA);
+
+ inFASTA.seekg(line->start);
- //read in seqs and store in vector
- while(!inFASTA.eof()){
- Sequence current(inFASTA);
-
- if (current.getAligned() == "") { current.setAligned(current.getUnaligned()); }
-
- seqs.push_back(current);
+ for(int i=0;i<line->numSeqs;i++){
+
+ Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
+
+ if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+
+ //find chimeras
+ chimera->getChimeras(candidateSeq);
+
+ //print results
+ chimera->print(out);
+ }
+ delete candidateSeq;
- gobble(inFASTA);
+ //report progress
+ if((i+1) % 100 == 0){ mothurOut("Processing sequence: " + toString(i+1)); mothurOutEndLine(); }
}
+ //report progress
+ if((line->numSeqs) % 100 != 0){ mothurOut("Processing sequence: " + toString(line->numSeqs)); mothurOutEndLine(); }
+
+ out.close();
inFASTA.close();
-
+
+ return 1;
}
catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "readSeqs");
+ errorOut(e, "ChimeraSeqsCommand", "driver");
exit(1);
}
}
-/***************************************************************************************************************/
-int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
- try {
+/**************************************************************************************************/
- for(int i=startSeq; i<endSeq; i++){
-
- for(int j=0;j<i;j++){
-
- distCalculator->calcDist(s[i], s[j]);
- float dist = distCalculator->getDist();
+void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename) {
+ try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ int process = 0;
+ // processIDS.resize(0);
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
- PCell temp(i, j, dist);
- sparse->addCell(temp);
-
- }
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+ exit(0);
+ }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
}
-
-
- return 1;
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processors;i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+#endif
}
catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "createSparseMatrix");
+ errorOut(e, "ChimeraSeqsCommand", "createProcesses");
exit(1);
}
}
-/***************************************************************************************************************/
-void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
- try {
-
- float dme = 0.0;
- SeqMap::iterator itR;
- SeqMap::iterator itL;
-
- //initialize pref[i]
- for (int i = 0; i < pref.size(); i++) {
- pref[i].score[1] = 0.0;
- pref[i].closestLeft[1] = 100000.0;
- pref[i].closestRight[1] = 100000.0;
- pref[i].leftParent[1] = "";
- pref[i].rightParent[1] = "";
- }
-
- for (int i = 0; i < left.size(); i++) {
-
- SeqMap currentLeft = left[i]; //example i = 3; currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
- // 1 to the distance of sequence 3 to sequence 1,
- // 2 to the distance of sequence 3 to sequence 2.
- SeqMap currentRight = right[i]; // same as left but with distances on the right side.
-
- for (int j = 0; j < i; j++) {
-
- itL = currentLeft.find(j);
- itR = currentRight.find(j);
-cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
-cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
-
- //if you can find this entry update the preferences
- if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
-
- if (!correction) {
- pref[i].score[1] += abs((itL->second - itR->second));
- pref[j].score[1] += abs((itL->second - itR->second));
-cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
-cout << "abs = " << abs((itL->second - itR->second)) << endl;
-cout << i << " score = " << pref[i].score[1] << endl;
-cout << j << " score = " << pref[j].score[1] << endl;
- }else {
- pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
- pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
-cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
-cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
-cout << i << " score = " << pref[i].score[1] << endl;
-cout << j << " score = " << pref[j].score[1] << endl;
- }
-cout << "pref[" << i << "].closestLeft[1] = " << pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;
- //are you the closest left sequence
- if (itL->second < pref[i].closestLeft[1]) {
- pref[i].closestLeft[1] = itL->second;
- pref[i].leftParent[1] = seqs[j].getName();
-cout << "updating closest left to " << pref[i].leftParent[1] << endl;
- }
-cout << "pref[" << j << "].closestLeft[1] = " << pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;
- if (itL->second < pref[j].closestLeft[1]) {
- pref[j].closestLeft[1] = itL->second;
- pref[j].leftParent[1] = seqs[i].getName();
-cout << "updating closest left to " << pref[j].leftParent[1] << endl;
- }
-
- //are you the closest right sequence
- if (itR->second < pref[i].closestRight[1]) {
- pref[i].closestRight[1] = itR->second;
- pref[i].rightParent[1] = seqs[j].getName();
- }
- if (itR->second < pref[j].closestRight[1]) {
- pref[j].closestRight[1] = itR->second;
- pref[j].rightParent[1] = seqs[i].getName();
- }
-
- }
- }
-
- }
+/**************************************************************************************************/
+
+void ChimeraSeqsCommand::appendOutputFiles(string temp, string filename) {
+ try{
+ ofstream output;
+ ifstream input;
-
- //calculate the dme
- int count0 = 0;
- for (int i = 0; i < pref.size(); i++) { dme += pref[i].score[1]; if (pref[i].score[1] == 0.0) { count0++; } }
+ openOutputFileAppend(temp, output);
+ openInputFile(filename, input);
- float expectedPercent = 1 / (float) (pref.size() - count0);
-cout << endl << "dme = " << dme << endl;
- //recalculate prefernences based on dme
- for (int i = 0; i < pref.size(); i++) {
-cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;
- // gives the actual percentage of the dme this seq adds
- pref[i].score[1] = pref[i].score[1] / dme;
-
- //how much higher or lower is this than expected
- pref[i].score[1] = pref[i].score[1] / expectedPercent;
-
- //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
-cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;
+ while(char c = input.get()){
+ if(input.eof()) { break; }
+ else { output << c; }
}
- //is this score bigger then the last score
- for (int i = 0; i < pref.size(); i++) {
-
- //update biggest score
- if (pref[i].score[1] > pref[i].score[0]) {
- pref[i].score[0] = pref[i].score[1];
- pref[i].leftParent[0] = pref[i].leftParent[1];
- pref[i].rightParent[0] = pref[i].rightParent[1];
- pref[i].closestLeft[0] = pref[i].closestLeft[1];
- pref[i].closestRight[0] = pref[i].closestRight[1];
- pref[i].midpoint = mid;
- }
-
- //total of preference scores across windows
- //pref[i].score[0] += pref[i].score[1];
- }
-
+ input.close();
+ output.close();
}
catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "generatePreferences");
+ errorOut(e, "ChimeraSeqsCommand", "appendOuputFiles");
exit(1);
}
}
-/**************************************************************************************************/
+//**********************************************************************************************************************
-/**************************************************************************************************/