X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraseqscommand.cpp;h=e1eebf400c052f7f7926e0029f6090ae0e93ede3;hb=9ada98592a54c82d08f3d46c9b1d8c3e472a922d;hp=182c25e4b909ba2dd49fc1971beb7abee3b99188;hpb=6e63c5ff52bd2830b689417df8ba3db831e63a96;p=mothur.git diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp index 182c25e..e1eebf4 100644 --- a/chimeraseqscommand.cpp +++ b/chimeraseqscommand.cpp @@ -8,6 +8,12 @@ */ #include "chimeraseqscommand.h" +#include "bellerophon.h" +#include "pintail.h" +#include "ccode.h" +#include "chimeracheckrdp.h" +#include "chimeraslayer.h" + //*************************************************************************************************************** @@ -20,24 +26,112 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ else { //valid paramters for this command - string Array[] = {"fasta", "filter", "correction", "processors", "method" }; + string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", + "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir" }; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); map parameters = parser.getParameters(); ValidParameters validParameter; + map::iterator it; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + + it = parameters.find("template"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["template"] = inputDir + it->second; } + } + + it = parameters.find("conservation"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["conservation"] = inputDir + it->second; } + } + + it = parameters.find("quantile"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["quantile"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + } + + //check for required parameters fastafile = validParameter.validFile(parameters, "fasta", true); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; } + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it + } + + templatefile = validParameter.validFile(parameters, "template", true); + if (templatefile == "not open") { abort = true; } + else if (templatefile == "not found") { templatefile = ""; } + + consfile = validParameter.validFile(parameters, "conservation", true); + if (consfile == "not open") { abort = true; } + else if (consfile == "not found") { consfile = ""; } + + quanfile = validParameter.validFile(parameters, "quantile", true); + if (quanfile == "not open") { abort = true; } + else if (quanfile == "not found") { quanfile = ""; } + + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + + maskfile = validParameter.validFile(parameters, "mask", false); + if (maskfile == "not found") { maskfile = ""; } + else if (maskfile != "default") { + if (inputDir != "") { + string path = hasPath(maskfile); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { maskfile = inputDir + maskfile; } + } + + ifstream in; + int ableToOpen = openInputFile(maskfile, in); + if (ableToOpen == 1) { abort = true; } + in.close(); + } + + method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; } + string temp; temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; } filter = isTrue(temp); @@ -45,10 +139,64 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; } correction = isTrue(temp); - temp = validParameter.validFile(parameters, "processors", true); if (temp == "not found") { temp = "1"; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); - method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "bellerophon"; } + temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; } + convert(temp, ksize); + + temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; } + svg = isTrue(temp); + + temp = validParameter.validFile(parameters, "window", false); + if ((temp == "not found") && (method == "chimeraslayer")) { temp = "50"; } + else if (temp == "not found") { temp = "0"; } + convert(temp, window); + + temp = validParameter.validFile(parameters, "match", false); if (temp == "not found") { temp = "5"; } + convert(temp, match); + + temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-4"; } + convert(temp, mismatch); + + temp = validParameter.validFile(parameters, "divergence", false); if (temp == "not found") { temp = "1.007"; } + convert(temp, divR); + + temp = validParameter.validFile(parameters, "minsim", false); if (temp == "not found") { temp = "90"; } + convert(temp, minSimilarity); + + temp = validParameter.validFile(parameters, "mincov", false); if (temp == "not found") { temp = "70"; } + convert(temp, minCoverage); + + temp = validParameter.validFile(parameters, "minbs", false); if (temp == "not found") { temp = "90"; } + convert(temp, minBS); + + temp = validParameter.validFile(parameters, "minsnp", false); if (temp == "not found") { temp = "10"; } + convert(temp, minSNP); + + temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "3"; } + convert(temp, parents); + + temp = validParameter.validFile(parameters, "iters", false); + if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; } + else if (temp == "not found") { temp = "1000"; } + convert(temp, iters); + + temp = validParameter.validFile(parameters, "increment", false); + if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; } + else if ((temp == "not found") && (method == "chimeraslayer")) { temp = "5"; } + else if (temp == "not found") { temp = "25"; } + convert(temp, increment); + + temp = validParameter.validFile(parameters, "numwanted", false); + if ((temp == "not found") && (method == "chimeraslayer")) { temp = "15"; } + else if (temp == "not found") { temp = "20"; } + convert(temp, numwanted); + + + + if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode, chimeraslayer or chimeracheck methods."); mothurOutEndLine(); abort = true; } + } } @@ -61,15 +209,47 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ void ChimeraSeqsCommand::help(){ try { - mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n"); - mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method. fasta is required.\n"); - mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter. The default is false. \n"); - mothurOut("The correction parameter allows you to ..... The default is true. \n"); + + //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name" + //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n"); + mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n"); + mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n"); + mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n"); + mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n"); + mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n"); mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"); - mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is bellerophon. \n"); + mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is pintail. Options include bellerophon, ccode and chimeracheck \n"); + mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n"); + mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n"); + mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n"); + mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n"); + mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n"); + mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n"); + mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n"); + mothurOut("The ksize parameter allows you to input kmersize. \n"); + mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n"); + mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n"); + mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n"); + mothurOut("The minsim parameter allows you .... \n"); + mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n"); + mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n"); + mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n"); + mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n"); + mothurOut("Details for each method: \n"); + mothurOut("\tpintail: \n"); + mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n"); + mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n"); + mothurOut("\tbellerophon: \n"); + mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n"); + mothurOut("\tccode: \n"); + mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n"); + mothurOut("\tchimeracheck: \n"); + mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n"); + mothurOut("\tchimeraslayer: \n"); + mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); mothurOut("The chimera.seqs command should be in the following format: \n"); mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n"); - mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n"); + mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n"); mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n"); } catch(exception& e) { @@ -89,215 +269,264 @@ int ChimeraSeqsCommand::execute(){ if (abort == true) { return 0; } - //do soft filter - if (filter) { - string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F"; - filterSeqs = new FilterSeqsCommand(optionString); - filterSeqs->execute(); - delete filterSeqs; - - //reset fastafile to filtered file - fastafile = getRootName(fastafile) + "filter.fasta"; - } + int start = time(NULL); - //read in sequences - readSeqs(); + if (method == "bellerophon") { chimera = new Bellerophon(fastafile, outputDir); } + else if (method == "pintail") { chimera = new Pintail(fastafile, outputDir); } + else if (method == "ccode") { chimera = new Ccode(fastafile, outputDir); } + else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, outputDir); } + else if (method == "chimeraslayer") { chimera = new ChimeraSlayer("blast"); } + else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; } - //int numSeqs = seqs.size(); + //set user options + if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine(); } - //find average midpoint of seqs - midpoint = findAverageMidPoint(); + chimera->setCons(consfile); + chimera->setQuantiles(quanfile); + chimera->setMask(maskfile); + chimera->setFilter(filter); + chimera->setCorrection(correction); + chimera->setProcessors(processors); + chimera->setWindow(window); + chimera->setIncrement(increment); + chimera->setNumWanted(numwanted); + chimera->setKmerSize(ksize); + chimera->setSVG(svg); + chimera->setName(namefile); + chimera->setMatch(match); + chimera->setMisMatch(mismatch); + chimera->setDivR(divR); + chimera->setParents(parents); + chimera->setMinSim(minSimilarity); + chimera->setMinCoverage(minCoverage); + chimera->setMinBS(minBS); + chimera->setMinSNP(minSNP); + chimera->setIters(iters); + chimera->setTemplateFile(templatefile); + - //create 2 vectors of sequences, 1 for left side and one for right side - vector left; vector right; - for (int i = 0; i < seqs.size(); i++) { - //save left side - string seqLeft = seqs[i].getAligned(); - seqLeft = seqLeft.substr(0, midpoint); - Sequence tempLeft(seqs[i].getName(), seqLeft); - left.push_back(tempLeft); - - //save right side - string seqRight = seqs[i].getAligned(); - seqRight = seqRight.substr(midpoint+1, (seqRight.length()-midpoint-1)); - Sequence tempRight(seqs[i].getName(), seqRight); - right.push_back(tempRight); + vector templateSeqs; + if ((method != "bellerophon") && (method != "chimeracheck")) { + templateSeqs = chimera->readSeqs(templatefile); + if (chimera->getUnaligned()) { + mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); + //free memory + for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } + return 0; + } + + //set options + chimera->setTemplateSeqs(templateSeqs); + + }else if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it + chimera->getChimeras(); + + string outputFName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras"; + ofstream out; + openOutputFile(outputFName, out); + + chimera->print(out); + out.close(); + return 0; } - - //this should be parallelized - //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | ) - //create a matrix containing the distance from left to left and right to right - //calculate distances - SparseMatrix* SparseLeft = new SparseMatrix(); - SparseMatrix* SparseRight = new SparseMatrix(); - createSparseMatrix(0, left.size(), SparseLeft, left); - createSparseMatrix(0, right.size(), SparseRight, right); + //some methods need to do prep work before processing the chimeras + chimera->doPrep(); + ofstream outHeader; + string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader"; + openOutputFile(tempHeader, outHeader); - //vector distMapRight; - //vector distMapLeft; + chimera->printHeader(outHeader); + outHeader.close(); - // Create a data structure to quickly access the distance information. - // It consists of a vector of distance maps, where each map contains - // all distances of a certain sequence. Vector and maps are accessed - // via the index of a sequence in the distance matrix - //distMapRight = vector(globaldata->gListVector->size()); - //distMapLeft = vector(globaldata->gListVector->size()); - for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) { - //distMapLeft[currentCell->row][currentCell->column] = currentCell->dist; - } - for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) { - //distMapRight[currentCell->row][currentCell->column] = currentCell->dist; - } + string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras"; + + //break up file + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(fastafile, inFASTA); + numSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + + lines.push_back(new linePair(0, numSeqs)); + + driver(lines[0], outputFileName, fastafile); + + }else{ + vector positions; + processIDS.resize(0); + + ifstream inFASTA; + openInputFile(fastafile, inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + numSeqs = positions.size(); + + int numSeqsPerProcessor = numSeqs / processors; + + for (int i = 0; i < processors; i++) { + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + + createProcesses(outputFileName, fastafile); + + rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); + + //append alignment and report files + for(int i=1;i(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + lines.push_back(new linePair(0, numSeqs)); + + driver(lines[0], outputFileName, fastafile); + #endif - //fill preference structure - //generatePreferences(distMapLeft, distMapRight); + //mothurOut("Output File Names: "); + //if ((filter) && (method == "bellerophon")) { mothurOut( + //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; } + // else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; } - - //output results to screen - mothurOutEndLine(); - mothurOut("\t\t"); mothurOutEndLine(); - //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine(); - //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine(); - //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine(); - //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine(); - //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine(); - //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine(); - //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine(); - //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine(); + appendOutputFiles(tempHeader, outputFileName); + remove(tempHeader.c_str()); + + for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } + + if (method == "chimeracheck") { mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); } + + mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); mothurOutEndLine(); - //outSummary.close(); return 0; + } catch(exception& e) { errorOut(e, "ChimeraSeqsCommand", "execute"); exit(1); } -} +}//********************************************************************************************************************** -//*************************************************************************************************************** -void ChimeraSeqsCommand::readSeqs(){ +int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename){ try { + ofstream out; + openOutputFile(outputFName, out); + ifstream inFASTA; - openInputFile(fastafile, inFASTA); + openInputFile(filename, inFASTA); + + inFASTA.seekg(line->start); - //read in seqs and store in vector - while(!inFASTA.eof()){ - Sequence current(inFASTA); - - seqs.push_back(current); + for(int i=0;inumSeqs;i++){ + + Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + + //find chimeras + chimera->getChimeras(candidateSeq); + + //print results + chimera->print(out); + } + delete candidateSeq; - gobble(inFASTA); + //report progress + if((i+1) % 100 == 0){ mothurOut("Processing sequence: " + toString(i+1)); mothurOutEndLine(); } } + //report progress + if((line->numSeqs) % 100 != 0){ mothurOut("Processing sequence: " + toString(line->numSeqs)); mothurOutEndLine(); } + + out.close(); inFASTA.close(); - + + return 1; } catch(exception& e) { - errorOut(e, "ChimeraSeqsCommand", "readSeqs"); + errorOut(e, "ChimeraSeqsCommand", "driver"); exit(1); } } +/**************************************************************************************************/ -//*************************************************************************************************************** -int ChimeraSeqsCommand::findAverageMidPoint(){ +void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename) { try { - int totalMids = 0; - int averageMid = 0; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 0; + // processIDS.resize(0); - //loop through the seqs and find midpoint - for (int i = 0; i < seqs.size(); i++) { - - //get unaligned sequence - seqs[i].setUnaligned(seqs[i].getUnaligned()); //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned - - string unaligned = seqs[i].getUnaligned(); - string aligned = seqs[i].getAligned(); + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); - //find midpoint of this seq - int count = 0; - int thismid = 0; - for (int j = 0; j < aligned.length(); j++) { - - thismid++; - - //if you are part of the unaligned sequence increment - if (isalpha(aligned[j])) { count++; } - - //if you have reached the halfway point stop - if (count >= (unaligned.length() / 2)) { break; } - } - - //add this mid to total - totalMids += thismid; - + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename); + exit(0); + }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); } } - averageMid = (totalMids / seqs.size()); - - return averageMid; - - - } - catch(exception& e) { - errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint"); - exit(1); - } -} - -/***************************************************************************************************************/ -int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector s){ - try { - - for(int i=startSeq; icalcDist(s.get(i), s.get(j)); - float dist = distCalculator->getDist(); - - PCell temp(i, j, dist); - sparse->addCell(temp); - - } + //force parent to wait until all the processes are done + for (int i=0;i left, vector right){ - try { - for (int i = 0; i < left.size(); i++) { - - int iscore = 0; - float closestLeft = 100000.0; - float closestRight = 100000.0; - - for (int j = 0; j < left.size(); j++) { - - //iscore += abs(left - - } +/**************************************************************************************************/ + +void ChimeraSeqsCommand::appendOutputFiles(string temp, string filename) { + try{ + ofstream output; + ifstream input; + + openOutputFileAppend(temp, output); + openInputFile(filename, input); + + while(char c = input.get()){ + if(input.eof()) { break; } + else { output << c; } } - + + input.close(); + output.close(); } catch(exception& e) { - errorOut(e, "ChimeraSeqsCommand", "generatePreferences"); + errorOut(e, "ChimeraSeqsCommand", "appendOuputFiles"); exit(1); } } -/**************************************************************************************************/ +//********************************************************************************************************************** -/**************************************************************************************************/