*/
#include "chimeraseqscommand.h"
+#include "bellerophon.h"
+#include "pintail.h"
+#include "ccode.h"
+#include "chimeracheckrdp.h"
+#include "chimeraslayer.h"
+
//***************************************************************************************************************
else {
//valid paramters for this command
- string Array[] = {"fasta", "filter", "correction", "processors", "method" };
+ string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
if (fastafile == "not open") { abort = true; }
else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; }
+ templatefile = validParameter.validFile(parameters, "template", true);
+ if (templatefile == "not open") { abort = true; }
+ else if (templatefile == "not found") { templatefile = ""; }
+
+ consfile = validParameter.validFile(parameters, "conservation", true);
+ if (consfile == "not open") { abort = true; }
+ else if (consfile == "not found") { consfile = ""; }
+
+ quanfile = validParameter.validFile(parameters, "quantile", true);
+ if (quanfile == "not open") { abort = true; }
+ else if (quanfile == "not found") { quanfile = ""; }
+
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { abort = true; }
+ else if (namefile == "not found") { namefile = ""; }
+
+ maskfile = validParameter.validFile(parameters, "mask", false);
+ if (maskfile == "not found") { maskfile = ""; }
+ else if (maskfile != "default") {
+ ifstream in;
+ int ableToOpen = openInputFile(maskfile, in);
+ if (ableToOpen == 1) { abort = true; }
+ in.close();
+ }
+
+ method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; }
+
string temp;
temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
filter = isTrue(temp);
temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
correction = isTrue(temp);
- temp = validParameter.validFile(parameters, "processors", true); if (temp == "not found") { temp = "1"; }
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
convert(temp, processors);
- method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "bellerophon"; }
+ temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
+ convert(temp, ksize);
+
+ temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; }
+ svg = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "window", false);
+ if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }
+ else if (temp == "not found") { temp = "0"; }
+ convert(temp, window);
+
+ temp = validParameter.validFile(parameters, "match", false); if (temp == "not found") { temp = "5"; }
+ convert(temp, match);
+
+ temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-4"; }
+ convert(temp, mismatch);
+
+ temp = validParameter.validFile(parameters, "divergence", false); if (temp == "not found") { temp = "1.0"; }
+ convert(temp, divR);
+
+ temp = validParameter.validFile(parameters, "minsim", false); if (temp == "not found") { temp = "90"; }
+ convert(temp, minSimilarity);
+
+ temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "5"; }
+ convert(temp, parents);
+
+ temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
+ convert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "increment", false);
+ if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
+ else if (temp == "not found") { temp = "25"; }
+ convert(temp, increment);
+
+ temp = validParameter.validFile(parameters, "numwanted", false);
+ if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }
+ else if (temp == "not found") { temp = "20"; }
+ convert(temp, numwanted);
+
+
+
+ if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode, chimeraslayer or chimeracheck methods."); mothurOutEndLine(); abort = true; }
+
}
}
void ChimeraSeqsCommand::help(){
try {
- mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n");
- mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method. fasta is required.\n");
- mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter. The default is false. \n");
- mothurOut("The correction parameter allows you to ..... The default is true. \n");
+
+ //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
+ //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
+ mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
+ mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
+ mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
+ mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
+ mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
- mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is bellerophon. \n");
+ mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
+ mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
+ mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
+ mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
+ mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
+ mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
+ mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
+ mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
+ mothurOut("The ksize parameter allows you to input kmersize. \n");
+ mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
+ mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+ //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+ mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
+ mothurOut("Details for each method: \n");
+ mothurOut("\tpintail: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n");
+ mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
+ mothurOut("\tbellerophon: \n");
+ mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n");
+ mothurOut("\tccode: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n");
+ mothurOut("\tchimeracheck: \n");
+ mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n");
+ //mothurOut("\tchimeraslayer: \n");
+ //mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n");
mothurOut("The chimera.seqs command should be in the following format: \n");
mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
- mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
+ mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
}
catch(exception& e) {
if (abort == true) { return 0; }
- //do soft filter
- if (filter) {
- string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F";
- filterSeqs = new FilterSeqsCommand(optionString);
- filterSeqs->execute();
- delete filterSeqs;
-
- //reset fastafile to filtered file
- fastafile = getRootName(fastafile) + "filter.fasta";
- }
+ if (method == "bellerophon") { chimera = new Bellerophon(fastafile); }
+ else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile); }
+ else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile); }
+ else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile); }
+ else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); }
+ else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
- //read in sequences
- readSeqs();
+ //set user options
+ if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine(); }
- //int numSeqs = seqs.size();
+ //saves time to avoid generating it
+ chimera->setCons(consfile);
- //find average midpoint of seqs
- midpoint = findAverageMidPoint();
-
- //this should be parallelized
- //generatePreferences();
+ //saves time to avoid generating it
+ chimera->setQuantiles(quanfile);
+
+ chimera->setMask(maskfile);
+ chimera->setFilter(filter);
+ chimera->setCorrection(correction);
+ chimera->setProcessors(processors);
+ chimera->setWindow(window);
+ chimera->setIncrement(increment);
+ chimera->setNumWanted(numwanted);
+ chimera->setKmerSize(ksize);
+ chimera->setSVG(svg);
+ chimera->setName(namefile);
+ chimera->setMatch(match);
+ chimera->setMisMatch(mismatch);
+ chimera->setDivR(divR);
+ chimera->setParents(parents);
+ chimera->setMinSim(minSimilarity);
+ chimera->setIters(iters);
- //output results to screen
- mothurOutEndLine();
- mothurOut("\t\t"); mothurOutEndLine();
- //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
- //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
- //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
- //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
- //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
- //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
- //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
- //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
-
- //outSummary.close();
- return 0;
- }
- catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "execute");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-void ChimeraSeqsCommand::readSeqs(){
- try {
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
+ //find chimeras
+ int error = chimera->getChimeras();
- //read in seqs and store in vector
- while(!inFASTA.eof()){
- Sequence current(inFASTA);
-
- seqs.push_back(current);
-
- gobble(inFASTA);
- }
- inFASTA.close();
-
- }
- catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "readSeqs");
- exit(1);
- }
-}
-
-
-//***************************************************************************************************************
-int ChimeraSeqsCommand::findAverageMidPoint(){
- try {
- int totalMids = 0;
- int averageMid = 0;
+ //there was a problem
+ if (error == 1) { return 0; }
- //loop through the seqs and find midpoint
- for (int i = 0; i < seqs.size(); i++) {
-
- //get unaligned sequence
- seqs[i].setUnaligned(seqs[i].getUnaligned()); //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned
-
- string unaligned = seqs[i].getUnaligned();
- string aligned = seqs[i].getAligned();
-
- //find midpoint of this seq
- int count = 0;
- int thismid = 0;
- for (int j = 0; j < aligned.length(); j++) {
-
- thismid++;
-
- //if you are part of the unaligned sequence increment
- if (isalpha(aligned[j])) { count++; }
-
- //if you have reached the halfway point stop
- if (count >= (unaligned.length() / 2)) { break; }
- }
-
- //add this mid to total
- totalMids += thismid;
+ string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras";
+ ofstream out;
+ openOutputFile(outputFileName, out);
- }
+ //print results
+ chimera->print(out);
- averageMid = (totalMids / seqs.size());
+ out.close();
+
+ delete chimera;
+
+ return 0;
- return averageMid;
-
-
- }
- catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint");
- exit(1);
- }
-}
-
-/***************************************************************************************************************
-int ChimeraSeqsCommand::createSparseMatrix(int startLine, int endLine, SparseMatrix* sparse){
- try {
-
- for(int i=startLine; i<endLine; i++){
-
- for(int j=0;j<i;j++){
-
- distCalculator->calcDist(seqs.get(i), seqs.get(j));
- double dist = distCalculator->getDist();
-
-
-
- }
-
-
- return 1;
}
catch(exception& e) {
- errorOut(e, "ChimeraSeqsCommand", "createSparseMatrix");
+ errorOut(e, "ChimeraSeqsCommand", "execute");
exit(1);
}
}