else {
//valid paramters for this command
- string AlignArray[] = {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors"};
+ string AlignArray[] = {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold"};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
OptionParser parser(option);
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
convert(temp, processors);
+ temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; }
+ flip = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.80"; }
+ convert(temp, threshold);
+
search = validParameter.validFile(parameters, "search", false); if (search == "not found"){ search = "kmer"; }
align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n");
mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0.\n");
mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -2.0.\n");
+ mothurOut("The flip parameter is used to specify whether or not you want mothur to try the reverse compement if a sequence falls below the threshold. The default is false.\n");
+ mothurOut("The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. \n");
+ mothurOut("If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n");
+ mothurOut("The default for the threshold parameter is 0.80, meaning at least 80% of the bases must remain or the sequence is reported as potentially reversed.\n");
mothurOut("The align.seqs command should be in the following format: \n");
mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n");
mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n");
string alignFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align";
string reportFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align.report";
+ string accnosFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "flip.accnos";
int numFastaSeqs = 0;
int start = time(NULL);
lines.push_back(new linePair(0, numFastaSeqs));
- driver(lines[0], alignFileName, reportFileName);
+ driver(lines[0], alignFileName, reportFileName, accnosFileName);
+ //delete accnos file if its blank else report to user
+ if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); }
+ else {
+ mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+ if (!flip) {
+ mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well.");
+ }else{ mothurOut(" If the reverse compliment proved to be better it was reported."); }
+ mothurOutEndLine();
+ }
}
else{
vector<int> positions;
}
lines.push_back(new linePair(startPos, numSeqsPerProcessor));
}
- createProcesses(alignFileName, reportFileName);
+ createProcesses(alignFileName, reportFileName, accnosFileName);
rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
+ vector<string> nonBlankAccnosFiles;
+ //delete blank accnos files generated with multiple processes
+ for(int i=0;i<processors;i++){
+ if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+ nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+ }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str()); }
+ }
+
+ //append accnos files
+ if (nonBlankAccnosFiles.size() != 0) {
+ rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+
+ for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+ appendAlignFiles(nonBlankAccnosFiles[h], accnosFileName);
+ remove(nonBlankAccnosFiles[h].c_str());
+ }
+ mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+ if (!flip) {
+ mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well.");
+ }else{ mothurOut(" If the reverse compliment proved to be better it was reported."); }
+ mothurOutEndLine();
+ }
+
+ //append other files
for(int i=1;i<processors;i++){
appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
remove((alignFileName + toString(processIDS[i]) + ".temp").c_str());
lines.push_back(new linePair(0, numFastaSeqs));
- driver(lines[0], alignFileName, reportFileName);
+ driver(lines[0], alignFileName, reportFileName, accnosFileName);
+
+ //delete accnos file if its blank else report to user
+ if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); }
+ else {
+ mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+ if (!flip) {
+ mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well.");
+ }else{ mothurOut(" If the reverse compliment proved to be better it was reported."); }
+ mothurOutEndLine();
+ }
+
#endif
+
+
mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
mothurOutEndLine();
mothurOutEndLine();
//**********************************************************************************************************************
-int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
+int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName){
try {
ofstream alignmentFile;
openOutputFile(alignFName, alignmentFile);
+
+ ofstream accnosFile;
+ openOutputFile(accnosFName, accnosFile);
+
NastReport report(reportFName);
ifstream inFASTA;
for(int i=0;i<line->numSeqs;i++){
Sequence* candidateSeq = new Sequence(inFASTA);
-
- if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
- alignment->resize(candidateSeq->getUnaligned().length()+1);
- }
-
- report.setCandidate(candidateSeq);
-
- Sequence temp = templateDB->findClosestSequence(candidateSeq);
-
- Sequence* templateSeq = &temp;
-
- report.setTemplate(templateSeq);
- report.setSearchParameters(search, templateDB->getSearchScore());
+ int origNumBases = candidateSeq->getNumBases();
+ string originalUnaligned = candidateSeq->getUnaligned();
+ int numBasesNeeded = origNumBases * threshold;
+
+ if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+ if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+ alignment->resize(candidateSeq->getUnaligned().length()+1);
+ }
+
+ Sequence temp = templateDB->findClosestSequence(candidateSeq);
+ Sequence* templateSeq = &temp;
- Nast nast(alignment, candidateSeq, templateSeq);
-
- report.setAlignmentParameters(align, alignment);
-
- report.setNastParameters(nast);
-
- alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+ float searchScore = templateDB->getSearchScore();
+
+ Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+ Sequence* copy;
- report.print();
+ Nast* nast2;
+ bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below
+ //since nast does not make a copy of hte sequence passed, and it is used by the reporter below
+ //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
+ //so this bool tells you if you need to delete it
+
+ //if there is a possibility that this sequence should be reversed
+ if (candidateSeq->getNumBases() < numBasesNeeded) {
+
+ string wasBetter = "";
+ //if the user wants you to try the reverse
+ if (flip) {
+ //get reverse compliment
+ copy = new Sequence(candidateSeq->getName(), originalUnaligned);
+ copy->reverseComplement();
+
+ //rerun alignment
+ Sequence temp2 = templateDB->findClosestSequence(copy);
+ Sequence* templateSeq2 = &temp2;
+
+ searchScore = templateDB->getSearchScore();
+
+ nast2 = new Nast(alignment, copy, templateSeq2);
+ //check if any better
+ if (copy->getNumBases() > candidateSeq->getNumBases()) {
+ candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better
+ templateSeq = templateSeq2;
+ delete nast;
+ nast = nast2;
+ needToDeleteCopy = true;
+ }else{
+ wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
+ delete nast2;
+ delete copy;
+ }
+ }
+
+ //create accnos file with names
+ accnosFile << candidateSeq->getName() << wasBetter << endl;
+ }
+
+ report.setCandidate(candidateSeq);
+ report.setTemplate(templateSeq);
+ report.setSearchParameters(search, searchScore);
+ report.setAlignmentParameters(align, alignment);
+ report.setNastParameters(*nast);
+
+ alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+
+ report.print();
+ delete nast;
+ if (needToDeleteCopy) { delete copy; }
+ }
delete candidateSeq;
-
}
alignmentFile.close();
inFASTA.close();
+ accnosFile.close();
return 1;
}
/**************************************************************************************************/
-void AlignCommand::createProcesses(string alignFileName, string reportFileName) {
+void AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp");
+ driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp");
exit(0);
}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
}
exit(1);
}
}
-
-/**************************************************************************************************/
+//**********************************************************************************************************************
void AlignCommand::appendReportFiles(string temp, string filename) {
try{
AlignmentDB* templateDB;
Alignment* alignment;
- int driver(linePair*, string, string);
- void createProcesses(string, string);
+ int driver(linePair*, string, string, string);
+ void createProcesses(string, string, string);
void appendAlignFiles(string, string);
void appendReportFiles(string, string);
string candidateFileName, templateFileName, distanceFileName, search, align;
- float match, misMatch, gapOpen, gapExtend;
+ float match, misMatch, gapOpen, gapExtend, threshold;
int processors, kmerSize;
- bool abort;
+ bool abort, flip;
};
#endif
mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
while (!fastaFile.eof()) {
- Sequence temp(fastaFile);
+ Sequence temp(fastaFile); gobble(fastaFile);
- templateSequences.push_back(temp);
-
- //save longest base
- if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length(); }
-
- gobble(fastaFile);
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+ //save longest base
+ if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length(); }
+ }
}
numSeqs = templateSequences.size();
//read in seqs and store in vector
while(!in.eof()){
- Sequence* current = new Sequence(in);
- container.push_back(current);
- gobble(in);
+ Sequence* current = new Sequence(in); gobble(in);
+
+ if (current->getName() != "") { container.push_back(current); }
}
in.close();
mothurOut("The ksize parameter allows you to input kmersize. \n");
mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
- mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+ //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
mothurOut("Details for each method: \n");
mothurOut("\tpintail: \n");
mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n");
mothurOut("\tchimeracheck: \n");
mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n");
- mothurOut("\tchimeraslayer: \n");
- mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n");
+ //mothurOut("\tchimeraslayer: \n");
+ //mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n");
mothurOut("The chimera.seqs command should be in the following format: \n");
mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
for(int i=0;i<line->numSeqs;i++){
Sequence* candidateSeq = new Sequence(inFASTA);
-
- taxonomy = classify->getTaxonomy(candidateSeq);
- if (taxonomy != "bad seq") {
- outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
- outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
- }
-
+ if (candidateSeq->getName() != "") {
+ taxonomy = classify->getTaxonomy(candidateSeq);
+
+ if (taxonomy != "bad seq") {
+ outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+ outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+ }
+ }
delete candidateSeq;
if((i+1) % 100 == 0){
else if(commandName == "merge.files") { command = new MergeFileCommand(optionString); }
else if(commandName == "system") { command = new SystemCommand(optionString); }
else if(commandName == "align.check") { command = new AlignCheckCommand(optionString); }
- else if(commandName == "get.sharedseqs") { command = new GetSharedOTUCommand(optionString); }
+ else if(commandName == "get.sharedseqs") { command = new GetSharedOTUCommand(optionString); }
else if(commandName == "get.otulist") { command = new GetListCountCommand(optionString); }
else if(commandName == "hcluster") { command = new HClusterCommand(optionString); }
else if(commandName == "classify.seqs") { command = new ClassifySeqsCommand(optionString); }
exit(1);
}
}
+/***********************************************************/
+//This function is used to interrupt a command
+Command* CommandFactory::getCommand(){
+ try {
+ delete command; //delete the old command
+ string s = "";
+ command = new NoCommand(s);
+
+ return command;
+ }
+ catch(exception& e) {
+ errorOut(e, "CommandFactory", "getCommand");
+ exit(1);
+ }
+}
/***********************************************************************/
bool CommandFactory::isValidCommand(string command) {
try {
CommandFactory();
~CommandFactory();
Command* getCommand(string, string);
+ Command* getCommand();
bool isValidCommand(string);
void printCommands(ostream&);
globaldata = GlobalData::getInstance();
globaldata->argv = path;
-
-
}
/***********************************************************************/
-InteractEngine::~InteractEngine(){
- }
+InteractEngine::~InteractEngine(){}
/***********************************************************************/
//This function allows the user to input commands one line at a time until they quit.
mothurOutEndLine();
mothurOut("mothur > ");
+
+ //get input char by char so you can check for use of arrow keys
+ //if (_kbhit()){
+ // _getch(); // edit : if you want to check the arrow-keys you must call getch twice because special-keys have two values
+ // return _getch();
+ //}
+ //return 0; // if no key is pressed
+ //setbuf(stdin, NULL); //no buffering
+/*if(ch==0)
+{
+ch=getch();
+if(ch==72) cout<<"up";
+else if(ch==75) cout<<"left";
+else if(ch==77) cout<<"right";
+else if(ch==80) cout<<"down";
+cout<<endl;
+}
+else break;
+}
+delay(2000);
+return 0;
+}*/
+
+ //int letter = 0;
+ //while ((letter != 10) && (letter != 13)) {
+ // letter = getch();
+
+ // cout << "char code = " << letter << endl;
+
+ // input += char(letter);
+ //}
+ // input = input.substr(0, input.length()-1); //cut off newline char
+
+ //cout << input << endl;
+
getline(cin, input);
if (cin.eof()) { input = "quit()"; }
+ //save command
+ //previousInputs.push_back(input);
+
mothurOutJustToLog(input);
mothurOutEndLine();
CommandOptionParser parser(input);
commandName = parser.getCommandString();
+ //cout << " command = " << commandName << endl;
options = parser.getOptionString();
if (commandName != "") {
//executes valid command
- CommandFactory cFactory;
- Command* command = cFactory.getCommand(commandName, options);
+ Command* command = cFactory->getCommand(commandName, options);
quitCommandCalled = command->execute();
}else {
exit(1);
}
}
-
+/***********************************************************************/
+void Engine::terminateCommand(int dummy) {
+ try {
+ mothurOut("Stopping command."); mothurOutEndLine();
+ cFactory->getCommand(); //terminates command
+ }
+ catch(exception& e) {
+ errorOut(e, "InteractEngine", "terminateCommand");
+ exit(1);
+ }
+}
/***********************************************************************/
//This function opens the batchfile to be used by BatchEngine::getInput.
BatchEngine::BatchEngine(string path, string batchFileName){
/***********************************************************************/
-BatchEngine::~BatchEngine(){
- }
+BatchEngine::~BatchEngine(){ }
/***********************************************************************/
//This Function allows the user to run a batchfile containing several commands on Dotur
if (commandName != "") {
//executes valid command
- CommandFactory cFactory;
- Command* command = cFactory.getCommand(commandName, options);
+ Command* command = cFactory->getCommand(commandName, options);
quitCommandCalled = command->execute();
}else {
mothurOut("Invalid.");
globaldata->argv = path;
-
-
}
catch(exception& e) {
errorOut(e, "ScriptEngine", "ScriptEngine");
/***********************************************************************/
-ScriptEngine::~ScriptEngine(){
- }
+ScriptEngine::~ScriptEngine(){ }
/***********************************************************************/
//This Function allows the user to run a batchfile containing several commands on mothur
if (commandName != "") {
//executes valid command
- CommandFactory cFactory;
- Command* command = cFactory.getCommand(commandName, options);
+ Command* command = cFactory->getCommand(commandName, options);
quitCommandCalled = command->execute();
}else {
mothurOut("Invalid.");
class Engine {
public:
- virtual ~Engine(){};
+ Engine() { cFactory = new CommandFactory(); }
+ virtual ~Engine(){ delete cFactory; }
virtual bool getInput() = 0;
// string getCommand() { return command; }
vector<string> getOptions() { return options; }
+ virtual void terminateCommand(int);
protected:
// string command;
vector<string> options;
+ CommandFactory* cFactory;
};
virtual bool getInput();
private:
GlobalData* globaldata;
+ vector<string> previousInputs; //this is used to make the arrow keys work
+
};
Sequence currSeq(in);
name = currSeq.getName();
- if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); }
- else { sequence = currSeq.getUnaligned(); }
-
- seqmap[name] = sequence;
- map<string,group>::iterator it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
-// data[sequence].groupnumber = 1;
- data[sequence].names = name;
- }else { // its a duplicate.
- data[sequence].names += "," + name;
-// data[sequence].groupnumber++;
- }
-
+ if (name != "") {
+ if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); }
+ else { sequence = currSeq.getUnaligned(); }
+
+ seqmap[name] = sequence;
+ map<string,group>::iterator it = data.find(sequence);
+ if (it == data.end()) { //it's unique.
+ data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
+ // data[sequence].groupnumber = 1;
+ data[sequence].names = name;
+ }else { // its a duplicate.
+ data[sequence].names += "," + name;
+ // data[sequence].groupnumber++;
+ }
+ }
gobble(in);
}
in.close();
Sequence currSeq(inFASTA);
name = currSeq.getName();
- if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); }
- else { sequence = currSeq.getUnaligned(); }
-
- seqmap[name] = sequence;
- map<string,group>::iterator it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
-// data[sequence].groupnumber = 1;
- data[sequence].names = oldNameMap[name];
- }else { // its a duplicate.
- data[sequence].names += "," + oldNameMap[name];
-// data[sequence].groupnumber++;
- }
-
+ if (name != "") {
+ if(currSeq.getIsAligned()) { sequence = currSeq.getAligned(); }
+ else { sequence = currSeq.getUnaligned(); }
+
+ seqmap[name] = sequence;
+ map<string,group>::iterator it = data.find(sequence);
+ if (it == data.end()) { //it's unique.
+ data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
+ // data[sequence].groupnumber = 1;
+ data[sequence].names = oldNameMap[name];
+ }else { // its a duplicate.
+ data[sequence].names += "," + oldNameMap[name];
+ // data[sequence].groupnumber++;
+ }
+ }
gobble(inFASTA);
}
if(trump != '*' || isTrue(vertical) || soft != 0){
while(!inFASTA.eof()){ //read through and create the filter...
Sequence seq(inFASTA);
- if(trump != '*'){ F.doTrump(seq); }
- if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); }
- numSeqs++;
- cout.flush();
+ if (seq.getName() != "") {
+ if(trump != '*'){ F.doTrump(seq); }
+ if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); }
+ numSeqs++;
+ cout.flush();
+ }
}
}
numSeqs = 0;
while(!inFasta2.eof()){
Sequence seq(inFasta2);
- string align = seq.getAligned();
- string filterSeq = "";
-
- for(int j=0;j<alignmentLength;j++){
- if(filter[j] == '1'){
- filterSeq += align[j];
+ if (seq.getName() != "") {
+ string align = seq.getAligned();
+ string filterSeq = "";
+
+ for(int j=0;j<alignmentLength;j++){
+ if(filter[j] == '1'){
+ filterSeq += align[j];
+ }
}
+
+ outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
+ numSeqs++;
}
-
- outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
- numSeqs++;
gobble(inFasta2);
}
outFASTA.close();
Sequence currSeq(in);
name = currSeq.getName();
- //if this name is in the accnos file
- if (names.count(name) == 1) {
- wroteSomething = true;
-
- currSeq.printSequence(out);
-
- names.erase(name);
+ if (name != "") {
+ //if this name is in the accnos file
+ if (names.count(name) == 1) {
+ wroteSomething = true;
+
+ currSeq.printSequence(out);
+
+ names.erase(name);
+ }
}
-
gobble(in);
}
in.close();
while(!inFasta.eof()) {
Sequence seq(inFasta);
- seqs.push_back(seq);
+ if (seq.getName() != "") { seqs.push_back(seq); }
}
inFasta.close();
}
Sequence currSeq(in);
name = currSeq.getName();
- names.push_back(name);
+ if (name != "") { names.push_back(name); }
gobble(in);
}
//srand(54321);
srand( (unsigned)time( NULL ) );
-
+
Engine* mothur;
bool bail = 0;
string input;
}
}
else{
- mothur = new InteractEngine(argv[0]);
+ mothur = new InteractEngine(argv[0]);
}
+
+ //used to intercept the terminate signal, so instead of terminating mothur it will end a command
+ //void (*prev_fn)(int);
+ //prev_fn = signal(SIGTERM, mothur->terminateCommand(0));
+
+ //if (prev_fn==SIG_IGN) signal (SIGTERM,SIG_IGN);
+
while(bail == 0) { bail = mothur->getInput(); }
delete mothur;
#include <iomanip>
#include <fstream>
#include <sstream>
+#include <signal.h>
//exception
#include <stdexcept>
}
-
-
-
/***********************************************************************/
inline bool isTrue(string f){
return extension;
}
-
+/***********************************************************************/
+inline bool isBlank(string fileName){
+
+ ifstream fileHandle;
+ fileHandle.open(fileName.c_str());
+ if(!fileHandle) {
+ mothurOut("Error: Could not open " + fileName); mothurOutEndLine();
+ return false;
+ }else {
+ //check for blank file
+ gobble(fileHandle);
+ if (fileHandle.eof()) { fileHandle.close(); return true; }
+ }
+ return false;
+}
/***********************************************************************/
inline int openInputFile(string fileName, ifstream& fileHandle){
errorOut(e, "Nast", "Nast");
exit(1);
}
-
}
/**************************************************************************************************/
try {
alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-
+
string candAln = alignment->getSeqAAln();
string tempAln = alignment->getSeqBAln();
-
+
if(candAln == ""){
candidateSeq->setPairwise("");
candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for
templateSeq->setPairwise(tempAln); // the candidate and template sequences
-
}
catch(exception& e) {
errorOut(e, "Nast", "pairwiseAlignSeqs");
public:
Nast(Alignment*, Sequence*, Sequence*);
+ ~Nast(){};
float getSimilarityScore();
int getMaxInsertLength();
int rowIndex = maxRow(alignment, band); // get the index for the row with the highest right hand side score
int colIndex = maxColumn(alignment, band); // get the index for the column with the highest bottom row score
-
+
int row = lB-1;
int column = lA-1;
Sequence currSeq(in);
name = currSeq.getName();
- //if this name is in the accnos file
- if (names.count(name) == 0) {
- wroteSomething = true;
-
- currSeq.printSequence(out);
- }else { names.erase(name); }
-
+ if (name != "") {
+ //if this name is in the accnos file
+ if (names.count(name) == 0) {
+ wroteSomething = true;
+
+ currSeq.printSequence(out);
+ }else { names.erase(name); }
+ }
gobble(in);
}
in.close();
openOutputFile(reverseFile, outFASTA);
while(!inFASTA.eof()){
- Sequence currSeq(inFASTA);
- currSeq.reverseComplement();
- currSeq.printSequence(outFASTA);
+ Sequence currSeq(inFASTA); gobble(inFASTA);
+ if (currSeq.getName() != "") {
+ currSeq.reverseComplement();
+ currSeq.printSequence(outFASTA);
+ }
}
inFASTA.close();
outFASTA.close();
while(!inFASTA.eof()){
Sequence currSeq(inFASTA);
- bool goodSeq = 1; // innocent until proven guilty
- if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
- if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
- if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
- if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
- if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
- if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
-
- if(goodSeq == 1){
- currSeq.printSequence(goodSeqOut);
- }
- else{
- currSeq.printSequence(badSeqOut);
- badSeqNames.insert(currSeq.getName());
+ if (currSeq.getName() != "") {
+ bool goodSeq = 1; // innocent until proven guilty
+ if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
+ if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
+
+ if(goodSeq == 1){
+ currSeq.printSequence(goodSeqOut);
+ }
+ else{
+ currSeq.printSequence(badSeqOut);
+ badSeqNames.insert(currSeq.getName());
+ }
}
gobble(inFASTA);
}
while(!in.eof()){
- Sequence seq(in);
- statData data = getStats(seq.getAligned());
-
- out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
- out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+ Sequence seq(in); gobble(in);
+ if (seq.getName() != "") {
+ statData data = getStats(seq.getAligned());
+
+ out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
+ out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+ }
}
in.close();
while(!inFASTA.eof()){
Sequence current(inFASTA);
- startPosition.push_back(current.getStartPos());
- endPosition.push_back(current.getEndPos());
- seqLength.push_back(current.getNumBases());
- ambigBases.push_back(current.getAmbigBases());
- longHomoPolymer.push_back(current.getLongHomoPolymer());
-
- outSummary << current.getName() << '\t';
- outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
- outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
- outSummary << current.getLongHomoPolymer() << endl;
-
- numSeqs++;
+ if (current.getName() != "") {
+ startPosition.push_back(current.getStartPos());
+ endPosition.push_back(current.getEndPos());
+ seqLength.push_back(current.getNumBases());
+ ambigBases.push_back(current.getAmbigBases());
+ longHomoPolymer.push_back(current.getLongHomoPolymer());
+
+ outSummary << current.getName() << '\t';
+ outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+ outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+ outSummary << current.getLongHomoPolymer() << endl;
+
+ numSeqs++;
+ }
gobble(inFASTA);
}
inFASTA.close();
}
//********************************************************************************************************************
-
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
Sequence::Sequence(ifstream& fastaFile){
initialize();
fastaFile >> name;
name = name.substr(1);
-
- while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
-
- char letter;
string sequence;
- while(fastaFile){
- letter= fastaFile.get();
- if(letter == '>'){
- fastaFile.putback(letter);
+ //read comments
+ while ((name[0] == '#') && fastaFile) {
+ while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ sequence = getCommentString(fastaFile);
+
+ if (fastaFile) {
+ fastaFile >> name;
+ name = name.substr(1);
+ }else {
+ name = "";
break;
}
- else if(isprint(letter)){
- letter = toupper(letter);
- if(letter == 'U'){letter = 'T';}
- sequence += letter;
- }
}
-
+
+ //read real sequence
+ while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+
+ sequence = getSequenceString(fastaFile);
+
setAligned(sequence);
//setUnaligned removes any gap characters for us
setUnaligned(sequence);
}
+//********************************************************************************************************************
+string Sequence::getSequenceString(ifstream& fastaFile) {
+ try {
+ char letter;
+ string sequence = "";
+
+ while(fastaFile){
+ letter= fastaFile.get();
+ if(letter == '>'){
+ fastaFile.putback(letter);
+ break;
+ }
+ else if(isprint(letter)){
+ letter = toupper(letter);
+ if(letter == 'U'){letter = 'T';}
+ sequence += letter;
+ }
+ }
+
+ return sequence;
+ }
+ catch(exception& e) {
+ errorOut(e, "Sequence", "getSequenceString");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(ifstream& fastaFile) {
+ try {
+ char letter;
+ string sequence = "";
+
+ while(fastaFile){
+ letter=fastaFile.get();
+ if((letter == '\r') || (letter == '\n')){
+ gobble(fastaFile); //in case its a \r\n situation
+ break;
+ }
+ }
+
+ return sequence;
+ }
+ catch(exception& e) {
+ errorOut(e, "Sequence", "getCommentString");
+ exit(1);
+ }
+}
//********************************************************************************************************************
//if the alignment starts or ends with a gap, replace it with a period to indicate missing data
aligned = sequence;
alignmentLength = aligned.length();
+ setUnaligned(sequence);
if(aligned[0] == '-'){
for(int i=0;i<alignmentLength;i++){
private:
void initialize();
+ string getSequenceString(ifstream&);
+ string getCommentString(ifstream&);
string name;
string unaligned;
string aligned;
while (!filehandle.eof()) {
//input sequence info into sequencedb
Sequence newSequence(filehandle);
- data.push_back(newSequence);
+
+ if (newSequence.getName() != "") { data.push_back(newSequence); }
//takes care of white space
gobble(filehandle);
while(!inFASTA.eof()){
Sequence currSeq(inFASTA);
string origSeq = currSeq.getUnaligned();
- int group;
- string trashCode = "";
-
- if(qFileName != ""){
- if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
- else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
- if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) {
- success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+ if (origSeq != "") {
+ int group;
+ string trashCode = "";
+
+ if(qFileName != ""){
+ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
+ else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
+ if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) {
+ success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+ }
+ if(!success) { trashCode += 'q'; }
}
- if(!success) { trashCode += 'q'; }
- }
- if(barcodes.size() != 0){
-
- success = stripBarcode(currSeq, group);
- if(!success){ trashCode += 'b'; }
- }
- if(numFPrimers != 0){
- success = stripForward(currSeq);
- if(!success){ trashCode += 'f'; }
- }
- if(numRPrimers != 0){
- success = stripReverse(currSeq);
- if(!success){ trashCode += 'r'; }
- }
- if(minLength > 0 || maxLength > 0){
- success = cullLength(currSeq);
- if ((currSeq.getUnaligned().length() > 300) && (success)) { cout << "too long " << currSeq.getUnaligned().length() << endl; }
- if(!success){ trashCode += 'l'; }
- }
- if(maxHomoP > 0){
- success = cullHomoP(currSeq);
- if(!success){ trashCode += 'h'; }
- }
- if(maxAmbig != -1){
- success = cullAmbigs(currSeq);
- if(!success){ trashCode += 'n'; }
- }
-
- if(flip){ currSeq.reverseComplement(); } // should go last
-
- if(trashCode.length() == 0){
- currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed.
- currSeq.printSequence(outFASTA);
if(barcodes.size() != 0){
- outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
- if(allFiles){
- currSeq.printSequence(*fastaFileNames[group]);
+ success = stripBarcode(currSeq, group);
+ if(!success){ trashCode += 'b'; }
+ }
+ if(numFPrimers != 0){
+ success = stripForward(currSeq);
+ if(!success){ trashCode += 'f'; }
+ }
+ if(numRPrimers != 0){
+ success = stripReverse(currSeq);
+ if(!success){ trashCode += 'r'; }
+ }
+ if(minLength > 0 || maxLength > 0){
+ success = cullLength(currSeq);
+ if ((currSeq.getUnaligned().length() > 300) && (success)) { cout << "too long " << currSeq.getUnaligned().length() << endl; }
+ if(!success){ trashCode += 'l'; }
+ }
+ if(maxHomoP > 0){
+ success = cullHomoP(currSeq);
+ if(!success){ trashCode += 'h'; }
+ }
+ if(maxAmbig != -1){
+ success = cullAmbigs(currSeq);
+ if(!success){ trashCode += 'n'; }
+ }
+
+ if(flip){ currSeq.reverseComplement(); } // should go last
+
+ if(trashCode.length() == 0){
+ currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed.
+ currSeq.printSequence(outFASTA);
+ if(barcodes.size() != 0){
+ outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
+
+ if(allFiles){
+ currSeq.printSequence(*fastaFileNames[group]);
+ }
}
}
- }
- else{
- currSeq.setName(currSeq.getName() + '|' + trashCode);
- currSeq.setUnaligned(origSeq);
- currSeq.printSequence(scrapFASTA);
+ else{
+ currSeq.setName(currSeq.getName() + '|' + trashCode);
+ currSeq.setUnaligned(origSeq);
+ currSeq.printSequence(scrapFASTA);
+ }
}
gobble(inFASTA);
}