2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 ScreenSeqsCommand::ScreenSeqsCommand(){
17 globaldata = GlobalData::getInstance();
18 if(globaldata->getFastaFile() == "") { cout << "you must provide a fasta formatted file" << endl; }
21 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
25 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
30 //***************************************************************************************************************
32 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
34 //***************************************************************************************************************
36 int ScreenSeqsCommand::execute(){
38 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
39 convert(globaldata->getStartPos(), startPos);
40 convert(globaldata->getEndPos(), endPos);
41 convert(globaldata->getMaxAmbig(), maxAmbig);
42 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
43 convert(globaldata->getMinLength(), minLength);
44 convert(globaldata->getMaxLength(), maxLength);
47 openInputFile(globaldata->getFastaFile(), inFASTA);
49 set<string> badSeqNames;
51 string goodSeqFile = getRootName(globaldata->inputFileName) + "good" + getExtension(globaldata->inputFileName);
52 string badSeqFile = getRootName(globaldata->inputFileName) + "bad" + getExtension(globaldata->inputFileName);
54 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
55 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
57 while(!inFASTA.eof()){
58 Sequence currSeq(inFASTA);
59 bool goodSeq = 1; // innocent until proven guilty
60 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
61 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
62 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
63 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
64 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
65 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
68 currSeq.printSequence(goodSeqOut);
71 currSeq.printSequence(badSeqOut);
72 badSeqNames.insert(currSeq.getName());
80 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
84 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
90 //***************************************************************************************************************
92 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
95 openInputFile(globaldata->getNameFile(), inputNames);
96 set<string> badSeqGroups;
97 string seqName, seqList, group;
98 set<string>::iterator it;
100 string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
101 string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
103 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
104 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
106 while(!inputNames.eof()){
107 inputNames >> seqName >> seqList;
108 it = badSeqNames.find(seqName);
110 if(it != badSeqNames.end()){
111 badSeqNames.erase(it);
112 badNameOut << seqName << '\t' << seqList << endl;
113 if(globaldata->getNameFile() != ""){
115 for(int i=0;i<seqList.length();i++){
116 if(seqList[i] == ','){
117 badSeqGroups.insert(seqList.substr(start,i-start));
121 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
125 goodNameOut << seqName << '\t' << seqList << endl;
133 if(globaldata->getGroupFile() != ""){
135 ifstream inputGroups;
136 openInputFile(globaldata->getGroupFile(), inputGroups);
138 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
139 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
141 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
142 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
144 while(!inputGroups.eof()){
145 inputGroups >> seqName >> group;
147 it = badSeqGroups.find(seqName);
149 if(it != badSeqGroups.end()){
150 badSeqGroups.erase(it);
151 badGroupOut << seqName << '\t' << group << endl;
154 goodGroupOut << seqName << '\t' << group << endl;
159 goodGroupOut.close();
164 //***************************************************************************************************************
166 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
168 ifstream inputGroups;
169 openInputFile(globaldata->getGroupFile(), inputGroups);
170 string seqName, group;
171 set<string>::iterator it;
173 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
174 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
176 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
177 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
179 while(!inputGroups.eof()){
180 inputGroups >> seqName >> group;
181 it = badSeqNames.find(seqName);
183 if(it != badSeqNames.end()){
184 badSeqNames.erase(it);
185 badGroupOut << seqName << '\t' << group << endl;
188 goodGroupOut << seqName << '\t' << group << endl;
193 goodGroupOut.close();
198 //***************************************************************************************************************