2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
12 //***************************************************************************************************************
14 ScreenSeqsCommand::ScreenSeqsCommand(){
16 globaldata = GlobalData::getInstance();
17 if(globaldata->getFastaFile() == "") { cout << "you must provide a fasta formatted file" << endl; }
20 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
24 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29 //***************************************************************************************************************
31 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
33 //***************************************************************************************************************
35 int ScreenSeqsCommand::execute(){
37 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
38 convert(globaldata->getStartPos(), startPos);
39 convert(globaldata->getEndPos(), endPos);
40 convert(globaldata->getMaxAmbig(), maxAmbig);
41 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
42 convert(globaldata->getMinLength(), minLength);
43 convert(globaldata->getMaxLength(), maxLength);
46 openInputFile(globaldata->getFastaFile(), inFASTA);
48 set<string> badSeqNames;
50 string goodSeqFile = getRootName(globaldata->inputFileName) + "good" + getExtension(globaldata->inputFileName);
51 string badSeqFile = getRootName(globaldata->inputFileName) + "bad" + getExtension(globaldata->inputFileName);
53 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
54 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
56 while(!inFASTA.eof()){
57 Sequence currSeq(inFASTA);
58 bool goodSeq = 1; // innocent until proven guilty
59 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
60 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
61 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
62 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
63 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
64 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
67 currSeq.printSequence(goodSeqOut);
70 currSeq.printSequence(badSeqOut);
71 badSeqNames.insert(currSeq.getName());
79 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
83 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
89 //***************************************************************************************************************
91 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
94 openInputFile(globaldata->getNameFile(), inputNames);
95 set<string> badSeqGroups;
96 string seqName, seqList, group;
97 set<string>::iterator it;
99 string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
100 string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
102 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
103 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
105 while(!inputNames.eof()){
106 inputNames >> seqName >> seqList;
107 it = badSeqNames.find(seqName);
109 if(it != badSeqNames.end()){
110 badSeqNames.erase(it);
111 badNameOut << seqName << '\t' << seqList << endl;
112 if(globaldata->getNameFile() != ""){
114 for(int i=0;i<seqList.length();i++){
115 if(seqList[i] == ','){
116 badSeqGroups.insert(seqList.substr(start,i-start));
120 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
124 goodNameOut << seqName << '\t' << seqList << endl;
132 if(globaldata->getGroupFile() != ""){
134 ifstream inputGroups;
135 openInputFile(globaldata->getGroupFile(), inputGroups);
137 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
138 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
140 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
141 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
143 while(!inputGroups.eof()){
144 inputGroups >> seqName >> group;
146 it = badSeqGroups.find(seqName);
148 if(it != badSeqGroups.end()){
149 badSeqGroups.erase(it);
150 badGroupOut << seqName << '\t' << group << endl;
153 goodGroupOut << seqName << '\t' << group << endl;
158 goodGroupOut.close();
163 //***************************************************************************************************************
165 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
167 ifstream inputGroups;
168 openInputFile(globaldata->getGroupFile(), inputGroups);
169 string seqName, group;
170 set<string>::iterator it;
172 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
173 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
175 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
176 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
178 while(!inputGroups.eof()){
179 inputGroups >> seqName >> group;
180 it = badSeqNames.find(seqName);
182 if(it != badSeqNames.end()){
183 badSeqNames.erase(it);
184 badGroupOut << seqName << '\t' << group << endl;
187 goodGroupOut << seqName << '\t' << group << endl;
192 goodGroupOut.close();
197 //***************************************************************************************************************