2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 ScreenSeqsCommand::ScreenSeqsCommand(){
17 globaldata = GlobalData::getInstance();
18 if(globaldata->getFastaFile() == "") { cout << "you must provide a fasta formatted file" << endl; }
21 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
25 cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
30 //***************************************************************************************************************
32 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
34 //***************************************************************************************************************
36 int ScreenSeqsCommand::execute(){
38 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
39 convert(globaldata->getStartPos(), startPos);
40 convert(globaldata->getEndPos(), endPos);
41 convert(globaldata->getMaxAmbig(), maxAmbig);
42 convert(globaldata->getMaxHomoPolymer(), maxHomoP);
43 convert(globaldata->getMinLength(), minLength);
44 convert(globaldata->getMaxLength(), maxLength);
47 openInputFile(globaldata->getFastaFile(), inFASTA);
49 set<string> badSeqNames;
51 string goodSeqFile = getRootName(globaldata->getFastaFile()) + "good" + getExtension(globaldata->getFastaFile());
52 string badSeqFile = getRootName(globaldata->getFastaFile()) + "bad" + getExtension(globaldata->getFastaFile());
54 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
55 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
57 while(!inFASTA.eof()){
58 Sequence currSeq(inFASTA);
59 bool goodSeq = 1; // innocent until proven guilty
60 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
61 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
62 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
63 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
64 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
65 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
68 currSeq.printSequence(goodSeqOut);
71 currSeq.printSequence(badSeqOut);
72 badSeqNames.insert(currSeq.getName());
76 if(globaldata->getNameFile() != ""){
77 screenNameGroupFile(badSeqNames);
79 else if(globaldata->getGroupFile() != ""){
80 screenGroupFile(badSeqNames);
86 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
90 cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
96 //***************************************************************************************************************
98 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
101 openInputFile(globaldata->getNameFile(), inputNames);
102 set<string> badSeqGroups;
103 string seqName, seqList, group;
104 set<string>::iterator it;
106 string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
107 string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
109 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
110 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
112 while(!inputNames.eof()){
113 inputNames >> seqName >> seqList;
114 it = badSeqNames.find(seqName);
116 if(it != badSeqNames.end()){
117 badSeqNames.erase(it);
118 badNameOut << seqName << '\t' << seqList << endl;
119 if(globaldata->getNameFile() != ""){
121 for(int i=0;i<seqList.length();i++){
122 if(seqList[i] == ','){
123 badSeqGroups.insert(seqList.substr(start,i-start));
127 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
131 goodNameOut << seqName << '\t' << seqList << endl;
139 if(globaldata->getGroupFile() != ""){
141 ifstream inputGroups;
142 openInputFile(globaldata->getGroupFile(), inputGroups);
144 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
145 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
147 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
148 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
150 while(!inputGroups.eof()){
151 inputGroups >> seqName >> group;
153 it = badSeqGroups.find(seqName);
155 if(it != badSeqGroups.end()){
156 badSeqGroups.erase(it);
157 badGroupOut << seqName << '\t' << group << endl;
160 goodGroupOut << seqName << '\t' << group << endl;
165 goodGroupOut.close();
170 //***************************************************************************************************************
172 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
174 ifstream inputGroups;
175 openInputFile(globaldata->getGroupFile(), inputGroups);
176 string seqName, group;
177 set<string>::iterator it;
179 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
180 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
182 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
183 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
185 while(!inputGroups.eof()){
186 inputGroups >> seqName >> group;
187 it = badSeqNames.find(seqName);
189 if(it != badSeqNames.end()){
190 badSeqNames.erase(it);
191 badGroupOut << seqName << '\t' << group << endl;
194 goodGroupOut << seqName << '\t' << group << endl;
199 goodGroupOut.close();
204 //***************************************************************************************************************