]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.cpp
added checks for ^C to quit command instead of program
[mothur.git] / seqsummarycommand.cpp
1 /*
2  *  seqcoordcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 5/30/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 SeqSummaryCommand::SeqSummaryCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         map<string,string>::iterator it;
32                         
33                         //check to make sure all parameters are valid for command
34                         for (it = parameters.begin(); it != parameters.end(); it++) { 
35                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
36                         }
37                         
38                         //if the user changes the input directory command factory will send this info to us in the output parameter 
39                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
40                         if (inputDir == "not found"){   inputDir = "";          }
41                         else {
42                                 string path;
43                                 it = parameters.find("fasta");
44                                 //user has given a template file
45                                 if(it != parameters.end()){ 
46                                         path = hasPath(it->second);
47                                         //if the user has not given a path then, add inputdir. else leave path alone.
48                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
49                                 }
50                         }
51                         
52                         //check for required parameters
53                         fastafile = validParameter.validFile(parameters, "fasta", true);
54                         if (fastafile == "not open") { abort = true; }
55                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the summary.seqs command."); m->mothurOutEndLine(); abort = true;  }       
56                         
57                         //if the user changes the output directory command factory will send this info to us in the output parameter 
58                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
59                                 outputDir = ""; 
60                                 outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
61                         }
62
63                 }
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
67                 exit(1);
68         }
69 }
70 //**********************************************************************************************************************
71
72 void SeqSummaryCommand::help(){
73         try {
74                 m->mothurOut("The summary.seqs command reads a fastafile and ....\n");
75                 m->mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
76                 m->mothurOut("The summary.seqs command should be in the following format: \n");
77                 m->mothurOut("summary.seqs(fasta=yourFastaFile) \n");
78                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "SeqSummaryCommand", "help");
82                 exit(1);
83         }
84 }
85
86 //***************************************************************************************************************
87
88 SeqSummaryCommand::~SeqSummaryCommand(){        /*      do nothing      */      }
89
90 //***************************************************************************************************************
91
92 int SeqSummaryCommand::execute(){
93         try{
94                 
95                 if (abort == true) { return 0; }
96                 
97                 ifstream inFASTA;
98                 openInputFile(fastafile, inFASTA);
99                 int numSeqs = 0;
100
101                 ofstream outSummary;
102                 string summaryFile = outputDir + getSimpleName(fastafile) + ".summary";
103                 openOutputFile(summaryFile, outSummary);
104                 
105                 vector<int> startPosition;
106                 vector<int> endPosition;
107                 vector<int> seqLength;
108                 vector<int> ambigBases;
109                 vector<int> longHomoPolymer;
110                 
111                 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
112
113                 while(!inFASTA.eof()){
114                         if (m->control_pressed) { inFASTA.close(); outSummary.close(); remove(summaryFile.c_str()); return 0; }
115                         
116                         Sequence current(inFASTA);
117                         if (current.getName() != "") {
118                                 startPosition.push_back(current.getStartPos());
119                                 endPosition.push_back(current.getEndPos());
120                                 seqLength.push_back(current.getNumBases());
121                                 ambigBases.push_back(current.getAmbigBases());
122                                 longHomoPolymer.push_back(current.getLongHomoPolymer());
123                                 
124                                 outSummary << current.getName() << '\t';
125                                 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
126                                 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
127                                 outSummary << current.getLongHomoPolymer() << endl;
128                                 
129                                 numSeqs++;
130                         }
131                         gobble(inFASTA);
132                 }
133                 inFASTA.close();
134                 
135                 sort(startPosition.begin(), startPosition.end());
136                 sort(endPosition.begin(), endPosition.end());
137                 sort(seqLength.begin(), seqLength.end());
138                 sort(ambigBases.begin(), ambigBases.end());
139                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
140                 
141                 int ptile0_25   = int(numSeqs * 0.025);
142                 int ptile25             = int(numSeqs * 0.250);
143                 int ptile50             = int(numSeqs * 0.500);
144                 int ptile75             = int(numSeqs * 0.750);
145                 int ptile97_5   = int(numSeqs * 0.975);
146                 int ptile100    = numSeqs - 1;
147                 
148                 //to compensate for blank sequences that would result in startPosition and endPostion equalling -1
149                 if (startPosition[0] == -1) {  startPosition[0] = 0;    }
150                 if (endPosition[0] == -1)       {  endPosition[0] = 0;          }
151                 
152                 if (m->control_pressed) {  outSummary.close(); remove(summaryFile.c_str()); return 0; }
153                 
154                 m->mothurOutEndLine();
155                 m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); m->mothurOutEndLine();
156                 m->mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); m->mothurOutEndLine();
157                 m->mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); m->mothurOutEndLine();
158                 m->mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); m->mothurOutEndLine();
159                 m->mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); m->mothurOutEndLine();
160                 m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); m->mothurOutEndLine();
161                 m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); m->mothurOutEndLine();
162                 m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); m->mothurOutEndLine();
163                 m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine();
164                 
165                 outSummary.close();
166                 
167                 if (m->control_pressed) {  remove(summaryFile.c_str()); return 0; }
168                 
169                 m->mothurOutEndLine();
170                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
171                 m->mothurOut(summaryFile); m->mothurOutEndLine();       
172                 m->mothurOutEndLine();
173
174                 return 0;
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "SeqSummaryCommand", "execute");
178                 exit(1);
179         }
180 }
181
182 //***************************************************************************************************************
183
184