]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.cpp
removed readseqs, readfasta, readnexus, readclustal, readseqsphylip and updated seque...
[mothur.git] / seqsummarycommand.cpp
1 /*
2  *  seqcoordcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 5/30/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 SeqSummaryCommand::SeqSummaryCommand(){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 if(globaldata->getFastaFile() == "")            {       cout << "you need to at least enter a fasta file name" << endl; }
19         }
20         catch(exception& e) {
21                 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
22                 exit(1);
23         }
24         catch(...) {
25                 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
26                 exit(1);
27         }       
28 }
29
30 //***************************************************************************************************************
31
32 SeqSummaryCommand::~SeqSummaryCommand(){        /*      do nothing      */      }
33
34 //***************************************************************************************************************
35
36 int SeqSummaryCommand::execute(){
37         try{
38
39                 ifstream inFASTA;
40                 openInputFile(globaldata->getFastaFile(), inFASTA);
41                 int numSeqs = 0;
42
43                 ofstream outSummary;
44                 string summaryFile = globaldata->getFastaFile() + ".summary";
45                 openOutputFile(summaryFile, outSummary);
46                 
47                 vector<int> startPosition;
48                 vector<int> endPosition;
49                 vector<int> seqLength;
50                 vector<int> ambigBases;
51                 vector<int> longHomoPolymer;
52                 
53                 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
54
55                 while(!inFASTA.eof()){
56                         Sequence current(inFASTA);
57                         startPosition.push_back(current.getStartPos());
58                         endPosition.push_back(current.getEndPos());
59                         seqLength.push_back(current.getNumBases());
60                         ambigBases.push_back(current.getAmbigBases());
61                         longHomoPolymer.push_back(current.getLongHomoPolymer());
62
63                         outSummary << current.getName() << '\t';
64                         outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
65                         outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
66                         outSummary << current.getLongHomoPolymer() << endl;
67                         
68                         numSeqs++;
69                         gobble(inFASTA);
70                 }
71                 inFASTA.close();
72                 
73                 sort(startPosition.begin(), startPosition.end());
74                 sort(endPosition.begin(), endPosition.end());
75                 sort(seqLength.begin(), seqLength.end());
76                 sort(ambigBases.begin(), ambigBases.end());
77                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
78                 
79                 int ptile0_25   = int(numSeqs * 0.025);
80                 int ptile25             = int(numSeqs * 0.250);
81                 int ptile50             = int(numSeqs * 0.500);
82                 int ptile75             = int(numSeqs * 0.750);
83                 int ptile97_5   = int(numSeqs * 0.975);
84                 int ptile100    = numSeqs - 1;
85                 
86                 cout << endl;
87                 cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
88                 cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
89                 cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
90                 cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
91                 cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
92                 cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
93                 cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
94                 cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
95                 cout << "# of Seqs:\t" << numSeqs << endl;
96                 
97                 return 0;
98         }
99         catch(exception& e) {
100                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
101                 exit(1);
102         }
103         catch(...) {
104                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105                 exit(1);
106         }
107         
108 }
109
110 //***************************************************************************************************************
111
112