]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.cpp
added screen.seqs command - pds
[mothur.git] / seqsummarycommand.cpp
1 /*
2  *  seqcoordcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 5/30/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqsummarycommand.h"
11
12 //***************************************************************************************************************
13
14 SeqSummaryCommand::SeqSummaryCommand(){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 if(globaldata->getFastaFile() != "")            {       readSeqs = new ReadFasta(globaldata->inputFileName);    }
19                 else if(globaldata->getNexusFile() != "")       {       readSeqs = new ReadNexus(globaldata->inputFileName);    }
20                 else if(globaldata->getClustalFile() != "") {   readSeqs = new ReadClustal(globaldata->inputFileName);  }
21                 else if(globaldata->getPhylipFile() != "")      {       readSeqs = new ReadPhylip(globaldata->inputFileName);   }
22                 
23                 readSeqs->read();
24                 db = readSeqs->getDB();
25                 numSeqs = db->size();
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }       
35 }
36
37 //***************************************************************************************************************
38
39 SeqSummaryCommand::~SeqSummaryCommand(){
40         delete readSeqs;
41 }
42
43 //***************************************************************************************************************
44
45 int SeqSummaryCommand::execute(){
46         try{
47                 
48                 ofstream outfile;
49                 string summaryFile = getRootName(globaldata->inputFileName) + "summary";
50                 openOutputFile(summaryFile, outfile);
51
52                 vector<int> startPosition(numSeqs, 0);
53                 vector<int> endPosition(numSeqs, 0);
54                 vector<int> seqLength(numSeqs, 0);
55                 vector<int> ambigBases(numSeqs, 0);
56                 vector<int> longHomoPolymer(numSeqs, 0);
57                 
58                 if(db->get(0).getIsAligned() == 1){
59                         outfile << "seqname\tstart\tend\tlength\tambiguities\tlonghomopolymer" << endl;                 
60                         for(int i = 0; i < numSeqs; i++) {
61                                 Sequence current = db->get(i);
62                                 startPosition[i] = current.getStartPos();
63                                 endPosition[i] = current.getEndPos();
64                                 seqLength[i] = current.getNumBases();
65                                 ambigBases[i] = current.getAmbigBases();
66                                 longHomoPolymer[i] = current.getLongHomoPolymer();
67                                 outfile << current.getName() << '\t' << startPosition[i] << '\t' << endPosition[i] << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
68                         }
69                 }
70                 else{
71                         outfile << "seqname\tlength\tambiguities\tlonghomopolymer" << endl;
72                         for(int i=0;i<numSeqs;i++){
73                                 Sequence current = db->get(i);
74                                 seqLength[i] = current.getNumBases();
75                                 ambigBases[i] = current.getAmbigBases();
76                                 longHomoPolymer[i] = current.getLongHomoPolymer();
77                                 outfile << current.getName() << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
78                         }
79                 }
80                 
81                 sort(seqLength.begin(), seqLength.end());
82                 sort(ambigBases.begin(), ambigBases.end());
83                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
84                 
85                 int median                      = int(numSeqs * 0.500);
86                 int lowestPtile         = int(numSeqs * 0.025);
87                 int lowPtile            = int(numSeqs * 0.250);
88                 int highPtile           = int(numSeqs * 0.750);
89                 int highestPtile        = int(numSeqs * 0.975);
90                 int max                         = numSeqs - 1;
91                 
92                 cout << endl;
93                 if(db->get(0).getIsAligned() == 1){
94                         sort(startPosition.begin(), startPosition.end());
95                         sort(endPosition.begin(), endPosition.end());
96                                         
97                         cout << "\t\tStart\tEnd\tLength\tN's\tPolymer" << endl;
98                         cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
99                         cout << "2.5%-tile:\t" << startPosition[lowestPtile] << '\t' << endPosition[lowestPtile] << '\t' << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
100                         cout << "25%-tile:\t" << startPosition[lowPtile] << '\t' << endPosition[lowPtile] << '\t' << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
101                         cout << "Median: \t" << startPosition[median] << '\t' << endPosition[median] << '\t' << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
102                         cout << "75%-tile:\t" << startPosition[highPtile] << '\t' << endPosition[highPtile] << '\t' << seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
103                         cout << "97.5%-tile:\t" << startPosition[highestPtile] << '\t' << endPosition[highestPtile] << '\t' << seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
104                         cout << "Maximum:\t" << startPosition[max] << '\t' << endPosition[max] << '\t' << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
105                 }
106                 else{
107                         cout << "\t\tLength\tN's\tPolymer" << endl;
108                         cout << "Minimum:\t" << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
109                         cout << "2.5%-tile:\t" << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
110                         cout << "25%-tile:\t" << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
111                         cout << "Median: \t" << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
112                         cout << "75%-tile:\t"<< seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
113                         cout << "97.5%-tile:\t"<< seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
114                         cout << "Maximum:\t" << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
115                 }
116                 cout << "# of Seqs:\t" << numSeqs << endl;
117                 
118                 return 0;
119         }
120         catch(exception& e) {
121                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
122                 exit(1);
123         }
124         catch(...) {
125                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
126                 exit(1);
127         }
128         
129 }
130
131 //***************************************************************************************************************
132
133