]> git.donarmstrong.com Git - mothur.git/blob - fastamap.cpp
created mothurOut class to handle logfiles
[mothur.git] / fastamap.cpp
1 /*
2  *  fastamap.cpp
3  *  mothur
4  *
5  *  Created by Sarah Westcott on 1/16/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "fastamap.h"
11 #include "sequence.hpp"
12
13 /*******************************************************************************/
14
15 void FastaMap::readFastaFile(string inFileName) {
16         try {
17                 ifstream in;
18                 openInputFile(inFileName, in);
19                 string name, sequence, line;
20                 sequence = "";
21                 string temp;
22
23                 while(!in.eof()){
24                         Sequence currSeq(in);
25                         name = currSeq.getName();
26                         
27                         if (name != "") {
28                                 if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
29                                 else                                            {       sequence = currSeq.getUnaligned();      }
30                                 
31                                 seqmap[name] = sequence;  
32                                 map<string,group>::iterator it = data.find(sequence);
33                                 if (it == data.end()) {         //it's unique.
34                                         data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
35                                         //                              data[sequence].groupnumber = 1;
36                                         data[sequence].names = name;
37                                 }else { // its a duplicate.
38                                         data[sequence].names += "," + name;
39                                         //                              data[sequence].groupnumber++;
40                                 }       
41                         }
42                         gobble(in);
43                 }
44                 in.close();             
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "FastaMap", "readFastaFile");
48                 exit(1);
49         }
50 }
51
52 /*******************************************************************************/
53
54 void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
55         
56         ifstream oldNameFile;
57         openInputFile(oldNameFileName, oldNameFile);
58         
59         map<string,string> oldNameMap;
60         string name, list;
61         while(!oldNameFile.eof()){
62                 oldNameFile >> name >> list;
63                 oldNameMap[name] = list;
64                 gobble(oldNameFile);
65         }
66         oldNameFile.close();
67         
68         ifstream inFASTA;
69         openInputFile(inFastaFile, inFASTA);
70         string sequence;
71         while(!inFASTA.eof()){
72                 Sequence currSeq(inFASTA);
73                 name = currSeq.getName();
74                 
75                 if (name != "") {
76                         if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
77                         else                                            {       sequence = currSeq.getUnaligned();      }
78                         
79                         seqmap[name] = sequence;  
80                         map<string,group>::iterator it = data.find(sequence);
81                         if (it == data.end()) {         //it's unique.
82                                 data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
83                                 //                      data[sequence].groupnumber = 1;
84                                 data[sequence].names = oldNameMap[name];
85                         }else { // its a duplicate.
86                                 data[sequence].names += "," + oldNameMap[name];
87                                 //                      data[sequence].groupnumber++;
88                         }       
89                 }
90                 gobble(inFASTA);
91         }
92         
93         
94         inFASTA.close();
95 }
96
97 /*******************************************************************************/
98
99 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
100         return data[seq].groupname;
101 }
102
103 /*******************************************************************************/
104
105 string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
106         return data[seq].names;
107 }
108
109 /*******************************************************************************/
110
111 string FastaMap::getSequence(string name) {
112         
113         map<string,string>::iterator it = seqmap.find(name);
114         if (it == seqmap.end()) {       return "not found";             }
115         else                                    {       return it->second;              }
116         
117 }       
118
119 /*******************************************************************************/
120
121 void FastaMap::push_back(string name, string seq) {
122         
123         map<string,group>::iterator it = data.find(seq);
124         if (it == data.end()) {         //it's unique.
125                 data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
126                 data[seq].names = name;
127         }else { // its a duplicate.
128                 data[seq].names += "," + name;
129         }
130         seqmap[name] = seq;
131 }
132
133 /*******************************************************************************/
134
135 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
136         return data.size();
137 }
138
139 /*******************************************************************************/
140
141 void FastaMap::printNamesFile(string outFileName){ //prints data
142         try {
143                 ofstream outFile;
144                 openOutputFile(outFileName, outFile);
145                 
146                 // two column file created with groupname and them list of identical sequence names
147                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
148                         outFile << it->second.groupname << '\t' << it->second.names << endl;
149                 }
150                 outFile.close();
151         }
152         catch(exception& e) {
153                 m->errorOut(e, "FastaMap", "printNamesFile");
154                 exit(1);
155         }
156 }
157
158 /*******************************************************************************/
159
160 void FastaMap::printCondensedFasta(string outFileName){ //prints data
161         try {
162                 ofstream out;
163                 openOutputFile(outFileName, out);
164                 //creates a fasta file
165                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
166                         out << ">" << it->second.groupname << endl;
167                         out << it->first << endl;
168                 }
169                 out.close();
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "FastaMap", "printCondensedFasta");
173                 exit(1);
174         }
175 }
176
177 /*******************************************************************************/
178