]> git.donarmstrong.com Git - mothur.git/blob - fastamap.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / fastamap.cpp
1 /*
2  *  fastamap.cpp
3  *  mothur
4  *
5  *  Created by Sarah Westcott on 1/16/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "fastamap.h"
11 #include "sequence.hpp"
12
13 /*******************************************************************************/
14
15 void FastaMap::readFastaFile(string inFileName) {
16         try {
17                 ifstream in;
18                 m->openInputFile(inFileName, in);
19                 string name, sequence, line;
20                 sequence = "";
21                 string temp;
22                 map<string, string>::iterator itName;
23                 
24                 
25                 while(!in.eof()){
26                         if (m->control_pressed) { break; }
27                         
28                         Sequence currSeq(in);
29                         name = currSeq.getName();
30                         
31                         if (name != "") {
32                                 if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
33                                 else                                            {       sequence = currSeq.getUnaligned();      }
34                                 
35                                 itName = seqmap.find(name);
36                                 if (itName == seqmap.end()) { seqmap[name] = sequence;  }
37                                 else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); }
38                                 
39                                 map<string,group>::iterator it = data.find(sequence);
40                                 if (it == data.end()) {         //it's unique.
41                                         data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
42                                         //                              data[sequence].groupnumber = 1;
43                                         data[sequence].names = name;
44                                 }else { // its a duplicate.
45                                         data[sequence].names += "," + name;
46                                         //                              data[sequence].groupnumber++;
47                                 }       
48                         }
49                         m->gobble(in);
50                 }
51                 in.close();             
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "FastaMap", "readFastaFile");
55                 exit(1);
56         }
57 }
58
59 /*******************************************************************************/
60
61 void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
62         
63         ifstream oldNameFile;
64         m->openInputFile(oldNameFileName, oldNameFile);
65         
66         map<string,string> oldNameMap;
67         string name, list;
68         while(!oldNameFile.eof()){
69                 if (m->control_pressed) { break; }
70                 
71                 oldNameFile >> name >> list;
72                 oldNameMap[name] = list;
73                 m->gobble(oldNameFile);
74         }
75         oldNameFile.close();
76         
77         ifstream inFASTA;
78         m->openInputFile(inFastaFile, inFASTA);
79         string sequence;
80         while(!inFASTA.eof()){
81                 if (m->control_pressed) { break; }
82                 
83                 Sequence currSeq(inFASTA);
84                 name = currSeq.getName();
85                 
86                 if (name != "") {
87                         if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
88                         else                                            {       sequence = currSeq.getUnaligned();      }
89                         
90                         seqmap[name] = sequence;  
91                         map<string,group>::iterator it = data.find(sequence);
92                         if (it == data.end()) {         //it's unique.
93                                 data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
94                                 //                      data[sequence].groupnumber = 1;
95                                 data[sequence].names = oldNameMap[name];
96                         }else { // its a duplicate.
97                                 data[sequence].names += "," + oldNameMap[name];
98                                 //                      data[sequence].groupnumber++;
99                         }       
100                 }
101                 m->gobble(inFASTA);
102         }
103         
104         
105         inFASTA.close();
106 }
107
108 /*******************************************************************************/
109
110 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
111         return data[seq].groupname;
112 }
113
114 /*******************************************************************************/
115
116 string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
117         return data[seq].names;
118 }
119
120 /*******************************************************************************/
121
122 string FastaMap::getSequence(string name) {
123         
124         map<string,string>::iterator it = seqmap.find(name);
125         if (it == seqmap.end()) {       return "not found";             }
126         else                                    {       return it->second;              }
127         
128 }       
129
130 /*******************************************************************************/
131
132 void FastaMap::push_back(string name, string seq) {
133         
134         map<string,group>::iterator it = data.find(seq);
135         if (it == data.end()) {         //it's unique.
136                 data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
137                 data[seq].names = name;
138         }else { // its a duplicate.
139                 data[seq].names += "," + name;
140         }
141         seqmap[name] = seq;
142 }
143
144 /*******************************************************************************/
145
146 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
147         return data.size();
148 }
149
150 /*******************************************************************************/
151
152 void FastaMap::printNamesFile(string outFileName){ //prints data
153         try {
154                 ofstream outFile;
155                 m->openOutputFile(outFileName, outFile);
156                 
157                 // two column file created with groupname and them list of identical sequence names
158                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
159                         if (m->control_pressed) { break; }
160                         outFile << it->second.groupname << '\t' << it->second.names << endl;
161                 }
162                 outFile.close();
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "FastaMap", "printNamesFile");
166                 exit(1);
167         }
168 }
169
170 /*******************************************************************************/
171
172 void FastaMap::printCondensedFasta(string outFileName){ //prints data
173         try {
174                 ofstream out;
175                 m->openOutputFile(outFileName, out);
176                 //creates a fasta file
177                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
178                         if (m->control_pressed) { break; }
179                         out << ">" << it->second.groupname << endl;
180                         out << it->first << endl;
181                 }
182                 out.close();
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "FastaMap", "printCondensedFasta");
186                 exit(1);
187         }
188 }
189
190 /*******************************************************************************/
191