5 * Created by westcott on 5/19/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitmatrix.h"
12 /***********************************************************************/
14 SplitMatrix::SplitMatrix(string distfile, string name, float c){
15 m = MothurOut::getInstance();
21 /***********************************************************************/
23 int SplitMatrix::split(){
26 vector<set<string> > groups;
31 openInputFile(distFile, dFile);
37 dFile >> seqA >> seqB >> dist;
40 //cout << "in cutoff: " << dist << endl;
46 for(int i=0;i<numGroups;i++){
47 set<string>::iterator aIt = groups[i].find(seqA);
48 set<string>::iterator bIt = groups[i].find(seqB);
50 if(groupIDA == -1 && aIt != groups[i].end()){//seqA is not already assigned to a group and is in group[i], so assign seqB to group[i]
51 groups[i].insert(seqB);
55 //cout << "in aIt: " << groupID << endl;
58 else if(groupIDB == -1 && bIt != groups[i].end()){//seqB is not already assigned to a group and is in group[i], so assign seqA to group[i]
59 groups[i].insert(seqA);
63 // cout << "in bIt: " << groupID << endl;
67 if(groupIDA != -1 && groupIDB != -1){//both ifs above have been executed, so we need to decide who to assign them to
68 if(groupIDA < groupIDB){
69 // cout << "A: " << groupIDA << "\t" << groupIDB << endl;
70 groups[groupIDA].insert(groups[groupIDB].begin(), groups[groupIDB].end()); //merge two groups into groupIDA
71 groups[groupIDB].clear();
75 // cout << "B: " << groupIDA << "\t" << groupIDB << endl;
76 groups[groupIDB].insert(groups[groupIDA].begin(), groups[groupIDA].end()); //merge two groups into groupIDB
77 groups[groupIDA].clear();
84 //windows is gonna gag on the reuse of outFile, will need to make it local...
86 if(groupIDA == -1 && groupIDB == -1){ //we need a new group
88 newGroup.insert(seqA);
89 newGroup.insert(seqB);
90 groups.push_back(newGroup);
93 string fileName = distFile + "." + toString(numGroups) + ".temp";
94 outFile.open(fileName.c_str(), ios::ate);
96 outFile << seqA << '\t' << seqB << '\t' << dist << endl;
100 string fileName = distFile + "." + toString(groupID) + ".temp";
101 if(groupID != prevGroupID){
103 outFile.open(fileName.c_str(), ios::app);
104 prevGroupID = groupID;
106 outFile << seqA << '\t' << seqB << '\t' << dist << endl;
108 if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above
109 string row, column, distance;
110 if(groupIDA<groupIDB){
111 string fileName = distFile + "." + toString(groupIDB) + ".temp";
112 ifstream fileB(fileName.c_str());
114 fileB >> row >> column >> distance;
115 outFile << row << '\t' << column << '\t' << distance << endl;
119 remove(fileName.c_str());
122 string fileName = distFile + "." + toString(groupIDA) + ".temp";
123 ifstream fileA(fileName.c_str());
125 fileA >> row >> column >> distance;
126 outFile << row << '\t' << column << '\t' << distance << endl;
130 remove(fileName.c_str());
140 ifstream bigNameFile(namefile.c_str());
142 cerr << "Error: We can't open the name file\n";
146 map<string, string> nameMap;
147 string name, nameList;
149 bigNameFile >> name >> nameList;
150 nameMap[name] = nameList;
155 for(int i=0;i<numGroups;i++){ //parse names file to match distance files
156 int numSeqsInGroup = groups[i].size();
158 if(numSeqsInGroup > 0){
159 string fileName = namefile + "." + toString(i) + ".temp";
160 ofstream smallNameFile(fileName.c_str(), ios::ate);
162 for(set<string>::iterator gIt=groups[i].begin();gIt!=groups[i].end();gIt++){
163 map<string,string>::iterator nIt = nameMap.find(*gIt);
165 if (nIt != nameMap.end()) {
166 smallNameFile << nIt->first << '\t' << nIt->second << endl;
169 m->mothurOut((*gIt) + " is in your distance file and not in your namefile. Please correct."); m->mothurOutEndLine(); exit(1);
172 smallNameFile.close();
176 //names of singletons
177 if (nameMap.size() != 0) {
178 singleton = namefile + ".extra.temp";
179 ofstream remainingNames(singleton.c_str(), ios::ate);
180 for(map<string,string>::iterator nIt=nameMap.begin();nIt!=nameMap.end();nIt++){
181 remainingNames << nIt->first << '\t' << nIt->second << endl;
183 remainingNames.close();
184 }else { singleton = "none"; }
186 for(int i=0;i<numGroups;i++){
187 if(groups[i].size() > 0){
188 string tempNameFile = namefile + "." + toString(i) + ".temp";
189 string tempDistFile = distFile + "." + toString(i) + ".temp";
191 map<string, string> temp;
192 temp[tempDistFile] = tempNameFile;
193 dists.push_back(temp);
200 catch(exception& e) {
201 m->errorOut(e, "SplitMatrix", "split");
205 //********************************************************************************************************************
206 //sorts biggest to smallest
207 inline bool compareFileSizes(map<string, string> left, map<string, string> right){
212 //get num bytes in file
213 string filename = left.begin()->first;
214 pFile = fopen (filename.c_str(),"rb");
215 string error = "Error opening " + filename;
216 if (pFile==NULL) perror (error.c_str());
218 fseek (pFile, 0, SEEK_END);
219 leftsize=ftell (pFile);
226 //get num bytes in file
227 filename = right.begin()->first;
228 pFile2 = fopen (filename.c_str(),"rb");
229 error = "Error opening " + filename;
230 if (pFile2==NULL) perror (error.c_str());
232 fseek (pFile2, 0, SEEK_END);
233 rightsize=ftell (pFile2);
237 return (leftsize > rightsize);
239 /***********************************************************************/
240 //returns map of distance files -> namefile sorted by distance file size
241 vector< map< string, string> > SplitMatrix::getDistanceFiles(){
244 sort(dists.begin(), dists.end(), compareFileSizes);
248 catch(exception& e) {
249 m->errorOut(e, "SplitMatrix", "getDistanceFiles");
253 /***********************************************************************/
254 SplitMatrix::~SplitMatrix(){}
255 /***********************************************************************/