]> git.donarmstrong.com Git - mothur.git/blob - seqnoise.cpp
fixed bug with trim.flows that was adding flow files names to the .flow.files file...
[mothur.git] / seqnoise.cpp
1 /*
2  *  mySeqNoise.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/31/11.
6  *  Copyright 2011 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqnoise.h"
11 #include "sequence.hpp"
12
13 #define MIN_DELTA 1.0e-6
14 #define MIN_ITER 20
15 #define MAX_ITER 1000
16 #define MIN_COUNT 0.1
17 #define MIN_TAU   1.0e-4
18 #define MIN_WEIGHT 0.1
19
20
21 /**************************************************************************************************/
22 int seqNoise::getSequenceData(string sequenceFileName, vector<string>& sequences){
23         try {
24                 
25                 ifstream sequenceFile;
26                 m->openInputFile(sequenceFileName, sequenceFile);
27                 
28                 while(!sequenceFile.eof()){
29                         
30                         if (m->control_pressed) { break; }
31                         
32                         Sequence temp(sequenceFile); m->gobble(sequenceFile);
33                         
34                         if (temp.getName() != "") {
35                                 sequences.push_back(temp.getAligned());
36                         }
37                 }
38                 sequenceFile.close();
39                 
40                 return 0;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "seqNoise", "getSequenceData");
44                 exit(1);
45         }
46 }
47 /**************************************************************************************************/
48 int seqNoise::addSeq(string seq, vector<string>& sequences){ 
49         try {
50                 sequences.push_back(seq);
51                 return 0;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "seqNoise", "addSeq");
55                 exit(1);
56         }
57 }       
58 /**************************************************************************************************/
59 //no checks for file mismatches
60 int seqNoise::getRedundantNames(string namesFileName, vector<string>& uniqueNames, vector<string>& redundantNames, vector<int>& seqFreq){
61         try {
62                 string unique, redundant;
63                 ifstream namesFile;
64                 m->openInputFile(namesFileName, namesFile);
65                 
66                 for(int i=0;i<redundantNames.size();i++){
67                         
68                         if (m->control_pressed) { break; }
69                         
70                         namesFile >> uniqueNames[i]; m->gobble(namesFile);
71                         namesFile >> redundantNames[i]; m->gobble(namesFile);
72                         
73                         seqFreq[i] = m->getNumNames(redundantNames[i]);
74                 }
75                 namesFile.close();
76                 
77                 return 0;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "seqNoise", "getRedundantNames");
81                 exit(1);
82         }
83 }
84 /**************************************************************************************************/
85 int seqNoise::addRedundantName(string uniqueName, string redundantName, vector<string>& uniqueNames, vector<string>& redundantNames, vector<int>& seqFreq){ 
86         try {
87                 
88                 uniqueNames.push_back(uniqueName);
89                 redundantNames.push_back(redundantName);
90                 seqFreq.push_back(m->getNumNames(redundantName));
91                 
92                 return 0;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "seqNoise", "addRedundantName");
96                 exit(1);
97         }
98 }       
99 /**************************************************************************************************/
100 int seqNoise::getDistanceData(string distFileName, vector<double>& distances){
101         try {
102                 
103                 ifstream distFile;
104                 m->openInputFile(distFileName, distFile);
105                 
106                 int numSeqs = 0;
107                 string name = "";
108                 
109                 distFile >> numSeqs;
110                 
111                 for(int i=0;i<numSeqs;i++){
112                         
113                         if (m->control_pressed) {  break; }
114                         
115                         distances[i * numSeqs + i] = 0.0000;
116                         
117                         distFile >> name;
118                         
119                         for(int j=0;j<i;j++){
120                                 distFile >> distances[i * numSeqs + j];
121                                 distances[j * numSeqs + i] = distances[i * numSeqs + j];
122                         }
123                 }
124                 
125                 distFile.close();       
126                 
127                 return 0;
128         }
129         catch(exception& e) {
130                 m->errorOut(e, "seqNoise", "getDistanceData");
131                 exit(1);
132         }
133 }
134
135 /**************************************************************************************************/
136 int seqNoise::getListData(string listFileName, double cutOff, vector<int>& otuData, vector<int>& otuFreq, vector<vector<int> >& otuBySeqLookUp){
137         try {
138                 
139                 ifstream listFile;
140                 m->openInputFile(listFileName, listFile);
141                 double threshold;
142                 int numOTUs;
143                 
144                 if(listFile.peek() == 'u'){     m->getline(listFile);   }
145                 
146                 while(listFile){
147                         listFile >> threshold;
148                         
149                         if(threshold < cutOff){
150                                 m->getline(listFile);   
151                         }
152                         else{
153                                 listFile >> numOTUs;
154                                 otuFreq.resize(numOTUs, 0);
155                                 
156                                 for(int i=0;i<numOTUs;i++){
157                                         
158                                         if (m->control_pressed) { return 0; }
159                                         
160                                         string otu;
161                                         listFile >> otu;
162                                         
163                                         int count = 0;
164                                         
165                                         string number = "";
166                                         
167                                         for(int j=0;j<otu.size();j++){
168                                                 if(otu[j] != ','){
169                                                         number += otu[j];
170                                                 }
171                                                 else{
172                                                         int index = atoi(number.c_str());
173                                                         otuData[index] = i;
174                                                         count++;
175                                                         number = "";
176                                                 }
177                                         }
178                                         
179                                         int index = atoi(number.c_str());
180                                         otuData[index] = i;
181                                         count++;
182                                         
183                                         otuFreq[i] = count;
184                                 }
185                                 
186                                 otuBySeqLookUp.resize(numOTUs);
187                                 
188                                 int numSeqs = otuData.size();
189                                 
190                                 for(int i=0;i<numSeqs;i++){
191                                         if (m->control_pressed) { return 0; }
192                                         otuBySeqLookUp[otuData[i]].push_back(i);
193                                 }
194                                 for(int i=0;i<numOTUs;i++){
195                                         if (m->control_pressed) { return 0; }
196                                         for(int j=otuBySeqLookUp[i].size();j<numSeqs;j++){
197                                                 otuBySeqLookUp[i].push_back(0);
198                                         }
199                                 }
200                                 
201                                 break;
202                         }
203                 }
204                 return 0;
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "seqNoise", "getListData");
208                 exit(1);
209         }
210 }
211
212 /**************************************************************************************************/
213 int seqNoise::updateOTUCountData(vector<int> otuFreq,
214                                                                  vector<vector<int> > otuBySeqLookUp,
215                                                                  vector<vector<int> > aanI,
216                                                                  vector<int>& anP,
217                                                                  vector<int>& anI,
218                                                                  vector<int>& cumCount
219                                                                  ){
220         try {
221                 int numOTUs = otuFreq.size();
222                 
223                 int count = 0;
224                 
225                 for(int i=0;i<numOTUs;i++){
226                         cumCount[i] = count;
227                         
228                         if (m->control_pressed) { return 0; }
229                         
230                         for(int j=0;j<otuFreq[i];j++){
231                                 anP[count] = otuBySeqLookUp[i][j];
232                                 anI[count] = aanI[i][j];
233                                 
234                                 count++;
235                         }
236                 }
237                 
238                 return 0;
239         }
240         catch(exception& e) {
241                 m->errorOut(e, "seqNoise", "updateOTUCountData");
242                 exit(1);
243         }
244 }
245 /**************************************************************************************************/
246 double seqNoise::calcNewWeights(
247                                           vector<double>& weights,      //
248                                           vector<int> seqFreq,          //
249                                           vector<int> anI,                      //
250                                           vector<int> cumCount,         //
251                                           vector<int> anP,                      //
252                                           vector<int> otuFreq,          //
253                                           vector<double> tau            //
254                                           ){
255         try {
256                 
257                 int numOTUs = weights.size();
258                 double maxChange = -1;
259                 
260                 cout.flush();
261                 
262                 for(int i=0;i<numOTUs;i++){
263                         
264                         if (m->control_pressed) { return 0; }
265                         
266                         double change = weights[i];
267                         
268                         weights[i] = 0.0000;
269                         
270                         for(int j=0;j<otuFreq[i];j++){
271                                 
272                                 int index1 = cumCount[i] + j;
273                                 int index2 = anI[index1];
274                                 
275                                 double currentTau = tau[anP[index1]];
276                                 double freq = double(seqFreq[index2]);
277                                 
278                                 weights[i] += currentTau * freq;
279                         }
280                         change = fabs(weights[i] - change);
281                         
282                         if(change > maxChange){ maxChange = change;     }
283                         cout.flush();
284                 }
285                 return maxChange;
286                 
287         }
288         catch(exception& e) {
289                 m->errorOut(e, "seqNoise", "calcNewWeights");
290                 exit(1);
291         }
292 }
293
294 /**************************************************************************************************/
295
296 int seqNoise::calcCentroids(
297                                    vector<int> anI,
298                                    vector<int> anP,
299                                    vector<int>& change, 
300                                    vector<int>& centroids, 
301                                    vector<int> cumCount,
302                                    vector<double> distances,///
303                                    vector<int> seqFreq, 
304                                    vector<int> otuFreq, 
305                                    vector<double> tau 
306                                    ){
307         try {
308                 int numOTUs = change.size();
309                 int numSeqs = seqFreq.size();
310                 
311                 for(int i=0;i<numOTUs;i++){
312                         
313                         if (m->control_pressed) { return 0; }
314                         
315                         int minFIndex = -1;
316                         double minFValue = 1e10;
317                         
318                         change[i] = 0;
319                         double count = 0.00000;
320                         
321                         int freqOfOTU = otuFreq[i];
322                         
323                         for(int j=0;j<freqOfOTU;j++){
324                                 int index = cumCount[i] + j;
325                                 count += seqFreq[anI[index]]*tau[anP[index]];
326                         }
327                         
328                         if(freqOfOTU > 0 && count > MIN_COUNT){
329                                 
330                                 vector<double> adF(freqOfOTU);
331                                 vector<int> anL(freqOfOTU);
332                                 
333                                 for(int j=0;j<freqOfOTU;j++){
334                                         anL[j] = anI[cumCount[i] + j];
335                                         adF[j] = 0.0000;
336                                 }
337                                 
338                                 for(int j=0;j<freqOfOTU;j++){           
339                                         int index = cumCount[i] + j;
340                                         double curTau = tau[anP[index]];
341                                         
342                                         for(int k=0;k<freqOfOTU;k++){
343                                                 double dist = distances[anL[j]*numSeqs + anL[k]];
344                                                 
345                                                 adF[k] += dist * curTau * seqFreq[anL[j]];
346                                         }
347                                 }
348                                 
349                                 for(int j=0;j<freqOfOTU;j++){
350                                         if(adF[j] < minFValue){
351                                                 minFIndex = j;
352                                                 minFValue = adF[j];
353                                         }
354                                 }
355                                 
356                                 if(centroids[i] != anL[minFIndex]){
357                                         change[i] = 1;
358                                         centroids[i] = anL[minFIndex];
359                                 }
360                         }
361                         else if(centroids[i] != -1){
362                                 change[i] = 1;
363                                 centroids[i] = -1;                      
364                         }
365                 }
366                 
367                 return 0;
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "seqNoise", "calcCentroids");
371                 exit(1);
372         }
373 }
374
375 /**************************************************************************************************/
376
377 int seqNoise::checkCentroids(vector<double>& weights, vector<int> centroids){
378         try {
379                 int numOTUs = centroids.size();
380                 vector<int> unique(numOTUs, 1);
381                 
382                 double minWeight = MIN_WEIGHT;
383                 for(int i=0;i<numOTUs;i++){
384                         if (m->control_pressed) { return 0; }
385                         if(weights[i] < minWeight){     unique[i] = -1; }
386                 }
387                 
388                 for(int i=0;i<numOTUs;i++){
389                         if (m->control_pressed) { return 0; }
390                         if(unique[i] == 1){
391                                 for(int j=i+1; j<numOTUs;j++){
392                                         if(unique[j] == 1){
393                                                 if(centroids[i] == centroids[j]){
394                                                         unique[j] = 0;
395                                                         weights[i] += weights[j];
396                                                         weights[j] = 0;
397                                                 }
398                                         }
399                                 }
400                         }               
401                 }
402                 
403                 return 0;
404         }
405         catch(exception& e) {
406                 m->errorOut(e, "seqNoise", "checkCentroids");
407                 exit(1);
408         }
409 }
410
411 /**************************************************************************************************/
412
413 int seqNoise::setUpOTUData(vector<int>& otuData, vector<double>& percentage, vector<int> cumCount, vector<double> tau, vector<int> otuFreq, vector<int> anP, vector<int> anI){
414         try {
415
416                 int numOTUs = cumCount.size();
417                 int numSeqs = otuData.size();
418                 
419                 vector<double> bestTau(numSeqs, 0);
420                 vector<double> bestIndex(numSeqs, -1);
421                 
422                 for(int i=0;i<numOTUs;i++){
423                         if (m->control_pressed) { return 0; }
424                         for(int j=0;j<otuFreq[i];j++){
425                                 
426                                 int index1 = cumCount[i] + j;
427                                 double thisTau = tau[anP[index1]];
428                                 int index2 = anI[index1];
429                                 
430                                 if(thisTau > bestTau[index2]){
431                                         bestTau[index2] = thisTau;
432                                         bestIndex[index2] = i;
433                                 }
434                         }               
435                 }
436                 
437                 for(int i=0;i<numSeqs;i++){
438                         if (m->control_pressed) { return 0; }
439                         otuData[i] = bestIndex[i];
440                         percentage[i] = 1 - bestTau[i];
441                 }
442                 return 0;
443         }
444         catch(exception& e) {
445                 m->errorOut(e, "seqNoise", "setUpOTUData");
446                 exit(1);
447         }
448 }
449
450 /**************************************************************************************************/
451
452 int seqNoise::finishOTUData(vector<int> otuData, vector<int>& otuFreq, vector<int>& anP, vector<int>& anI, vector<int>& cumCount, vector<vector<int> >& otuBySeqLookUp, vector<vector<int> >& aanI, vector<double>& tau){
453         try {
454                 int numSeqs = otuData.size();
455                 int numOTUs = otuFreq.size();
456                 int total = numSeqs;
457                 
458                 otuFreq.assign(numOTUs, 0);
459                 tau.assign(numSeqs, 1);
460                 anP.assign(numSeqs, 0);
461                 anI.assign(numSeqs, 0);
462                 
463                 for(int i=0;i<numSeqs;i++){
464                         if (m->control_pressed) { return 0; }
465                         int otu = otuData[i];
466                         total++;
467                         
468                         otuBySeqLookUp[otu][otuFreq[otu]] = i;
469                         aanI[otu][otuFreq[otu]] = i;
470                         otuFreq[otu]++;
471                 }
472                 updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount);
473                 return 0;
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "seqNoise", "finishOTUData");
477                 exit(1);
478         }
479 }
480
481 /**************************************************************************************************/
482
483 int seqNoise::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, vector<int>& seqA, vector<int>& seqB){
484         try{
485                 char nullReturn = -1;
486                 
487                 while(i>=1 && j>=1){
488                         if (m->control_pressed) { return nullReturn; }
489                         if(direction == 'd'){
490                                 if(seqA[i-1] == seqB[j-1])      {       return seqA[i-1];       }
491                                 else                                            {       return nullReturn;      }
492                         }
493                         
494                         else if(direction == 'l')               {       j--;                            }
495                         else                                                    {       i--;                            }
496                         
497                         direction = alignMoves[i][j];
498                 }
499                 
500                 return nullReturn;
501         }
502         catch(exception& e) {
503                 m->errorOut(e, "seqNoise", "getLastMatch");
504                 exit(1);
505         }
506 }
507
508 /**************************************************************************************************/
509
510 int seqNoise::countDiffs(vector<int> query, vector<int> ref){
511         try {
512                 //double MATCH = 5.0;
513                 //double MISMATCH = -2.0;
514                 //double GAP = -2.0;
515                 
516                 vector<vector<double> > correctMatrix(4);
517                 for(int i=0;i<4;i++){   correctMatrix[i].resize(4);     }
518                 
519                 correctMatrix[0][0] = 0.000000;         //AA
520                 correctMatrix[1][0] = 11.619259;        //CA
521                 correctMatrix[2][0] = 11.694004;        //TA
522                 correctMatrix[3][0] = 7.748623;         //GA
523                 
524                 correctMatrix[1][1] = 0.000000;         //CC
525                 correctMatrix[2][1] = 7.619657;         //TC
526                 correctMatrix[3][1] = 12.852562;        //GC
527                 
528                 correctMatrix[2][2] = 0.000000;         //TT
529                 correctMatrix[3][2] = 10.964048;        //TG
530                 
531                 correctMatrix[3][3] = 0.000000;         //GG
532                 
533                 for(int i=0;i<4;i++){
534                         for(int j=0;j<i;j++){
535                                 correctMatrix[j][i] = correctMatrix[i][j];
536                         }
537                 }
538                 
539                 int queryLength = query.size();
540                 int refLength = ref.size();
541                 
542                 vector<vector<double> > alignMatrix(queryLength + 1);
543                 vector<vector<char> > alignMoves(queryLength + 1);
544                 
545                 for(int i=0;i<=queryLength;i++){
546                         if (m->control_pressed) { return 0; }
547                         alignMatrix[i].resize(refLength + 1, 0);
548                         alignMoves[i].resize(refLength + 1, 'x');
549                 }
550                 
551                 for(int i=0;i<=queryLength;i++){
552                         if (m->control_pressed) { return 0; }
553                         alignMatrix[i][0] = 15.0 * i;
554                         alignMoves[i][0] = 'u';
555                 }
556                 
557                 for(int i=0;i<=refLength;i++){
558                         if (m->control_pressed) { return 0; }
559                         alignMatrix[0][i] = 15.0 * i;
560                         alignMoves[0][i] = 'l';
561                 }
562                 
563                 for(int i=1;i<=queryLength;i++){
564                         if (m->control_pressed) { return 0; }
565                         for(int j=1;j<=refLength;j++){
566                                 
567                                 double nogap;           
568                                 nogap = alignMatrix[i-1][j-1] + correctMatrix[query[i-1]][ref[j-1]];
569                                 
570                                 
571                                 double gap;
572                                 double left;
573                                 if(i == queryLength){ //terminal gap
574                                         left = alignMatrix[i][j-1];
575                                 }
576                                 else{
577                                         if(ref[j-1] == getLastMatch('l', alignMoves, i, j, query, ref)){
578                                                 gap = 4.0;
579                                         }
580                                         else{
581                                                 gap = 15.0;
582                                         }
583                                         
584                                         left = alignMatrix[i][j-1] + gap;
585                                 }
586                                 
587                                 
588                                 double up;
589                                 if(j == refLength){ //terminal gap
590                                         up = alignMatrix[i-1][j];
591                                 }
592                                 else{
593                                         
594                                         if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, ref)){
595                                                 gap = 4.0;
596                                         }
597                                         else{
598                                                 gap = 15.0;
599                                         }
600                                         
601                                         up = alignMatrix[i-1][j] + gap;
602                                 }
603                                 
604                                 
605                                 
606                                 if(nogap < left){
607                                         if(nogap < up){
608                                                 alignMoves[i][j] = 'd';
609                                                 alignMatrix[i][j] = nogap;
610                                         }
611                                         else{
612                                                 alignMoves[i][j] = 'u';
613                                                 alignMatrix[i][j] = up;
614                                         }
615                                 }
616                                 else{
617                                         if(left < up){
618                                                 alignMoves[i][j] = 'l';
619                                                 alignMatrix[i][j] = left;
620                                         }
621                                         else{
622                                                 alignMoves[i][j] = 'u';
623                                                 alignMatrix[i][j] = up;
624                                         }
625                                 }
626                         }
627                 }
628                 
629                 int i = queryLength;
630                 int j = refLength;
631                 int diffs = 0;
632                 
633                 //      string alignA = "";
634                 //      string alignB = "";
635                 //      string bases = "ACTG";
636                 
637                 while(i > 0 && j > 0){
638                         if (m->control_pressed) { return 0; }
639                         if(alignMoves[i][j] == 'd'){
640                                 //                      alignA = bases[query[i-1]] + alignA;
641                                 //                      alignB = bases[ref[j-1]] + alignB;
642                                 
643                                 if(query[i-1] != ref[j-1])      {       diffs++;        }
644                                 
645                                 i--;
646                                 j--;
647                         }
648                         else if(alignMoves[i][j] == 'u'){
649                                 if(j != refLength){
650                                         //                              alignA = bases[query[i-1]] + alignA;
651                                         //                              alignB = '-' + alignB;
652                                         
653                                         diffs++;
654                                 }
655                                 
656                                 i--;
657                         }
658                         else if(alignMoves[i][j] == 'l'){
659                                 if(i != queryLength){
660                                         //                              alignA = '-' + alignA;
661                                         //                              alignB = bases[ref[j-1]] + alignB;
662                                         
663                                         diffs++;
664                                 }
665                                 
666                                 j--;
667                         }
668                 }
669                 
670                 //      cout << diffs << endl;
671                 //      cout << alignA << endl;
672                 //      cout << alignB << endl;
673                 //      cout << endl;
674                 
675                 return diffs;
676         }
677         catch(exception& e) {
678                 m->errorOut(e, "seqNoise", "countDiffs");
679                 exit(1);
680         }
681         
682 }
683
684 /**************************************************************************************************/
685
686 vector<int> seqNoise::convertSeq(string bases){
687         try {
688                 vector<int> numbers(bases.length(), -1);
689                 
690                 for(int i=0;i<bases.length();i++){
691                         if (m->control_pressed) { return numbers; }
692                         
693                         char b = bases[i];
694                         
695                         if(b == 'A')    {       numbers[i] = 0; }
696                         else if(b=='C') {       numbers[i] = 1; }
697                         else if(b=='T') {       numbers[i] = 2; }
698                         else if(b=='G') {       numbers[i] = 3; }
699                         else                    {       numbers[i] = 0; }
700                 }
701                 
702                 return numbers;
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "seqNoise", "convertSeq");
706                 exit(1);
707         }
708 }
709
710 /**************************************************************************************************/
711
712 string seqNoise::degapSeq(string aligned){
713         try {
714                 string unaligned = "";
715                 
716                 for(int i=0;i<aligned.length();i++){
717                         
718                         if (m->control_pressed) { return ""; }
719                         
720                         if(aligned[i] != '-' && aligned[i] != '.'){
721                                 unaligned += aligned[i];
722                         }
723                 }
724                 
725                 return unaligned;
726         }
727         catch(exception& e) {
728                 m->errorOut(e, "seqNoise", "degapSeq");
729                 exit(1);
730         }
731 }
732
733 /**************************************************************************************************/
734
735 int seqNoise::writeOutput(string fastaFileName, string namesFileName, string uMapFileName, vector<int> finalTau, vector<int> centroids, vector<int> otuData, vector<string> sequences, vector<string> uniqueNames, vector<string> redundantNames, vector<int> seqFreq, vector<double>& distances){
736         try {
737                 int numOTUs = finalTau.size();
738                 int numSeqs = uniqueNames.size();
739                 
740                 ofstream fastaFile(fastaFileName.c_str());
741                 ofstream namesFile(namesFileName.c_str());
742                 ofstream uMapFile(uMapFileName.c_str());
743                 
744                 vector<int> maxSequenceAbund(numOTUs, 0);
745                 vector<int> maxSequenceIndex(numOTUs, 0);
746                 
747                 for(int i=0;i<numSeqs;i++){
748                         if (m->control_pressed) { return 0; }
749                         if(maxSequenceAbund[otuData[i]] < seqFreq[i]){
750                                 maxSequenceAbund[otuData[i]] = seqFreq[i];
751                                 maxSequenceIndex[otuData[i]] = i;
752                         }
753                 }
754                 
755                 int count = 1;
756                 
757                 for(int i=0;i<numOTUs;i++){
758                         if (m->control_pressed) { return 0; }
759                         
760                         if(finalTau[i] > 0){
761                                 
762                                 if(maxSequenceIndex[i] != centroids[i] && distances[maxSequenceIndex[i]*numSeqs + centroids[i]] == 0){
763                                         //                              cout << uniqueNames[centroids[i]] << '\t' << uniqueNames[maxSequenceIndex[i]] << '\t' << count << endl;
764                                         centroids[i] = maxSequenceIndex[i];
765                                 }
766                                 
767                                 int index = centroids[i];
768                                 
769                                 fastaFile << '>' << uniqueNames[index] << endl << sequences[index] << endl;
770                                 namesFile << uniqueNames[index] << '\t';
771                                 
772                                 string refSeq = sequences[index];
773                                 string redundantSeqs = redundantNames[index];;
774                                 
775                                 
776                                 vector<freqData> frequencyData;
777                                 
778                                 for(int j=0;j<numSeqs;j++){
779                                         if(otuData[j] == i && j != index){
780                                                 frequencyData.push_back(freqData(j, seqFreq[j]));
781                                         }
782                                 }
783                                 sort(frequencyData.rbegin(), frequencyData.rend());
784                                 
785                                 string refDegap = degapSeq(refSeq);
786                                 vector<int> rUnalign = convertSeq(refDegap);
787                                 
788                                 uMapFile << "ideal_seq_" << count << '\t' << finalTau[i] << endl;
789                                 uMapFile << uniqueNames[index] << '\t' << seqFreq[index] << "\t0\t" << refDegap << endl;
790                                 
791                                 
792                                 for(int j=0;j<frequencyData.size();j++){
793                                         if (m->control_pressed) { return 0; }
794                                         redundantSeqs += ',' + redundantNames[frequencyData[j].index];
795                                         
796                                         uMapFile << uniqueNames[frequencyData[j].index] << '\t' << seqFreq[frequencyData[j].index] << '\t';
797                                         
798                                         string querySeq = sequences[frequencyData[j].index];
799                                         
800                                         string queryDegap = degapSeq(querySeq);
801                                         vector<int> qUnalign = convertSeq(queryDegap);
802                                         
803                                         int udiffs = countDiffs(qUnalign, rUnalign);
804                                         uMapFile << udiffs << '\t' << queryDegap << endl;
805                                         
806                                 }                                       
807                                 
808                                 uMapFile << endl;
809                                 namesFile << redundantSeqs << endl;
810                                 count++;
811                                 
812                         }
813                 }
814                 fastaFile.close();
815                 namesFile.close();
816                 uMapFile.close();
817                 return 0;
818         }
819         catch(exception& e) {
820                 m->errorOut(e, "seqNoise", "writeOutput");
821                 exit(1);
822         }
823 }
824
825 /**************************************************************************************************
826
827 int main(int argc, char *argv[]){
828         
829         double sigma = 100;
830         sigma = atof(argv[5]);
831         
832         double cutOff = 0.08;
833         int minIter = 10;
834         int maxIter = 1000;
835         double minDelta = 1e-6;
836         
837         string sequenceFileName = argv[1];
838         string fileNameStub = sequenceFileName.substr(0,sequenceFileName.find_last_of('.')) + ".shhh";
839         
840         vector<string> sequences;
841         getSequenceData(sequenceFileName, sequences);
842         
843         int numSeqs = sequences.size();
844         
845         vector<string> uniqueNames(numSeqs);
846         vector<string> redundantNames(numSeqs);
847         vector<int> seqFreq(numSeqs);
848         
849         string namesFileName = argv[4];
850         getRedundantNames(namesFileName, uniqueNames, redundantNames, seqFreq);
851         
852         string distFileName = argv[2];
853         vector<double> distances(numSeqs * numSeqs);
854         getDistanceData(distFileName, distances);
855         
856         string listFileName = argv[3];
857         vector<int> otuData(numSeqs);
858         vector<int> otuFreq;
859         vector<vector<int> > otuBySeqLookUp;
860         
861         getListData(listFileName, cutOff, otuData, otuFreq, otuBySeqLookUp);
862         
863         int numOTUs = otuFreq.size();
864         
865         vector<double> weights(numOTUs, 0);
866         vector<int> change(numOTUs, 1);
867         vector<int> centroids(numOTUs, -1);
868         vector<int> cumCount(numOTUs, 0);
869         
870         vector<double> tau(numSeqs, 1);
871         vector<int> anP(numSeqs, 0);
872         vector<int> anI(numSeqs, 0);
873         vector<int> anN(numSeqs, 0);
874         vector<vector<int> > aanI = otuBySeqLookUp;
875         
876         int numIters = 0;
877         double maxDelta = 1e6;
878         
879         while(numIters < minIter || ((maxDelta > minDelta) && (numIters < maxIter))){
880                 
881                 updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount);
882                 maxDelta = calcNewWeights(weights, seqFreq, anI, cumCount, anP, otuFreq, tau);
883                 
884                 calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau);
885                 checkCentroids(weights, centroids);
886                 
887                 otuFreq.assign(numOTUs, 0);
888                 
889                 int total = 0;
890                 
891                 for(int i=0;i<numSeqs;i++){
892                         double offset = 1e6;
893                         double norm = 0.0000;
894                         double minWeight = MIN_WEIGHT;
895                         vector<double> currentTau(numOTUs);
896                         
897                         for(int j=0;j<numOTUs;j++){
898                                 if(weights[j] > minWeight && distances[i * numSeqs+centroids[j]] < offset){
899                                         offset = distances[i * numSeqs+centroids[j]];
900                                 }
901                         }
902                         
903                         for(int j=0;j<numOTUs;j++){
904                                 if(weights[j] > minWeight){
905                                         currentTau[j] = exp(sigma * (-distances[(i * numSeqs + centroids[j])] + offset)) * weights[j];
906                                         norm += currentTau[j];
907                                 }
908                                 else{
909                                         currentTau[j] = 0.0000;
910                                 }
911                         }                       
912                         
913                         for(int j=0;j<numOTUs;j++){
914                                 currentTau[j] /= norm;
915                         }
916                         
917                         for(int j=0;j<numOTUs;j++){
918                                 
919                                 if(currentTau[j] > MIN_TAU){
920                                         int oldTotal = total;
921                                         total++;
922                                         
923                                         tau.resize(oldTotal+1);
924                                         tau[oldTotal] = currentTau[j];
925                                         otuBySeqLookUp[j][otuFreq[j]] = oldTotal;
926                                         aanI[j][otuFreq[j]] = i;
927                                         otuFreq[j]++;
928                                         
929                                 }
930                         }
931                         
932                         anP.resize(total);
933                         anI.resize(total);
934                 }
935                 
936                 numIters++;
937         }
938         
939         updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount);
940         
941         vector<double> percentage(numSeqs);
942         setUpOTUData(otuData, percentage, cumCount, tau, otuFreq, anP, anI);
943         finishOTUData(otuData, otuFreq, anP, anI, cumCount, otuBySeqLookUp, aanI, tau);
944         
945         change.assign(numOTUs, 1);
946         calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau);
947         
948         
949         vector<int> finalTau(numOTUs, 0);
950         for(int i=0;i<numSeqs;i++){
951                 finalTau[otuData[i]] += int(seqFreq[i]);
952         }
953         
954         writeOutput(fileNameStub, finalTau, centroids, otuData, sequences, uniqueNames, redundantNames, seqFreq, distances);
955         
956         return 0;
957 }
958
959 /**************************************************************************************************/