+ }
+
+ if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+ offset = dist[indexOffset + j];
+ }
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ if(weight[j] > MIN_WEIGHT){
+ newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+ norms[i] += newTau[j];
+ }
+ else{
+ newTau[j] = 0.0;
+ }
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ newTau[j] /= norms[i];
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ if(newTau[j] > MIN_TAU){
+
+ int oldTotal = total;
+
+ total++;
+
+ singleTau.resize(total, 0);
+ seqNumber.resize(total, 0);
+ seqIndex.resize(total, 0);
+
+ singleTau[oldTotal] = newTau[j];
+
+ aaP[j][nSeqsPerOTU[j]] = oldTotal;
+ aaI[j][nSeqsPerOTU[j]] = i;
+ nSeqsPerOTU[j]++;
+ }
+ }
+
+ }
+
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ iter++;
+
+ pDataArray->m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+
+ }
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ pDataArray->m->mothurOut("\nFinalizing...\n");
+ //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+ /*****************************************************************************************************
+ int indexFill = 0;
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ cumNumSeqs[i] = indexFill;
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ seqNumber[indexFill] = aaP[i][j];
+ seqIndex[indexFill] = aaI[i][j];
+
+ indexFill++;
+ }
+ }
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
+ /*****************************************************************************************************
+ vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ double tauValue = singleTau[seqNumber[index]];
+ int sIndex = seqIndex[index];
+ bigTauMatrix[sIndex * numOTUs + i] = tauValue;
+ }
+ }
+
+ for(int i=0;i<numSeqs;i++){
+ double maxTau = -1.0000;
+ int maxOTU = -1;
+ for(int j=0;j<numOTUs;j++){
+ if(bigTauMatrix[i * numOTUs + j] > maxTau){
+ maxTau = bigTauMatrix[i * numOTUs + j];
+ maxOTU = j;
+ }
+ }
+
+ otuData[i] = maxOTU;
+ }
+
+ nSeqsPerOTU.assign(numOTUs, 0);
+
+ for(int i=0;i<numSeqs;i++){
+ int index = otuData[i];
+
+ singleTau[i] = 1.0000;
+ dist[i] = 0.0000;
+
+ aaP[index][nSeqsPerOTU[index]] = i;
+ aaI[index][nSeqsPerOTU[index]] = i;
+
+ nSeqsPerOTU[index]++;
+ }
+
+ //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+ /*****************************************************************************************************
+ indexFill = 0;
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ cumNumSeqs[i] = indexFill;
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ seqNumber[indexFill] = aaP[i][j];
+ seqIndex[indexFill] = aaI[i][j];
+
+ indexFill++;
+ }
+ }
+ /*****************************************************************************************************/
+
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ vector<int> otuCounts(numOTUs, 0);
+ for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
+
+ //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+ /*****************************************************************************************************
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ double count = 0;
+ int position = 0;
+ int minFlowGram = 100000000;
+ double minFlowValue = 1e8;
+ change[i] = 0; //FALSE
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+ }
+
+ if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+ vector<double> adF(nSeqsPerOTU[i]);
+ vector<int> anL(nSeqsPerOTU[i]);
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ int nI = seqIndex[index];
+ int nIU = mapSeqToUnique[nI];
+
+ int k;
+ for(k=0;k<position;k++){
+ if(nIU == anL[k]){
+ break;
+ }
+ }
+ if(k == position){
+ anL[position] = nIU;
+ adF[position] = 0.0000;
+ position++;
+ }
+ }
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ int nI = seqIndex[index];
+
+ double tauValue = singleTau[seqNumber[index]];
+
+ for(int k=0;k<position;k++){
+ // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
+ /*****************************************************************************************************
+ int flowAValue = anL[k] * numFlowCells;
+ int flowBValue = nI * numFlowCells;
+
+ double dist = 0;
+
+ for(int l=0;l<lengths[nI];l++){
+ dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+ flowAValue++;
+ flowBValue++;
+ }
+
+ dist = dist / (double)lengths[nI];
+ /*****************************************************************************************************
+ adF[k] += dist * tauValue;
+ }
+ }
+
+ for(int j=0;j<position;j++){
+ if(adF[j] < minFlowValue){
+ minFlowGram = j;
+ minFlowValue = adF[j];
+ }
+ }
+
+ if(centroids[i] != anL[minFlowGram]){
+ change[i] = 1;
+ centroids[i] = anL[minFlowGram];
+ }
+ }
+ else if(centroids[i] != -1){
+ change[i] = 1;
+ centroids[i] = -1;
+ }
+ }
+
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI);
+ if (pDataArray->m->control_pressed) { break; }
+ /*****************************************************************************************************
+ string thisOutputDir = pDataArray->outputDir;
+ if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
+ string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
+
+ ofstream qualityFile;
+ pDataArray->m->openOutputFile(qualityFileName, qualityFile);
+
+ qualityFile.setf(ios::fixed, ios::floatfield);
+ qualityFile.setf(ios::showpoint);
+ qualityFile << setprecision(6);
+
+ vector<vector<int> > qualities(numOTUs);
+ vector<double> pr(HOMOPS, 0);
+
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ int index = 0;
+ int base = 0;
+
+ if(nSeqsPerOTU[i] > 0){
+ qualities[i].assign(1024, -1);
+
+ while(index < numFlowCells){
+ double maxPrValue = 1e8;
+ short maxPrIndex = -1;
+ double count = 0.0000;
+
+ pr.assign(HOMOPS, 0);
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int lIndex = cumNumSeqs[i] + j;
+ double tauValue = singleTau[seqNumber[lIndex]];
+ int sequenceIndex = aaI[i][j];
+ short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
+
+ count += tauValue;
+
+ for(int s=0;s<HOMOPS;s++){
+ pr[s] += tauValue * pDataArray->singleLookUp[s * NUMBINS + intensity];
+ }
+ }
+
+ maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
+ maxPrValue = pr[maxPrIndex];
+
+ if(count > MIN_COUNT){
+ double U = 0.0000;
+ double norm = 0.0000;
+
+ for(int s=0;s<HOMOPS;s++){
+ norm += exp(-(pr[s] - maxPrValue));
+ }
+
+ for(int s=1;s<=maxPrIndex;s++){
+ int value = 0;
+ double temp = 0.0000;
+
+ U += exp(-(pr[s-1]-maxPrValue))/norm;
+
+ if(U>0.00){
+ temp = log10(U);
+ }
+ else{
+ temp = -10.1;
+ }
+ temp = floor(-10 * temp);
+ value = (int)floor(temp);
+ if(value > 100){ value = 100; }
+
+ qualities[i][base] = (int)value;
+ base++;
+ }
+ }
+
+ index++;
+ }
+ }
+
+
+ if(otuCounts[i] > 0){
+ qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
+
+ int j=4; //need to get past the first four bases
+ while(qualities[i][j] != -1){
+ qualityFile << qualities[i][j] << ' ';
+ j++;
+ }
+ qualityFile << endl;
+ }
+ }
+ qualityFile.close();
+ pDataArray->outputNames.push_back(qualityFileName);
+ /*****************************************************************************************************
+
+ // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
+ if (pDataArray->m->control_pressed) { break; }
+ /*****************************************************************************************************
+ thisOutputDir = pDataArray->outputDir;
+ if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
+ string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
+ ofstream fastaFile;
+ pDataArray->m->openOutputFile(fastaFileName, fastaFile);
+
+ vector<string> names(numOTUs, "");
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ int index = centroids[i];
+
+ if(otuCounts[i] > 0){
+ fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
+
+ string newSeq = "";
+
+ for(int j=0;j<numFlowCells;j++){
+
+ char base = pDataArray->flowOrder[j % 4];
+ for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
+ newSeq += base;
+ }
+ }
+
+ fastaFile << newSeq.substr(4) << endl;
+ }
+ }
+ fastaFile.close();
+
+ pDataArray->outputNames.push_back(fastaFileName);
+
+ if(pDataArray->thisCompositeFASTAFileName != ""){
+ pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
+ }
+
+ /*****************************************************************************************************
+
+ //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);
+ if (pDataArray->m->control_pressed) { break; }
+ /*****************************************************************************************************
+ thisOutputDir = pDataArray->outputDir;
+ if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
+ string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
+ ofstream nameFileOut;
+ pDataArray->m->openOutputFile(nameFileName, nameFileOut);
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ if(otuCounts[i] > 0){
+ nameFileOut << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
+
+ for(int j=1;j<nSeqsPerOTU[i];j++){
+ nameFileOut << ',' << seqNameVector[aaI[i][j]];
+ }
+
+ nameFileOut << endl;
+ }
+ }
+ nameFileOut.close();
+ pDataArray->outputNames.push_back(nameFileName);
+
+
+ if(pDataArray->thisCompositeNameFileName != ""){
+ pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
+ }
+ /*****************************************************************************************************
+
+ //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);
+ if (pDataArray->m->control_pressed) { break; }
+ /*****************************************************************************************************
+ thisOutputDir = pDataArray->outputDir;
+ if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
+ string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
+ ofstream otuCountsFile;
+ pDataArray->m->openOutputFile(otuCountsFileName, otuCountsFile);
+
+ string bases = pDataArray->flowOrder;
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) {
+ break;
+ }
+ //output the translated version of the centroid sequence for the otu
+ if(otuCounts[i] > 0){
+ int index = centroids[i];
+
+ otuCountsFile << "ideal\t";
+ for(int j=8;j<numFlowCells;j++){
+ char base = bases[j % 4];
+ for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
+ otuCountsFile << base;
+ }
+ }
+ otuCountsFile << endl;
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int sequence = aaI[i][j];
+ otuCountsFile << seqNameVector[sequence] << '\t';
+
+ string newSeq = "";
+
+ for(int k=0;k<lengths[sequence];k++){
+ char base = bases[k % 4];
+ int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
+
+ for(int s=0;s<freq;s++){
+ newSeq += base;
+ //otuCountsFile << base;
+ }
+ }
+ otuCountsFile << newSeq.substr(4) << endl;
+ }
+ otuCountsFile << endl;
+ }
+ }
+ otuCountsFile.close();
+ pDataArray->outputNames.push_back(otuCountsFileName)
+ /*****************************************************************************************************
+
+ //writeGroups(flowFileName, numSeqs, seqNameVector);
+ if (pDataArray->m->control_pressed) { break; }
+ /*****************************************************************************************************
+ thisOutputDir = pDataArray->outputDir;
+ if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
+ string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
+ string groupFileName = fileRoot + "shhh.groups";
+ ofstream groupFile;
+ pDataArray->m->openOutputFile(groupFileName, groupFile);
+
+ for(int i=0;i<numSeqs;i++){
+ if (pDataArray->m->control_pressed) { break; }
+ groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+ }
+ groupFile.close();
+ pDataArray->outputNames.push_back(groupFileName);
+ /*****************************************************************************************************
+
+ pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
+ }
+
+ if (pDataArray->m->control_pressed) { for (int i = 0; i < pDataArray->outputNames.size(); i++) { pDataArray->m->mothurRemove(pDataArray->outputNames[i]); } return 0; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "ShhherCommand", "ShhhFlowsThreadFunction");
+ exit(1);
+ }
+}
+#endif
+*/