+++ /dev/null
-#ifndef SHHHSEQSCOMMAND_H
-#define SHHHSEQSCOMMAND_H
-
-/*
- * shhhseqscommand.h
- * Mothur
- *
- * Created by westcott on 11/8/11.
- * Copyright 2011 Schloss Lab. All rights reserved.
- *
- */
-
-#include "command.hpp"
-#include "myseqdist.h"
-#include "seqnoise.h"
-#include "sequenceparser.h"
-#include "deconvolutecommand.h"
-#include "clustercommand.h"
-
-//**********************************************************************************************************************
-
-class ShhhSeqsCommand : public Command {
-
-public:
- ShhhSeqsCommand(string);
- ShhhSeqsCommand();
- ~ShhhSeqsCommand() {}
-
- vector<string> setParameters();
- string getCommandName() { return "shhh.seqs"; }
- string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/Shhh.seqs"; }
- string getDescription() { return "shhh.seqs"; }
-
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
-
- struct linePair {
- int start;
- int end;
- linePair(int i, int j) : start(i), end(j) {}
- };
-
- bool abort;
- string outputDir, fastafile, namefile, groupfile;
- int processors;
- double sigma;
- vector<string> outputNames;
-
- int readData(correctDist*, seqNoise&, vector<string>&, vector<string>&, vector<string>&, vector<int>&);
- int loadData(correctDist*, seqNoise&, vector<string>&, vector<string>&, vector<string>&, vector<int>&, map<string, string>&, vector<Sequence>&);
-
- int driver(seqNoise&, vector<string>&, vector<string>&, vector<string>&, vector<int>&, string, string, string, string);
- vector<string> driverGroups(SequenceParser&, string, string, string, int, int, vector<string>);
- vector<string> createProcessesGroups(SequenceParser&, string, string, string, vector<string>);
- int deconvoluteResults(string, string);
-
-
-
-};
-
-/**************************************************************************************************/
-//custom data structure for threads to use.
-// This is passed by void pointer so it can be any data type
-// that can be passed using a single void pointer (LPVOID).
-struct shhhseqsData {
- string fastafile;
- string namefile;
- string groupfile;
- string newFName, newNName, newMName;
- MothurOut* m;
- int start;
- int end;
- int sigma, threadID;
- vector<string> groups;
- vector<string> mapfileNames;
-
- shhhseqsData(){}
- shhhseqsData(string f, string n, string g, string nff, string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int s, int tid) {
- fastafile = f;
- namefile = n;
- groupfile = g;
- newFName = nff;
- newNName = nnf;
- newMName = nmf;
- m = mout;
- start = st;
- end = en;
- sigma = s;
- threadID = tid;
- groups = gr;
- }
-};
-
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){
- shhhseqsData* pDataArray;
- pDataArray = (shhhseqsData*)lpParam;
-
- try {
-
- //parse fasta and name file by group
- SequenceParser parser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);
-
- //precluster each group
- for (int k = pDataArray->start; k < pDataArray->end; k++) {
-
- int start = time(NULL);
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Processing group " + pDataArray->groups[k] + ":"); pDataArray->m->mothurOutEndLine();
-
- map<string, string> thisNameMap;
- thisNameMap = parser.getNameMap(pDataArray->groups[k]);
- vector<Sequence> thisSeqs = parser.getSeqs(pDataArray->groups[k]);
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- vector<string> sequences;
- vector<string> uniqueNames;
- vector<string> redundantNames;
- vector<int> seqFreq;
-
- seqNoise noise;
- correctDist* correct = new correctDist(1); //we use one processor since we already split up the work load.
-
- //load this groups info in order
- //loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs);
- //////////////////////////////////////////////////////////////////////////////////////////////////
- bool error = false;
- map<string, string>::iterator it;
-
- for (int i = 0; i < thisSeqs.size(); i++) {
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- if (thisSeqs[i].getName() != "") {
- correct->addSeq(thisSeqs[i].getName(), thisSeqs[i].getAligned());
-
- it = thisNameMap.find(thisSeqs[i].getName());
- if (it != thisNameMap.end()) {
- noise.addSeq(thisSeqs[i].getAligned(), sequences);
- noise.addRedundantName(it->first, it->second, uniqueNames, redundantNames, seqFreq);
- }else {
- pDataArray->m->mothurOut("[ERROR]: " + thisSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct.");
- error = true;
- }
- }
- }
-
- if (error) { return 0; }
- //////////////////////////////////////////////////////////////////////////////////////////////////
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- //calc distances for cluster
- string distFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(pDataArray->fastafile)) + pDataArray->groups[k] + ".shhh.dist";
- correct->execute(distFileName);
- delete correct;
-
- if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(distFileName); return 0; }
-
- //driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map");
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- double cutOff = 0.08;
- int minIter = 10;
- int maxIter = 1000;
- double minDelta = 1e-6;
- int numIters = 0;
- double maxDelta = 1e6;
- int numSeqs = sequences.size();
-
- //run cluster command
- string inputString = "phylip=" + distFileName + ", method=furthest, cutoff=0.08";
- pDataArray->m->mothurOut("/******************************************/"); pDataArray->m->mothurOutEndLine();
- pDataArray->m->mothurOut("Running command: cluster(" + inputString + ")"); pDataArray->m->mothurOutEndLine();
-
- Command* clusterCommand = new ClusterCommand(inputString);
- clusterCommand->execute();
-
- map<string, vector<string> > filenames = clusterCommand->getOutputFiles();
- string listFileName = filenames["list"][0];
- string rabundFileName = filenames["rabund"][0]; pDataArray->m->mothurRemove(rabundFileName);
- string sabundFileName = filenames["sabund"][0]; pDataArray->m->mothurRemove(sabundFileName);
-
- delete clusterCommand;
- pDataArray->m->mothurOut("/******************************************/"); pDataArray->m->mothurOutEndLine();
-
- if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(distFileName); pDataArray->m->mothurRemove(listFileName); return 0; }
-
- vector<double> distances(numSeqs * numSeqs);
- noise.getDistanceData(distFileName, distances);
- pDataArray->m->mothurRemove(distFileName);
- if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(listFileName); return 0; }
-
- vector<int> otuData(numSeqs);
- vector<int> otuFreq;
- vector<vector<int> > otuBySeqLookUp;
- noise.getListData(listFileName, cutOff, otuData, otuFreq, otuBySeqLookUp);
- pDataArray->m->mothurRemove(listFileName);
- if (pDataArray->m->control_pressed) { return 0; }
-
- int numOTUs = otuFreq.size();
-
- vector<double> weights(numOTUs, 0);
- vector<int> change(numOTUs, 1);
- vector<int> centroids(numOTUs, -1);
- vector<int> cumCount(numOTUs, 0);
-
- vector<double> tau(numSeqs, 1);
- vector<int> anP(numSeqs, 0);
- vector<int> anI(numSeqs, 0);
- vector<int> anN(numSeqs, 0);
- vector<vector<int> > aanI = otuBySeqLookUp;
-
- while(numIters < minIter || ((maxDelta > minDelta) && (numIters < maxIter))){
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); if (pDataArray->m->control_pressed) { return 0; }
- maxDelta = noise.calcNewWeights(weights, seqFreq, anI, cumCount, anP, otuFreq, tau); if (pDataArray->m->control_pressed) { return 0; }
-
- noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (pDataArray->m->control_pressed) { return 0; }
- noise.checkCentroids(weights, centroids); if (pDataArray->m->control_pressed) { return 0; }
-
- otuFreq.assign(numOTUs, 0);
-
- int total = 0;
-
- for(int i=0;i<numSeqs;i++){
- if (pDataArray->m->control_pressed) { return 0; }
-
- double offset = 1e6;
- double norm = 0.0000;
- double minWeight = 0.1;
- vector<double> currentTau(numOTUs);
-
- for(int j=0;j<numOTUs;j++){
- if (pDataArray->m->control_pressed) { return 0; }
- if(weights[j] > minWeight && distances[i * numSeqs+centroids[j]] < offset){
- offset = distances[i * numSeqs+centroids[j]];
- }
- }
-
- for(int j=0;j<numOTUs;j++){
- if (pDataArray->m->control_pressed) { return 0; }
- if(weights[j] > minWeight){
- currentTau[j] = exp(pDataArray->sigma * (-distances[(i * numSeqs + centroids[j])] + offset)) * weights[j];
- norm += currentTau[j];
- }
- else{
- currentTau[j] = 0.0000;
- }
- }
-
- for(int j=0;j<numOTUs;j++){
- if (pDataArray->m->control_pressed) { return 0; }
- currentTau[j] /= norm;
- }
-
- for(int j=0;j<numOTUs;j++){
- if (pDataArray->m->control_pressed) { return 0; }
-
- if(currentTau[j] > 1.0e-4){
- int oldTotal = total;
- total++;
-
- tau.resize(oldTotal+1);
- tau[oldTotal] = currentTau[j];
- otuBySeqLookUp[j][otuFreq[j]] = oldTotal;
- aanI[j][otuFreq[j]] = i;
- otuFreq[j]++;
-
- }
- }
-
- anP.resize(total);
- anI.resize(total);
- }
-
- numIters++;
- }
-
- noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); if (pDataArray->m->control_pressed) { return 0; }
-
- vector<double> percentage(numSeqs);
- noise.setUpOTUData(otuData, percentage, cumCount, tau, otuFreq, anP, anI); if (pDataArray->m->control_pressed) { return 0; }
- noise.finishOTUData(otuData, otuFreq, anP, anI, cumCount, otuBySeqLookUp, aanI, tau); if (pDataArray->m->control_pressed) { return 0; }
-
- change.assign(numOTUs, 1);
- noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (pDataArray->m->control_pressed) { return 0; }
-
-
- vector<int> finalTau(numOTUs, 0);
- for(int i=0;i<numSeqs;i++){
- if (pDataArray->m->control_pressed) { return 0; }
- finalTau[otuData[i]] += int(seqFreq[i]);
- }
-
- noise.writeOutput(pDataArray->newFName+pDataArray->groups[k], pDataArray->newNName+pDataArray->groups[k], pDataArray->newMName+pDataArray->groups[k]+".map", finalTau, centroids, otuData, sequences, uniqueNames, redundantNames, seqFreq, distances);
-
- ///////////////////////////////////////////////////////////////////////////////////////////////////
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- pDataArray->m->appendFiles(pDataArray->newFName+pDataArray->groups[k], pDataArray->newFName); pDataArray->m->mothurRemove(pDataArray->newFName+pDataArray->groups[k]);
- pDataArray->m->appendFiles(pDataArray->newNName+pDataArray->groups[k], pDataArray->newNName); pDataArray->m->mothurRemove(pDataArray->newNName+pDataArray->groups[k]);
- pDataArray->mapfileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map");
-
- pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + pDataArray->groups[k] + "."); pDataArray->m->mothurOutEndLine();
- }
-
- return 0;
-
-
- }
- catch(exception& e) {
- pDataArray->m->errorOut(e, "ShhhSeqsCommand", "MyShhhSeqsThreadFunction");
- exit(1);
- }
-}
-#endif
-/**************************************************************************************************/
-
-#endif