1 #ifndef GETOTUREPCOMMAND_H
2 #define GETOTUREPCOMMAND_H
7 * Created by Sarah Westcott on 4/6/09.
8 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /* The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin. */
14 #include "command.hpp"
15 #include "listvector.hpp"
16 #include "inputdata.h"
19 #include "readmatrix.hpp"
20 #include "formatmatrix.h"
21 #include "counttable.h"
23 typedef map<int, float> SeqMap;
33 repStruct(string n, string b, int sb, int s, string g) : name(n), bin(b), size(s), group(g), simpleBin(sb) { }
37 class GetOTURepCommand : public Command {
40 GetOTURepCommand(string);
44 vector<string> setParameters();
45 string getCommandName() { return "get.oturep"; }
46 string getCommandCategory() { return "OTU-Based Approaches"; }
48 string getHelpString();
49 string getOutputPattern(string);
50 string getCitation() { return "http://www.mothur.org/wiki/Get.oturep"; }
51 string getDescription() { return "gets a representative sequence for each OTU"; }
55 void help() { m->mothurOut(getHelpString()); }
61 ReadMatrix* readMatrix;
62 FormatMatrix* formatMatrix;
63 NameAssignment* nameMap;
65 string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, countfile, columnfile, distFile, format, outputDir, groups, method;
67 ifstream in, inNames, inRow;
68 bool abort, allLines, groupError, large, weighted, hasGroups;
69 set<string> labels; //holds labels to be used
70 map<string, int> nameToIndex; //maps sequence name to index in sparsematrix
71 map<string, string> nameFileMap;
72 vector<string> outputNames, Groups;
73 map<string, string> outputNameFiles;
76 vector<SeqMap> seqVec; // contains maps with sequence index and distance
77 // for all distances related to a certain sequence
78 vector<int> rowPositions;
80 void readNamesFile(FastaMap*&);
81 void readNamesFile(bool);
82 int process(ListVector*);
84 string findRep(vector<string>, string); // returns the name of the "representative" sequence of given bin or subset of a bin, for groups
85 string findRepAbund(vector<string>, string);
86 int processNames(string, string);
87 int processFastaNames(string, string, FastaMap*&);