A78782AA10A1B1CB0086103D /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentdb.cpp; sourceTree = SOURCE_ROOT; };
A787844310A1EBDD0086103D /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = knn.h; sourceTree = SOURCE_ROOT; };
A787844410A1EBDD0086103D /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = knn.cpp; sourceTree = SOURCE_ROOT; };
- A794200F11107897003AECCD /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distancedb.cpp; sourceTree = "<group>"; };
- A794201011107897003AECCD /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = "<group>"; };
+ A794200F11107897003AECCD /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distancedb.cpp; sourceTree = SOURCE_ROOT; };
+ A794201011107897003AECCD /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = SOURCE_ROOT; };
A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayer.cpp; sourceTree = SOURCE_ROOT; };
A7B0450C106CEEC90046FC83 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slayer.h; sourceTree = SOURCE_ROOT; };
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not found") { namefile = ""; }
else if (namefile == "not open") { abort = true; }
-// else { readNameFile(); }
+ else { readNameFile(); }
string temp = validParameter.validFile(parameters, "diffs", false); if(temp == "not found"){ temp = "1"; }
convert(temp, diffs);
if (abort == true) { return 0; }
//reads fasta file and return number of seqs
- int numSeqs = readNamesFASTA(); //fills alignSeqs and makes all seqs active
+ int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active
if (numSeqs == 0) { mothurOut("Error reading fasta file...please correct."); mothurOutEndLine(); return 0; }
if (diffs > length) { mothurOut("Error: diffs is greater than your sequence length."); mothurOutEndLine(); return 0; }
exit(1);
}
}
-/**************************************************************************************************/
+/**************************************************************************************************
int PreClusterCommand::readFASTA(){
try {
// ifstream inFasta;
}
/**************************************************************************************************/
//this seems to require the names and fasta file to be in the same order???
-int PreClusterCommand::readNamesFASTA(){
+int PreClusterCommand::readFASTA(){
try {
- ifstream inNames;
+ //ifstream inNames;
ifstream inFasta;
- openInputFile(namefile, inNames);
+ //openInputFile(namefile, inNames);
openInputFile(fastafile, inFasta);
- string firstCol, secondCol, nameString;
+ //string firstCol, secondCol, nameString;
length = 0;
- while (inFasta && inNames) {
+ while (!inFasta.eof()) {
- inNames >> firstCol >> secondCol;
- nameString = secondCol;
+ //inNames >> firstCol >> secondCol;
+ //nameString = secondCol;
- gobble(inNames);
- int size = 1;
- while (secondCol.find_first_of(',') != -1) {
- size++;
- secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
- }
+ //gobble(inNames);
+ //int size = 1;
+ //while (secondCol.find_first_of(',') != -1) {
+ // size++;
+ // secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
+ //}
Sequence seq(inFasta); gobble(inFasta);
- if (seq.getName() != firstCol) { mothurOut(seq.getName() + " is not in your names file, please correct."); mothurOutEndLine(); exit(1); }
- else{
- seqPNode tempNode(size, seq, nameString);
- alignSeqs.push_back(tempNode);
- if (seq.getAligned().length() > length) { length = alignSeqs[0].seq.getAligned().length(); }
- }
+
+ if (seq.getName() != "") { //can get "" if commented line is at end of fasta file
+ if (namefile != "") {
+ itSize = sizes.find(seq.getName());
+
+ if (itSize == sizes.end()) { mothurOut(seq.getName() + " is not in your names file, please correct."); mothurOutEndLine(); exit(1); }
+ else{
+ seqPNode tempNode(itSize->second, seq, names[seq.getName()]);
+ alignSeqs.push_back(tempNode);
+ if (seq.getAligned().length() > length) { length = alignSeqs[0].seq.getAligned().length(); }
+ }
+ }else { //no names file, you are identical to yourself
+ seqPNode tempNode(1, seq, seq.getName());
+ alignSeqs.push_back(tempNode);
+ if (seq.getAligned().length() > length) { length = alignSeqs[0].seq.getAligned().length(); }
+ }
+ }
}
inFasta.close();
- inNames.close();
+ //inNames.close();
return alignSeqs.size();
}
catch(exception& e) {
- errorOut(e, "PreClusterCommand", "readNamesFASTA");
+ errorOut(e, "PreClusterCommand", "readFASTA");
exit(1);
}
}
exit(1);
}
}
+/**************************************************************************************************/
+void PreClusterCommand::readNameFile(){
+ try {
+ ifstream in;
+ openInputFile(namefile, in);
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ in >> firstCol >> secondCol; gobble(in);
+ names[firstCol] = secondCol;
+ int size = 1;
+ while (secondCol.find_first_of(',') != -1) {
+ size++;
+ secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
+ }
+ sizes[firstCol] = size;
+ }
+ in.close();
+ }
+ catch(exception& e) {
+ errorOut(e, "PreClusterCommand", "readNameFile");
+ exit(1);
+ }
+}
/**************************************************************************************************/
bool abort;
string fastafile, namefile, outputDir;
vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
-// map<string, string> names; //represents the names file first column maps to second column
-// map<string, int> sizes; //this map a seq name to the number of identical seqs in the names file
+ map<string, string> names; //represents the names file first column maps to second column
+ map<string, int> sizes; //this map a seq name to the number of identical seqs in the names file
+ map<string, int>::iterator itSize;
// map<string, bool> active; //maps sequence name to whether it has already been merged or not.
int readFASTA();
- int readNamesFASTA();
+ void readNameFile();
+ //int readNamesFASTA();
int calcMisMatches(string, string);
void printData(string, string); //fasta filename, names file name
};
AlignCheckCommand::AlignCheckCommand(string option){
try {
abort = false;
+ haderror = 0;
//allow user to run help
if(option == "help") { help(); abort = true; }
if (seq.getName() != "") {
statData data = getStats(seq.getAligned());
+ if (haderror == 1) { break; }
+
out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
}
statData data;
sequence = "*" + sequence; // need to pad the sequence so we can index it by 1
- int seqLength = sequence.length();
- for(int i=1;i<seqLength;i++){
+ int length = sequence.length();
+
+ if (length != seqLength) { mothurOut("your sequences are " + toString(length) + " long, but your map file only contains " + toString(seqLength) + " entries. please correct."); mothurOutEndLine(); haderror = 1; return data; }
+
+ for(int i=1;i<length;i++){
if(structMap[i] != 0){
if(sequence[i] == 'A'){
if(sequence[structMap[i]] == 'T') { data.tilde++; }