src/gnFastTranslator.cpp

Go to the documentation of this file.
00001 
00002 // File:            gnFastTranslator.h
00003 // Purpose:         Filter for all Sequences
00004 // Description:     translates, converts sequence
00005 // Changes:        
00006 // Version:         libGenome 0.5.1 
00007 // Author:          Aaron Darling 
00008 // Modified by:     
00009 // Copyright:       (c) Aaron Darling 
00010 // Licenses:        See COPYING file for details
00012 #include "gn/gnFastTranslator.h"
00013 #include <iostream>
00014 
00015 //      static data access, avoids static initialization order fiasco
00016 const gnFastTranslator *gnFastTranslator::ProteinDNATranslator(){
00017         const static gnFastTranslator* t_trans = new gnFastTranslator(ProteinDNATranslatorType);
00018         return t_trans;
00019 }
00020 const gnFastTranslator *gnFastTranslator::DNAProteinTranslator(){
00021         const static gnFastTranslator* t_trans = new gnFastTranslator(DNAProteinTranslatorType);
00022         return t_trans;
00023 }
00024 
00025 //      public:
00026 gnFastTranslator::gnFastTranslator()
00027 {
00028         use_default = false;
00029         m_defaultChar = 0;
00030 }
00031 
00032 gnFastTranslator::gnFastTranslator( const gnFastTranslator &sf )
00033 {
00034         m_name = sf.m_name;
00035         use_default = sf.use_default;
00036         m_defaultChar = sf.m_defaultChar;
00037         m_transCache = sf.m_transCache;
00038 }
00039 gnFastTranslator::gnFastTranslator( gnTranslatorType t_type )
00040 {
00041         use_default = false;
00042         m_defaultChar = 0;
00043         switch(t_type){
00044                 case ProteinDNATranslatorType:
00045                         CacheTranslator(gnTranslator::ProteinDNATranslator(), "FLIMVPTAY.HQNKDECGSR", 1);
00046                         break;
00047                 case DNAProteinTranslatorType:
00048                         CacheTranslator(gnTranslator::DNAProteinTranslator(), "ACGTRYKMBVDHSWNX", 3);
00049                         break;
00050         }
00051 }
00052 
00053         // gnSeqC 
00054 gnSeqC gnFastTranslator::Filter( const gnSeqC ch ) const{
00055 /*      for(uint32 i=0; i < m_inputTable.size(); i++){
00056                 if(m_inputTable[i].length() == 1)
00057                         if(compare->Contains(m_inputTable[i][0], ch))
00058                                 return m_outputTable[i][0];
00059         }
00060 */      return m_defaultChar;
00061 }
00062 
00063 void gnFastTranslator::Filter( gnSeqC** seq, gnSeqI& len ) const{
00064 /*      uint32 curpos = 0;
00065         string output;
00066         while(curpos < len){
00067                 uint32 i=0;
00068                 for(; i < m_inputTable.size(); i++){
00069                         //don't compare if there aren't enough chars
00070                         uint32 curlen = m_inputTable[i].length();
00071                         if(len - curpos < curlen)
00072                                 continue;
00073                         if(compare->Contains(m_inputTable[i].data(), *seq + curpos, curlen)){
00074                                 output += m_outputTable[i];
00075                                 curpos += curlen;
00076                                 break;
00077                         }
00078                 }
00079                 if(i == m_inputTable.size()){
00080                         //no match was found.  
00081                         if(use_default)  //fill with the default char?
00082                                 output += m_defaultChar;
00083                         curpos++;
00084                 }
00085         }
00086         if(output.length() > len){
00087                 delete[] *seq;
00088                 *seq = new gnSeqC[output.length()];
00089         }
00090         len = output.length();
00091         memcpy(*seq, output.data(), len);
00092 */}
00093         // string
00094 void gnFastTranslator::Filter( string &seq ) const{
00095         uint32 curpos = 0, outpos = 0;
00096         uint32 len = seq.length();
00097         uint32 width = m_transCache.begin()->first.length();
00098         uint32 out_width = m_transCache.begin()->second.length();
00099         uint32 out_size = (seq.length() / width) * out_width + seq.length() % width + 1;
00100         gnSeqC* output_array = new gnSeqC[out_size];
00101         output_array[out_size-1] = 0;
00102         string seq_upper;
00103         while(curpos < len){
00104                 //transform to upper case
00105                 seq_upper = seq.substr(curpos, width);
00106                 for(uint32 i=0; i < seq_upper.size(); i++)
00107                         seq_upper[i] = toupper(seq_upper[i]);
00108                 
00109                 map<string, string>::const_iterator iter = m_transCache.find(seq_upper);
00110                 
00111                 if(iter == m_transCache.end()){
00112                         //no match was found.  
00113                         if(use_default)  //fill with the default char?
00114                                 output_array[curpos] = m_defaultChar;
00115                         curpos++;
00116                 }else{
00117                         iter->second.copy(output_array + outpos, out_width);
00118                         curpos += width;
00119                         outpos += out_width;
00120                 }
00121         }
00122         seq = output_array;
00123 }
00124 
00125 void gnFastTranslator::CacheTranslator(const gnTranslator* tranny, string inputs, const gnSeqI input_width){
00126         string cur_input;
00127         string cur_trans;
00128         vector<gnSeqI> index;
00129         gnSeqI cur_index = input_width;
00130         
00131         //fill the index array with input_width 0's
00132         for(gnSeqI curI = 0; curI < input_width; curI++)
00133                 index.push_back(0);
00134 
00135         while(true){
00136                 //ensure the validity of our indices
00137                 cur_index = input_width - 1;
00138                 while(index[cur_index] == inputs.length()){
00139                         if(cur_index == 0){
00140                                 return;
00141                         }
00142                         index[cur_index] = 0;
00143                         cur_index--;
00144                         index[cur_index]++;
00145                         continue;
00146                 }
00147                 
00148                 //create a sequence to cache.
00149                 for(gnSeqI i = 0; i < input_width; i++){
00150                         cur_input += inputs[index[i]];
00151                 }
00152                 cur_trans = cur_input;
00153                 tranny->Filter(cur_trans);
00154                 m_transCache[cur_input] = cur_trans;
00155 //              m_transCache.insert(map<string, string>::value_type(cur_input, cur_trans));
00156                 // prepare for next time thru the loop
00157                 cur_input = "";
00158                 index[input_width - 1]++;
00159         }
00160 }

Generated on Mon Mar 28 06:00:21 2005 for libGenome by doxygen 1.3.6