src/gnFilter.cpp

Go to the documentation of this file.
00001 
00002 // File:            gnFilter.h
00003 // Purpose:         Filter for all Sequences
00004 // Description:     Filters sequences, translates, reverse complement, converts
00005 //                   additions, etc.
00006 // Changes:        
00007 // Version:         libGenome 0.5.1 
00008 // Author:          Aaron Darling 
00009 // Modified by:     
00010 // Copyright:       (c) Aaron Darling 
00011 // Licenses:        See COPYING file for details
00013 #include "gn/gnFilter.h"
00014 #include "gn/gnDebug.h"
00015 
00016 //      public:
00017 const gnFilter *gnFilter::alphabetCharacterFilter(){
00018         const static gnFilter* t_filt = new gnFilter(alphabetCharacterFilterType);
00019         return t_filt;
00020 }
00021 
00022 const gnFilter *gnFilter::numberCharacterFilter(){
00023         const static gnFilter* t_filt = new gnFilter(numberCharacterFilterType);
00024         return t_filt;
00025 }
00026 
00027 
00028 const gnFilter *gnFilter::proteinSeqFilter(){
00029         const static gnFilter* t_filt = new gnFilter(proteinSeqFilterType);
00030         return t_filt;
00031 }
00032 
00033 const gnFilter *gnFilter::basicDNASeqFilter(){
00034         const static gnFilter* t_filt = new gnFilter(basicDNASeqFilterType);
00035         return t_filt;
00036 }
00037 
00038 const gnFilter *gnFilter::fullDNASeqFilter(){
00039         const static gnFilter* t_filt = new gnFilter(fullDNASeqFilterType);
00040         return t_filt;
00041 }
00042 
00043 const gnFilter *gnFilter::basicRNASeqFilter(){
00044         const static gnFilter* t_filt = new gnFilter(basicRNASeqFilterType);
00045         return t_filt;
00046 }
00047 
00048 const gnFilter *gnFilter::fullRNASeqFilter(){
00049         const static gnFilter* t_filt = new gnFilter(fullRNASeqFilterType);
00050         return t_filt;
00051 }
00052 
00053 const gnFilter *gnFilter::DNAtoRNAFilter(){
00054         const static gnFilter* t_filt = new gnFilter(DNAtoRNAFilterType);
00055         return t_filt;
00056 }
00057 
00058 const gnFilter *gnFilter::RNAtoDNAFilter(){
00059         const static gnFilter* t_filt = new gnFilter(RNAtoDNAFilterType);
00060         return t_filt;
00061 }
00062 
00063 const gnFilter *gnFilter::DNAComplementFilter(){
00064         const static gnFilter* t_filt = new gnFilter(DNAComplementFilterType);
00065         return t_filt;
00066 }
00067 
00068 const gnFilter *gnFilter::RNAComplementFilter(){
00069         const static gnFilter* t_filt = new gnFilter(RNAComplementFilterType);
00070         return t_filt;
00071 }
00072 
00073 
00074 //      public:
00075 gnFilter::gnFilter()
00076 {
00077         m_defaultChar = 'n';
00078         m_rDefaultChar = 'n';
00079         for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00080                 m_pairArray[i] = NO_REVCOMP_CHAR;
00081 }
00082 gnFilter::gnFilter( const gnSeqC defaultChar, const gnSeqC rdefaultChar )
00083 {
00084         m_defaultChar = defaultChar; 
00085         m_rDefaultChar = rdefaultChar;
00086         for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00087                 m_pairArray[i] = NO_REVCOMP_CHAR;
00088 }
00089 
00090 gnFilter::gnFilter( const gnFilter &sf )
00091 {
00092         m_name = sf.m_name;
00093         for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00094                 m_pairArray[i] = sf.m_pairArray[i];
00095         m_defaultChar = sf.m_defaultChar;
00096         m_rDefaultChar = sf.m_rDefaultChar;
00097 }
00098 
00099 gnFilter::gnFilter( const gnFilterType f_type ){
00100         for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00101                 m_pairArray[i] = NO_REVCOMP_CHAR;
00102         switch(f_type){
00103                 case alphabetCharacterFilterType:
00104                         CreateAlphabetCharacterFilter();
00105                         break;
00106                 case numberCharacterFilterType:
00107                         CreateNumberCharacterFilter();
00108                         break;
00109                 case proteinSeqFilterType:
00110                         CreateProteinFilter();
00111                         break;
00112                 case basicDNASeqFilterType:
00113                         CreateBasicDNAFilter();
00114                         break;
00115                 case fullDNASeqFilterType:
00116                         CreateFullDNAFilter();
00117                         break;
00118                 case basicRNASeqFilterType:
00119                         CreateBasicRNAFilter();
00120                         break;
00121                 case fullRNASeqFilterType:
00122                         CreateFullRNAFilter();
00123                         break;
00124                 case DNAtoRNAFilterType:
00125                         CreateDNAtoRNAFilter();
00126                         break;
00127                 case RNAtoDNAFilterType:
00128                         CreateRNAtoDNAFilter();
00129                         break;
00130                 case DNAComplementFilterType:
00131                         CreateDNAComplementFilter();
00132                         break;          
00133                 case RNAComplementFilterType:
00134                         CreateRNAComplementFilter();
00135                         break;
00136         }
00137 }
00138 
00139 
00140 gnFilter::~gnFilter()
00141 {
00142 }
00143 
00144 inline
00145 void gnFilter::Filter( gnSeqC** seq, gnSeqI& len ) const
00146 {
00147         Array<gnSeqC> array_buf( len );
00148         gnSeqC* tmp = array_buf.data;
00149         gnSeqI c=0;
00150         for(uint32 i=0; i < len; i++)
00151                 if(IsValid((*seq)[i]))
00152                         tmp[c++] = m_pairArray[(*seq)[i]];
00153         len = c;
00154         memcpy(*seq, tmp, len);
00155 }
00156 
00157 void gnFilter::ReverseFilter( gnSeqC** seq, gnSeqI& len ) const
00158 {
00159         gnSeqC tmp, dum;
00160         uint32 halfLen = len/2;
00161         uint32 end = len - 1;
00162         uint32 curB = 0;
00163         uint32 curE = end;
00164         for( uint32 i=0; i < halfLen ; ++i )
00165         {
00166                 tmp = m_pairArray[(*seq)[i]];
00167                 dum = m_pairArray[(*seq)[ end - i ]];
00168                 if(dum != NO_REVCOMP_CHAR)
00169                         (*seq)[ curB++ ] = dum;
00170                 if(tmp != NO_REVCOMP_CHAR)
00171                         (*seq)[ curE-- ] = tmp;
00172         }
00173         if(len&0x1){
00174                 tmp = m_pairArray[(*seq)[halfLen]];
00175                 if(tmp != NO_REVCOMP_CHAR)
00176                         (*seq)[curB++] = tmp;
00177         }
00178         // now for the memmove
00179         if(curE >= curB){
00180                 memmove(*seq+curB, *seq+curE+1, end - curE);
00181                 len = end - curE + curB;
00182         }
00183 
00184 }
00185 
00186 void gnFilter::Filter( string &seq ) const
00187 {
00188         gnSeqI c=0;
00189         for(uint32 i=0; i < seq.length(); i++)
00190                 if(IsValid(seq[i]))
00191                         seq[c++] = m_pairArray[seq[i]];
00192 }
00193 
00194 void gnFilter::ReverseFilter( string &seq ) const
00195 {
00196         gnSeqC tmp, dum;
00197         uint32 halfLen = seq.length()/2;
00198         uint32 end = seq.length() - 1;
00199         uint32 curB = 0;
00200         uint32 curE = end;
00201         for( uint32 i=0; i < halfLen ; ++i )
00202         {
00203                 tmp = m_pairArray[seq[i]];
00204                 dum = m_pairArray[seq[ end - i ]];
00205                 if(dum != NO_REVCOMP_CHAR)
00206                         seq[ curB++ ] = dum;
00207                 if(tmp != NO_REVCOMP_CHAR)
00208                         seq[ curE-- ] = tmp;
00209         }
00210         if(seq.length()&0x1){
00211                 tmp = m_pairArray[seq[halfLen]];
00212                 if(tmp != NO_REVCOMP_CHAR)
00213                         seq[curB++] = tmp;
00214         }
00215         // now for the memmove
00216         if(curE >= curB){
00217                 seq.erase(curB, curE-curB);
00218         }
00219 }
00220 
00221 // standard filters
00222 void gnFilter::CreateAlphabetCharacterFilter()
00223 {
00224         SetDefaultChar( 0, 0 );
00225         SetName( "Alphabet Character Filter" );
00226         SetPair( 'A', 'a' );
00227         SetPair( 'B', 'b' );
00228         SetPair( 'C', 'c' );
00229         SetPair( 'D', 'd' );
00230         SetPair( 'E', 'e' );
00231         SetPair( 'F', 'f' );
00232         SetPair( 'G', 'g' );
00233         SetPair( 'H', 'h' );
00234         SetPair( 'I', 'i' );
00235         SetPair( 'J', 'j' );
00236         SetPair( 'K', 'k' );
00237         SetPair( 'L', 'l' );
00238         SetPair( 'M', 'm' );
00239         SetPair( 'N', 'n' );
00240         SetPair( 'O', 'o' );
00241         SetPair( 'P', 'p' );
00242         SetPair( 'Q', 'q' );
00243         SetPair( 'R', 'r' );
00244         SetPair( 'S', 's' );
00245         SetPair( 'T', 't' );
00246         SetPair( 'U', 'u' );
00247         SetPair( 'V', 'v' );
00248         SetPair( 'W', 'w' );
00249         SetPair( 'X', 'x' );
00250         SetPair( 'Y', 'y' );
00251         SetPair( 'Z', 'z' );
00252 }
00253 
00254 void gnFilter::CreateNumberCharacterFilter()
00255 {
00256         SetDefaultChar( 0, 0 );
00257         SetName( "Number Character Filter" );
00258         SetSingle( '0' );
00259         SetSingle( '1' );
00260         SetSingle( '2' );
00261         SetSingle( '3' );
00262         SetSingle( '4' );
00263         SetSingle( '5' );
00264         SetSingle( '6' );
00265         SetSingle( '7' );
00266         SetSingle( '8' );
00267         SetSingle( '9' );
00268 }
00269 
00270 void gnFilter::CreateProteinFilter()
00271 {
00272         SetDefaultChar( 'x', 'x' );
00273         SetName( "Protein Filter" );
00274         SetSingle( 'A' );
00275         SetSingle( 'B' );
00276         SetSingle( 'C' );
00277         SetSingle( 'D' );
00278         SetSingle( 'E' );
00279         SetSingle( 'F' );
00280         SetSingle( 'G' );
00281         SetSingle( 'H' );
00282         SetSingle( 'I' );
00283         SetSingle( 'K' );
00284         SetSingle( 'L' );
00285         SetSingle( 'M' );
00286         SetSingle( 'N' );
00287         SetSingle( 'P' );
00288         SetSingle( 'Q' );
00289         SetSingle( 'R' );
00290         SetSingle( 'S' );
00291         SetSingle( 'T' );
00292         SetSingle( 'U' );       // don't forget selenocystine!
00293         SetSingle( 'V' );
00294         SetSingle( 'W' );
00295         SetSingle( 'X' );
00296         SetSingle( 'Y' );
00297         SetSingle( 'Z' );
00298         
00299         SetSingle( 'a' );
00300         SetSingle( 'b' );
00301         SetSingle( 'c' );
00302         SetSingle( 'd' );
00303         SetSingle( 'e' );
00304         SetSingle( 'f' );
00305         SetSingle( 'g' );
00306         SetSingle( 'h' );
00307         SetSingle( 'i' );
00308         SetSingle( 'k' );
00309         SetSingle( 'l' );
00310         SetSingle( 'm' );
00311         SetSingle( 'n' );
00312         SetSingle( 'p' );
00313         SetSingle( 'q' );
00314         SetSingle( 'r' );
00315         SetSingle( 's' );
00316         SetSingle( 't' );
00317         SetSingle( 'u' );       // don't forget selenocystine!
00318         SetSingle( 'v' );
00319         SetSingle( 'w' );
00320         SetSingle( 'x' );
00321         SetSingle( 'y' );
00322         SetSingle( 'z' );
00323 
00324         SetSingle( 'a' );
00325         SetSingle( 'r' );
00326         SetSingle( 'n' );
00327         SetSingle( 'd' );
00328         SetSingle( 'c' );
00329         SetSingle( 'q' );
00330         SetSingle( 'e' );
00331         SetSingle( 'g' );
00332         SetSingle( 'h' );
00333         SetSingle( 'i' );
00334         SetSingle( 'l' );
00335         SetSingle( 'k' );
00336         SetSingle( 'm' );
00337         SetSingle( 'f' );
00338         SetSingle( 'p' );
00339         SetSingle( 's' );
00340         SetSingle( 't' );
00341         SetSingle( 'w' );
00342         SetSingle( 'y' );
00343         SetSingle( 'v' );
00344         SetSingle( 'u' );       // don't forget selenocystine!
00345         SetSingle( 'z' );
00346         
00347         SetSingle( '.' );
00348 }
00349 
00350 void gnFilter::CreateBasicDNAFilter()
00351 {
00352         SetDefaultChar( 'n', 'n' );
00353         SetName( "Basic DNA Filter" );
00354         SetSingle( 'a' );
00355         SetSingle( 'c' );
00356         SetSingle( 'g' );
00357         SetSingle( 't' );
00358         SetSingle( 'A' );
00359         SetSingle( 'C' );
00360         SetSingle( 'G' );
00361         SetSingle( 'T' );
00362         SetSingle( 'n' );
00363         SetSingle( 'N' );
00364         SetSingle( 'x' );
00365         SetSingle( 'X' );
00366         SetSingle( '-' );
00367 }
00368 void gnFilter::CreateFullDNAFilter()
00369 {       
00370         SetDefaultChar( 'n', 'n' );
00371         SetName( "Full DNA Filter" );
00372         SetSingle( 'a' );
00373         SetSingle( 'c' );
00374         SetSingle( 'g' );
00375         SetSingle( 't' );
00376         SetSingle( 'A' );
00377         SetSingle( 'C' );
00378         SetSingle( 'G' );
00379         SetSingle( 'T' );
00380         SetSingle( 'r' );
00381         SetSingle( 'y' );
00382         SetSingle( 'k' );
00383         SetSingle( 'm' );
00384         SetSingle( 'b' );
00385         SetSingle( 'v' );
00386         SetSingle( 'd' );
00387         SetSingle( 'h' );
00388         SetSingle( 'R' );
00389         SetSingle( 'Y' );
00390         SetSingle( 'K' );
00391         SetSingle( 'M' );
00392         SetSingle( 'B' );
00393         SetSingle( 'V' );
00394         SetSingle( 'D' );
00395         SetSingle( 'H' );
00396         SetSingle( 's' );
00397         SetSingle( 'S' );
00398         SetSingle( 'w' );
00399         SetSingle( 'W' );
00400         SetSingle( 'n' );
00401         SetSingle( 'N' );
00402         SetSingle( 'x' );
00403         SetSingle( 'X' );
00404         SetSingle( '-' );
00405 }
00406 void gnFilter::CreateBasicRNAFilter()
00407 {
00408         SetDefaultChar( 'n', 'n' );
00409         SetName( "Basic RNA Filter" );
00410         SetSingle( 'a' );
00411         SetSingle( 'c' );
00412         SetSingle( 'g' );
00413         SetSingle( 'u' );
00414         SetSingle( 'A' );
00415         SetSingle( 'C' );
00416         SetSingle( 'G' );
00417         SetSingle( 'U' );
00418         SetSingle( 'n' );
00419         SetSingle( 'N' );
00420         SetSingle( '-' );
00421 }
00422 void gnFilter::CreateFullRNAFilter()
00423 {
00424         SetDefaultChar( 'n', 'n' );
00425         SetName( "Full RNA Filter" );
00426         SetSingle( 'a' );
00427         SetSingle( 'c' );
00428         SetSingle( 'g' );
00429         SetSingle( 'u' );
00430         SetSingle( 'A' );
00431         SetSingle( 'C' );
00432         SetSingle( 'G' );
00433         SetSingle( 'U' );
00434         SetSingle( 'r' );
00435         SetSingle( 'y' );
00436         SetSingle( 'k' );
00437         SetSingle( 'm' );
00438         SetSingle( 'b' );
00439         SetSingle( 'v' );
00440         SetSingle( 'd' );
00441         SetSingle( 'h' );
00442         SetSingle( 'R' );
00443         SetSingle( 'Y' );
00444         SetSingle( 'K' );
00445         SetSingle( 'M' );
00446         SetSingle( 'B' );
00447         SetSingle( 'V' );
00448         SetSingle( 'D' );
00449         SetSingle( 'H' );
00450         SetSingle( 's' );
00451         SetSingle( 'S' );
00452         SetSingle( 'w' );
00453         SetSingle( 'W' );
00454         SetSingle( 'n' );
00455         SetSingle( 'N' );
00456         SetSingle( '-' );
00457 }
00458 
00459 
00460 void gnFilter::CreateDNAtoRNAFilter(){
00461         SetDefaultChar( 'n', 'n' );
00462         SetName( "Full DNA to RNA Filter" );
00463         SetSingle( 'a' );
00464         SetSingle( 'c' );
00465         SetSingle( 'g' );
00466         SetPair( 't', 'u' );
00467         SetSingle( 'A' );
00468         SetSingle( 'C' );
00469         SetSingle( 'G' );
00470         SetPair( 'T', 'U' );
00471         SetSingle( 'r' );
00472         SetSingle( 'y' );
00473         SetSingle( 'k' );
00474         SetSingle( 'm' );
00475         SetSingle( 'b' );
00476         SetSingle( 'v' );
00477         SetSingle( 'd' );
00478         SetSingle( 'h' );
00479         SetSingle( 'R' );
00480         SetSingle( 'Y' );
00481         SetSingle( 'K' );
00482         SetSingle( 'M' );
00483         SetSingle( 'B' );
00484         SetSingle( 'V' );
00485         SetSingle( 'D' );
00486         SetSingle( 'H' );
00487         SetSingle( 's' );
00488         SetSingle( 'S' );
00489         SetSingle( 'w' );
00490         SetSingle( 'W' );
00491         SetSingle( 'n' );
00492         SetSingle( 'N' );
00493         SetSingle( '-' );
00494 }
00495 
00496 void gnFilter::CreateRNAtoDNAFilter(){
00497         SetDefaultChar( 'n', 'n' );
00498         SetName( "Full RNA to DNA Filter" );
00499         SetSingle( 'a' );
00500         SetSingle( 'c' );
00501         SetSingle( 'g' );
00502         SetPair( 'u', 't' );
00503         SetSingle( 'A' );
00504         SetSingle( 'C' );
00505         SetSingle( 'G' );
00506         SetPair( 'U', 'T' );
00507         SetSingle( 'r' );
00508         SetSingle( 'y' );
00509         SetSingle( 'k' );
00510         SetSingle( 'm' );
00511         SetSingle( 'b' );
00512         SetSingle( 'v' );
00513         SetSingle( 'd' );
00514         SetSingle( 'h' );
00515         SetSingle( 'R' );
00516         SetSingle( 'Y' );
00517         SetSingle( 'K' );
00518         SetSingle( 'M' );
00519         SetSingle( 'B' );
00520         SetSingle( 'V' );
00521         SetSingle( 'D' );
00522         SetSingle( 'H' );
00523         SetSingle( 's' );
00524         SetSingle( 'S' );
00525         SetSingle( 'w' );
00526         SetSingle( 'W' );
00527         SetSingle( 'n' );
00528         SetSingle( 'N' );
00529         SetSingle( '-' );
00530 }
00531 
00532 void gnFilter::CreateDNAComplementFilter(){
00533         SetDefaultChar( 'n', 'n' );
00534         SetName( "Full DNA Complement Filter" );
00535         SetPair( 'a', 't' );
00536         SetPair( 'A', 'T' );
00537         SetPair( 't', 'a' );
00538         SetPair( 'T', 'A' );
00539         SetPair( 'c', 'g' );
00540         SetPair( 'C', 'G' );
00541         SetPair( 'g', 'c' );
00542         SetPair( 'G', 'C' );
00543         SetPair( 'r', 'y' );
00544         SetPair( 'R', 'Y' );
00545         SetPair( 'y', 'r' );
00546         SetPair( 'Y', 'R' );
00547         SetPair( 'k', 'm' );
00548         SetPair( 'K', 'M' );
00549         SetPair( 'm', 'k' );
00550         SetPair( 'M', 'K' );
00551         SetSingle( 's' );
00552         SetSingle( 'S' );
00553         SetSingle( 'w' );
00554         SetSingle( 'W' );
00555         SetPair( 'b', 'v' );
00556         SetPair( 'B', 'V' );
00557         SetPair( 'v', 'b' );
00558         SetPair( 'V', 'B' );
00559         SetPair( 'd', 'h' );
00560         SetPair( 'D', 'H' );
00561         SetPair( 'h', 'd' );
00562         SetPair( 'H', 'D' );
00563         SetSingle( 'n' );
00564         SetSingle( 'N' );
00565         SetSingle( 'x' );
00566         SetSingle( 'X' );
00567         SetSingle( '-' );
00568 }
00569 
00570 void gnFilter::CreateRNAComplementFilter(){
00571         SetDefaultChar( 'n', 'n' );
00572         SetName( "Full RNA Complement Filter" );
00573         SetPair( 'a', 'u' );
00574         SetPair( 'A', 'U' );
00575         SetPair( 'u', 'a' );
00576         SetPair( 'U', 'A' );
00577         SetPair( 'c', 'g' );
00578         SetPair( 'C', 'G' );
00579         SetPair( 'g', 'c' );
00580         SetPair( 'G', 'C' );
00581         SetPair( 'r', 'y' );
00582         SetPair( 'R', 'Y' );
00583         SetPair( 'y', 'r' );
00584         SetPair( 'Y', 'R' );
00585         SetPair( 'k', 'm' );
00586         SetPair( 'K', 'M' );
00587         SetPair( 'm', 'k' );
00588         SetPair( 'M', 'K' );
00589         SetSingle( 's' );
00590         SetSingle( 'S' );
00591         SetSingle( 'w' );
00592         SetSingle( 'W' );
00593         SetPair( 'b', 'v' );
00594         SetPair( 'B', 'V' );
00595         SetPair( 'v', 'b' );
00596         SetPair( 'V', 'B' );
00597         SetPair( 'd', 'h' );
00598         SetPair( 'D', 'H' );
00599         SetPair( 'h', 'd' );
00600         SetPair( 'H', 'D' );
00601         SetSingle( 'n' );
00602         SetSingle( 'N' );
00603         SetSingle( '-' );
00604 }

Generated on Mon Mar 28 06:00:21 2005 for libGenome by doxygen 1.3.6