00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00013 #include "gn/gnFilter.h"
00014 #include "gn/gnDebug.h"
00015
00016
00017 const gnFilter *gnFilter::alphabetCharacterFilter(){
00018 const static gnFilter* t_filt = new gnFilter(alphabetCharacterFilterType);
00019 return t_filt;
00020 }
00021
00022 const gnFilter *gnFilter::numberCharacterFilter(){
00023 const static gnFilter* t_filt = new gnFilter(numberCharacterFilterType);
00024 return t_filt;
00025 }
00026
00027
00028 const gnFilter *gnFilter::proteinSeqFilter(){
00029 const static gnFilter* t_filt = new gnFilter(proteinSeqFilterType);
00030 return t_filt;
00031 }
00032
00033 const gnFilter *gnFilter::basicDNASeqFilter(){
00034 const static gnFilter* t_filt = new gnFilter(basicDNASeqFilterType);
00035 return t_filt;
00036 }
00037
00038 const gnFilter *gnFilter::fullDNASeqFilter(){
00039 const static gnFilter* t_filt = new gnFilter(fullDNASeqFilterType);
00040 return t_filt;
00041 }
00042
00043 const gnFilter *gnFilter::basicRNASeqFilter(){
00044 const static gnFilter* t_filt = new gnFilter(basicRNASeqFilterType);
00045 return t_filt;
00046 }
00047
00048 const gnFilter *gnFilter::fullRNASeqFilter(){
00049 const static gnFilter* t_filt = new gnFilter(fullRNASeqFilterType);
00050 return t_filt;
00051 }
00052
00053 const gnFilter *gnFilter::DNAtoRNAFilter(){
00054 const static gnFilter* t_filt = new gnFilter(DNAtoRNAFilterType);
00055 return t_filt;
00056 }
00057
00058 const gnFilter *gnFilter::RNAtoDNAFilter(){
00059 const static gnFilter* t_filt = new gnFilter(RNAtoDNAFilterType);
00060 return t_filt;
00061 }
00062
00063 const gnFilter *gnFilter::DNAComplementFilter(){
00064 const static gnFilter* t_filt = new gnFilter(DNAComplementFilterType);
00065 return t_filt;
00066 }
00067
00068 const gnFilter *gnFilter::RNAComplementFilter(){
00069 const static gnFilter* t_filt = new gnFilter(RNAComplementFilterType);
00070 return t_filt;
00071 }
00072
00073
00074
00075 gnFilter::gnFilter()
00076 {
00077 m_defaultChar = 'n';
00078 m_rDefaultChar = 'n';
00079 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00080 m_pairArray[i] = NO_REVCOMP_CHAR;
00081 }
00082 gnFilter::gnFilter( const gnSeqC defaultChar, const gnSeqC rdefaultChar )
00083 {
00084 m_defaultChar = defaultChar;
00085 m_rDefaultChar = rdefaultChar;
00086 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00087 m_pairArray[i] = NO_REVCOMP_CHAR;
00088 }
00089
00090 gnFilter::gnFilter( const gnFilter &sf )
00091 {
00092 m_name = sf.m_name;
00093 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00094 m_pairArray[i] = sf.m_pairArray[i];
00095 m_defaultChar = sf.m_defaultChar;
00096 m_rDefaultChar = sf.m_rDefaultChar;
00097 }
00098
00099 gnFilter::gnFilter( const gnFilterType f_type ){
00100 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i )
00101 m_pairArray[i] = NO_REVCOMP_CHAR;
00102 switch(f_type){
00103 case alphabetCharacterFilterType:
00104 CreateAlphabetCharacterFilter();
00105 break;
00106 case numberCharacterFilterType:
00107 CreateNumberCharacterFilter();
00108 break;
00109 case proteinSeqFilterType:
00110 CreateProteinFilter();
00111 break;
00112 case basicDNASeqFilterType:
00113 CreateBasicDNAFilter();
00114 break;
00115 case fullDNASeqFilterType:
00116 CreateFullDNAFilter();
00117 break;
00118 case basicRNASeqFilterType:
00119 CreateBasicRNAFilter();
00120 break;
00121 case fullRNASeqFilterType:
00122 CreateFullRNAFilter();
00123 break;
00124 case DNAtoRNAFilterType:
00125 CreateDNAtoRNAFilter();
00126 break;
00127 case RNAtoDNAFilterType:
00128 CreateRNAtoDNAFilter();
00129 break;
00130 case DNAComplementFilterType:
00131 CreateDNAComplementFilter();
00132 break;
00133 case RNAComplementFilterType:
00134 CreateRNAComplementFilter();
00135 break;
00136 }
00137 }
00138
00139
00140 gnFilter::~gnFilter()
00141 {
00142 }
00143
00144 inline
00145 void gnFilter::Filter( gnSeqC** seq, gnSeqI& len ) const
00146 {
00147 Array<gnSeqC> array_buf( len );
00148 gnSeqC* tmp = array_buf.data;
00149 gnSeqI c=0;
00150 for(uint32 i=0; i < len; i++)
00151 if(IsValid((*seq)[i]))
00152 tmp[c++] = m_pairArray[(*seq)[i]];
00153 len = c;
00154 memcpy(*seq, tmp, len);
00155 }
00156
00157 void gnFilter::ReverseFilter( gnSeqC** seq, gnSeqI& len ) const
00158 {
00159 gnSeqC tmp, dum;
00160 uint32 halfLen = len/2;
00161 uint32 end = len - 1;
00162 uint32 curB = 0;
00163 uint32 curE = end;
00164 for( uint32 i=0; i < halfLen ; ++i )
00165 {
00166 tmp = m_pairArray[(*seq)[i]];
00167 dum = m_pairArray[(*seq)[ end - i ]];
00168 if(dum != NO_REVCOMP_CHAR)
00169 (*seq)[ curB++ ] = dum;
00170 if(tmp != NO_REVCOMP_CHAR)
00171 (*seq)[ curE-- ] = tmp;
00172 }
00173 if(len&0x1){
00174 tmp = m_pairArray[(*seq)[halfLen]];
00175 if(tmp != NO_REVCOMP_CHAR)
00176 (*seq)[curB++] = tmp;
00177 }
00178
00179 if(curE >= curB){
00180 memmove(*seq+curB, *seq+curE+1, end - curE);
00181 len = end - curE + curB;
00182 }
00183
00184 }
00185
00186 void gnFilter::Filter( string &seq ) const
00187 {
00188 gnSeqI c=0;
00189 for(uint32 i=0; i < seq.length(); i++)
00190 if(IsValid(seq[i]))
00191 seq[c++] = m_pairArray[seq[i]];
00192 }
00193
00194 void gnFilter::ReverseFilter( string &seq ) const
00195 {
00196 gnSeqC tmp, dum;
00197 uint32 halfLen = seq.length()/2;
00198 uint32 end = seq.length() - 1;
00199 uint32 curB = 0;
00200 uint32 curE = end;
00201 for( uint32 i=0; i < halfLen ; ++i )
00202 {
00203 tmp = m_pairArray[seq[i]];
00204 dum = m_pairArray[seq[ end - i ]];
00205 if(dum != NO_REVCOMP_CHAR)
00206 seq[ curB++ ] = dum;
00207 if(tmp != NO_REVCOMP_CHAR)
00208 seq[ curE-- ] = tmp;
00209 }
00210 if(seq.length()&0x1){
00211 tmp = m_pairArray[seq[halfLen]];
00212 if(tmp != NO_REVCOMP_CHAR)
00213 seq[curB++] = tmp;
00214 }
00215
00216 if(curE >= curB){
00217 seq.erase(curB, curE-curB);
00218 }
00219 }
00220
00221
00222 void gnFilter::CreateAlphabetCharacterFilter()
00223 {
00224 SetDefaultChar( 0, 0 );
00225 SetName( "Alphabet Character Filter" );
00226 SetPair( 'A', 'a' );
00227 SetPair( 'B', 'b' );
00228 SetPair( 'C', 'c' );
00229 SetPair( 'D', 'd' );
00230 SetPair( 'E', 'e' );
00231 SetPair( 'F', 'f' );
00232 SetPair( 'G', 'g' );
00233 SetPair( 'H', 'h' );
00234 SetPair( 'I', 'i' );
00235 SetPair( 'J', 'j' );
00236 SetPair( 'K', 'k' );
00237 SetPair( 'L', 'l' );
00238 SetPair( 'M', 'm' );
00239 SetPair( 'N', 'n' );
00240 SetPair( 'O', 'o' );
00241 SetPair( 'P', 'p' );
00242 SetPair( 'Q', 'q' );
00243 SetPair( 'R', 'r' );
00244 SetPair( 'S', 's' );
00245 SetPair( 'T', 't' );
00246 SetPair( 'U', 'u' );
00247 SetPair( 'V', 'v' );
00248 SetPair( 'W', 'w' );
00249 SetPair( 'X', 'x' );
00250 SetPair( 'Y', 'y' );
00251 SetPair( 'Z', 'z' );
00252 }
00253
00254 void gnFilter::CreateNumberCharacterFilter()
00255 {
00256 SetDefaultChar( 0, 0 );
00257 SetName( "Number Character Filter" );
00258 SetSingle( '0' );
00259 SetSingle( '1' );
00260 SetSingle( '2' );
00261 SetSingle( '3' );
00262 SetSingle( '4' );
00263 SetSingle( '5' );
00264 SetSingle( '6' );
00265 SetSingle( '7' );
00266 SetSingle( '8' );
00267 SetSingle( '9' );
00268 }
00269
00270 void gnFilter::CreateProteinFilter()
00271 {
00272 SetDefaultChar( 'x', 'x' );
00273 SetName( "Protein Filter" );
00274 SetSingle( 'A' );
00275 SetSingle( 'B' );
00276 SetSingle( 'C' );
00277 SetSingle( 'D' );
00278 SetSingle( 'E' );
00279 SetSingle( 'F' );
00280 SetSingle( 'G' );
00281 SetSingle( 'H' );
00282 SetSingle( 'I' );
00283 SetSingle( 'K' );
00284 SetSingle( 'L' );
00285 SetSingle( 'M' );
00286 SetSingle( 'N' );
00287 SetSingle( 'P' );
00288 SetSingle( 'Q' );
00289 SetSingle( 'R' );
00290 SetSingle( 'S' );
00291 SetSingle( 'T' );
00292 SetSingle( 'U' );
00293 SetSingle( 'V' );
00294 SetSingle( 'W' );
00295 SetSingle( 'X' );
00296 SetSingle( 'Y' );
00297 SetSingle( 'Z' );
00298
00299 SetSingle( 'a' );
00300 SetSingle( 'b' );
00301 SetSingle( 'c' );
00302 SetSingle( 'd' );
00303 SetSingle( 'e' );
00304 SetSingle( 'f' );
00305 SetSingle( 'g' );
00306 SetSingle( 'h' );
00307 SetSingle( 'i' );
00308 SetSingle( 'k' );
00309 SetSingle( 'l' );
00310 SetSingle( 'm' );
00311 SetSingle( 'n' );
00312 SetSingle( 'p' );
00313 SetSingle( 'q' );
00314 SetSingle( 'r' );
00315 SetSingle( 's' );
00316 SetSingle( 't' );
00317 SetSingle( 'u' );
00318 SetSingle( 'v' );
00319 SetSingle( 'w' );
00320 SetSingle( 'x' );
00321 SetSingle( 'y' );
00322 SetSingle( 'z' );
00323
00324 SetSingle( 'a' );
00325 SetSingle( 'r' );
00326 SetSingle( 'n' );
00327 SetSingle( 'd' );
00328 SetSingle( 'c' );
00329 SetSingle( 'q' );
00330 SetSingle( 'e' );
00331 SetSingle( 'g' );
00332 SetSingle( 'h' );
00333 SetSingle( 'i' );
00334 SetSingle( 'l' );
00335 SetSingle( 'k' );
00336 SetSingle( 'm' );
00337 SetSingle( 'f' );
00338 SetSingle( 'p' );
00339 SetSingle( 's' );
00340 SetSingle( 't' );
00341 SetSingle( 'w' );
00342 SetSingle( 'y' );
00343 SetSingle( 'v' );
00344 SetSingle( 'u' );
00345 SetSingle( 'z' );
00346
00347 SetSingle( '.' );
00348 }
00349
00350 void gnFilter::CreateBasicDNAFilter()
00351 {
00352 SetDefaultChar( 'n', 'n' );
00353 SetName( "Basic DNA Filter" );
00354 SetSingle( 'a' );
00355 SetSingle( 'c' );
00356 SetSingle( 'g' );
00357 SetSingle( 't' );
00358 SetSingle( 'A' );
00359 SetSingle( 'C' );
00360 SetSingle( 'G' );
00361 SetSingle( 'T' );
00362 SetSingle( 'n' );
00363 SetSingle( 'N' );
00364 SetSingle( 'x' );
00365 SetSingle( 'X' );
00366 SetSingle( '-' );
00367 }
00368 void gnFilter::CreateFullDNAFilter()
00369 {
00370 SetDefaultChar( 'n', 'n' );
00371 SetName( "Full DNA Filter" );
00372 SetSingle( 'a' );
00373 SetSingle( 'c' );
00374 SetSingle( 'g' );
00375 SetSingle( 't' );
00376 SetSingle( 'A' );
00377 SetSingle( 'C' );
00378 SetSingle( 'G' );
00379 SetSingle( 'T' );
00380 SetSingle( 'r' );
00381 SetSingle( 'y' );
00382 SetSingle( 'k' );
00383 SetSingle( 'm' );
00384 SetSingle( 'b' );
00385 SetSingle( 'v' );
00386 SetSingle( 'd' );
00387 SetSingle( 'h' );
00388 SetSingle( 'R' );
00389 SetSingle( 'Y' );
00390 SetSingle( 'K' );
00391 SetSingle( 'M' );
00392 SetSingle( 'B' );
00393 SetSingle( 'V' );
00394 SetSingle( 'D' );
00395 SetSingle( 'H' );
00396 SetSingle( 's' );
00397 SetSingle( 'S' );
00398 SetSingle( 'w' );
00399 SetSingle( 'W' );
00400 SetSingle( 'n' );
00401 SetSingle( 'N' );
00402 SetSingle( 'x' );
00403 SetSingle( 'X' );
00404 SetSingle( '-' );
00405 }
00406 void gnFilter::CreateBasicRNAFilter()
00407 {
00408 SetDefaultChar( 'n', 'n' );
00409 SetName( "Basic RNA Filter" );
00410 SetSingle( 'a' );
00411 SetSingle( 'c' );
00412 SetSingle( 'g' );
00413 SetSingle( 'u' );
00414 SetSingle( 'A' );
00415 SetSingle( 'C' );
00416 SetSingle( 'G' );
00417 SetSingle( 'U' );
00418 SetSingle( 'n' );
00419 SetSingle( 'N' );
00420 SetSingle( '-' );
00421 }
00422 void gnFilter::CreateFullRNAFilter()
00423 {
00424 SetDefaultChar( 'n', 'n' );
00425 SetName( "Full RNA Filter" );
00426 SetSingle( 'a' );
00427 SetSingle( 'c' );
00428 SetSingle( 'g' );
00429 SetSingle( 'u' );
00430 SetSingle( 'A' );
00431 SetSingle( 'C' );
00432 SetSingle( 'G' );
00433 SetSingle( 'U' );
00434 SetSingle( 'r' );
00435 SetSingle( 'y' );
00436 SetSingle( 'k' );
00437 SetSingle( 'm' );
00438 SetSingle( 'b' );
00439 SetSingle( 'v' );
00440 SetSingle( 'd' );
00441 SetSingle( 'h' );
00442 SetSingle( 'R' );
00443 SetSingle( 'Y' );
00444 SetSingle( 'K' );
00445 SetSingle( 'M' );
00446 SetSingle( 'B' );
00447 SetSingle( 'V' );
00448 SetSingle( 'D' );
00449 SetSingle( 'H' );
00450 SetSingle( 's' );
00451 SetSingle( 'S' );
00452 SetSingle( 'w' );
00453 SetSingle( 'W' );
00454 SetSingle( 'n' );
00455 SetSingle( 'N' );
00456 SetSingle( '-' );
00457 }
00458
00459
00460 void gnFilter::CreateDNAtoRNAFilter(){
00461 SetDefaultChar( 'n', 'n' );
00462 SetName( "Full DNA to RNA Filter" );
00463 SetSingle( 'a' );
00464 SetSingle( 'c' );
00465 SetSingle( 'g' );
00466 SetPair( 't', 'u' );
00467 SetSingle( 'A' );
00468 SetSingle( 'C' );
00469 SetSingle( 'G' );
00470 SetPair( 'T', 'U' );
00471 SetSingle( 'r' );
00472 SetSingle( 'y' );
00473 SetSingle( 'k' );
00474 SetSingle( 'm' );
00475 SetSingle( 'b' );
00476 SetSingle( 'v' );
00477 SetSingle( 'd' );
00478 SetSingle( 'h' );
00479 SetSingle( 'R' );
00480 SetSingle( 'Y' );
00481 SetSingle( 'K' );
00482 SetSingle( 'M' );
00483 SetSingle( 'B' );
00484 SetSingle( 'V' );
00485 SetSingle( 'D' );
00486 SetSingle( 'H' );
00487 SetSingle( 's' );
00488 SetSingle( 'S' );
00489 SetSingle( 'w' );
00490 SetSingle( 'W' );
00491 SetSingle( 'n' );
00492 SetSingle( 'N' );
00493 SetSingle( '-' );
00494 }
00495
00496 void gnFilter::CreateRNAtoDNAFilter(){
00497 SetDefaultChar( 'n', 'n' );
00498 SetName( "Full RNA to DNA Filter" );
00499 SetSingle( 'a' );
00500 SetSingle( 'c' );
00501 SetSingle( 'g' );
00502 SetPair( 'u', 't' );
00503 SetSingle( 'A' );
00504 SetSingle( 'C' );
00505 SetSingle( 'G' );
00506 SetPair( 'U', 'T' );
00507 SetSingle( 'r' );
00508 SetSingle( 'y' );
00509 SetSingle( 'k' );
00510 SetSingle( 'm' );
00511 SetSingle( 'b' );
00512 SetSingle( 'v' );
00513 SetSingle( 'd' );
00514 SetSingle( 'h' );
00515 SetSingle( 'R' );
00516 SetSingle( 'Y' );
00517 SetSingle( 'K' );
00518 SetSingle( 'M' );
00519 SetSingle( 'B' );
00520 SetSingle( 'V' );
00521 SetSingle( 'D' );
00522 SetSingle( 'H' );
00523 SetSingle( 's' );
00524 SetSingle( 'S' );
00525 SetSingle( 'w' );
00526 SetSingle( 'W' );
00527 SetSingle( 'n' );
00528 SetSingle( 'N' );
00529 SetSingle( '-' );
00530 }
00531
00532 void gnFilter::CreateDNAComplementFilter(){
00533 SetDefaultChar( 'n', 'n' );
00534 SetName( "Full DNA Complement Filter" );
00535 SetPair( 'a', 't' );
00536 SetPair( 'A', 'T' );
00537 SetPair( 't', 'a' );
00538 SetPair( 'T', 'A' );
00539 SetPair( 'c', 'g' );
00540 SetPair( 'C', 'G' );
00541 SetPair( 'g', 'c' );
00542 SetPair( 'G', 'C' );
00543 SetPair( 'r', 'y' );
00544 SetPair( 'R', 'Y' );
00545 SetPair( 'y', 'r' );
00546 SetPair( 'Y', 'R' );
00547 SetPair( 'k', 'm' );
00548 SetPair( 'K', 'M' );
00549 SetPair( 'm', 'k' );
00550 SetPair( 'M', 'K' );
00551 SetSingle( 's' );
00552 SetSingle( 'S' );
00553 SetSingle( 'w' );
00554 SetSingle( 'W' );
00555 SetPair( 'b', 'v' );
00556 SetPair( 'B', 'V' );
00557 SetPair( 'v', 'b' );
00558 SetPair( 'V', 'B' );
00559 SetPair( 'd', 'h' );
00560 SetPair( 'D', 'H' );
00561 SetPair( 'h', 'd' );
00562 SetPair( 'H', 'D' );
00563 SetSingle( 'n' );
00564 SetSingle( 'N' );
00565 SetSingle( 'x' );
00566 SetSingle( 'X' );
00567 SetSingle( '-' );
00568 }
00569
00570 void gnFilter::CreateRNAComplementFilter(){
00571 SetDefaultChar( 'n', 'n' );
00572 SetName( "Full RNA Complement Filter" );
00573 SetPair( 'a', 'u' );
00574 SetPair( 'A', 'U' );
00575 SetPair( 'u', 'a' );
00576 SetPair( 'U', 'A' );
00577 SetPair( 'c', 'g' );
00578 SetPair( 'C', 'G' );
00579 SetPair( 'g', 'c' );
00580 SetPair( 'G', 'C' );
00581 SetPair( 'r', 'y' );
00582 SetPair( 'R', 'Y' );
00583 SetPair( 'y', 'r' );
00584 SetPair( 'Y', 'R' );
00585 SetPair( 'k', 'm' );
00586 SetPair( 'K', 'M' );
00587 SetPair( 'm', 'k' );
00588 SetPair( 'M', 'K' );
00589 SetSingle( 's' );
00590 SetSingle( 'S' );
00591 SetSingle( 'w' );
00592 SetSingle( 'W' );
00593 SetPair( 'b', 'v' );
00594 SetPair( 'B', 'V' );
00595 SetPair( 'v', 'b' );
00596 SetPair( 'V', 'B' );
00597 SetPair( 'd', 'h' );
00598 SetPair( 'D', 'H' );
00599 SetPair( 'h', 'd' );
00600 SetPair( 'H', 'D' );
00601 SetSingle( 'n' );
00602 SetSingle( 'N' );
00603 SetSingle( '-' );
00604 }