src/Mutator.cpp

Go to the documentation of this file.
00001 #ifdef HAVE_CONFIG_H
00002 #include "config.h"
00003 #endif
00004 
00005 #include "Mutator.h"
00006 #include <vector>
00007 #include <cmath>
00008 
00009 extern "C"{
00010 #include "twister.h"
00011 }
00012 
00013 using namespace std;
00014 using namespace genome;
00015 using namespace mems;
00016 
00022 gnSeqI uniformSample( gnSeqI min, gnSeqI max );
00023 gnSeqI uniformSample( gnSeqI min, gnSeqI max ){
00024         if( max == min )
00025                 return 0;
00026 
00027         gnSeqI sample = (gnSeqI)(rndu() * (max - min));
00028         return sample + min;
00029 }
00030 
00035 double exponentialSample( double theta );
00036 double exponentialSample( double theta ){
00037         double z_samp = rndu();
00038         z_samp = -log( z_samp ) * theta;
00039         return z_samp;
00040 }
00041 
00046 gnSeqI poissonSample( double p );
00047 gnSeqI poissonSample( double p ){
00048         gnSeqI count = 0;
00049         double lambda = 1;
00050         double sum = 0;
00051         for( ;; count++ ){
00052                 sum += exponentialSample( lambda );
00053                 if( sum >= p )
00054                         break;
00055         }
00056         return count;
00057 }
00058 
00059 void IndelInserter::getLocation( gnSeqI& source_start, gnSeqI& source_len, gnSeqI& dest, gnSeqI dest_len ) {
00060         do{
00061                 source_len = poissonSample( size );     // don't let it be 0
00062         }while( source_len == 0 );
00063         source_len = source_len < donor.alignedSeqsSize() ? source_len : donor.alignedSeqsSize() - 1;
00064         source_start = uniformSample( 0, donor.alignedSeqsSize() - source_len - 1 );
00065         dest = uniformSample( 0, dest_len );
00066 }
00067 
00068 void SmallHTInserter::getLocation( gnSeqI& source_start, gnSeqI& source_len, gnSeqI& dest, gnSeqI dest_len ) {
00069         do{
00070                 source_len = (gnSeqI)exponentialSample( size ) + 1;     // force it between 1 and sequence length
00071         }while( source_len == 0 );
00072         source_len = source_len < donor.alignedSeqsSize() ? source_len : donor.alignedSeqsSize() - 1;
00073         source_start = uniformSample( 0, donor.alignedSeqsSize() - source_len - 1 );
00074         dest = uniformSample( 0, dest_len );
00075 }
00076 
00077 void LargeHTInserter::getLocation( gnSeqI& source_start, gnSeqI& source_len, gnSeqI& dest, gnSeqI dest_len ) {
00078         source_len = uniformSample( min_size, max_size );
00079         source_len = source_len < donor.alignedSeqsSize() ? source_len : donor.alignedSeqsSize() - 1;
00080         source_len = source_len == 0 ? 1 : source_len;
00081         source_start = uniformSample( 0, donor.alignedSeqsSize() - source_len - 1 );
00082         dest = uniformSample( 0, dest_len );
00083 }
00084 
00085 void IndelDeleter::getLocation( gnSeqI& start, gnSeqI& len, gnSeqI dest_len ) {
00086         do{
00087                 len = poissonSample( size );
00088         }while( len == 0 );
00089         len = len < dest_len ? len : dest_len;
00090         start = uniformSample( 0, dest_len - len );
00091 }
00092 
00093 void SmallHTDeleter::getLocation( gnSeqI& start, gnSeqI& len, gnSeqI dest_len ) {
00094         do{
00095                 len = (gnSeqI)exponentialSample( size );        // don't let it be 0
00096         }while( len == 0 );
00097         len = len < dest_len ? len : dest_len;
00098         start = uniformSample( 0, dest_len - len );
00099 }
00100 
00101 void LargeHTDeleter::getLocation( gnSeqI& start, gnSeqI& len, gnSeqI dest_len ) {
00102         len = uniformSample( min_size, max_size );
00103         len = len < dest_len ? len : dest_len;
00104         start = uniformSample( 0, dest_len - len );
00105 }
00106 
00107 void Inverter::getLocation( gnSeqI& start, gnSeqI& len, gnSeqI dest_len ) {
00108         do{
00109                 len = (gnSeqI)exponentialSample( size );        // don't let it be 0
00110         }while( len == 0 );
00111         // treat as circular
00112         gnSeqI s = uniformSample( 0, dest_len );
00113         gnSeqI circ = (s+len)%dest_len;
00114         start = s < circ ? s : circ;
00115         len = abs((int64)circ-(int64)s);
00116 }
00117 
00118 
00119 // take sequence from donor and insert it into sequences below nodeI
00120 void Inserter::mutate( node_id_t nodeI, const PhyloTree<TreeNode>& tree, Alignment& evolved_alignment ) {
00121         gnSeqI source_start;
00122         gnSeqI source_length;
00123         gnSeqI dest, dest_col;
00124         // nodeI and all sequences below it in the tree should have the same length
00125         gnSeqI dest_len = evolved_alignment.sequenceLength( nodeI );
00126         getLocation( source_start, source_length, dest, dest_len );
00127         dest_col = evolved_alignment.getColumnIndex( nodeI, dest );
00128         recursiveInsert( tree.root, nodeI, tree, evolved_alignment, dest_col, source_start, source_length );
00129         if( debugChecks() )
00130                 evolved_alignment.checkLengths();
00131 }
00132 
00133 void Inserter::recursiveInsert( node_id_t cur_node, node_id_t insert_node, const PhyloTree<TreeNode>& t, 
00134                 Alignment& evolved_alignment, gnSeqI point, gnSeqI source_left, gnSeqI source_length ){
00135         if( cur_node == insert_node ){
00136                 // insert the actual sequence in this one
00137                 evolved_alignment.applyInsertion( cur_node, point, source_left, source_length );
00138         }else
00139                 evolved_alignment.applyGapInsertion( cur_node, point, source_length );
00140                 
00141         for( uint childI = 0; childI < t[ cur_node ].children.size(); childI++ ){
00142                 // make the insert node follow through the tree down from the initial insertion
00143                 node_id_t new_ins_node = cur_node == insert_node ? t[ cur_node ].children[ childI ] : insert_node;
00144                 recursiveInsert( t[ cur_node ].children[ childI ], new_ins_node, t, evolved_alignment, point, source_left, source_length );
00145         }
00146 }
00147 
00148 void Deleter::mutate( node_id_t nodeI, const PhyloTree<TreeNode>& tree, Alignment& evolved_alignment ) {
00149         gnSeqI start;
00150         gnSeqI length;
00151         // nodeI and all sequences below it in the tree should have the same length
00152         gnSeqI dest_len = evolved_alignment.sequenceLength( nodeI );
00153         getLocation( start, length, dest_len );
00154         if( length == 0 )
00155                 return; // there isn't anything left to delete
00156 
00157         // convert sequence position to alignment column
00158         evolved_alignment.getDeletionColumns( nodeI, start, length );
00159         recursiveDelete( tree.root, nodeI, tree, evolved_alignment, start, length );
00160         if( debugChecks() )
00161                 evolved_alignment.checkLengths();
00162 }
00163 
00164 void Deleter::recursiveDelete( node_id_t cur_node, node_id_t insert_node, const PhyloTree<TreeNode>& t, Alignment& evolved_alignment, 
00165                 gnSeqI left_end, gnSeqI length ){
00166         if( cur_node == insert_node )
00167                 // delete sequence in this one
00168                 evolved_alignment.applyDeletion( cur_node, left_end, length );
00169                 
00170         for( uint childI = 0; childI < t[ cur_node ].children.size(); childI++ ){
00171                 // make the insert node follow through the tree down from the initial insertion
00172                 node_id_t new_ins_node = cur_node == insert_node ? t[ cur_node ].children[ childI ] : insert_node;
00173                 recursiveDelete( t[ cur_node ].children[ childI ], new_ins_node, t, evolved_alignment, left_end, length );
00174         }
00175 }
00176 
00177 void Inverter::mutate( node_id_t nodeI, const PhyloTree<TreeNode>& tree, Alignment& evolved_alignment ) {
00178         gnSeqI start;
00179         gnSeqI length;
00180         // nodeI and all sequences below it in the tree should have the same length
00181         gnSeqI dest_len = evolved_alignment.sequenceLength( nodeI );
00182         getLocation( start, length, dest_len );
00183         if( length == 0 )
00184                 return; // there isn't anything left to invert
00185 
00186         // convert sequence position to alignment column
00187         evolved_alignment.getDeletionColumns( nodeI, start, length );
00188         evolved_alignment.addInversion( nodeI, start, length );
00189         cerr << "Inversion\t" << start << "\t" << start+length << "\n";
00190 // don't do recursive inversion -- applyInversions() will take care of applying it
00191 // to each sequence     
00192 }

Generated on Mon Aug 19 06:00:51 2013 for sgEvolver by doxygen 1.3.6