src/org/gel/mauve/summary/output/OverviewFileWriter.java

Go to the documentation of this file.
00001 package org.gel.mauve.summary.output;
00002 
00003 import java.util.Arrays;
00004 import java.util.Hashtable;
00005 import java.util.Vector;
00006 
00007 import org.gel.mauve.MauveConstants;
00008 import org.gel.mauve.MauveHelperFunctions;
00009 import org.gel.mauve.analysis.Segment;
00010 
00011 public class OverviewFileWriter extends AbstractTabbedDataWriter implements MauveConstants {
00012         
00013         protected SegmentDataProcessor processor;
00014         
00015         public final static String NUMBER_GENES = "num_genes";
00016         public final static String NUMBER_ISLANDS = "num_islands";
00017         public final static String NUM_BASE_PAIRS = "num_bp";
00018         public final static String PERCENT_TOTAL = "percent";
00019         public final static String UNKNOWN = "unknown";
00020         
00021         protected int [][] gene_data;
00022         protected int [] island_data;
00023         protected long [] bp_data;
00024         protected long [] long_totals;
00025         protected double [] double_totals;
00026         
00027         protected int [] num_genes;
00028         protected int [] num_segments;
00029         protected long [] lengths;
00030         protected Segment [] firsts;
00031         
00032         protected int sequence;
00033         protected int total;
00034         protected long cur_multiplicity;
00035         protected int min_size;
00036         
00037         public OverviewFileWriter (SegmentDataProcessor proc) {
00038                 super (proc.get (SegmentDataProcessor.FILE_STUB) + "_overview.tab", proc);
00039         }
00040         
00041         protected void initSubClassParticulars (Hashtable args) {
00042                 processor = (SegmentDataProcessor) args;
00043                 setMinSize ();
00044                 firsts = (Segment []) processor.get (FIRSTS);
00045                 if (processor.get (GENOME_LENGTHS) != null)
00046                         lengths = (long []) processor.get (GENOME_LENGTHS);
00047                 num_genes = (int []) args.get (TOTAL_GENES);
00048                 num_segments = new int [num_genes.length];
00049                 gene_data = (int [][]) processor.get (NUM_GENES_PER_MULT);
00050                 System.out.println ("num_genes: " + gene_data [0].length);
00051                 island_data = new int [gene_data [0].length];
00052                 bp_data = new long [gene_data [0].length];
00053                 row_number = gene_data [0].length + 2;
00054                 sequence = -1;
00055                 cur_multiplicity = ((Long) args.get (ALL_MULTIPLICITY)).longValue () + 1;
00056                 super.initSubClassParticulars (args);
00057                 writeHeaderInfo ();
00058                 printGeneInformation ();
00059                 doneWritingFile ();
00060         }
00061         
00062         protected void setMinSize () {
00063                 int isl_min = ((Integer) processor.get (ISLAND_MIN)).intValue();
00064                 int bb_min = ((Integer) processor.get (BACKBONE_MIN)).intValue();
00065                 min_size = Math.min(isl_min, bb_min);
00066         }
00067         
00068         protected void performCalculations () {
00069                 Segment seg = firsts [sequence];
00070                 do {
00071                         num_segments [sequence] += 1;
00072                         if (seg.getSegmentLength (sequence) > min_size) {
00073                                 island_data [(int) seg.multiplicityType () - 1] += 1;
00074                                 bp_data [(int) seg.multiplicityType () - 1] += seg.getSegmentLength (sequence);
00075                         }
00076                         seg = seg.nexts [sequence];
00077                 } while (seg != Segment.END);
00078         }
00079 
00080         public void writeHeaderInfo () {
00081                 try {
00082                         out.println ("Sequence " + processor.get (REFERENCE) + 
00083                                         " is the reference sequence.");
00084                         out.println ("Island minimum: " + processor.get (ISLAND_MIN));
00085                         out.println ("Backbone minimum: " + processor.get (BACKBONE_MIN));
00086                         out.println ("Minimum length ratio considered a problem: " + 
00087                                         processor.get (MAX_LENGTH_RATIO));
00088                         out.println ("Ratio represents the difference in length between the " +
00089                                         "longest and shortest pieces over the average length.");
00090                         out.println ("Minimum percent of gene that must be on island: " + 
00091                                         processor.get (MINIMUM_PERCENT_CONTAINED));
00092                         out.println ("File explanations: ");
00093                         out.println ("_islandscoords.mo contains island id and coordinate information " +
00094                                         "for all islands in all sequences");
00095                         out.println ("_problembb.mo contains backbone segments whose lengths vary" +
00096                                         "widely between sequences.");
00097                         out.println ("_islands contains information on all the islands in a particular sequence." +
00098                                         "\nIt can be loaded into Mauve as features.  A file is generated per sequence");
00099                         out.println ("_island_genes contains similar information as _islands, but"
00100                                         + " by gene\n");
00101                 } catch (Exception e) {
00102                         System.out.println ("Couldn't write overview file.");
00103                         e.printStackTrace ();
00104                 }
00105         }
00106         
00107         public void printGeneInformation () {
00108                 printHeaders ();
00109                 moreRowsToPrint ();
00110                 printData ();
00111                 out.println (IslandGeneFeatureWriter.buffer_count);
00112                 out.println (IslandGeneFeatureWriter.ids);
00113         }
00114         
00115         protected String getData (int column, int row) {
00116                 if (row < island_data.length) {
00117                         double percent = -1;
00118                         long count = -1;
00119                         switch (column) {
00120                                 case 0:
00121                                         return MauveHelperFunctions.getReadableMultiplicity (row + 1, 
00122                                                         num_genes.length);
00123                                 case 1:
00124                                         if (row == island_data.length - 1 && gene_data [sequence][row] == 0)
00125                                                 gene_data [sequence][row] = num_genes [sequence] - total;
00126                                         else
00127                                                 total += gene_data [sequence][row];
00128                                         count = gene_data [sequence][row];
00129                                         break;
00130                                 case 2:
00131                                         percent = gene_data [sequence][row] / (double) num_genes [sequence];
00132                                         break;
00133                                 case 3:
00134                                         count = island_data [row];
00135                                         break;
00136                                 case 4:
00137                                         percent = island_data [row] / (double) num_segments [sequence];
00138                                         break;
00139                                 case 5:
00140                                         count = bp_data [row];
00141                                         break;
00142                                 case 6:
00143                                         percent = bp_data [row] / (double) lengths [sequence];
00144                                         break;
00145                         }
00146                         if (percent != -1) {
00147                                 double_totals [column] += percent;
00148                                 return MauveHelperFunctions.doubleToString (percent * 100, 1);
00149                         }
00150                         else if (count != -1) {
00151                                 long_totals [column] += count;
00152                                 return count + "";
00153                         }
00154                 }
00155                 else if (row == island_data.length){
00156                         switch (column) {
00157                                 case 0:
00158                                         return TOTALS;
00159                                 case 1:
00160                                         return num_genes [sequence] + "";
00161                                 case 2:
00162                                         return "100";
00163                                 case 3:
00164                                         return num_segments [sequence] + "";
00165                                 case 4:
00166                                         return "100";
00167                                 case 5:
00168                                         return lengths [sequence] + "";
00169                                 case 6:
00170                                         return "100";
00171                                 default:
00172                                         return null;
00173                         }
00174                 }
00175                 else {
00176                         if (column == 0)
00177                                 return "unknown";
00178                         if (column % 2 == 1) {
00179                                 long tot = (column == 1) ? num_genes [sequence] : 
00180                                         ((column == 3) ? num_segments [sequence] : lengths [sequence]);
00181                                 return (tot - long_totals [column]) + "";
00182                         }
00183                         else
00184                                 return MauveHelperFunctions.doubleToString (100 - (
00185                                                 double_totals [column] * 100), 1);
00186                 }
00187                 return null;
00188         }
00189 
00190         protected boolean moreRowsToPrint () {
00191                 if (row_number == island_data.length + 2) {
00192                         if (sequence == num_genes.length - 1) 
00193                                 return false;
00194                         else {
00195                                 total = 0;
00196                                 sequence++;
00197                                 cur_multiplicity >>= 1;
00198                                 row_number = 0;
00199                                 if (sequence > 0) {
00200                                         Arrays.fill (island_data, 0);
00201                                         Arrays.fill (bp_data, 0);
00202                                         Arrays.fill (long_totals, 0);
00203                                         Arrays.fill (double_totals, 0);
00204                                 }
00205                                 performCalculations ();
00206                                 out.println ("Sequence " + sequence + ":");
00207                                 return true;
00208                         }
00209                 }
00210                 else
00211                         return true;
00212         }
00213 
00214         protected Vector setColumnHeaders () {
00215                 Vector titles = new Vector ();
00216                 titles.add (Segment.MULTIPLICITY_STRING);
00217                 titles.add (NUMBER_GENES);
00218                 titles.add (PERCENT_TOTAL);
00219                 titles.add (NUMBER_ISLANDS);
00220                 titles.add (PERCENT_TOTAL);
00221                 titles.add (NUM_BASE_PAIRS);
00222                 titles.add (PERCENT_TOTAL);
00223                 long_totals = new long [titles.size()];
00224                 double_totals = new double [long_totals.length];
00225                 return titles;
00226         }
00227 
00228         protected boolean shouldPrintRow (int row) {
00229                 if (row == island_data.length + 1 || row == island_data.length || (/*gene_data [sequence][row] != 0 &&*/
00230                                 ((row + 1) & cur_multiplicity) == cur_multiplicity)) {
00231                         return true;
00232                 }
00233                 else {
00234                         return false;
00235                 }
00236         }
00237 
00238 }

Generated on Mon Aug 19 06:03:43 2013 for Mauve by doxygen 1.3.6