00001 package org.gel.mauve.summary.output;
00002
00003 import java.util.Arrays;
00004 import java.util.Hashtable;
00005 import java.util.Vector;
00006
00007 import org.gel.mauve.MauveConstants;
00008 import org.gel.mauve.MauveHelperFunctions;
00009 import org.gel.mauve.analysis.Segment;
00010
00011 public class OverviewFileWriter extends AbstractTabbedDataWriter implements MauveConstants {
00012
00013 protected SegmentDataProcessor processor;
00014
00015 public final static String NUMBER_GENES = "num_genes";
00016 public final static String NUMBER_ISLANDS = "num_islands";
00017 public final static String NUM_BASE_PAIRS = "num_bp";
00018 public final static String PERCENT_TOTAL = "percent";
00019 public final static String UNKNOWN = "unknown";
00020
00021 protected int [][] gene_data;
00022 protected int [] island_data;
00023 protected long [] bp_data;
00024 protected long [] long_totals;
00025 protected double [] double_totals;
00026
00027 protected int [] num_genes;
00028 protected int [] num_segments;
00029 protected long [] lengths;
00030 protected Segment [] firsts;
00031
00032 protected int sequence;
00033 protected int total;
00034 protected long cur_multiplicity;
00035 protected int min_size;
00036
00037 public OverviewFileWriter (SegmentDataProcessor proc) {
00038 super (proc.get (SegmentDataProcessor.FILE_STUB) + "_overview.tab", proc);
00039 }
00040
00041 protected void initSubClassParticulars (Hashtable args) {
00042 processor = (SegmentDataProcessor) args;
00043 setMinSize ();
00044 firsts = (Segment []) processor.get (FIRSTS);
00045 if (processor.get (GENOME_LENGTHS) != null)
00046 lengths = (long []) processor.get (GENOME_LENGTHS);
00047 num_genes = (int []) args.get (TOTAL_GENES);
00048 num_segments = new int [num_genes.length];
00049 gene_data = (int [][]) processor.get (NUM_GENES_PER_MULT);
00050 System.out.println ("num_genes: " + gene_data [0].length);
00051 island_data = new int [gene_data [0].length];
00052 bp_data = new long [gene_data [0].length];
00053 row_number = gene_data [0].length + 2;
00054 sequence = -1;
00055 cur_multiplicity = ((Long) args.get (ALL_MULTIPLICITY)).longValue () + 1;
00056 super.initSubClassParticulars (args);
00057 writeHeaderInfo ();
00058 printGeneInformation ();
00059 doneWritingFile ();
00060 }
00061
00062 protected void setMinSize () {
00063 int isl_min = ((Integer) processor.get (ISLAND_MIN)).intValue();
00064 int bb_min = ((Integer) processor.get (BACKBONE_MIN)).intValue();
00065 min_size = Math.min(isl_min, bb_min);
00066 }
00067
00068 protected void performCalculations () {
00069 Segment seg = firsts [sequence];
00070 do {
00071 num_segments [sequence] += 1;
00072 if (seg.getSegmentLength (sequence) > min_size) {
00073 island_data [(int) seg.multiplicityType () - 1] += 1;
00074 bp_data [(int) seg.multiplicityType () - 1] += seg.getSegmentLength (sequence);
00075 }
00076 seg = seg.nexts [sequence];
00077 } while (seg != Segment.END);
00078 }
00079
00080 public void writeHeaderInfo () {
00081 try {
00082 out.println ("Sequence " + processor.get (REFERENCE) +
00083 " is the reference sequence.");
00084 out.println ("Island minimum: " + processor.get (ISLAND_MIN));
00085 out.println ("Backbone minimum: " + processor.get (BACKBONE_MIN));
00086 out.println ("Minimum length ratio considered a problem: " +
00087 processor.get (MAX_LENGTH_RATIO));
00088 out.println ("Ratio represents the difference in length between the " +
00089 "longest and shortest pieces over the average length.");
00090 out.println ("Minimum percent of gene that must be on island: " +
00091 processor.get (MINIMUM_PERCENT_CONTAINED));
00092 out.println ("File explanations: ");
00093 out.println ("_islandscoords.mo contains island id and coordinate information " +
00094 "for all islands in all sequences");
00095 out.println ("_problembb.mo contains backbone segments whose lengths vary" +
00096 "widely between sequences.");
00097 out.println ("_islands contains information on all the islands in a particular sequence." +
00098 "\nIt can be loaded into Mauve as features. A file is generated per sequence");
00099 out.println ("_island_genes contains similar information as _islands, but"
00100 + " by gene\n");
00101 } catch (Exception e) {
00102 System.out.println ("Couldn't write overview file.");
00103 e.printStackTrace ();
00104 }
00105 }
00106
00107 public void printGeneInformation () {
00108 printHeaders ();
00109 moreRowsToPrint ();
00110 printData ();
00111 out.println (IslandGeneFeatureWriter.buffer_count);
00112 out.println (IslandGeneFeatureWriter.ids);
00113 }
00114
00115 protected String getData (int column, int row) {
00116 if (row < island_data.length) {
00117 double percent = -1;
00118 long count = -1;
00119 switch (column) {
00120 case 0:
00121 return MauveHelperFunctions.getReadableMultiplicity (row + 1,
00122 num_genes.length);
00123 case 1:
00124 if (row == island_data.length - 1 && gene_data [sequence][row] == 0)
00125 gene_data [sequence][row] = num_genes [sequence] - total;
00126 else
00127 total += gene_data [sequence][row];
00128 count = gene_data [sequence][row];
00129 break;
00130 case 2:
00131 percent = gene_data [sequence][row] / (double) num_genes [sequence];
00132 break;
00133 case 3:
00134 count = island_data [row];
00135 break;
00136 case 4:
00137 percent = island_data [row] / (double) num_segments [sequence];
00138 break;
00139 case 5:
00140 count = bp_data [row];
00141 break;
00142 case 6:
00143 percent = bp_data [row] / (double) lengths [sequence];
00144 break;
00145 }
00146 if (percent != -1) {
00147 double_totals [column] += percent;
00148 return MauveHelperFunctions.doubleToString (percent * 100, 1);
00149 }
00150 else if (count != -1) {
00151 long_totals [column] += count;
00152 return count + "";
00153 }
00154 }
00155 else if (row == island_data.length){
00156 switch (column) {
00157 case 0:
00158 return TOTALS;
00159 case 1:
00160 return num_genes [sequence] + "";
00161 case 2:
00162 return "100";
00163 case 3:
00164 return num_segments [sequence] + "";
00165 case 4:
00166 return "100";
00167 case 5:
00168 return lengths [sequence] + "";
00169 case 6:
00170 return "100";
00171 default:
00172 return null;
00173 }
00174 }
00175 else {
00176 if (column == 0)
00177 return "unknown";
00178 if (column % 2 == 1) {
00179 long tot = (column == 1) ? num_genes [sequence] :
00180 ((column == 3) ? num_segments [sequence] : lengths [sequence]);
00181 return (tot - long_totals [column]) + "";
00182 }
00183 else
00184 return MauveHelperFunctions.doubleToString (100 - (
00185 double_totals [column] * 100), 1);
00186 }
00187 return null;
00188 }
00189
00190 protected boolean moreRowsToPrint () {
00191 if (row_number == island_data.length + 2) {
00192 if (sequence == num_genes.length - 1)
00193 return false;
00194 else {
00195 total = 0;
00196 sequence++;
00197 cur_multiplicity >>= 1;
00198 row_number = 0;
00199 if (sequence > 0) {
00200 Arrays.fill (island_data, 0);
00201 Arrays.fill (bp_data, 0);
00202 Arrays.fill (long_totals, 0);
00203 Arrays.fill (double_totals, 0);
00204 }
00205 performCalculations ();
00206 out.println ("Sequence " + sequence + ":");
00207 return true;
00208 }
00209 }
00210 else
00211 return true;
00212 }
00213
00214 protected Vector setColumnHeaders () {
00215 Vector titles = new Vector ();
00216 titles.add (Segment.MULTIPLICITY_STRING);
00217 titles.add (NUMBER_GENES);
00218 titles.add (PERCENT_TOTAL);
00219 titles.add (NUMBER_ISLANDS);
00220 titles.add (PERCENT_TOTAL);
00221 titles.add (NUM_BASE_PAIRS);
00222 titles.add (PERCENT_TOTAL);
00223 long_totals = new long [titles.size()];
00224 double_totals = new double [long_totals.length];
00225 return titles;
00226 }
00227
00228 protected boolean shouldPrintRow (int row) {
00229 if (row == island_data.length + 1 || row == island_data.length || (
00230 ((row + 1) & cur_multiplicity) == cur_multiplicity)) {
00231 return true;
00232 }
00233 else {
00234 return false;
00235 }
00236 }
00237
00238 }