00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
#include <volume_io/internal_volume_io.h>
00016
#include <bicpl/numerical.h>
00017
#include <bicpl/data_structures.h>
00018
00019
#ifndef lint
00020
static char rcsid[] =
"$Header: /software/source//libraries/bicpl/Numerical/statistics.c,v 1.14 2000/02/06 15:30:41 stever Exp $";
00021
#endif
00022
00023 #define DEFAULT_N_MEDIAN_BOXES 100000
00024
00025 #define MAX_SAMPLES_RECORDED 100000
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 public void compute_statistics(
00046
int n,
00047 Real samples[],
00048 Real *min_value,
00049 Real *max_value,
00050 Real *mean_value,
00051 Real *std_dev,
00052 Real *median )
00053 {
00054
int i;
00055 Real x, min_median, max_median, median_error;
00056
statistics_struct stats;
00057 BOOLEAN done;
00058
00059
if( median != NULL )
00060 {
00061 min_median = 0.0;
00062 max_median = 0.0;
00063
00064 for_less( i, 0, n )
00065 {
00066 x = samples[i];
00067
if( i == 0 )
00068 {
00069 min_median = x;
00070 max_median = x;
00071 }
00072
else if( x < min_median )
00073 min_median = x;
00074
else if( x > max_median )
00075 max_median = x;
00076 }
00077 }
00078
else
00079 {
00080 min_median = 0.0;
00081 max_median = -1.0;
00082 }
00083
00084
initialize_statistics( &stats, min_median, max_median );
00085
00086 done =
FALSE;
00087
00088
while( !done )
00089 {
00090 for_less( i, 0, n )
00091
add_sample_to_statistics( &stats, samples[i] );
00092
00093
get_statistics( &stats, NULL, mean_value, median, &median_error,
00094 min_value, max_value, std_dev );
00095
00096
if( median != NULL && median_error > 0.0 )
00097
restart_statistics_with_narrower_median_range( &stats );
00098
else
00099 done =
TRUE;
00100 }
00101
00102
terminate_statistics( &stats );
00103 }
00104
00105 public void initialize_statistics(
00106
statistics_struct *stats,
00107 Real median_lower_bound,
00108 Real median_upper_bound )
00109 {
00110
int i;
00111
00112 stats->
n_samples = 0;
00113 stats->
sum_x = 0.0;
00114 stats->
sum_xx = 0.0;
00115
00116 stats->
min_median_range = median_lower_bound;
00117 stats->
max_median_range = median_upper_bound;
00118
00119
if( median_lower_bound < median_upper_bound )
00120 {
00121 stats->
n_median_boxes =
DEFAULT_N_MEDIAN_BOXES;
00122 ALLOC( stats->
median_box_counts, stats->
n_median_boxes );
00123 ALLOC( stats->
median_box_values, stats->
n_median_boxes );
00124
00125 for_less( i, 0, stats->
n_median_boxes )
00126 stats->
median_box_counts[i] = 0;
00127
00128 stats->
n_below_median_range = 0;
00129 stats->
n_above_median_range = 0;
00130 }
00131 }
00132
00133 public void add_sample_to_statistics(
00134
statistics_struct *stats,
00135 Real sample )
00136 {
00137
int median_box;
00138
00139
if( stats->
n_samples == 0 )
00140 {
00141 stats->
min_value = sample;
00142 stats->
max_value = sample;
00143 }
00144
else if( sample < stats->
min_value )
00145 stats->
min_value = sample;
00146
else if( sample > stats->
max_value )
00147 stats->
max_value = sample;
00148
00149
if( stats->
n_samples <
MAX_SAMPLES_RECORDED )
00150 {
00151 SET_ARRAY_SIZE( stats->
samples, stats->
n_samples,
00152 stats->
n_samples+1, DEFAULT_CHUNK_SIZE );
00153 stats->
samples[stats->
n_samples] = sample;
00154 }
00155
else if( stats->
n_samples ==
MAX_SAMPLES_RECORDED )
00156 FREE( stats->
samples );
00157
00158 stats->
n_samples += 1;
00159
00160 stats->
sum_x += sample;
00161 stats->
sum_xx += sample * sample;
00162
00163
if( stats->
min_median_range < stats->
max_median_range )
00164 {
00165
if( sample < stats->
min_median_range )
00166 ++stats->
n_below_median_range;
00167
else if( sample >= stats->
max_median_range )
00168 ++stats->
n_above_median_range;
00169
else
00170 {
00171 median_box = (
int) ((Real) stats->
n_median_boxes *
00172 (sample - stats->
min_median_range)/
00173 (stats->
max_median_range - stats->
min_median_range));
00174
00175 ++stats->
median_box_counts[median_box];
00176
if( stats->
median_box_counts[median_box] == 1 )
00177 stats->
median_box_values[median_box] = sample;
00178 }
00179 }
00180 }
00181
00182 private void get_median(
00183
statistics_struct *stats,
00184 Real *min_range,
00185 Real *max_range )
00186 {
00187
int box_index, median_index;
00188
00189 median_index = stats->
n_samples / 2;
00190
00191
if( stats->
n_samples <=
MAX_SAMPLES_RECORDED )
00192 {
00193
int i, j, best_index;
00194 Real tmp;
00195
00196 for_less( i, 0, stats->
n_samples-1 )
00197 {
00198 best_index = i;
00199 for_less( j, i+1, stats->
n_samples )
00200 {
00201
if( stats->
samples[j] < stats->
samples[best_index] )
00202 best_index = j;
00203 }
00204
00205 tmp = stats->
samples[best_index];
00206 stats->
samples[best_index] = stats->
samples[i];
00207 stats->
samples[i] = tmp;
00208 }
00209
00210 *min_range = stats->
samples[median_index];
00211 *max_range = stats->
samples[median_index];
00212
00213
return;
00214 }
00215
00216
if( stats->
min_median_range >= stats->
max_median_range )
00217 {
00218 *min_range = -1.0e30;
00219 *max_range = 1.0e30;
00220
return;
00221 }
00222
00223
if( median_index < stats->
n_below_median_range )
00224 {
00225 *min_range = stats->
min_value;
00226 *max_range = stats->
min_median_range;
00227
return;
00228 }
00229
00230 median_index -= stats->
n_below_median_range;
00231
00232 box_index = 0;
00233
while( box_index < stats->
n_median_boxes &&
00234 median_index >= stats->
median_box_counts[box_index] )
00235 {
00236 median_index -= stats->
median_box_counts[box_index];
00237 ++box_index;
00238 }
00239
00240
if( box_index == stats->
n_median_boxes )
00241 {
00242 *min_range = stats->
max_median_range;
00243 *max_range = stats->
max_value;
00244 }
00245
else
00246 {
00247
if( stats->
median_box_counts[box_index] == 1 )
00248 {
00249 *min_range = stats->
median_box_values[box_index];
00250 *max_range = stats->
median_box_values[box_index];
00251 }
00252
else
00253 {
00254 *min_range = stats->
min_median_range +
00255 (stats->
max_median_range - stats->
min_median_range) *
00256 (Real) box_index / (Real) stats->
n_median_boxes;
00257 *max_range = stats->
min_median_range +
00258 (stats->
max_median_range - stats->
min_median_range) *
00259 (Real) (box_index+1) / (Real) stats->
n_median_boxes;
00260 }
00261 }
00262 }
00263
00264 public void restart_statistics_with_narrower_median_range(
00265
statistics_struct *stats )
00266 {
00267 Real min_median_range, max_median_range;
00268
00269
get_median( stats, &min_median_range, &max_median_range );
00270
00271
if( min_median_range == max_median_range )
00272 {
00273 min_median_range = stats->
min_median_range;
00274 max_median_range = stats->
max_median_range;
00275 print_error(
"Median range already narrow enough.\n" );
00276 }
00277
00278
terminate_statistics( stats );
00279
00280
initialize_statistics( stats, min_median_range, max_median_range );
00281 }
00282
00283 public void get_statistics(
00284
statistics_struct *stats,
00285
int *n_samples,
00286 Real *mean,
00287 Real *median,
00288 Real *median_error,
00289 Real *min_value,
00290 Real *max_value,
00291 Real *std_deviation )
00292 {
00293
int n;
00294 Real sum_x, sum_xx, min_median_range, max_median_range, variance;
00295
00296
if( n_samples != NULL )
00297 *n_samples = stats->
n_samples;
00298
00299
if( stats->
n_samples <= 0 )
00300 {
00301
if( median_error != NULL )
00302 *median_error = 0.0;
00303
return;
00304 }
00305
00306
if( median != NULL )
00307 {
00308
get_median( stats, &min_median_range, &max_median_range );
00309
00310
if( min_median_range == max_median_range )
00311 {
00312 *median = min_median_range;
00313
if( median_error != NULL )
00314 *median_error = 0.0;
00315 }
00316
else
00317 {
00318 *median = (min_median_range + max_median_range) / 2.0;
00319
if( median_error != NULL )
00320 *median_error = (max_median_range - min_median_range) / 2.0;
00321 }
00322 }
00323
00324
if( min_value != NULL )
00325 *min_value = stats->
min_value;
00326
00327
if( max_value != NULL )
00328 *max_value = stats->
max_value;
00329
00330 n = stats->
n_samples;
00331 sum_x = stats->
sum_x;
00332 sum_xx = stats->
sum_xx;
00333
00334
if( mean != NULL )
00335 *mean = sum_x / (Real) n;
00336
00337
if( n == 1 )
00338 variance = 0.0;
00339
else
00340 variance = (sum_xx - sum_x * sum_x / (Real) n) / (Real) (n - 1);
00341
00342
if( std_deviation != NULL )
00343 {
00344
if( variance <= 0.0 )
00345 *std_deviation = 0.0;
00346
else
00347 *std_deviation = sqrt( variance );
00348 }
00349 }
00350
00351 public void terminate_statistics(
00352
statistics_struct *stats )
00353 {
00354
if( stats->
n_samples > 0 && stats->
n_samples <=
MAX_SAMPLES_RECORDED )
00355 FREE( stats->
samples );
00356
00357
if( stats->
min_median_range < stats->
max_median_range )
00358 {
00359 FREE( stats->
median_box_counts );
00360 FREE( stats->
median_box_values );
00361 }
00362 }
00363
00364 public void compute_mean_and_variance(
00365
int n,
00366 Real samples[],
00367 Real *mean,
00368 Real *variance )
00369 {
00370
int i;
00371 Real sum_x, sum_xx;
00372
00373 sum_x = 0.0;
00374 sum_xx = 0.0;
00375
00376 for_less( i, 0, n )
00377 {
00378 sum_x += samples[i];
00379 sum_xx += samples[i] * samples[i];
00380 }
00381
00382 *mean = sum_x / (Real) n;
00383
00384
if( n == 1 )
00385 *variance = 0.0;
00386
else
00387 *variance = (sum_xx - sum_x * sum_x / (Real) n) / (Real) (n - 1);
00388 }
00389
00390 public Real
compute_two_means_t_statistic(
00391
int n1,
00392 Real samples1[],
00393
int n2,
00394 Real samples2[] )
00395 {
00396 Real mean1, mean2, variance1, variance2, std_dev, std_err;
00397 Real t;
00398
00399
compute_mean_and_variance( n1, samples1, &mean1, &variance1 );
00400
compute_mean_and_variance( n2, samples2, &mean2, &variance2 );
00401
00402 std_dev = sqrt( ((Real) n1 * variance1 + (Real) n2 * variance2) /
00403 (Real) (n1 + n2 - 2) );
00404
00405 std_err = std_dev * sqrt( 1.0 / (Real) n1 + 1.0 / (Real) n2 );
00406
00407
if( std_err == 0.0 )
00408 t = 0.0;
00409
else
00410 t = (mean1 - mean2) / std_err;
00411
00412
return( t );
00413 }