Main Page | Modules | Data Structures | File List | Data Fields | Globals | Related Pages

statistics.c

Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------------- 00002 @COPYRIGHT : 00003 Copyright 1993,1994,1995 David MacDonald, 00004 McConnell Brain Imaging Centre, 00005 Montreal Neurological Institute, McGill University. 00006 Permission to use, copy, modify, and distribute this 00007 software and its documentation for any purpose and without 00008 fee is hereby granted, provided that the above copyright 00009 notice appear in all copies. The author and McGill University 00010 make no representations about the suitability of this 00011 software for any purpose. It is provided "as is" without 00012 express or implied warranty. 00013 ---------------------------------------------------------------------------- */ 00014 00015 #include <volume_io/internal_volume_io.h> 00016 #include <bicpl/numerical.h> 00017 #include <bicpl/data_structures.h> 00018 00019 #ifndef lint 00020 static char rcsid[] = "$Header: /software/source//libraries/bicpl/Numerical/statistics.c,v 1.14 2000/02/06 15:30:41 stever Exp $"; 00021 #endif 00022 00023 #define DEFAULT_N_MEDIAN_BOXES 100000 00024 00025 #define MAX_SAMPLES_RECORDED 100000 00026 00027 /* ----------------------------- MNI Header ----------------------------------- 00028 @NAME : compute_statistics 00029 @INPUT : n 00030 samples 00031 @OUTPUT : min_value 00032 max_value 00033 mean_value 00034 std_dev 00035 median - may be null pointer if median not desired 00036 @RETURNS : 00037 @DESCRIPTION: Computes the most basic statistics on a set of n samples. 00038 @METHOD : 00039 @GLOBALS : 00040 @CALLS : 00041 @CREATED : 1993 David MacDonald 00042 @MODIFIED : 00043 ---------------------------------------------------------------------------- */ 00044 00045 public void compute_statistics( 00046 int n, 00047 Real samples[], 00048 Real *min_value, 00049 Real *max_value, 00050 Real *mean_value, 00051 Real *std_dev, 00052 Real *median ) 00053 { 00054 int i; 00055 Real x, min_median, max_median, median_error; 00056 statistics_struct stats; 00057 BOOLEAN done; 00058 00059 if( median != NULL ) 00060 { 00061 min_median = 0.0; 00062 max_median = 0.0; 00063 00064 for_less( i, 0, n ) 00065 { 00066 x = samples[i]; 00067 if( i == 0 ) 00068 { 00069 min_median = x; 00070 max_median = x; 00071 } 00072 else if( x < min_median ) 00073 min_median = x; 00074 else if( x > max_median ) 00075 max_median = x; 00076 } 00077 } 00078 else 00079 { 00080 min_median = 0.0; 00081 max_median = -1.0; 00082 } 00083 00084 initialize_statistics( &stats, min_median, max_median ); 00085 00086 done = FALSE; 00087 00088 while( !done ) 00089 { 00090 for_less( i, 0, n ) 00091 add_sample_to_statistics( &stats, samples[i] ); 00092 00093 get_statistics( &stats, NULL, mean_value, median, &median_error, 00094 min_value, max_value, std_dev ); 00095 00096 if( median != NULL && median_error > 0.0 ) 00097 restart_statistics_with_narrower_median_range( &stats ); 00098 else 00099 done = TRUE; 00100 } 00101 00102 terminate_statistics( &stats ); 00103 } 00104 00105 public void initialize_statistics( 00106 statistics_struct *stats, 00107 Real median_lower_bound, 00108 Real median_upper_bound ) 00109 { 00110 int i; 00111 00112 stats->n_samples = 0; 00113 stats->sum_x = 0.0; 00114 stats->sum_xx = 0.0; 00115 00116 stats->min_median_range = median_lower_bound; 00117 stats->max_median_range = median_upper_bound; 00118 00119 if( median_lower_bound < median_upper_bound ) 00120 { 00121 stats->n_median_boxes = DEFAULT_N_MEDIAN_BOXES; 00122 ALLOC( stats->median_box_counts, stats->n_median_boxes ); 00123 ALLOC( stats->median_box_values, stats->n_median_boxes ); 00124 00125 for_less( i, 0, stats->n_median_boxes ) 00126 stats->median_box_counts[i] = 0; 00127 00128 stats->n_below_median_range = 0; 00129 stats->n_above_median_range = 0; 00130 } 00131 } 00132 00133 public void add_sample_to_statistics( 00134 statistics_struct *stats, 00135 Real sample ) 00136 { 00137 int median_box; 00138 00139 if( stats->n_samples == 0 ) 00140 { 00141 stats->min_value = sample; 00142 stats->max_value = sample; 00143 } 00144 else if( sample < stats->min_value ) 00145 stats->min_value = sample; 00146 else if( sample > stats->max_value ) 00147 stats->max_value = sample; 00148 00149 if( stats->n_samples < MAX_SAMPLES_RECORDED ) 00150 { 00151 SET_ARRAY_SIZE( stats->samples, stats->n_samples, 00152 stats->n_samples+1, DEFAULT_CHUNK_SIZE ); 00153 stats->samples[stats->n_samples] = sample; 00154 } 00155 else if( stats->n_samples == MAX_SAMPLES_RECORDED ) 00156 FREE( stats->samples ); 00157 00158 stats->n_samples += 1; 00159 00160 stats->sum_x += sample; 00161 stats->sum_xx += sample * sample; 00162 00163 if( stats->min_median_range < stats->max_median_range ) 00164 { 00165 if( sample < stats->min_median_range ) 00166 ++stats->n_below_median_range; 00167 else if( sample >= stats->max_median_range ) 00168 ++stats->n_above_median_range; 00169 else 00170 { 00171 median_box = (int) ((Real) stats->n_median_boxes * 00172 (sample - stats->min_median_range)/ 00173 (stats->max_median_range - stats->min_median_range)); 00174 00175 ++stats->median_box_counts[median_box]; 00176 if( stats->median_box_counts[median_box] == 1 ) 00177 stats->median_box_values[median_box] = sample; 00178 } 00179 } 00180 } 00181 00182 private void get_median( 00183 statistics_struct *stats, 00184 Real *min_range, 00185 Real *max_range ) 00186 { 00187 int box_index, median_index; 00188 00189 median_index = stats->n_samples / 2; 00190 00191 if( stats->n_samples <= MAX_SAMPLES_RECORDED ) 00192 { 00193 int i, j, best_index; 00194 Real tmp; 00195 00196 for_less( i, 0, stats->n_samples-1 ) 00197 { 00198 best_index = i; 00199 for_less( j, i+1, stats->n_samples ) 00200 { 00201 if( stats->samples[j] < stats->samples[best_index] ) 00202 best_index = j; 00203 } 00204 00205 tmp = stats->samples[best_index]; 00206 stats->samples[best_index] = stats->samples[i]; 00207 stats->samples[i] = tmp; 00208 } 00209 00210 *min_range = stats->samples[median_index]; 00211 *max_range = stats->samples[median_index]; 00212 00213 return; 00214 } 00215 00216 if( stats->min_median_range >= stats->max_median_range ) 00217 { 00218 *min_range = -1.0e30; 00219 *max_range = 1.0e30; 00220 return; 00221 } 00222 00223 if( median_index < stats->n_below_median_range ) 00224 { 00225 *min_range = stats->min_value; 00226 *max_range = stats->min_median_range; 00227 return; 00228 } 00229 00230 median_index -= stats->n_below_median_range; 00231 00232 box_index = 0; 00233 while( box_index < stats->n_median_boxes && 00234 median_index >= stats->median_box_counts[box_index] ) 00235 { 00236 median_index -= stats->median_box_counts[box_index]; 00237 ++box_index; 00238 } 00239 00240 if( box_index == stats->n_median_boxes ) 00241 { 00242 *min_range = stats->max_median_range; 00243 *max_range = stats->max_value; 00244 } 00245 else 00246 { 00247 if( stats->median_box_counts[box_index] == 1 ) 00248 { 00249 *min_range = stats->median_box_values[box_index]; 00250 *max_range = stats->median_box_values[box_index]; 00251 } 00252 else 00253 { 00254 *min_range = stats->min_median_range + 00255 (stats->max_median_range - stats->min_median_range) * 00256 (Real) box_index / (Real) stats->n_median_boxes; 00257 *max_range = stats->min_median_range + 00258 (stats->max_median_range - stats->min_median_range) * 00259 (Real) (box_index+1) / (Real) stats->n_median_boxes; 00260 } 00261 } 00262 } 00263 00264 public void restart_statistics_with_narrower_median_range( 00265 statistics_struct *stats ) 00266 { 00267 Real min_median_range, max_median_range; 00268 00269 get_median( stats, &min_median_range, &max_median_range ); 00270 00271 if( min_median_range == max_median_range ) 00272 { 00273 min_median_range = stats->min_median_range; 00274 max_median_range = stats->max_median_range; 00275 print_error( "Median range already narrow enough.\n" ); 00276 } 00277 00278 terminate_statistics( stats ); 00279 00280 initialize_statistics( stats, min_median_range, max_median_range ); 00281 } 00282 00283 public void get_statistics( 00284 statistics_struct *stats, 00285 int *n_samples, 00286 Real *mean, 00287 Real *median, 00288 Real *median_error, 00289 Real *min_value, 00290 Real *max_value, 00291 Real *std_deviation ) 00292 { 00293 int n; 00294 Real sum_x, sum_xx, min_median_range, max_median_range, variance; 00295 00296 if( n_samples != NULL ) 00297 *n_samples = stats->n_samples; 00298 00299 if( stats->n_samples <= 0 ) 00300 { 00301 if( median_error != NULL ) 00302 *median_error = 0.0; 00303 return; 00304 } 00305 00306 if( median != NULL ) 00307 { 00308 get_median( stats, &min_median_range, &max_median_range ); 00309 00310 if( min_median_range == max_median_range ) 00311 { 00312 *median = min_median_range; 00313 if( median_error != NULL ) 00314 *median_error = 0.0; 00315 } 00316 else 00317 { 00318 *median = (min_median_range + max_median_range) / 2.0; 00319 if( median_error != NULL ) 00320 *median_error = (max_median_range - min_median_range) / 2.0; 00321 } 00322 } 00323 00324 if( min_value != NULL ) 00325 *min_value = stats->min_value; 00326 00327 if( max_value != NULL ) 00328 *max_value = stats->max_value; 00329 00330 n = stats->n_samples; 00331 sum_x = stats->sum_x; 00332 sum_xx = stats->sum_xx; 00333 00334 if( mean != NULL ) 00335 *mean = sum_x / (Real) n; 00336 00337 if( n == 1 ) 00338 variance = 0.0; 00339 else 00340 variance = (sum_xx - sum_x * sum_x / (Real) n) / (Real) (n - 1); 00341 00342 if( std_deviation != NULL ) 00343 { 00344 if( variance <= 0.0 ) 00345 *std_deviation = 0.0; 00346 else 00347 *std_deviation = sqrt( variance ); 00348 } 00349 } 00350 00351 public void terminate_statistics( 00352 statistics_struct *stats ) 00353 { 00354 if( stats->n_samples > 0 && stats->n_samples <= MAX_SAMPLES_RECORDED ) 00355 FREE( stats->samples ); 00356 00357 if( stats->min_median_range < stats->max_median_range ) 00358 { 00359 FREE( stats->median_box_counts ); 00360 FREE( stats->median_box_values ); 00361 } 00362 } 00363 00364 public void compute_mean_and_variance( 00365 int n, 00366 Real samples[], 00367 Real *mean, 00368 Real *variance ) 00369 { 00370 int i; 00371 Real sum_x, sum_xx; 00372 00373 sum_x = 0.0; 00374 sum_xx = 0.0; 00375 00376 for_less( i, 0, n ) 00377 { 00378 sum_x += samples[i]; 00379 sum_xx += samples[i] * samples[i]; 00380 } 00381 00382 *mean = sum_x / (Real) n; 00383 00384 if( n == 1 ) 00385 *variance = 0.0; 00386 else 00387 *variance = (sum_xx - sum_x * sum_x / (Real) n) / (Real) (n - 1); 00388 } 00389 00390 public Real compute_two_means_t_statistic( 00391 int n1, 00392 Real samples1[], 00393 int n2, 00394 Real samples2[] ) 00395 { 00396 Real mean1, mean2, variance1, variance2, std_dev, std_err; 00397 Real t; 00398 00399 compute_mean_and_variance( n1, samples1, &mean1, &variance1 ); 00400 compute_mean_and_variance( n2, samples2, &mean2, &variance2 ); 00401 00402 std_dev = sqrt( ((Real) n1 * variance1 + (Real) n2 * variance2) / 00403 (Real) (n1 + n2 - 2) ); 00404 00405 std_err = std_dev * sqrt( 1.0 / (Real) n1 + 1.0 / (Real) n2 ); 00406 00407 if( std_err == 0.0 ) 00408 t = 0.0; 00409 else 00410 t = (mean1 - mean2) / std_err; 00411 00412 return( t ); 00413 }

Generated on Wed Jul 28 09:10:58 2004 for BICPL by doxygen 1.3.7