00001 //---------------------------------------------------------------------------- 00002 /** @file SgStatistics.h 00003 Classes for incremental computation of statistical properties. 00004 The classes SgStatisticsBase (mean), SgStatistics (mean, variance) and 00005 SgStatisticsExt (mean, variance, min, max) are extensions of each other, 00006 but don't derive from each other, to avoid the overhead of virtual 00007 functions and because a base class has can (or could in the future) have 00008 functions that are not supported in derived classes (like removing a 00009 sample, which is easy in SgStatisticsBase, but not possible in 00010 SgStatisticsExt). However, member functions with the same meaning have the 00011 same name, so that the classes are easily replacable in user code and can 00012 be used as template arguments. 00013 */ 00014 //---------------------------------------------------------------------------- 00015 00016 #ifndef SG_STATISTICS_H 00017 #define SG_STATISTICS_H 00018 00019 #include <cmath> 00020 #include <iostream> 00021 #include <limits> 00022 #include <map> 00023 #include <sstream> 00024 #include <string> 00025 #include <vector> 00026 #include "SgException.h" 00027 #include "SgWrite.h" 00028 00029 //---------------------------------------------------------------------------- 00030 00031 /** Computes mean of a statistical variable. 00032 The template parameters are the floating point type and the counter type, 00033 depending on the precision-memory tradeoff. 00034 */ 00035 template<typename VALUE, typename COUNT> 00036 class SgStatisticsBase 00037 { 00038 public: 00039 SgStatisticsBase(); 00040 00041 /** Create statistics initialized with values. 00042 Note that value must be initialized to 0 if count is 0. 00043 Equivalent to creating a statistics and calling @c count times 00044 Add(val) 00045 */ 00046 SgStatisticsBase(VALUE val, COUNT count); 00047 00048 void Add(VALUE val); 00049 00050 void Remove(VALUE val); 00051 00052 /** Add a value n times */ 00053 void Add(VALUE val, COUNT n); 00054 00055 /** Remove a value n times. */ 00056 void Remove(VALUE val, COUNT n); 00057 00058 void Clear(); 00059 00060 const COUNT& Count() const; 00061 00062 /** Initialize with values. 00063 Equivalent to calling Clear() and calling @c count times 00064 Add(val) 00065 */ 00066 void Initialize(VALUE val, COUNT count); 00067 00068 /** Check if the mean value is defined. 00069 The mean value is defined, if the count if greater than zero. The 00070 result of this function is equivalent to <tt>Count() > 0</tt>, for 00071 integer count types and <tt>Count() > epsilon()</tt> for floating 00072 point count types. 00073 */ 00074 bool IsDefined() const; 00075 00076 const VALUE& Mean() const; 00077 00078 /** Write in human readable format. */ 00079 void Write(std::ostream& out) const; 00080 00081 /** Save in a compact platform-independent text format. 00082 The data is written in a single line, without trailing newline. 00083 */ 00084 void SaveAsText(std::ostream& out) const; 00085 00086 /** Load from text format. 00087 See SaveAsText() 00088 */ 00089 void LoadFromText(std::istream& in); 00090 00091 private: 00092 COUNT m_count; 00093 00094 VALUE m_mean; 00095 }; 00096 00097 template<typename VALUE, typename COUNT> 00098 inline SgStatisticsBase<VALUE,COUNT>::SgStatisticsBase() 00099 { 00100 Clear(); 00101 } 00102 00103 template<typename VALUE, typename COUNT> 00104 inline SgStatisticsBase<VALUE,COUNT>::SgStatisticsBase(VALUE val, COUNT count) 00105 : m_count(count), 00106 m_mean(val) 00107 { 00108 } 00109 00110 template<typename VALUE, typename COUNT> 00111 void SgStatisticsBase<VALUE,COUNT>::Add(VALUE val) 00112 { 00113 // Write order dependency: at least one class (SgUctSearch in lock-free 00114 // mode) uses SgStatisticsBase concurrently without locking and assumes 00115 // that m_mean is valid, if m_count is greater zero 00116 COUNT count = m_count; 00117 ++count; 00118 SG_ASSERT(! std::numeric_limits<COUNT>::is_exact 00119 || count > 0); // overflow 00120 val -= m_mean; 00121 m_mean += val / count; 00122 m_count = count; 00123 } 00124 00125 template<typename VALUE, typename COUNT> 00126 void SgStatisticsBase<VALUE,COUNT>::Remove(VALUE val) 00127 { 00128 // Write order dependency: at least on class (SgUctSearch in lock-free 00129 // mode) uses SgStatisticsBase concurrently without locking and assumes 00130 // that m_mean is valid, if m_count is greater zero 00131 COUNT count = m_count; 00132 if (count > 1) 00133 { 00134 --count; 00135 m_mean += (m_mean - val) / count; 00136 m_count = count; 00137 } 00138 else 00139 Clear(); 00140 } 00141 00142 template<typename VALUE, typename COUNT> 00143 void SgStatisticsBase<VALUE,COUNT>::Remove(VALUE val, COUNT n) 00144 { 00145 SG_ASSERT(m_count >= n); 00146 // Write order dependency: at least on class (SgUctSearch in lock-free 00147 // mode) uses SgStatisticsBase concurrently without locking and assumes 00148 // that m_mean is valid, if m_count is greater zero 00149 COUNT count = m_count; 00150 if (count > n) 00151 { 00152 count -= n; 00153 m_mean += n * (m_mean - val) / count; 00154 m_count = count; 00155 } 00156 else 00157 Clear(); 00158 } 00159 00160 template<typename VALUE, typename COUNT> 00161 void SgStatisticsBase<VALUE,COUNT>::Add(VALUE val, COUNT n) 00162 { 00163 // Write order dependency: at least one class (SgUctSearch in lock-free 00164 // mode) uses SgStatisticsBase concurrently without locking and assumes 00165 // that m_mean is valid, if m_count is greater zero 00166 COUNT count = m_count; 00167 count += n; 00168 SG_ASSERT(! std::numeric_limits<COUNT>::is_exact 00169 || count > 0); // overflow 00170 val -= m_mean; 00171 m_mean += n * val / count; 00172 m_count = count; 00173 } 00174 00175 template<typename VALUE, typename COUNT> 00176 inline void SgStatisticsBase<VALUE,COUNT>::Clear() 00177 { 00178 m_count = 0; 00179 m_mean = 0; 00180 } 00181 00182 template<typename VALUE, typename COUNT> 00183 inline const COUNT& SgStatisticsBase<VALUE,COUNT>::Count() const 00184 { 00185 return m_count; 00186 } 00187 00188 template<typename VALUE, typename COUNT> 00189 inline void SgStatisticsBase<VALUE,COUNT>::Initialize(VALUE val, COUNT count) 00190 { 00191 SG_ASSERT(count > 0); 00192 m_count = count; 00193 m_mean = val; 00194 } 00195 00196 template<typename VALUE, typename COUNT> 00197 inline bool SgStatisticsBase<VALUE,COUNT>::IsDefined() const 00198 { 00199 if (std::numeric_limits<COUNT>::is_exact) 00200 return m_count > 0; 00201 else 00202 return m_count > std::numeric_limits<COUNT>::epsilon(); 00203 } 00204 00205 template<typename VALUE, typename COUNT> 00206 void SgStatisticsBase<VALUE,COUNT>::LoadFromText(std::istream& in) 00207 { 00208 in >> m_count >> m_mean; 00209 } 00210 00211 template<typename VALUE, typename COUNT> 00212 inline const VALUE& SgStatisticsBase<VALUE,COUNT>::Mean() const 00213 { 00214 SG_ASSERT(IsDefined()); 00215 return m_mean; 00216 } 00217 00218 template<typename VALUE, typename COUNT> 00219 void SgStatisticsBase<VALUE,COUNT>::Write(std::ostream& out) const 00220 { 00221 if (IsDefined()) 00222 out << Mean(); 00223 else 00224 out << '-'; 00225 } 00226 00227 template<typename VALUE, typename COUNT> 00228 void SgStatisticsBase<VALUE,COUNT>::SaveAsText(std::ostream& out) const 00229 { 00230 out << m_count << ' ' << m_mean; 00231 } 00232 00233 //---------------------------------------------------------------------------- 00234 00235 /** Computes mean and variance of a statistical variable. 00236 The template parameters are the floating point type and the counter type, 00237 depending on the precision-memory tradeoff. 00238 */ 00239 template<typename VALUE, typename COUNT> 00240 class SgStatistics 00241 { 00242 public: 00243 SgStatistics(); 00244 00245 /** Create statistics initialized with values. 00246 Equivalent to creating a statistics and calling @c count times 00247 Add(val) 00248 */ 00249 SgStatistics(VALUE val, COUNT count); 00250 00251 void Add(VALUE val); 00252 00253 void Clear(); 00254 00255 bool IsDefined() const; 00256 00257 const VALUE& Mean() const; 00258 00259 const COUNT& Count() const; 00260 00261 VALUE Deviation() const; 00262 00263 VALUE Variance() const; 00264 00265 /** Write in human readable format. */ 00266 void Write(std::ostream& out) const; 00267 00268 /** Save in a compact platform-independent text format. 00269 The data is written in a single line, without trailing newline. 00270 */ 00271 void SaveAsText(std::ostream& out) const; 00272 00273 /** Load from text format. 00274 See SaveAsText() 00275 */ 00276 void LoadFromText(std::istream& in); 00277 00278 private: 00279 SgStatisticsBase<VALUE,COUNT> m_statisticsBase; 00280 00281 VALUE m_variance; 00282 }; 00283 00284 template<typename VALUE, typename COUNT> 00285 inline SgStatistics<VALUE,COUNT>::SgStatistics() 00286 { 00287 Clear(); 00288 } 00289 00290 template<typename VALUE, typename COUNT> 00291 inline SgStatistics<VALUE,COUNT>::SgStatistics(VALUE val, COUNT count) 00292 : m_statisticsBase(val, count) 00293 { 00294 m_variance = 0; 00295 } 00296 00297 template<typename VALUE, typename COUNT> 00298 void SgStatistics<VALUE,COUNT>::Add(VALUE val) 00299 { 00300 if (IsDefined()) 00301 { 00302 COUNT countOld = Count(); 00303 VALUE meanOld = Mean(); 00304 m_statisticsBase.Add(val); 00305 VALUE mean = Mean(); 00306 COUNT count = Count(); 00307 m_variance = (countOld * (m_variance + meanOld * meanOld) 00308 + val * val) / count - mean * mean; 00309 } 00310 else 00311 { 00312 m_statisticsBase.Add(val); 00313 m_variance = 0; 00314 } 00315 } 00316 00317 template<typename VALUE, typename COUNT> 00318 inline void SgStatistics<VALUE,COUNT>::Clear() 00319 { 00320 m_statisticsBase.Clear(); 00321 m_variance = 0; 00322 } 00323 00324 template<typename VALUE, typename COUNT> 00325 inline const COUNT& SgStatistics<VALUE,COUNT>::Count() const 00326 { 00327 return m_statisticsBase.Count(); 00328 } 00329 00330 template<typename VALUE, typename COUNT> 00331 inline VALUE SgStatistics<VALUE,COUNT>::Deviation() const 00332 { 00333 return std::sqrt(m_variance); 00334 } 00335 00336 template<typename VALUE, typename COUNT> 00337 inline bool SgStatistics<VALUE,COUNT>::IsDefined() const 00338 { 00339 return m_statisticsBase.IsDefined(); 00340 } 00341 00342 template<typename VALUE, typename COUNT> 00343 void SgStatistics<VALUE,COUNT>::LoadFromText(std::istream& in) 00344 { 00345 m_statisticsBase.LoadFromText(in); 00346 in >> m_variance; 00347 } 00348 00349 template<typename VALUE, typename COUNT> 00350 inline const VALUE& SgStatistics<VALUE,COUNT>::Mean() const 00351 { 00352 return m_statisticsBase.Mean(); 00353 } 00354 00355 template<typename VALUE, typename COUNT> 00356 inline VALUE SgStatistics<VALUE,COUNT>::Variance() const 00357 { 00358 return m_variance; 00359 } 00360 00361 template<typename VALUE, typename COUNT> 00362 void SgStatistics<VALUE,COUNT>::Write(std::ostream& out) const 00363 { 00364 if (IsDefined()) 00365 out << Mean() << " dev=" << Deviation(); 00366 else 00367 out << '-'; 00368 } 00369 00370 template<typename VALUE, typename COUNT> 00371 void SgStatistics<VALUE,COUNT>::SaveAsText(std::ostream& out) const 00372 { 00373 m_statisticsBase.SaveAsText(out); 00374 out << ' ' << m_variance; 00375 } 00376 00377 //---------------------------------------------------------------------------- 00378 00379 /** Extended version of SgStatistics. 00380 Also stores minimum and maximum values. 00381 The template parameters are the floating point type and the counter type, 00382 depending on the precision-memory tradeoff. 00383 */ 00384 template<typename VALUE, typename COUNT> 00385 class SgStatisticsExt 00386 { 00387 public: 00388 SgStatisticsExt(); 00389 00390 void Add(VALUE val); 00391 00392 void Clear(); 00393 00394 bool IsDefined() const; 00395 00396 const VALUE& Mean() const; 00397 00398 const COUNT& Count() const; 00399 00400 VALUE Max() const; 00401 00402 VALUE Min() const; 00403 00404 VALUE Deviation() const; 00405 00406 VALUE Variance() const; 00407 00408 void Write(std::ostream& out) const; 00409 00410 private: 00411 SgStatistics<VALUE,COUNT> m_statistics; 00412 00413 VALUE m_max; 00414 00415 VALUE m_min; 00416 }; 00417 00418 template<typename VALUE, typename COUNT> 00419 inline SgStatisticsExt<VALUE,COUNT>::SgStatisticsExt() 00420 { 00421 Clear(); 00422 } 00423 00424 template<typename VALUE, typename COUNT> 00425 void SgStatisticsExt<VALUE,COUNT>::Add(VALUE val) 00426 { 00427 m_statistics.Add(val); 00428 if (val > m_max) 00429 m_max = val; 00430 if (val < m_min) 00431 m_min = val; 00432 } 00433 00434 template<typename VALUE, typename COUNT> 00435 inline void SgStatisticsExt<VALUE,COUNT>::Clear() 00436 { 00437 m_statistics.Clear(); 00438 m_min = std::numeric_limits<VALUE>::max(); 00439 m_max = -std::numeric_limits<VALUE>::max(); 00440 } 00441 00442 template<typename VALUE, typename COUNT> 00443 inline const COUNT& SgStatisticsExt<VALUE,COUNT>::Count() const 00444 { 00445 return m_statistics.Count(); 00446 } 00447 00448 template<typename VALUE, typename COUNT> 00449 inline VALUE SgStatisticsExt<VALUE,COUNT>::Deviation() const 00450 { 00451 return m_statistics.Deviation(); 00452 } 00453 00454 template<typename VALUE, typename COUNT> 00455 inline bool SgStatisticsExt<VALUE,COUNT>::IsDefined() const 00456 { 00457 return m_statistics.IsDefined(); 00458 } 00459 00460 template<typename VALUE, typename COUNT> 00461 inline VALUE SgStatisticsExt<VALUE,COUNT>::Max() const 00462 { 00463 return m_max; 00464 } 00465 00466 template<typename VALUE, typename COUNT> 00467 inline const VALUE& SgStatisticsExt<VALUE,COUNT>::Mean() const 00468 { 00469 return m_statistics.Mean(); 00470 } 00471 00472 template<typename VALUE, typename COUNT> 00473 inline VALUE SgStatisticsExt<VALUE,COUNT>::Min() const 00474 { 00475 return m_min; 00476 } 00477 00478 template<typename VALUE, typename COUNT> 00479 inline VALUE SgStatisticsExt<VALUE,COUNT>::Variance() const 00480 { 00481 return m_statistics.Variance(); 00482 } 00483 00484 template<typename VALUE, typename COUNT> 00485 void SgStatisticsExt<VALUE,COUNT>::Write(std::ostream& out) const 00486 { 00487 if (IsDefined()) 00488 { 00489 m_statistics.Write(out); 00490 out << " min=" << m_min << " max=" << m_max; 00491 } 00492 else 00493 out << '-'; 00494 } 00495 00496 //---------------------------------------------------------------------------- 00497 00498 /** Set of named statistical variables. 00499 The template parameters are the floating point type and the counter type, 00500 depending on the precision-memory tradeoff. 00501 */ 00502 template<typename VALUE, typename COUNT> 00503 class SgStatisticsCollection 00504 { 00505 public: 00506 /** Add the statistics of another collection. 00507 The collections must contain the same entries. 00508 */ 00509 void Add(const SgStatisticsCollection<VALUE,COUNT>& collection); 00510 00511 void Clear(); 00512 00513 bool Contains(const std::string& name) const; 00514 00515 /** Create a new variable. */ 00516 void Create(const std::string& name); 00517 00518 const SgStatistics<VALUE,COUNT>& Get(const std::string& name) const; 00519 00520 SgStatistics<VALUE,COUNT>& Get(const std::string& name); 00521 00522 void Write(std::ostream& o) const; 00523 00524 private: 00525 typedef std::map<std::string,SgStatistics<VALUE,COUNT> > Map; 00526 00527 typedef typename Map::iterator Iterator; 00528 00529 typedef typename Map::const_iterator ConstIterator; 00530 00531 Map m_map; 00532 }; 00533 00534 template<typename VALUE, typename COUNT> 00535 void 00536 SgStatisticsCollection<VALUE,COUNT> 00537 ::Add(const SgStatisticsCollection<VALUE,COUNT>& collection) 00538 { 00539 if (m_map.size() != collection.m_map.size()) 00540 throw SgException("Incompatible statistics collections"); 00541 for (Iterator p = m_map.begin(); p != m_map.end(); ++p) 00542 { 00543 ConstIterator k = collection.m_map.find(p->first); 00544 if (k == collection.m_map.end()) 00545 throw SgException("Incompatible statistics collections"); 00546 p->second.Add(k->second); 00547 } 00548 } 00549 00550 template<typename VALUE, typename COUNT> 00551 void SgStatisticsCollection<VALUE,COUNT>::Clear() 00552 { 00553 for (Iterator p = m_map.begin(); p != m_map.end(); ++p) 00554 p->second.Clear(); 00555 } 00556 00557 template<typename VALUE, typename COUNT> 00558 bool SgStatisticsCollection<VALUE,COUNT>::Contains(const std::string& name) 00559 const 00560 { 00561 return (m_map.find(name) != m_map.end()); 00562 } 00563 00564 template<typename VALUE, typename COUNT> 00565 void SgStatisticsCollection<VALUE,COUNT>::Create(const std::string& name) 00566 { 00567 m_map[name] = SgStatistics<VALUE,COUNT>(); 00568 } 00569 00570 template<typename VALUE, typename COUNT> 00571 const SgStatistics<VALUE,COUNT>& 00572 SgStatisticsCollection<VALUE,COUNT>::Get(const std::string& name) const 00573 { 00574 ConstIterator p = m_map.find(name); 00575 if (p == m_map.end()) 00576 { 00577 std::ostringstream o; 00578 o << "Unknown statistics name " << name << '.'; 00579 throw SgException(o.str()); 00580 } 00581 return p->second; 00582 } 00583 00584 template<typename VALUE, typename COUNT> 00585 SgStatistics<VALUE,COUNT>& 00586 SgStatisticsCollection<VALUE,COUNT>::Get(const std::string& name) 00587 { 00588 Iterator p = m_map.find(name); 00589 if (p == m_map.end()) 00590 { 00591 std::ostringstream o; 00592 o << "Unknown statistics name " << name << '.'; 00593 throw SgException(o.str()); 00594 } 00595 return p->second; 00596 } 00597 00598 template<typename VALUE, typename COUNT> 00599 void SgStatisticsCollection<VALUE,COUNT>::Write(std::ostream& o) const 00600 { 00601 for (ConstIterator p = m_map.begin(); p != m_map.end(); ++p) 00602 o << p->first << ": " << p->second.Write(o) << '\n'; 00603 } 00604 00605 //---------------------------------------------------------------------------- 00606 00607 /** Histogram. 00608 The template parameters are the floating point type and the counter type, 00609 depending on the precision-memory tradeoff. 00610 */ 00611 template<typename VALUE, typename COUNT> 00612 class SgHistogram 00613 { 00614 public: 00615 SgHistogram(); 00616 00617 SgHistogram(VALUE min, VALUE max, int bins); 00618 00619 /** Reinitialize and clear histogram. */ 00620 void Init(VALUE min, VALUE max, int bins); 00621 00622 void Add(VALUE value); 00623 00624 void Clear(); 00625 00626 int Bins() const; 00627 00628 COUNT Count() const; 00629 00630 /** Get count in a certain bin. */ 00631 COUNT Count(int i) const; 00632 00633 /** Write as x,y-table. 00634 Writes the historgram in a format that likely can be used by other 00635 programs. Writes one x,y pair per line. The separator is TAB. 00636 The x-values are the left border values of the bins, the y-values 00637 are the counts of the bins. 00638 */ 00639 void Write(std::ostream& out) const; 00640 00641 /** Write with labels. 00642 Example output with label "Value", the numbers in brackets are the 00643 left border of each bin: 00644 @verbatim 00645 Value[0] 100 00646 Value[10] 2000 00647 Value[20] 500 00648 @endverbatim 00649 */ 00650 void WriteWithLabels(std::ostream& out, const std::string& label) const; 00651 00652 private: 00653 typedef std::vector<COUNT> Vector; 00654 00655 int m_bins; 00656 00657 COUNT m_count; 00658 00659 VALUE m_binSize; 00660 00661 VALUE m_min; 00662 00663 VALUE m_max; 00664 00665 Vector m_array; 00666 }; 00667 00668 template<typename VALUE, typename COUNT> 00669 SgHistogram<VALUE,COUNT>::SgHistogram() 00670 { 00671 Init(0, 1, 1); 00672 } 00673 00674 template<typename VALUE, typename COUNT> 00675 SgHistogram<VALUE,COUNT>::SgHistogram(VALUE min, VALUE max, int bins) 00676 { 00677 Init(min, max, bins); 00678 } 00679 00680 template<typename VALUE, typename COUNT> 00681 void SgHistogram<VALUE,COUNT>::Add(VALUE value) 00682 { 00683 ++m_count; 00684 int i = static_cast<int>((value - m_min) / m_binSize); 00685 if (i < 0) 00686 i = 0; 00687 if (i >= m_bins) 00688 i = m_bins - 1; 00689 ++m_array[i]; 00690 } 00691 00692 template<typename VALUE, typename COUNT> 00693 int SgHistogram<VALUE,COUNT>::Bins() const 00694 { 00695 return m_bins; 00696 } 00697 00698 template<typename VALUE, typename COUNT> 00699 void SgHistogram<VALUE,COUNT>::Clear() 00700 { 00701 m_count = 0; 00702 for (typename Vector::iterator it = m_array.begin(); it != m_array.end(); 00703 ++ it) 00704 *it = 0; 00705 } 00706 00707 template<typename VALUE, typename COUNT> 00708 COUNT SgHistogram<VALUE,COUNT>::Count() const 00709 { 00710 return m_count; 00711 } 00712 00713 template<typename VALUE, typename COUNT> 00714 COUNT SgHistogram<VALUE,COUNT>::Count(int i) const 00715 { 00716 SG_ASSERT(i >= 0); 00717 SG_ASSERT(i < m_bins); 00718 return m_array[i]; 00719 } 00720 00721 template<typename VALUE, typename COUNT> 00722 void SgHistogram<VALUE,COUNT>::Init(VALUE min, VALUE max, int bins) 00723 { 00724 m_array.resize(bins); 00725 m_min = min; 00726 m_max = max; 00727 m_bins = bins; 00728 m_binSize = (m_max - m_min) / m_bins; 00729 Clear(); 00730 } 00731 00732 template<typename VALUE, typename COUNT> 00733 void SgHistogram<VALUE,COUNT>::Write(std::ostream& out) const 00734 { 00735 for (int i = 0; i < m_bins; ++i) 00736 out << (m_min + i * m_binSize) << '\t' << m_array[i] << '\n'; 00737 00738 } 00739 00740 template<typename VALUE, typename COUNT> 00741 void SgHistogram<VALUE,COUNT>::WriteWithLabels(std::ostream& out, 00742 const std::string& label) const 00743 { 00744 for (int i = 0; i < m_bins; ++i) 00745 { 00746 std::ostringstream binLabel; 00747 binLabel << label << '[' << (m_min + i * m_binSize) << ']'; 00748 out << SgWriteLabel(binLabel.str()) << m_array[i] << '\n'; 00749 } 00750 } 00751 00752 //---------------------------------------------------------------------------- 00753 00754 #endif // SG_STATISTICS_H