#pragma once #include #include #include //! See more details here http://en.wikipedia.org/wiki/Kahan_summation_algorithm template class TKahanAccumulator { public: using TValueType = TAccumulateType; template explicit TKahanAccumulator(const TFloatType x) : Sum_(x) , Compensation_() { } TKahanAccumulator() : Sum_() , Compensation_() { } template TKahanAccumulator& operator=(const TFloatType& rhs) { Sum_ = TValueType(rhs); Compensation_ = TValueType(); return *this; } TValueType Get() const { return Sum_ + Compensation_; } template inline operator TFloatType() const { return Get(); } template inline bool operator<(const TKahanAccumulator& other) const { return Get() < other.Get(); } template inline bool operator<=(const TKahanAccumulator& other) const { return !(other < *this); } template inline bool operator>(const TKahanAccumulator& other) const { return other < *this; } template inline bool operator>=(const TKahanAccumulator& other) const { return !(*this < other); } template inline TKahanAccumulator& operator+=(const TFloatType x) { const TValueType y = TValueType(x) - Compensation_; const TValueType t = Sum_ + y; Compensation_ = (t - Sum_) - y; Sum_ = t; return *this; } template inline TKahanAccumulator& operator-=(const TFloatType x) { return *this += -TValueType(x); } template inline TKahanAccumulator& operator*=(const TFloatType x) { return *this = TValueType(*this) * TValueType(x); } template inline TKahanAccumulator& operator/=(const TFloatType x) { return *this = TValueType(*this) / TValueType(x); } Y_SAVELOAD_DEFINE(Sum_, Compensation_); private: TValueType Sum_; TValueType Compensation_; }; template inline const TKahanAccumulator operator+(TKahanAccumulator lhs, const TFloatType rhs) { return lhs += rhs; } template inline const TKahanAccumulator operator-(TKahanAccumulator lhs, const TFloatType rhs) { return lhs -= rhs; } template inline const TKahanAccumulator operator*(TKahanAccumulator lhs, const TFloatType rhs) { return lhs *= rhs; } template inline const TKahanAccumulator operator/(TKahanAccumulator lhs, const TFloatType rhs) { return lhs /= rhs; } template static inline TAccumulatorType TypedFastAccumulate(It begin, It end) { TAccumulatorType accumulator = TAccumulatorType(); for (; begin + 15 < end; begin += 16) { accumulator += *(begin + 0) + *(begin + 1) + *(begin + 2) + *(begin + 3) + *(begin + 4) + *(begin + 5) + *(begin + 6) + *(begin + 7) + *(begin + 8) + *(begin + 9) + *(begin + 10) + *(begin + 11) + *(begin + 12) + *(begin + 13) + *(begin + 14) + *(begin + 15); } for (; begin != end; ++begin) { accumulator += *begin; } return accumulator; } template static inline TAccumulatorType TypedFastInnerOperation(It1 begin1, It1 end1, It2 begin2) { TAccumulatorType accumulator = TAccumulatorType(); const TOperation op; for (; begin1 + 15 < end1; begin1 += 16, begin2 += 16) { accumulator += op(*(begin1 + 0), *(begin2 + 0)) + op(*(begin1 + 1), *(begin2 + 1)) + op(*(begin1 + 2), *(begin2 + 2)) + op(*(begin1 + 3), *(begin2 + 3)) + op(*(begin1 + 4), *(begin2 + 4)) + op(*(begin1 + 5), *(begin2 + 5)) + op(*(begin1 + 6), *(begin2 + 6)) + op(*(begin1 + 7), *(begin2 + 7)) + op(*(begin1 + 8), *(begin2 + 8)) + op(*(begin1 + 9), *(begin2 + 9)) + op(*(begin1 + 10), *(begin2 + 10)) + op(*(begin1 + 11), *(begin2 + 11)) + op(*(begin1 + 12), *(begin2 + 12)) + op(*(begin1 + 13), *(begin2 + 13)) + op(*(begin1 + 14), *(begin2 + 14)) + op(*(begin1 + 15), *(begin2 + 15)); } for (; begin1 != end1; ++begin1, ++begin2) { accumulator += op(*begin1, *begin2); } return accumulator; } template static inline TAccumulatorType TypedFastInnerProduct(It1 begin1, It1 end1, It2 begin2) { return TypedFastInnerOperation, TAccumulatorType>(begin1, end1, begin2); } template static inline double FastAccumulate(It begin, It end) { return TypedFastAccumulate(begin, end); } template static inline double FastAccumulate(const TVector& sequence) { return FastAccumulate(sequence.begin(), sequence.end()); } template static inline double FastKahanAccumulate(It begin, It end) { return TypedFastAccumulate>(begin, end); } template static inline double FastKahanAccumulate(const TVector& sequence) { return FastKahanAccumulate(sequence.begin(), sequence.end()); } template static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) { return TypedFastInnerProduct(begin1, end1, begin2); } template static inline double FastInnerProduct(const TVector& lhs, const TVector& rhs) { Y_ASSERT(lhs.size() == rhs.size()); return FastInnerProduct(lhs.begin(), lhs.end(), rhs.begin()); } template static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) { return TypedFastInnerProduct>(begin1, end1, begin2); } template static inline double FastKahanInnerProduct(const TVector& lhs, const TVector& rhs) { Y_ASSERT(lhs.size() == rhs.size()); return FastKahanInnerProduct(lhs.begin(), lhs.end(), rhs.begin()); }