http://d.hatena.ne.jp/makiyamakoji/20090512/p1のコードを参考に(というか、ほとんど一緒ですが)C++で書き直してみた。
#include <algorithm> #include <stdexcept> #include <cmath> double calcNormalDistPercentile(double percent) { if((percent <= 0) || (1 <= percent)) { throw std::range_error("INVALID PERCENT"); } if(percent == 0.5) { return 0; } double b[] = { 1.570796288, 0.03706987906, -0.8364353589e-3, -0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5, -0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8, 0.3657763036e-10, 0.6936233982e-12 }; int b_length = sizeof(b)/sizeof(*b); double w3 = -std::log(4 * percent * (1 - percent)); double w1 = b[0]; for(int i = 1; i < b_length; ++i) { w1 += b[i] * std::pow(w3, i); } if(percent > 0.5) { return std::sqrt(w1 * w3); } else { return -std::sqrt(w1 * w3); } } double calcScore(int posCount, int negCount, double percent) { int n = posCount + negCount; double p = static_cast<double>(posCount) / n; double z = calcNormalDistPercentile(percent); double z2 = z * z; double temp = p * (1 - p) + z2 / (4 * n); double numerator = p + (z2 / (2 * n)) - z * std::sqrt(temp / n); double denominator = 1 + z2 / n; return numerator / denominator; } template<int N> double calcScore5(int (&ratings)[N], double percent) { int poscount = 0; int negcount = 0; for(int i = 0; i < N; ++i) { if (ratings[i] == 5) { poscount += 4; } else if(ratings[i] == 4) { poscount += 3; negcount += 1; } else if(ratings[i] == 3) { poscount += 2; negcount += 2; } else if(ratings[i] == 2) { poscount += 1; negcount += 3; } else if(ratings[i] == 1) { negcount += 4; } } return calcScore(poscount, negcount, percent); } template<int N> double calcScore(int (&ratings)[N], double percent) { int poscount = std::count(ratings, ratings + N, 1); int negcount = std::count(ratings, ratings + N, 0); return calcScore(poscount, negcount, percent); } #include <iostream> #include <iomanip> int main(int, char* []) { std::cout << std::setprecision(15); int rating1[] = { 0, 1, 0, 1, 1, 1, 0, 1 }; int rating2[] = { 1 }; std::cout << calcScore(rating1, 0.95) << std::endl; std::cout << calcScore(rating2, 0.95) << std::endl; int rating3[] = { 4, 4, 5, 5, 5, 5, 5, 5, 5, 5 }; int rating4[] = { 5 }; std::cout << calcScore5(rating3, 0.95) << std::endl; std::cout << calcScore5(rating4, 0.95) << std::endl; }
実行結果。
0.34799143561252 0.269865944074627 0.859668176956633 0.596521369044698
で、こういう計算こそ関数型言語の出番でしょ、ってことでHaskellでも書いてみた。直訳なのでまだちょっと不細工。
module Score where b = [ 1.570796288, 0.03706987906, -0.8364353589e-3, -0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5, -0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8, 0.3657763036e-10, 0.6936233982e-12 ] count s n = length [x | x <- s, x == n] calcNormalDistPercentile percent | (percent <= 0) || (1 <= percent) = 0/0 -- NaN | percent == 0.5 = 0 | otherwise = sqrt $ (((w (tail b) 1) + (head b))) * w3 where w3 = - (log (4 * percent * (1 - percent))) w (b:bs) i = (b * (w3 ^ i)) + (w bs (i + 1)) w [] _ = 0 calcScore' posCount negCount percent = numerator / denominator where n = posCount + negCount p = posCount / n z = calcNormalDistPercentile percent z2 = z * z temp = p * (1 - p) + z2 / (4 * n) numerator = p + (z2 / (2 * n)) - z * (sqrt (temp / n)) denominator = 1 + z2 / n calcScore ratings percent = calcScore' posCount negCount percent where posCount = toEnum $ count ratings 1 negCount = toEnum $ count ratings 0 calcScore5 ratings percent = calcScore' posCount negCount percent where r5 = count ratings 5 r4 = count ratings 4 r3 = count ratings 3 r2 = count ratings 2 r1 = count ratings 1 posCount = toEnum $ (r5 * 4) + (r4 * 3) + (r3 * 2) + (r2 * 1) + (r1 * 0) negCount = toEnum $ (r5 * 0) + (r4 * 1) + (r3 * 2) + (r2 * 3) + (r1 * 4)
module Main where import Score rating1 = [ 0, 1, 0, 1, 1, 1, 0, 1 ] rating2 = [ 1 ] rating3 = [ 4, 4, 5, 5, 5, 5, 5, 5, 5, 5 ] rating4 = [ 5 ] main = do print $ calcScore rating1 0.95 print $ calcScore rating2 0.95 print $ calcScore5 rating3 0.95 print $ calcScore5 rating4 0.95
実行結果。
0.3479914356125201 0.2698659440746267 0.8596681769566334 0.5965213690446981