[[ノート/ノート]]~
訪問者数 &counter(); 最終更新 &lastmod();~
&contents();
*混合分布 [#n2e11266]
**正規分布 [#k8eb44b4]
***正規分布に従う乱数の生成 [#ib7540f1]
一様乱数を生成したのち、ボックス・ミュラー法で正規分布に加工する。
-[[Wikipedia 乱数列:http://ja.wikipedia.org/wiki/%E4%B9%B1%E6%95%B0%E5%88%97]]
-[[C言語による乱数生成:http://www.ton.scphys.kyoto-u.ac.jp/~omi/random_variables_generation.html#Gauss]]
-[[C言語 乱数:http://kodamaforest.blog112.fc2.com/blog-entry-50.html]]
-[[エクセルを用いた標準正規分布に従う乱数(正規乱数)の発生とヒストグラム:http://homepage1.nifty.com/gfk/Norm-Rand.htm]]
-[[標準正規分布に従う乱数:http://hp.vector.co.jp/authors/VA008683/RNormal.htm]]
***ボックス・ミュラー法による生成プログラム例(1次元1分布) [#fd5385df]
$ cat randgen1.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
double *gauss_rand(int n, double av, double dist) {
/* Box^-Muller */
double pi = 3.14159265358979;
int j = 0;
double a, b, r1, r2, r3, r4;
double *ret;
ret = (double *) malloc(n*sizeof(double));
double sd = sqrt(dist);
while (j<n) {
a=(double)rand()/(double)RAND_MAX;
b=(double)rand()/(double)RAND_MAX;
r1 = sqrt((-2.0)*log(a)) * sin(2.0*pi*b); /*Box-Mullarは同時に2つ無関係*/
r2 = sqrt((-2.0)*log(a)) * cos(2.0*pi*b); /*な0〜1の乱数を生成する */
r3 = r1 * sd + av; /* 0〜1の乱数を平均μ=av、標準偏差σ=sdに変換*/
r4 = r2 * sd + av;
ret[j] = r3; /*2つ生成したので2つ追加してリターン*/
ret[j+1] = r4;
j = j+2;
//printf("%f %f\n", r3, r4);
}
return ret;
}
int main(int argc, char *argv[]) {
int num;
double av, dist;
double *sample;
int i;
if (argc != 4) {
fprintf(stderr, "usage: %s <num of nums> <double average> <double distribution(=sd*sd)>\n", argv[0]);
exit(-1);
}
num = atoi(argv[1]);
av = atof(argv[2]); dist = atof(argv[3]);
sample = gauss_rand(num, av, dist);
for (i = 0; i<num; i++) {
printf("%f\n", sample[i]);
}
/* Check average and standard deviation */ /* 平均値と標準偏差を計算確認*/
double sum = 0.0; double squaresum = 0.0;
for (i = 0; i<num; i++) {
sum += sample[i];
}
double average = sum/num;
for (i = 0; i<num; i++) {
squaresum += ((sample[i]-average)*(sample[i]-average));
}
fprintf(stderr, "av = %5.2f, sd = %5.2f\n", sum/num, sqrt(squaresum/num));
exit(0);
}
***生成例2(1次元で2つの正規分布の混合分布) [#ua8f87e0]
混合分布は、混合の仕方がいろいろ考えられるが、一番の手抜きは、最初に第1分布、次に第2分布、のように順番に生成する。~
また、Box-Muller法で2つずつ生成されることを利用して、一方を第1分布に、他方を第2分布にすることもできる。この場合は両社の個数が同じになる。
以下の例では、後者(2つずつのペアの生成)で試している。同数できることが欠点。
$ cat randgen2.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
double *gauss_rand2(int n, double av1, double dist1, double av2, double dist2) {
/*第1分布はμ=av1, σ**2=dist1、第2分布はμ=av2, σ**2=dist2で、n個ずつ生成する*/
/* Box^-Muller */
double pi = 3.14159265358979;
int j = 0;
double a, b, r1, r2, r3, r4;
double *ret;
ret = (double *) malloc(n*sizeof(double));
double sd1 = sqrt(dist1);
double sd2 = sqrt(dist2);
while (j<(2*n)) {
a=(double)rand()/(double)RAND_MAX;
b=(double)rand()/(double)RAND_MAX;
r1 = sqrt((-2.0)*log(a)) * sin(2.0*pi*b);
r2 = sqrt((-2.0)*log(a)) * cos(2.0*pi*b);
r3 = r1 * sd1 + av1; /* Box-Mullerは2つずつ生成するので一方を第1に、*/
r4 = r2 * sd2 + av2; /* 他方を第2に宛ててみた。こうしなくてもよい。*/
ret[j] = r3;
ret[j+1] = r4;
j = j+2;
//printf("%f %f\n", r3, r4);
}
return ret;
}
int main(int argc, char *argv[]) {
int num;
double av1, av2, dist1, dist2;
double *sample;
int i;
if (argc != 6) {
fprintf(stderr, "usage: %s <num of nums> <double average1> <double distribution1(=sd*sd)> <double average2> <double distribution2>\n", argv[0]);
exit(-1);
}
num = atoi(argv[1]);
av1 = atof(argv[2]); dist1 = atof(argv[3]);
av2 = atof(argv[4]); dist2 = atof(argv[5]);
sample = gauss_rand2(num, av1, dist1, av2, dist2);
for (i = 0; i<num; i++) {
printf("%f, %f\n", sample[i*2], sample[i*2+1]);
}
/* Check average and standard deviation */
double sum1 = 0.0; double squaresum1 = 0.0;
double sum2 = 0.0; double squaresum2 = 0.0;
for (i = 0; i<num; i++) {
sum1 += sample[i*2]; sum2 += sample[i*2+1];
}
double average1 = sum1/num; double average2 = sum2/num;
for (i = 0; i<num; i++) {
squaresum1 += ((sample[i*2]-average1)*(sample[i*2]-average1));
squaresum2 += ((sample[i*2+1]-average2)*(sample[i*2+1]-average2));
}
fprintf(stderr, "av1 = %5.2f, sd1 = %5.2f\n", sum1/num, sqrt(squaresum1/num));
fprintf(stderr, "av2 = %5.2f, sd2 = %5.2f\n", sum2/num, sqrt(squaresum2/num));
exit(0);
}
***生成例3(2次元で2つの正規分布の混合分布) [#nc7cf906]
この例では、2次元の正規分布(x, y)を2つ生成する。~
第1の分布は、xは正規分布(av11, dist11)に従い、yは正規分布(av12, dist12)に従うn1個の点からなる。~
第2の分布は、xは正規分布(av21, dist21)に従い、yは正規分布(av22, dist22)に従うn1個の点からなる。~
2つの分布は混ざっていない。
$ cat randgen22.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
$ cat randgen22.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
double *gauss_rand22(int n1, int n2, double av11, double dist11, double av12, double dist12, double av21, double dist21, double av22, double dist22) {
/* Box^-Muller */
double pi = 3.14159265358979;
int j = 0;
double a, b, r1, r2, r3, r4;
double *ret;
ret = (double *) malloc(2*(n1+n2)*sizeof(double));
double sd11 = sqrt(dist11);
double sd12 = sqrt(dist12);
double sd21 = sqrt(dist21);
double sd22 = sqrt(dist22);
while (j<(2*n1)) { /* 始めにnum1個の第1分布を生成する*/
a=(double)rand()/(double)RAND_MAX;
b=(double)rand()/(double)RAND_MAX;
r1 = sqrt((-2.0)*log(a)) * sin(2.0*pi*b);
r2 = sqrt((-2.0)*log(a)) * cos(2.0*pi*b);
r3 = r1 * sd11 + av11;
r4 = r2 * sd12 + av12;
ret[j] = r3;
ret[j+1] = r4;
j = j+2;
//printf("%f %f\n", r3, r4);
}
while (j<(2*(n1+n2))) { /* 次にnum2個の第2分布を生成する*/
a=(double)rand()/(double)RAND_MAX;
b=(double)rand()/(double)RAND_MAX;
r1 = sqrt((-2.0)*log(a)) * sin(2.0*pi*b);
r2 = sqrt((-2.0)*log(a)) * cos(2.0*pi*b);
r3 = r1 * sd21 + av21;
r4 = r2 * sd22 + av22;
ret[j] = r3;
ret[j+1] = r4;
j = j+2;
//printf("%f %f\n", r3, r4);
}
return ret;
}
int main(int argc, char *argv[]) {
int num1, num2;
double av11, av12, dist11, dist12, av21, av22, dist21, dist22;
double *sample;
int i;
if (argc != 11) {
fprintf(stderr, "usage: %s <num of nums1> <double av11> <double dis11(=sd*sd)> <double av12> <double dis12> <double av21> <double dis21> <double av22> <dboule dis22>\n", argv[0]);
exit(-1);
}
num1 = atoi(argv[1]); num2 = atoi(argv[2]);
av11 = atof(argv[3]); dist11 = atof(argv[4]);
av12 = atof(argv[5]); dist12 = atof(argv[6]);
av21 = atof(argv[7]); dist21 = atof(argv[8]);
av22 = atof(argv[9]); dist22 = atof(argv[10]);
sample = gauss_rand22(num1, num2, av11, dist11, av12, dist12, av21, dist21, av22, dist22);
for (i = 0; i<num1; i++) {
printf("%f, %f\n", sample[i*2], sample[i*2+1]);
}
/* Check average and standard deviation */
double sum1 = 0.0; double squaresum1 = 0.0;
double sum2 = 0.0; double squaresum2 = 0.0;
for (i = 0; i<num1; i++) {
sum1 += sample[i*2]; sum2 += sample[i*2+1];
}
double average1 = sum1/num1; double average2 = sum2/num1;
for (i = 0; i<num1; i++) {
squaresum1 += ((sample[i*2]-average1)*(sample[i*2]-average1));
squaresum2 += ((sample[i*2+1]-average2)*(sample[i*2+1]-average2));
}
fprintf(stderr, "av1 = %5.2f, sd1 = %5.2f\n", sum1/num1, sqrt(squaresum1/num1));
fprintf(stderr, "av2 = %5.2f, sd2 = %5.2f\n", sum2/num1, sqrt(squaresum2/num1));
for (i=num1+1; i<num1+num2; i++) {
printf("%f, %f\n", sample[i*2], sample[i*2+1]);
}
/* Check average and standard deviation */
sum1 = 0.0; squaresum1 = 0.0;
sum2 = 0.0; squaresum2 = 0.0;
for (i = num1+1; i<num1+num2; i++) {
sum1 += sample[i*2]; sum2 += sample[i*2+1];
}
average1 = sum1/num2; average2 = sum2/num2;
for (i = num1+1; i<num1+num2; i++) {
squaresum1 += ((sample[i*2]-average1)*(sample[i*2]-average1));
squaresum2 += ((sample[i*2+1]-average2)*(sample[i*2+1]-average2));
}
fprintf(stderr, "av1 = %5.2f, sd1 = %5.2f\n", sum1/num2, sqrt(squaresum1/num2));
fprintf(stderr, "av2 = %5.2f, sd2 = %5.2f\n", sum2/num2, sqrt(squaresum2/num2));
exit(0);
}
double *gauss_rand22(int n1, int n2, double av11, double dist11, double av12, double dist12, double av21, double dist21, double av22, double dist22) {
/* Box^-Muller */
double pi = 3.14159265358979;
int j = 0;
double a, b, r1, r2, r3, r4;
double *ret;
ret = (double *) malloc(2*(n1+n2)*sizeof(double));
double sd11 = sqrt(dist11);
double sd12 = sqrt(dist12);
double sd21 = sqrt(dist21);
double sd22 = sqrt(dist22);
while (j<(2*n1)) {
a=(double)rand()/(double)RAND_MAX;
b=(double)rand()/(double)RAND_MAX;
r1 = sqrt((-2.0)*log(a)) * sin(2.0*pi*b);
r2 = sqrt((-2.0)*log(a)) * cos(2.0*pi*b);
r3 = r1 * sd11 + av11;
r4 = r2 * sd12 + av12;
ret[j] = r3;
ret[j+1] = r4;
j = j+2;
//printf("%f %f\n", r3, r4);
}
while (j<(2*(n1+n2))) {
a=(double)rand()/(double)RAND_MAX;
b=(double)rand()/(double)RAND_MAX;
r1 = sqrt((-2.0)*log(a)) * sin(2.0*pi*b);
r2 = sqrt((-2.0)*log(a)) * cos(2.0*pi*b);
r3 = r1 * sd21 + av21;
r4 = r2 * sd22 + av22;
ret[j] = r3;
ret[j+1] = r4;
j = j+2;
//printf("%f %f\n", r3, r4);
}
return ret;
}
int main(int argc, char *argv[]) {
int num1, num2;
double av11, av12, dist11, dist12;
double av21, av22, dist21, dist22;
double *sample;
int i;
if (argc != 11) {
fprintf(stderr, "usage: %s <num of nums1> <double av11> <double dis11(=sd*sd)> <double av12> <double dis12> <double av21> <double dis21> <double av22> <dboule dis22>\n", argv[0]);
exit(-1);
}
num1 = atoi(argv[1]); num2 = atoi(argv[2]);
av11 = atof(argv[3]); dist11 = atof(argv[4]);
av12 = atof(argv[5]); dist12 = atof(argv[6]);
av21 = atof(argv[7]); dist21 = atof(argv[8]);
av22 = atof(argv[9]); dist22 = atof(argv[10]);
sample = gauss_rand22(num1, num2, av11, dist11, av12, dist12, av21, dist21, av22, dist22);
for (i = 0; i<num1; i++) {
printf("%f, %f\n", sample[i*2], sample[i*2+1]);
}
/* Check average and standard deviation */
double sum1 = 0.0; double squaresum1 = 0.0;
double sum2 = 0.0; double squaresum2 = 0.0;
for (i = 0; i<num1; i++) {
sum1 += sample[i*2]; sum2 += sample[i*2+1];
}
double average1 = sum1/num1; double average2 = sum2/num1;
for (i = 0; i<num1; i++) {
squaresum1 += ((sample[i*2]-average1)*(sample[i*2]-average1));
squaresum2 += ((sample[i*2+1]-average2)*(sample[i*2+1]-average2));
}
fprintf(stderr, "av1 = %5.2f, sd1 = %5.2f\n", sum1/num1, sqrt(squaresum1/num1));
fprintf(stderr, "av2 = %5.2f, sd2 = %5.2f\n", sum2/num1, sqrt(squaresum2/num1));
for (i=num1+1; i<num1+num2; i++) {
printf("%f, %f\n", sample[i*2], sample[i*2+1]);
}
/* Check average and standard deviation */
sum1 = 0.0; squaresum1 = 0.0;
sum2 = 0.0; squaresum2 = 0.0;
for (i = num1+1; i<num1+num2; i++) {
sum1 += sample[i*2]; sum2 += sample[i*2+1];
}
average1 = sum1/num2; average2 = sum2/num2;
for (i = num1+1; i<num1+num2; i++) {
squaresum1 += ((sample[i*2]-average1)*(sample[i*2]-average1));
squaresum2 += ((sample[i*2+1]-average2)*(sample[i*2+1]-average2));
}
fprintf(stderr, "av1 = %5.2f, sd1 = %5.2f\n", sum1/num2, sqrt(squaresum1/num2));
fprintf(stderr, "av2 = %5.2f, sd2 = %5.2f\n", sum2/num2, sqrt(squaresum2/num2));
exit(0);
}
**EM法による混合分布判定 [#r189ce2c]
**EM方の初期値生成にk-means法を使う [#j34ee617]