pca.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. /*
  2. * principal component analysis (PCA)
  3. * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file libavutil/pca.c
  23. * principal component analysis (PCA)
  24. */
  25. #include "common.h"
  26. #include "pca.h"
  27. typedef struct PCA{
  28. int count;
  29. int n;
  30. double *covariance;
  31. double *mean;
  32. }PCA;
  33. PCA *ff_pca_init(int n){
  34. PCA *pca;
  35. if(n<=0)
  36. return NULL;
  37. pca= av_mallocz(sizeof(PCA));
  38. pca->n= n;
  39. pca->count=0;
  40. pca->covariance= av_mallocz(sizeof(double)*n*n);
  41. pca->mean= av_mallocz(sizeof(double)*n);
  42. return pca;
  43. }
  44. void ff_pca_free(PCA *pca){
  45. av_freep(&pca->covariance);
  46. av_freep(&pca->mean);
  47. av_free(pca);
  48. }
  49. void ff_pca_add(PCA *pca, double *v){
  50. int i, j;
  51. const int n= pca->n;
  52. for(i=0; i<n; i++){
  53. pca->mean[i] += v[i];
  54. for(j=i; j<n; j++)
  55. pca->covariance[j + i*n] += v[i]*v[j];
  56. }
  57. pca->count++;
  58. }
  59. int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){
  60. int i, j, pass;
  61. int k=0;
  62. const int n= pca->n;
  63. double z[n];
  64. memset(eigenvector, 0, sizeof(double)*n*n);
  65. for(j=0; j<n; j++){
  66. pca->mean[j] /= pca->count;
  67. eigenvector[j + j*n] = 1.0;
  68. for(i=0; i<=j; i++){
  69. pca->covariance[j + i*n] /= pca->count;
  70. pca->covariance[j + i*n] -= pca->mean[i] * pca->mean[j];
  71. pca->covariance[i + j*n] = pca->covariance[j + i*n];
  72. }
  73. eigenvalue[j]= pca->covariance[j + j*n];
  74. z[j]= 0;
  75. }
  76. for(pass=0; pass < 50; pass++){
  77. double sum=0;
  78. for(i=0; i<n; i++)
  79. for(j=i+1; j<n; j++)
  80. sum += fabs(pca->covariance[j + i*n]);
  81. if(sum == 0){
  82. for(i=0; i<n; i++){
  83. double maxvalue= -1;
  84. for(j=i; j<n; j++){
  85. if(eigenvalue[j] > maxvalue){
  86. maxvalue= eigenvalue[j];
  87. k= j;
  88. }
  89. }
  90. eigenvalue[k]= eigenvalue[i];
  91. eigenvalue[i]= maxvalue;
  92. for(j=0; j<n; j++){
  93. double tmp= eigenvector[k + j*n];
  94. eigenvector[k + j*n]= eigenvector[i + j*n];
  95. eigenvector[i + j*n]= tmp;
  96. }
  97. }
  98. return pass;
  99. }
  100. for(i=0; i<n; i++){
  101. for(j=i+1; j<n; j++){
  102. double covar= pca->covariance[j + i*n];
  103. double t,c,s,tau,theta, h;
  104. if(pass < 3 && fabs(covar) < sum / (5*n*n)) //FIXME why pass < 3
  105. continue;
  106. if(fabs(covar) == 0.0) //FIXME should not be needed
  107. continue;
  108. if(pass >=3 && fabs((eigenvalue[j]+z[j])/covar) > (1LL<<32) && fabs((eigenvalue[i]+z[i])/covar) > (1LL<<32)){
  109. pca->covariance[j + i*n]=0.0;
  110. continue;
  111. }
  112. h= (eigenvalue[j]+z[j]) - (eigenvalue[i]+z[i]);
  113. theta=0.5*h/covar;
  114. t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
  115. if(theta < 0.0) t = -t;
  116. c=1.0/sqrt(1+t*t);
  117. s=t*c;
  118. tau=s/(1.0+c);
  119. z[i] -= t*covar;
  120. z[j] += t*covar;
  121. #define ROTATE(a,i,j,k,l) {\
  122. double g=a[j + i*n];\
  123. double h=a[l + k*n];\
  124. a[j + i*n]=g-s*(h+g*tau);\
  125. a[l + k*n]=h+s*(g-h*tau); }
  126. for(k=0; k<n; k++) {
  127. if(k!=i && k!=j){
  128. ROTATE(pca->covariance,FFMIN(k,i),FFMAX(k,i),FFMIN(k,j),FFMAX(k,j))
  129. }
  130. ROTATE(eigenvector,k,i,k,j)
  131. }
  132. pca->covariance[j + i*n]=0.0;
  133. }
  134. }
  135. for (i=0; i<n; i++) {
  136. eigenvalue[i] += z[i];
  137. z[i]=0.0;
  138. }
  139. }
  140. return -1;
  141. }
  142. #ifdef TEST
  143. #undef printf
  144. #undef random
  145. #include <stdio.h>
  146. #include <stdlib.h>
  147. int main(void){
  148. PCA *pca;
  149. int i, j, k;
  150. #define LEN 8
  151. double eigenvector[LEN*LEN];
  152. double eigenvalue[LEN];
  153. pca= ff_pca_init(LEN);
  154. for(i=0; i<9000000; i++){
  155. double v[2*LEN+100];
  156. double sum=0;
  157. int pos= random()%LEN;
  158. int v2= (random()%101) - 50;
  159. v[0]= (random()%101) - 50;
  160. for(j=1; j<8; j++){
  161. if(j<=pos) v[j]= v[0];
  162. else v[j]= v2;
  163. sum += v[j];
  164. }
  165. /* for(j=0; j<LEN; j++){
  166. v[j] -= v[pos];
  167. }*/
  168. // sum += random()%10;
  169. /* for(j=0; j<LEN; j++){
  170. v[j] -= sum/LEN;
  171. }*/
  172. // lbt1(v+100,v+100,LEN);
  173. ff_pca_add(pca, v);
  174. }
  175. ff_pca(pca, eigenvector, eigenvalue);
  176. for(i=0; i<LEN; i++){
  177. pca->count= 1;
  178. pca->mean[i]= 0;
  179. // (0.5^|x|)^2 = 0.5^2|x| = 0.25^|x|
  180. // pca.covariance[i + i*LEN]= pow(0.5, fabs
  181. for(j=i; j<LEN; j++){
  182. printf("%f ", pca->covariance[i + j*LEN]);
  183. }
  184. printf("\n");
  185. }
  186. #if 1
  187. for(i=0; i<LEN; i++){
  188. double v[LEN];
  189. double error=0;
  190. memset(v, 0, sizeof(v));
  191. for(j=0; j<LEN; j++){
  192. for(k=0; k<LEN; k++){
  193. v[j] += pca->covariance[FFMIN(k,j) + FFMAX(k,j)*LEN] * eigenvector[i + k*LEN];
  194. }
  195. v[j] /= eigenvalue[i];
  196. error += fabs(v[j] - eigenvector[i + j*LEN]);
  197. }
  198. printf("%f ", error);
  199. }
  200. printf("\n");
  201. #endif
  202. for(i=0; i<LEN; i++){
  203. for(j=0; j<LEN; j++){
  204. printf("%9.6f ", eigenvector[i + j*LEN]);
  205. }
  206. printf(" %9.1f %f\n", eigenvalue[i], eigenvalue[i]/eigenvalue[0]);
  207. }
  208. return 0;
  209. }
  210. #endif