Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Svm.cc
Go to the documentation of this file.
1 #include <math.h>
2 #include <cstdio>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdarg.h>
6 #include <limits.h>
7 #include <locale.h>
8 #include <iostream>
9 #include <cassert>
10 
11 #include "Svm.hh"
13 typedef float Qfloat;
14 typedef signed char schar;
15 #ifndef min
16 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
17 #endif
18 #ifndef max
19 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
20 #endif
21 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
22 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
23 {
24  dst = new T[n];
25  memcpy((void *)dst,(void *)src,sizeof(T)*n);
26 }
27 static inline double powi(double base, int times)
28 {
29  double tmp = base, ret = 1.0;
30 
31  for ( int t=times; t>0; t/=2 ) {
32  if ( t%2==1 ) ret*=tmp;
33  tmp = tmp * tmp;
34  }
35  return ret;
36 }
37 #define INF HUGE_VAL
38 #define TAU 1e-12
39 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
40 
41 static void print_string_stdout(const char *s)
42 {
43  fputs(s,stdout);
44  fflush(stdout);
45 }
46 static void (*svm_print_string) (const char *) = &print_string_stdout;
47 #if 1
48 static void info(const char *fmt,...)
49 {
50  char buf[BUFSIZ];
51  va_list ap;
52  va_start(ap,fmt);
53  vsprintf(buf,fmt,ap);
54  va_end(ap);
55  (*svm_print_string)(buf);
56 }
57 #else
58 static void info(const char *fmt,...) {}
59 #endif
60 
61 
62 // Kernel Cache
63 //
64 // l is the number of total data items
65 // size is the cache size limit in bytes
66 //
67 class Cache
68 {
69 public:
70  Cache(int l,long int size);
71  ~Cache();
72 
73  // request data [0,len)
74  // return some position p where [p,len) need to be filled
75  // (p >= len if nothing needs to be filled)
76  int get_data(const int index, Qfloat **data, int len);
77  void swap_index(int i, int j);
78 private:
79  int l;
80  long int size;
81  struct head_t
82  {
83  head_t *prev, *next; // a circular list
85  int len; // data[0,len) is cached in this entry
86  };
87 
90  void lru_delete(head_t *h);
91  void lru_insert(head_t *h);
92 };
93 
94 Cache::Cache(int l_,long int size_):l(l_),size(size_)
95 {
96  head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
97  size /= sizeof(Qfloat);
98  size -= l * sizeof(head_t) / sizeof(Qfloat);
99  size = max(size, 2 * (long int) l); // cache must be large enough for two columns
101 }
102 
104 {
105  for ( head_t *h = lru_head.next; h != &lru_head; h=h->next ) {
106  free(h->data);
107  }
108  free(head);
109 }
110 
112 {
113  // delete from current location
114  h->prev->next = h->next;
115  h->next->prev = h->prev;
116 }
117 
119 {
120  // insert to last position
121  h->next = &lru_head;
122  h->prev = lru_head.prev;
123  h->prev->next = h;
124  h->next->prev = h;
125 }
126 
127 int Cache::get_data(const int index, Qfloat **data, int len)
128 {
129  head_t *h = &head[index];
130  if ( h->len ) lru_delete(h);
131  int more = len - h->len;
132 
133  if ( more > 0 ) {
134  // free old space
135  while ( size < more )
136  {
137  head_t *old = lru_head.next;
138  lru_delete(old);
139  free(old->data);
140  size += old->len;
141  old->data = 0;
142  old->len = 0;
143  }
144 
145  // allocate new space
146  h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
147  size -= more;
148  swap(h->len,len);
149  }
150 
151  lru_insert(h);
152  *data = h->data;
153  return len;
154 }
155 
156 void Cache::swap_index(int i, int j)
157 {
158  if ( i==j ) return;
159 
160  if ( head[i].len ) lru_delete(&head[i]);
161  if ( head[j].len ) lru_delete(&head[j]);
162  swap(head[i].data,head[j].data);
163  swap(head[i].len,head[j].len);
164  if ( head[i].len ) lru_insert(&head[i]);
165  if ( head[j].len ) lru_insert(&head[j]);
166 
167  if ( i>j ) swap(i,j);
168  for ( head_t *h = lru_head.next; h!=&lru_head; h=h->next ) {
169  if ( h->len > i ) {
170  if ( h->len > j ) {
171  swap(h->data[i],h->data[j]);
172  } else {
173  // give up
174  lru_delete(h);
175  free(h->data);
176  size += h->len;
177  h->data = 0;
178  h->len = 0;
179  }
180  }
181  }
182 }
183 
184 
185 // Kernel evaluation
186 //
187 // the static method k_function is for doing single kernel evaluation
188 // the constructor of Kernel prepares to calculate the l*l kernel matrix
189 // the member function get_Q is for getting one column from the Q Matrix
190 //
191 class QMatrix {
192 public:
193  virtual Qfloat *get_Q(int column, int len) const = 0;
194  virtual double *get_QD() const = 0;
195  virtual void swap_index(int i, int j) const = 0;
196  virtual ~QMatrix() {}
197 };
198 
199 class Kernel: public QMatrix {
200 public:
201  Kernel(int l, svm_node * const * x, const svm_parameter& param);
202  virtual ~Kernel();
203 
204  static double k_function(const svm_node *x, const svm_node *y,
205  const svm_parameter& param);
206  virtual Qfloat *get_Q(int column, int len) const = 0;
207  virtual double *get_QD() const = 0;
208  virtual void swap_index(int i, int j) const // no so const...
209  {
210  swap(x[i],x[j]);
211  if ( x_square ) swap(x_square[i],x_square[j]);
212  }
213 protected:
214 
215  double (Kernel::*kernel_function)(int i, int j) const;
216 
217 private:
218  const svm_node **x;
219  double *x_square;
220 
221  // svm_parameter
222  const int kernel_type;
223  const int degree;
224  const double gamma;
225  const double coef0;
226 
227  static double dot(const svm_node *px, const svm_node *py);
228  double kernel_linear(int i, int j) const
229  {
230  return dot(x[i],x[j]);
231  }
232  double kernel_poly(int i, int j) const
233  {
234  return powi(gamma*dot(x[i],x[j])+coef0,degree);
235  }
236  double kernel_rbf(int i, int j) const
237  {
238  return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
239  }
240  double kernel_sigmoid(int i, int j) const
241  {
242  return tanh(gamma*dot(x[i],x[j])+coef0);
243  }
244  double kernel_precomputed(int i, int j) const
245  {
246  return x[i][(int)(x[j][0].value)].value;
247  }
248 };
249 
250 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
251 :kernel_type(param.kernel_type), degree(param.degree),
252  gamma(param.gamma), coef0(param.coef0)
253 {
254  switch(kernel_type)
255  {
256  case LINEAR :
258  break;
259  case POLY :
261  break;
262  case RBF :
264  break;
265  case SIGMOID :
267  break;
268  case PRECOMPUTED :
270  break;
271  }
272 
273  clone(x,x_,l);
274 
275  if ( kernel_type == RBF ) {
276  x_square = new double[l];
277  for ( int i=0; i<l; i++ ) {
278  x_square[i] = dot(x[i],x[i]);
279  }
280  } else {
281  x_square = 0;
282  }
283 }
284 
286 {
287  delete[] x;
288  delete[] x_square;
289 }
290 
291 double Kernel::dot(const svm_node *px, const svm_node *py)
292 {
293  double sum = 0;
294  while ( px->index != -1 && py->index != -1 )
295  {
296  if ( px->index == py->index ) {
297  sum += px->value * py->value;
298  ++px;
299  ++py;
300  } else {
301  if ( px->index > py->index ) {
302  ++py;
303  } else {
304  ++px;
305  }
306  }
307  }
308  return sum;
309 }
310 
311 double Kernel::k_function(const svm_node *x, const svm_node *y,
312  const svm_parameter& param)
313 {
314  switch(param.kernel_type)
315  {
316  case LINEAR :
317  return dot(x,y);
318  case POLY :
319  return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
320  case RBF :
321  {
322  double sum = 0;
323  while ( x->index != -1 && y->index !=-1 )
324  {
325  if ( x->index == y->index ) {
326  double d = x->value - y->value;
327  sum += d*d;
328  ++x;
329  ++y;
330  } else {
331  if ( x->index > y->index ) {
332  sum += y->value * y->value;
333  ++y;
334  } else {
335  sum += x->value * x->value;
336  ++x;
337  }
338  }
339  }
340 
341  while ( x->index != -1 )
342  {
343  sum += x->value * x->value;
344  ++x;
345  }
346 
347  while ( y->index != -1 )
348  {
349  sum += y->value * y->value;
350  ++y;
351  }
352 
353  return exp(-param.gamma*sum);
354  }
355  case SIGMOID :
356  return tanh(param.gamma*dot(x,y)+param.coef0);
357  case PRECOMPUTED : //x: test (validation), y: SV
358  return x[(int)(y->value)].value;
359  default :
360  return 0; // Unreachable
361  }
362 }
363 
364 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
365 // Solves:
366 //
367 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
368 //
369 // y^T \alpha = \delta
370 // y_i = +1 or -1
371 // 0 <= alpha_i <= Cp for y_i = 1
372 // 0 <= alpha_i <= Cn for y_i = -1
373 //
374 // Given:
375 //
376 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
377 // l is the size of vectors and matrices
378 // eps is the stopping tolerance
379 //
380 // solution will be put in \alpha, objective value will be put in obj
381 //
382 class Solver {
383 public:
384  Solver() {};
385  virtual ~Solver() {};
386 
387  struct SolutionInfo {
388  double obj;
389  double rho;
392  double r; // for Solver_NU
393  };
394 
395  void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
396  double *alpha_, double Cp, double Cn, double eps,
397  SolutionInfo* si, int shrinking);
398 protected:
401  double *G; // gradient of objective function
403  char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
404  double *alpha;
405  const QMatrix *Q;
406  const double *QD;
407  double eps;
408  double Cp,Cn;
409  double *p;
411  double *G_bar; // gradient, if we treat free variables as 0
412  int l;
413  bool unshrink; // XXX
414 
415  double get_C(int i)
416  {
417  return (y[i] > 0)? Cp : Cn;
418  }
420  {
421  if ( alpha[i] >= get_C(i) ) {
423  } else if ( alpha[i] <= 0 ) {
425  } else alpha_status[i] = FREE;
426  }
427  bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
428  bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
429  bool is_free(int i) { return alpha_status[i] == FREE; }
430  void swap_index(int i, int j);
431  void reconstruct_gradient();
432  virtual int select_working_set(int &i, int &j);
433  virtual double calculate_rho();
434  virtual void do_shrinking();
435 private:
436  bool be_shrunk(int i, double Gmax1, double Gmax2);
437 };
438 
439 void Solver::swap_index(int i, int j)
440 {
441  Q->swap_index(i,j);
442  swap(y[i],y[j]);
443  swap(G[i],G[j]);
445  swap(alpha[i],alpha[j]);
446  swap(p[i],p[j]);
447  swap(active_set[i],active_set[j]);
448  swap(G_bar[i],G_bar[j]);
449 }
450 
452 {
453  // reconstruct inactive elements of G from G_bar and free variables
454 
455  if ( active_size == l ) return;
456 
457  int i,j;
458  int nr_free = 0;
459 
460  for ( j=active_size; j<l; j++ ) {
461  G[j] = G_bar[j] + p[j];
462  }
463 
464  for ( j=0; j<active_size; j++ ) {
465  if ( is_free(j) ) {
466  nr_free++;
467  }
468  }
469 
470  if ( 2*nr_free < active_size ) {
471  info("\nWARNING: using -h 0 may be faster\n");
472  }
473 
474  if ( nr_free*l > 2*active_size*(l-active_size) ) {
475  for ( i=active_size; i<l; i++ ) {
476  const Qfloat *Q_i = Q->get_Q(i,active_size);
477  for ( j=0; j<active_size; j++ ) {
478  if ( is_free(j) ) {
479  G[i] += alpha[j] * Q_i[j];
480  }
481  }
482  }
483  } else {
484  for ( i=0; i<active_size; i++ ) {
485  if ( is_free(i) ) {
486  const Qfloat *Q_i = Q->get_Q(i,l);
487  double alpha_i = alpha[i];
488  for ( j=active_size; j<l; j++ ) {
489  G[j] += alpha_i * Q_i[j];
490  }
491  }
492  }
493  }
494 }
495 
496 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
497  double *alpha_, double Cp, double Cn, double eps,
498  SolutionInfo* si, int shrinking)
499 {
500  this->l = l;
501  this->Q = &Q;
502  QD=Q.get_QD();
503  clone(p, p_,l);
504  clone(y, y_,l);
505  clone(alpha,alpha_,l);
506  this->Cp = Cp;
507  this->Cn = Cn;
508  this->eps = eps;
509  unshrink = false;
510 
511  // initialize alpha_status
512  {
513  alpha_status = new char[l];
514  for ( int i=0; i<l; i++ ) {
516  }
517  }
518 
519  // initialize active set (for shrinking)
520  {
521  active_set = new int[l];
522  for ( int i=0; i<l; i++ ) {
523  active_set[i] = i;
524  }
525  active_size = l;
526  }
527 
528  // initialize gradient
529  {
530  G = new double[l];
531  G_bar = new double[l];
532  int i;
533  for ( i=0; i<l; i++ ) {
534  G[i] = p[i];
535  G_bar[i] = 0;
536  }
537  for ( i=0; i<l; i++ ) {
538  if ( !is_lower_bound(i) ) {
539  const Qfloat *Q_i = Q.get_Q(i,l);
540  double alpha_i = alpha[i];
541  int j;
542  for ( j=0; j<l; j++ ) {
543  G[j] += alpha_i*Q_i[j];
544  }
545  if ( is_upper_bound(i) ) {
546  for ( j=0; j<l; j++ ) {
547  G_bar[j] += get_C(i) * Q_i[j];
548  }
549  }
550  }
551  }
552  }
553 
554  // optimization step
555 
556  int iter = 0;
557  int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
558  int counter = min(l,1000)+1;
559 
560  while ( iter < max_iter )
561  {
562  // show progress and do shrinking
563 
564  if ( --counter == 0 ) {
565  counter = min(l,1000);
566  if ( shrinking ) do_shrinking();
567  info(".");
568  }
569 
570  int i,j;
571  if ( select_working_set(i,j)!=0 ) {
572  // reconstruct the whole gradient
574  // reset active set size and check
575  active_size = l;
576  info("*");
577  if ( select_working_set(i,j)!=0 ) {
578  break;
579  } else {
580  counter = 1; // do shrinking next iteration
581  }
582  }
583 
584  ++iter;
585 
586  // update alpha[i] and alpha[j], handle bounds carefully
587 
588  const Qfloat *Q_i = Q.get_Q(i,active_size);
589  const Qfloat *Q_j = Q.get_Q(j,active_size);
590 
591  double C_i = get_C(i);
592  double C_j = get_C(j);
593 
594  double old_alpha_i = alpha[i];
595  double old_alpha_j = alpha[j];
596 
597  if ( y[i]!=y[j] ) {
598  double quad_coef = QD[i]+QD[j]+2*Q_i[j];
599  if ( quad_coef <= 0 ) {
600  quad_coef = TAU;
601  }
602  double delta = (-G[i]-G[j])/quad_coef;
603  double diff = alpha[i] - alpha[j];
604  alpha[i] += delta;
605  alpha[j] += delta;
606 
607  if ( diff > 0 ) {
608  if ( alpha[j] < 0 ) {
609  alpha[j] = 0;
610  alpha[i] = diff;
611  }
612  } else {
613  if ( alpha[i] < 0 ) {
614  alpha[i] = 0;
615  alpha[j] = -diff;
616  }
617  }
618  if ( diff > C_i - C_j ) {
619  if ( alpha[i] > C_i ) {
620  alpha[i] = C_i;
621  alpha[j] = C_i - diff;
622  }
623  } else {
624  if ( alpha[j] > C_j ) {
625  alpha[j] = C_j;
626  alpha[i] = C_j + diff;
627  }
628  }
629  } else {
630  double quad_coef = QD[i]+QD[j]-2*Q_i[j];
631  if ( quad_coef <= 0 ) {
632  quad_coef = TAU;
633  }
634  double delta = (G[i]-G[j])/quad_coef;
635  double sum = alpha[i] + alpha[j];
636  alpha[i] -= delta;
637  alpha[j] += delta;
638 
639  if ( sum > C_i ) {
640  if ( alpha[i] > C_i ) {
641  alpha[i] = C_i;
642  alpha[j] = sum - C_i;
643  }
644  } else {
645  if ( alpha[j] < 0 ) {
646  alpha[j] = 0;
647  alpha[i] = sum;
648  }
649  }
650  if ( sum > C_j ) {
651  if ( alpha[j] > C_j ) {
652  alpha[j] = C_j;
653  alpha[i] = sum - C_j;
654  }
655  } else {
656  if ( alpha[i] < 0 ) {
657  alpha[i] = 0;
658  alpha[j] = sum;
659  }
660  }
661  }
662 
663  // update G
664 
665  double delta_alpha_i = alpha[i] - old_alpha_i;
666  double delta_alpha_j = alpha[j] - old_alpha_j;
667 
668  for ( int k=0; k<active_size; k++ ) {
669  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
670  }
671 
672  // update alpha_status and G_bar
673 
674  {
675  bool ui = is_upper_bound(i);
676  bool uj = is_upper_bound(j);
679  int k;
680  if ( ui != is_upper_bound(i) ) {
681  Q_i = Q.get_Q(i,l);
682  if ( ui ) {
683  for ( k=0; k<l; k++ ) {
684  G_bar[k] -= C_i * Q_i[k];
685  }
686  } else {
687  for ( k=0; k<l; k++ ) {
688  G_bar[k] += C_i * Q_i[k];
689  }
690  }
691  }
692 
693  if ( uj != is_upper_bound(j) ) {
694  Q_j = Q.get_Q(j,l);
695  if ( uj ) {
696  for ( k=0; k<l; k++ ) {
697  G_bar[k] -= C_j * Q_j[k];
698  }
699  } else {
700  for ( k=0; k<l; k++ ) {
701  G_bar[k] += C_j * Q_j[k];
702  }
703  }
704  }
705  }
706  }
707 
708  if ( iter >= max_iter ) {
709  if ( active_size < l ) {
710  // reconstruct the whole gradient to calculate objective value
712  active_size = l;
713  info("*");
714  }
715  fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
716  }
717 
718  // calculate rho
719 
720  si->rho = calculate_rho();
721 
722  // calculate objective value
723  {
724  double v = 0;
725  int i;
726  for ( i=0; i<l; i++ ) {
727  v += alpha[i] * (G[i] + p[i]);
728  }
729 
730  si->obj = v/2;
731  }
732 
733  // put back the solution
734  {
735  for ( int i=0; i<l; i++ ) {
736  alpha_[active_set[i]] = alpha[i];
737  }
738  }
739 
740  // juggle everything back
741  /*{
742  for(int i=0;i<l;i++)
743  while(active_set[i] != i)
744  swap_index(i,active_set[i]);
745  // or Q.swap_index(i,active_set[i]);
746  }*/
747 
748  si->upper_bound_p = Cp;
749  si->upper_bound_n = Cn;
750 
751  info("\noptimization finished, #iter = %d\n",iter);
752 
753  delete[] p;
754  delete[] y;
755  delete[] alpha;
756  delete[] alpha_status;
757  delete[] active_set;
758  delete[] G;
759  delete[] G_bar;
760 }
761 
762 // return 1 if already optimal, return 0 otherwise
763 int Solver::select_working_set(int &out_i, int &out_j)
764 {
765  // return i,j such that
766  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
767  // j: minimizes the decrease of obj value
768  // (if quadratic coefficeint <= 0, replace it with tau)
769  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
770 
771  double Gmax = -INF;
772  double Gmax2 = -INF;
773  int Gmax_idx = -1;
774  int Gmin_idx = -1;
775  double obj_diff_min = INF;
776 
777  for ( int t=0; t<active_size; t++ ) {
778  if ( y[t]==+1 ) {
779  if ( !is_upper_bound(t) ) {
780  if ( -G[t] >= Gmax ) {
781  Gmax = -G[t];
782  Gmax_idx = t;
783  }
784  }
785  } else {
786  if ( !is_lower_bound(t) ) {
787  if ( G[t] >= Gmax ) {
788  Gmax = G[t];
789  Gmax_idx = t;
790  }
791  }
792  }
793  }
794 
795  int i = Gmax_idx;
796  const Qfloat *Q_i = NULL;
797  if ( i != -1 ) { // NULL Q_i not accessed: Gmax=-INF if i=-1
798  Q_i = Q->get_Q(i,active_size);
799  }
800 
801  for ( int j=0; j<active_size; j++ ) {
802  if ( y[j]==+1 ) {
803  if ( !is_lower_bound(j) ) {
804  double grad_diff=Gmax+G[j];
805  if ( G[j] >= Gmax2 ) {
806  Gmax2 = G[j];
807  }
808  if ( grad_diff > 0 ) {
809  double obj_diff;
810  double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
811  if ( quad_coef > 0 ) {
812  obj_diff = -(grad_diff*grad_diff)/quad_coef;
813  } else {
814  obj_diff = -(grad_diff*grad_diff)/TAU;
815  }
816 
817  if ( obj_diff <= obj_diff_min ) {
818  Gmin_idx=j;
819  obj_diff_min = obj_diff;
820  }
821  }
822  }
823  } else {
824  if ( !is_upper_bound(j) ) {
825  double grad_diff= Gmax-G[j];
826  if ( -G[j] >= Gmax2 ) {
827  Gmax2 = -G[j];
828  }
829  if ( grad_diff > 0 ) {
830  double obj_diff;
831  double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
832  if ( quad_coef > 0 ) {
833  obj_diff = -(grad_diff*grad_diff)/quad_coef;
834  } else {
835  obj_diff = -(grad_diff*grad_diff)/TAU;
836  }
837 
838  if ( obj_diff <= obj_diff_min ) {
839  Gmin_idx=j;
840  obj_diff_min = obj_diff;
841  }
842  }
843  }
844  }
845  }
846 
847  if ( Gmax+Gmax2 < eps ) {
848  return 1;
849  }
850 
851  out_i = Gmax_idx;
852  out_j = Gmin_idx;
853  return 0;
854 }
855 
856 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
857 {
858  if ( is_upper_bound(i) ) {
859  if ( y[i]==+1 ) {
860  return(-G[i] > Gmax1);
861  } else {
862  return(-G[i] > Gmax2);
863  }
864  } else if ( is_lower_bound(i) ) {
865  if ( y[i]==+1 ) {
866  return(G[i] > Gmax2);
867  } else {
868  return(G[i] > Gmax1);
869  }
870  } else {
871  return(false);
872  }
873 }
874 
876 {
877  int i;
878  double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
879  double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
880 
881  // find maximal violating pair first
882  for ( i=0; i<active_size; i++ ) {
883  if ( y[i]==+1 ) {
884  if ( !is_upper_bound(i) ) {
885  if ( -G[i] >= Gmax1 ) {
886  Gmax1 = -G[i];
887  }
888  }
889  if ( !is_lower_bound(i) ) {
890  if ( G[i] >= Gmax2 ) {
891  Gmax2 = G[i];
892  }
893  }
894  } else {
895  if ( !is_upper_bound(i) ) {
896  if ( -G[i] >= Gmax2 ) {
897  Gmax2 = -G[i];
898  }
899  }
900  if ( !is_lower_bound(i) ) {
901  if ( G[i] >= Gmax1 ) {
902  Gmax1 = G[i];
903  }
904  }
905  }
906  }
907 
908  if ( unshrink == false && Gmax1 + Gmax2 <= eps*10 ) {
909  unshrink = true;
911  active_size = l;
912  info("*");
913  }
914 
915  for ( i=0; i<active_size; i++ ) {
916  if ( be_shrunk(i, Gmax1, Gmax2) ) {
917  active_size--;
918  while ( active_size > i )
919  {
920  if ( !be_shrunk(active_size, Gmax1, Gmax2) ) {
921  swap_index(i,active_size);
922  break;
923  }
924  active_size--;
925  }
926  }
927  }
928 }
929 
931 {
932  double r;
933  int nr_free = 0;
934  double ub = INF, lb = -INF, sum_free = 0;
935  for ( int i=0; i<active_size; i++ ) {
936  double yG = y[i]*G[i];
937 
938  if ( is_upper_bound(i) ) {
939  if ( y[i]==-1 ) {
940  ub = min(ub,yG);
941  } else {
942  lb = max(lb,yG);
943  }
944  } else if ( is_lower_bound(i) ) {
945  if ( y[i]==+1 ) {
946  ub = min(ub,yG);
947  } else {
948  lb = max(lb,yG);
949  }
950  } else {
951  ++nr_free;
952  sum_free += yG;
953  }
954  }
955 
956  if ( nr_free>0 ) {
957  r = sum_free/nr_free;
958  } else {
959  r = (ub+lb)/2;
960  }
961 
962  return r;
963 }
964 
965 
966 // Solver for nu-svm classification and regression
967 //
968 // additional constraint: e^T \alpha = constant
969 //
970 class Solver_NU: public Solver
971 {
972 public:
974  void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
975  double *alpha, double Cp, double Cn, double eps,
976  SolutionInfo* si, int shrinking)
977  {
978  this->si = si;
979  Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
980  }
981 private:
983  int select_working_set(int &i, int &j);
984  double calculate_rho();
985  bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
986  void do_shrinking();
987 };
988 
989 // return 1 if already optimal, return 0 otherwise
990 int Solver_NU::select_working_set(int &out_i, int &out_j)
991 {
992  // return i,j such that y_i = y_j and
993  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
994  // j: minimizes the decrease of obj value
995  // (if quadratic coefficeint <= 0, replace it with tau)
996  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
997 
998  double Gmaxp = -INF;
999  double Gmaxp2 = -INF;
1000  int Gmaxp_idx = -1;
1001 
1002  double Gmaxn = -INF;
1003  double Gmaxn2 = -INF;
1004  int Gmaxn_idx = -1;
1005 
1006  int Gmin_idx = -1;
1007  double obj_diff_min = INF;
1008 
1009  for ( int t=0; t<active_size; t++ ) {
1010  if ( y[t]==+1 ) {
1011  if ( !is_upper_bound(t) ) {
1012  if ( -G[t] >= Gmaxp ) {
1013  Gmaxp = -G[t];
1014  Gmaxp_idx = t;
1015  }
1016  }
1017  } else {
1018  if ( !is_lower_bound(t) ) {
1019  if ( G[t] >= Gmaxn ) {
1020  Gmaxn = G[t];
1021  Gmaxn_idx = t;
1022  }
1023  }
1024  }
1025  }
1026 
1027  int ip = Gmaxp_idx;
1028  int in = Gmaxn_idx;
1029  const Qfloat *Q_ip = NULL;
1030  const Qfloat *Q_in = NULL;
1031  if ( ip != -1 ) { // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1032  Q_ip = Q->get_Q(ip,active_size);
1033  }
1034  if ( in != -1 ) {
1035  Q_in = Q->get_Q(in,active_size);
1036  }
1037 
1038  for ( int j=0; j<active_size; j++ ) {
1039  if ( y[j]==+1 ) {
1040  if ( !is_lower_bound(j) ) {
1041  double grad_diff=Gmaxp+G[j];
1042  if ( G[j] >= Gmaxp2 ) {
1043  Gmaxp2 = G[j];
1044  }
1045  if ( grad_diff > 0 ) {
1046  double obj_diff;
1047  double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1048  if ( quad_coef > 0 ) {
1049  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1050  } else {
1051  obj_diff = -(grad_diff*grad_diff)/TAU;
1052  }
1053 
1054  if ( obj_diff <= obj_diff_min ) {
1055  Gmin_idx=j;
1056  obj_diff_min = obj_diff;
1057  }
1058  }
1059  }
1060  } else {
1061  if ( !is_upper_bound(j) ) {
1062  double grad_diff=Gmaxn-G[j];
1063  if ( -G[j] >= Gmaxn2 ) {
1064  Gmaxn2 = -G[j];
1065  }
1066  if ( grad_diff > 0 ) {
1067  double obj_diff;
1068  double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1069  if ( quad_coef > 0 ) {
1070  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1071  } else {
1072  obj_diff = -(grad_diff*grad_diff)/TAU;
1073  }
1074 
1075  if ( obj_diff <= obj_diff_min ) {
1076  Gmin_idx=j;
1077  obj_diff_min = obj_diff;
1078  }
1079  }
1080  }
1081  }
1082  }
1083 
1084  if ( max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps ) {
1085  return 1;
1086  }
1087 
1088  if ( y[Gmin_idx] == +1 ) {
1089  out_i = Gmaxp_idx;
1090  } else {
1091  out_i = Gmaxn_idx;
1092  }
1093  out_j = Gmin_idx;
1094 
1095  return 0;
1096 }
1097 
1098 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1099 {
1100  if ( is_upper_bound(i) ) {
1101  if ( y[i]==+1 ) {
1102  return(-G[i] > Gmax1);
1103  } else {
1104  return(-G[i] > Gmax4);
1105  }
1106  } else if ( is_lower_bound(i) ) {
1107  if ( y[i]==+1 ) {
1108  return(G[i] > Gmax2);
1109  } else {
1110  return(G[i] > Gmax3);
1111  }
1112  } else {
1113  return(false);
1114  }
1115 }
1116 
1118 {
1119  double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1120  double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1121  double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1122  double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1123 
1124  // find maximal violating pair first
1125  int i;
1126  for ( i=0; i<active_size; i++ ) {
1127  if ( !is_upper_bound(i) ) {
1128  if ( y[i]==+1 ) {
1129  if ( -G[i] > Gmax1 ) Gmax1 = -G[i];
1130  } else if ( -G[i] > Gmax4 ) Gmax4 = -G[i];
1131  }
1132  if ( !is_lower_bound(i) ) {
1133  if ( y[i]==+1 ) {
1134  if ( G[i] > Gmax2 ) Gmax2 = G[i];
1135  } else if ( G[i] > Gmax3 ) Gmax3 = G[i];
1136  }
1137  }
1138 
1139  if ( unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10 ) {
1140  unshrink = true;
1142  active_size = l;
1143  }
1144 
1145  for ( i=0; i<active_size; i++ ) {
1146  if ( be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4) ) {
1147  active_size--;
1148  while ( active_size > i )
1149  {
1150  if ( !be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4) ) {
1151  swap_index(i,active_size);
1152  break;
1153  }
1154  active_size--;
1155  }
1156  }
1157  }
1158 }
1159 
1161 {
1162  int nr_free1 = 0,nr_free2 = 0;
1163  double ub1 = INF, ub2 = INF;
1164  double lb1 = -INF, lb2 = -INF;
1165  double sum_free1 = 0, sum_free2 = 0;
1166 
1167  for ( int i=0; i<active_size; i++ ) {
1168  if ( y[i]==+1 ) {
1169  if ( is_upper_bound(i) ) {
1170  lb1 = max(lb1,G[i]);
1171  } else if ( is_lower_bound(i) ) {
1172  ub1 = min(ub1,G[i]);
1173  } else {
1174  ++nr_free1;
1175  sum_free1 += G[i];
1176  }
1177  } else {
1178  if ( is_upper_bound(i) ) {
1179  lb2 = max(lb2,G[i]);
1180  } else if ( is_lower_bound(i) ) {
1181  ub2 = min(ub2,G[i]);
1182  } else {
1183  ++nr_free2;
1184  sum_free2 += G[i];
1185  }
1186  }
1187  }
1188 
1189  double r1,r2;
1190  if ( nr_free1 > 0 ) {
1191  r1 = sum_free1/nr_free1;
1192  } else {
1193  r1 = (ub1+lb1)/2;
1194  }
1195 
1196  if ( nr_free2 > 0 ) {
1197  r2 = sum_free2/nr_free2;
1198  } else {
1199  r2 = (ub2+lb2)/2;
1200  }
1201 
1202  si->r = (r1+r2)/2;
1203  return (r1-r2)/2;
1204 }
1205 
1206 
1207 // Q matrices for various formulations
1208 //
1209 class SVC_Q: public Kernel
1210 {
1211 public:
1212  SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1213  :Kernel(prob.l, prob.x, param)
1214  {
1215  clone(y,y_,prob.l);
1216  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1217  QD = new double[prob.l];
1218  for ( int i=0; i<prob.l; i++ ) {
1219  QD[i] = (this->*kernel_function)(i,i);
1220  }
1221  }
1222 
1223  Qfloat *get_Q(int i, int len) const
1224  {
1225  Qfloat *data;
1226  int start;
1227  if ( (start = cache->get_data( i, &data, len ) ) < len ) {
1228  for ( int j = start; j < len; j++ ) {
1229  data[ j ] = ( Qfloat )( y[ i ] * y[ j ] * ( this->*kernel_function )( i, j ) );
1230  }
1231  }
1232  return data;
1233  }
1234 
1235  double *get_QD() const
1236  {
1237  return QD;
1238  }
1239 
1240  void swap_index(int i, int j) const
1241  {
1242  cache->swap_index(i,j);
1243  Kernel::swap_index(i,j);
1244  swap(y[i],y[j]);
1245  swap(QD[i],QD[j]);
1246  }
1247 
1249  {
1250  delete[] y;
1251  delete cache;
1252  delete[] QD;
1253  }
1254 private:
1257  double *QD;
1258 };
1259 
1260 class ONE_CLASS_Q: public Kernel
1261 {
1262 public:
1263  ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1264  :Kernel(prob.l, prob.x, param)
1265  {
1266  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1267  QD = new double[prob.l];
1268  for ( int i=0; i<prob.l; i++ ) {
1269  QD[i] = (this->*kernel_function)(i,i);
1270  }
1271  }
1272 
1273  Qfloat *get_Q(int i, int len) const
1274  {
1275  Qfloat *data;
1276  int start;
1277  if ( ( start = cache->get_data(i,&data,len)) < len ) {
1278  for ( int j=start; j<len; j++ ) {
1279  data[j] = (Qfloat)(this->*kernel_function)(i,j);
1280  }
1281  }
1282  return data;
1283  }
1284 
1285  double *get_QD() const
1286  {
1287  return QD;
1288  }
1289 
1290  void swap_index(int i, int j) const
1291  {
1292  cache->swap_index(i,j);
1293  Kernel::swap_index(i,j);
1294  swap(QD[i],QD[j]);
1295  }
1296 
1298  {
1299  delete cache;
1300  delete[] QD;
1301  }
1302 private:
1304  double *QD;
1305 };
1306 
1307 class SVR_Q: public Kernel
1308 {
1309 public:
1310  SVR_Q(const svm_problem& prob, const svm_parameter& param)
1311  :Kernel(prob.l, prob.x, param)
1312  {
1313  l = prob.l;
1314  cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1315  QD = new double[2*l];
1316  sign = new schar[2*l];
1317  index = new int[2*l];
1318  for ( int k=0; k<l; k++ ) {
1319  sign[k] = 1;
1320  sign[k+l] = -1;
1321  index[k] = k;
1322  index[k+l] = k;
1323  QD[k] = (this->*kernel_function)(k,k);
1324  QD[k+l] = QD[k];
1325  }
1326  buffer[0] = new Qfloat[2*l];
1327  buffer[1] = new Qfloat[2*l];
1328  next_buffer = 0;
1329  }
1330 
1331  void swap_index(int i, int j) const
1332  {
1333  swap(sign[i],sign[j]);
1334  swap(index[i],index[j]);
1335  swap(QD[i],QD[j]);
1336  }
1337 
1338  Qfloat *get_Q(int i, int len) const
1339  {
1340  Qfloat *data;
1341  int j, real_i = index[i];
1342  if ( cache->get_data(real_i,&data,l) < l ) {
1343  for ( j=0; j<l; j++ ) {
1344  data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1345  }
1346  }
1347 
1348  // reorder and copy
1349  Qfloat *buf = buffer[next_buffer];
1350  next_buffer = 1 - next_buffer;
1351  schar si = sign[i];
1352  for ( j=0; j<len; j++ ) {
1353  buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1354  }
1355  return buf;
1356  }
1357 
1358  double *get_QD() const
1359  {
1360  return QD;
1361  }
1362 
1364  {
1365  delete cache;
1366  delete[] sign;
1367  delete[] index;
1368  delete[] buffer[0];
1369  delete[] buffer[1];
1370  delete[] QD;
1371  }
1372 private:
1373  int l;
1376  int *index;
1377  mutable int next_buffer;
1379  double *QD;
1380 };
1381 
1382 
1383 // construct and solve various formulations
1384 //
1385 static void solve_c_svc(
1386  const svm_problem *prob, const svm_parameter* param,
1387  double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1388 {
1389  int l = prob->l;
1390  double *minus_ones = new double[l];
1391  schar *y = new schar[l];
1392 
1393  int i;
1394 
1395  for ( i=0; i<l; i++ ) {
1396  alpha[i] = 0;
1397  minus_ones[i] = -1;
1398  if ( prob->y[i] > 0 ) y[i] = +1; else y[i] = -1;
1399  }
1400 
1401  Solver s;
1402  s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1403  alpha, Cp, Cn, param->eps, si, param->shrinking);
1404 
1405  double sum_alpha=0;
1406  for ( i=0; i<l; i++ ) {
1407  sum_alpha += alpha[i];
1408  }
1409 
1410  if ( Cp==Cn ) {
1411  info("nu = %f\n", sum_alpha/(Cp*prob->l));
1412  }
1413 
1414  for ( i=0; i<l; i++ ) {
1415  alpha[i] *= y[i];
1416  }
1417 
1418  delete[] minus_ones;
1419  delete[] y;
1420 }
1421 
1422 static void solve_nu_svc(
1423  const svm_problem *prob, const svm_parameter *param,
1424  double *alpha, Solver::SolutionInfo* si)
1425 {
1426  int i;
1427  int l = prob->l;
1428  double nu = param->nu;
1429 
1430  schar *y = new schar[l];
1431 
1432  for ( i=0; i<l; i++ ) {
1433  if ( prob->y[i]>0 ) {
1434  y[i] = +1;
1435  } else {
1436  y[i] = -1;
1437  }
1438  }
1439 
1440  double sum_pos = nu*l/2;
1441  double sum_neg = nu*l/2;
1442 
1443  for ( i=0; i<l; i++ ) {
1444  if ( y[i] == +1 ) {
1445  alpha[i] = min(1.0,sum_pos);
1446  sum_pos -= alpha[i];
1447  } else {
1448  alpha[i] = min(1.0,sum_neg);
1449  sum_neg -= alpha[i];
1450  }
1451  }
1452 
1453  double *zeros = new double[l];
1454 
1455  for ( i=0; i<l; i++ ) {
1456  zeros[i] = 0;
1457  }
1458 
1459  Solver_NU s;
1460  s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1461  alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1462  double r = si->r;
1463 
1464  info("C = %f\n",1/r);
1465 
1466  for ( i=0; i<l; i++ ) {
1467  alpha[i] *= y[i]/r;
1468  }
1469 
1470  si->rho /= r;
1471  si->obj /= (r*r);
1472  si->upper_bound_p = 1/r;
1473  si->upper_bound_n = 1/r;
1474 
1475  delete[] y;
1476  delete[] zeros;
1477 }
1478 
1479 static void solve_one_class(
1480  const svm_problem *prob, const svm_parameter *param,
1481  double *alpha, Solver::SolutionInfo* si)
1482 {
1483  int l = prob->l;
1484  double *zeros = new double[l];
1485  schar *ones = new schar[l];
1486  int i;
1487 
1488  int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1489 
1490  for ( i=0; i<n; i++ ) {
1491  alpha[i] = 1;
1492  }
1493  if ( n<prob->l ) {
1494  alpha[n] = param->nu * prob->l - n;
1495  }
1496  for ( i=n+1; i<l; i++ ) {
1497  alpha[i] = 0;
1498  }
1499 
1500  for ( i=0; i<l; i++ ) {
1501  zeros[i] = 0;
1502  ones[i] = 1;
1503  }
1504 
1505  Solver s;
1506  s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1507  alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1508 
1509  delete[] zeros;
1510  delete[] ones;
1511 }
1512 
1513 static void solve_epsilon_svr(
1514  const svm_problem *prob, const svm_parameter *param,
1515  double *alpha, Solver::SolutionInfo* si)
1516 {
1517  int l = prob->l;
1518  double *alpha2 = new double[2*l];
1519  double *linear_term = new double[2*l];
1520  schar *y = new schar[2*l];
1521  int i;
1522 
1523  for ( i=0; i<l; i++ ) {
1524  alpha2[i] = 0;
1525  linear_term[i] = param->p - prob->y[i];
1526  y[i] = 1;
1527 
1528  alpha2[i+l] = 0;
1529  linear_term[i+l] = param->p + prob->y[i];
1530  y[i+l] = -1;
1531  }
1532 
1533  Solver s;
1534  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1535  alpha2, param->C, param->C, param->eps, si, param->shrinking);
1536 
1537  double sum_alpha = 0;
1538  for ( i=0; i<l; i++ ) {
1539  alpha[i] = alpha2[i] - alpha2[i+l];
1540  sum_alpha += fabs(alpha[i]);
1541  }
1542  info("nu = %f\n",sum_alpha/(param->C*l));
1543 
1544  delete[] alpha2;
1545  delete[] linear_term;
1546  delete[] y;
1547 }
1548 
1549 static void solve_nu_svr(
1550  const svm_problem *prob, const svm_parameter *param,
1551  double *alpha, Solver::SolutionInfo* si)
1552 {
1553  int l = prob->l;
1554  double C = param->C;
1555  double *alpha2 = new double[2*l];
1556  double *linear_term = new double[2*l];
1557  schar *y = new schar[2*l];
1558  int i;
1559 
1560  double sum = C * param->nu * l / 2;
1561  for ( i=0; i<l; i++ ) {
1562  alpha2[i] = alpha2[i+l] = min(sum,C);
1563  sum -= alpha2[i];
1564 
1565  linear_term[i] = - prob->y[i];
1566  y[i] = 1;
1567 
1568  linear_term[i+l] = prob->y[i];
1569  y[i+l] = -1;
1570  }
1571 
1572  Solver_NU s;
1573  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1574  alpha2, C, C, param->eps, si, param->shrinking);
1575 
1576  info("epsilon = %f\n",-si->r);
1577 
1578  for ( i=0; i<l; i++ ) {
1579  alpha[i] = alpha2[i] - alpha2[i+l];
1580  }
1581 
1582  delete[] alpha2;
1583  delete[] linear_term;
1584  delete[] y;
1585 }
1586 
1587 
1588 // decision_function
1589 //
1591 {
1592  double *alpha;
1593  double rho;
1594 };
1595 
1597  const svm_problem *prob, const svm_parameter *param,
1598  double Cp, double Cn)
1599 {
1600  double *alpha = Malloc(double,prob->l);
1602  switch(param->svm_type)
1603  {
1604  case C_SVC :
1605  solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1606  break;
1607  case NU_SVC :
1608  solve_nu_svc(prob,param,alpha,&si);
1609  break;
1610  case ONE_CLASS :
1611  solve_one_class(prob,param,alpha,&si);
1612  break;
1613  case EPSILON_SVR :
1614  solve_epsilon_svr(prob,param,alpha,&si);
1615  break;
1616  case NU_SVR :
1617  solve_nu_svr(prob,param,alpha,&si);
1618  break;
1619  }
1620 
1621  info("obj = %f, rho = %f\n",si.obj,si.rho);
1622 
1623  // output SVs
1624 
1625  int nSV = 0;
1626  int nBSV = 0;
1627  for ( int i=0; i<prob->l; i++ ) {
1628  if ( fabs(alpha[i]) > 0 ) {
1629  ++nSV;
1630  if ( prob->y[i] > 0 ) {
1631  if ( fabs(alpha[i]) >= si.upper_bound_p ) {
1632  ++nBSV;
1633  }
1634  } else {
1635  if ( fabs(alpha[i]) >= si.upper_bound_n ) {
1636  ++nBSV;
1637  }
1638  }
1639  }
1640  }
1641 
1642  info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1643 
1645  f.alpha = alpha;
1646  f.rho = si.rho;
1647  return f;
1648 }
1649 
1650 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1651 static void sigmoid_train(
1652  int l, const double *dec_values, const double *labels,
1653  double& A, double& B)
1654 {
1655  double prior1=0, prior0 = 0;
1656  int i;
1657 
1658  for ( i=0; i<l; i++ ) {
1659  if ( labels[i] > 0 ) prior1+=1;
1660  else prior0+=1;
1661  }
1662 
1663  int max_iter=100; // Maximal number of iterations
1664  double min_step=1e-10; // Minimal step taken in line search
1665  double sigma=1e-12; // For numerically strict PD of Hessian
1666  double eps=1e-5;
1667  double hiTarget=(prior1+1.0)/(prior1+2.0);
1668  double loTarget=1/(prior0+2.0);
1669  double *t=Malloc(double,l);
1670  double fApB,p,q;
1671  double newA,newB,newf,d1,d2;
1672  int iter;
1673 
1674  // Initial Point and Initial Fun Value
1675  A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1676  double fval = 0.0;
1677 
1678  for ( i=0; i<l; i++ ) {
1679  if ( labels[i]>0 ) t[i]=hiTarget;
1680  else t[i]=loTarget;
1681  fApB = dec_values[i]*A+B;
1682  if ( fApB>=0 ) {
1683  fval += t[i]*fApB + log(1+exp(-fApB));
1684  } else {
1685  fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1686  }
1687  }
1688  for ( iter=0; iter<max_iter; iter++ ) {
1689  // Update Gradient and Hessian (use H' = H + sigma I)
1690  double h11=sigma; // numerically ensures strict PD
1691  double h22=sigma;
1692  double h21=0.0,g1=0.0,g2=0.0;
1693  for ( i=0; i<l; i++ ) {
1694  fApB = dec_values[i]*A+B;
1695  if ( fApB >= 0 ) {
1696  p=exp(-fApB)/(1.0+exp(-fApB));
1697  q=1.0/(1.0+exp(-fApB));
1698  } else {
1699  p=1.0/(1.0+exp(fApB));
1700  q=exp(fApB)/(1.0+exp(fApB));
1701  }
1702  d2=p*q;
1703  h11+=dec_values[i]*dec_values[i]*d2;
1704  h22+=d2;
1705  h21+=dec_values[i]*d2;
1706  d1=t[i]-p;
1707  g1+=dec_values[i]*d1;
1708  g2+=d1;
1709  }
1710 
1711  // Stopping Criteria
1712  if ( fabs(g1)<eps && fabs(g2)<eps ) {
1713  break;
1714  }
1715 
1716  // Finding Newton direction: -inv(H') * g
1717  double det=h11*h22-h21*h21;
1718  double dA=-(h22*g1 - h21 * g2) / det;
1719  double dB=-(-h21*g1+ h11 * g2) / det;
1720  double gd=g1*dA+g2*dB;
1721 
1722 
1723  double stepsize = 1; // Line Search
1724  while ( stepsize >= min_step )
1725  {
1726  newA = A + stepsize * dA;
1727  newB = B + stepsize * dB;
1728 
1729  // New function value
1730  newf = 0.0;
1731  for ( i=0; i<l; i++ ) {
1732  fApB = dec_values[i]*newA+newB;
1733  if ( fApB >= 0 ) {
1734  newf += t[i]*fApB + log(1+exp(-fApB));
1735  } else {
1736  newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1737  }
1738  }
1739  // Check sufficient decrease
1740  if ( newf<fval+0.0001*stepsize*gd ) {
1741  A=newA;B=newB;fval=newf;
1742  break;
1743  } else {
1744  stepsize = stepsize / 2.0;
1745  }
1746  }
1747 
1748  if ( stepsize < min_step ) {
1749  info("Line search fails in two-class probability estimates\n");
1750  break;
1751  }
1752  }
1753 
1754  if ( iter>=max_iter ) {
1755  info("Reaching maximal iterations in two-class probability estimates\n");
1756  }
1757  free(t);
1758 }
1759 
1760 static double sigmoid_predict(double decision_value, double A, double B)
1761 {
1762  double fApB = decision_value*A+B;
1763  // 1-p used later; avoid catastrophic cancellation
1764  if ( fApB >= 0 ) {
1765  return exp(-fApB)/(1.0+exp(-fApB));
1766  } else {
1767  return 1.0/(1+exp(fApB)) ;
1768  }
1769 }
1770 
1771 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1772 static void multiclass_probability(int k, double **r, double *p)
1773 {
1774  int t,j;
1775  int iter = 0, max_iter=max(100,k);
1776  double **Q=Malloc(double *,k);
1777  double *Qp=Malloc(double,k);
1778  double eps=0.005/k;
1779 
1780  for ( t=0; t<k; t++ ) {
1781  p[t]=1.0/k; // Valid if k = 1
1782  Q[t]=Malloc(double,k);
1783  Q[t][t]=0;
1784  for ( j=0; j<t; j++ ) {
1785  Q[t][t]+=r[j][t]*r[j][t];
1786  Q[t][j]=Q[j][t];
1787  }
1788  for ( j=t+1; j<k; j++ ) {
1789  Q[t][t]+=r[j][t]*r[j][t];
1790  Q[t][j]=-r[j][t]*r[t][j];
1791  }
1792  }
1793  for ( iter=0; iter<max_iter; iter++ ) {
1794  // stopping condition, recalculate QP,pQP for numerical accuracy
1795  double pQp=0;
1796  for ( t=0; t<k; t++ ) {
1797  Qp[t]=0;
1798  for ( j=0; j<k; j++ ) {
1799  Qp[t]+=Q[t][j]*p[j];
1800  }
1801  pQp+=p[t]*Qp[t];
1802  }
1803  double max_error=0;
1804  for ( t=0; t<k; t++ ) {
1805  double error=fabs(Qp[t]-pQp);
1806  if ( error>max_error ) {
1807  max_error=error;
1808  }
1809  }
1810  if ( max_error<eps ) break;
1811 
1812  for ( t=0; t<k; t++ ) {
1813  double diff=(-Qp[t]+pQp)/Q[t][t];
1814  p[t]+=diff;
1815  pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1816  for ( j=0; j<k; j++ ) {
1817  Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1818  p[j]/=(1+diff);
1819  }
1820  }
1821  }
1822  if ( iter>=max_iter ) {
1823  info("Exceeds max_iter in multiclass_prob\n");
1824  }
1825  for ( t=0; t<k; t++ ) free(Q[t]);
1826  free(Q);
1827  free(Qp);
1828 }
1829 
1830 // Cross-validation decision values for probability estimates
1832  const svm_problem *prob, const svm_parameter *param,
1833  double Cp, double Cn, double& probA, double& probB)
1834 {
1835  int i;
1836  int nr_fold = 5;
1837  int *perm = Malloc(int,prob->l);
1838  double *dec_values = Malloc(double,prob->l);
1839 
1840  // random shuffle
1841  for ( i=0; i<prob->l; i++ ) perm[i]=i;
1842  for ( i=0; i<prob->l; i++ ) {
1843  int j = i+rand()%(prob->l-i);
1844  swap(perm[i],perm[j]);
1845  }
1846  for ( i=0; i<nr_fold; i++ ) {
1847  int begin = i*prob->l/nr_fold;
1848  int end = (i+1)*prob->l/nr_fold;
1849  int j,k;
1850  struct svm_problem subprob;
1851 
1852  subprob.l = prob->l-(end-begin);
1853  subprob.x = Malloc(struct svm_node*,subprob.l);
1854  subprob.y = Malloc(double,subprob.l);
1855 
1856  k=0;
1857  for ( j=0; j<begin; j++ ) {
1858  subprob.x[k] = prob->x[perm[j]];
1859  subprob.y[k] = prob->y[perm[j]];
1860  ++k;
1861  }
1862  for ( j=end; j<prob->l; j++ ) {
1863  subprob.x[k] = prob->x[perm[j]];
1864  subprob.y[k] = prob->y[perm[j]];
1865  ++k;
1866  }
1867  int p_count=0,n_count=0;
1868  for ( j=0; j<k; j++ ) {
1869  if ( subprob.y[j]>0 ) {
1870  p_count++;
1871  } else {
1872  n_count++;
1873  }
1874  }
1875 
1876  if ( p_count==0 && n_count==0 ) {
1877  for ( j=begin; j<end; j++ ) {
1878  dec_values[perm[j]] = 0;
1879  }
1880  } else if ( p_count > 0 && n_count == 0 ) {
1881  for ( j=begin; j<end; j++ ) {
1882  dec_values[perm[j]] = 1;
1883  }
1884  } else if ( p_count == 0 && n_count > 0 ) {
1885  for ( j=begin; j<end; j++ ) {
1886  dec_values[perm[j]] = -1;
1887  }
1888  } else {
1889  svm_parameter subparam = *param;
1890  subparam.probability=0;
1891  subparam.C=1.0;
1892  subparam.nr_weight=2;
1893  subparam.weight_label = Malloc(int,2);
1894  subparam.weight = Malloc(double,2);
1895  subparam.weight_label[0]=+1;
1896  subparam.weight_label[1]=-1;
1897  subparam.weight[0]=Cp;
1898  subparam.weight[1]=Cn;
1899  struct svm_model *submodel = svm_train(&subprob,&subparam);
1900  for ( j=begin; j<end; j++ ) {
1901  svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1902  // ensure +1 -1 order; reason not using CV subroutine
1903  dec_values[perm[j]] *= submodel->label[0];
1904  }
1905  svm_free_and_destroy_model(&submodel);
1906  svm_destroy_param(&subparam);
1907  }
1908  free(subprob.x);
1909  free(subprob.y);
1910  }
1911  sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1912  free(dec_values);
1913  free(perm);
1914 }
1915 
1916 // Return parameter of a Laplace distribution
1917 static double svm_svr_probability(
1918  const svm_problem *prob, const svm_parameter *param)
1919 {
1920  int i;
1921  int nr_fold = 5;
1922  double *ymv = Malloc(double,prob->l);
1923  double mae = 0;
1924 
1925  svm_parameter newparam = *param;
1926  newparam.probability = 0;
1927  svm_cross_validation(prob,&newparam,nr_fold,ymv);
1928  for ( i=0; i<prob->l; i++ ) {
1929  ymv[i]=prob->y[i]-ymv[i];
1930  mae += fabs(ymv[i]);
1931  }
1932  mae /= prob->l;
1933  double std=sqrt(2*mae*mae);
1934  int count=0;
1935  mae=0;
1936  for ( i=0; i<prob->l; i++ ) {
1937  if ( fabs(ymv[i]) > 5*std ) {
1938  count=count+1;
1939  } else {
1940  mae+=fabs(ymv[i]);
1941  }
1942  }
1943  mae /= (prob->l-count);
1944  info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
1945  free(ymv);
1946  return mae;
1947 }
1948 
1949 
1950 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1951 // perm, length l, must be allocated before calling this subroutine
1952 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1953 {
1954  int l = prob->l;
1955  int max_nr_class = 16;
1956  int nr_class = 0;
1957  int *label = Malloc(int,max_nr_class);
1958  int *count = Malloc(int,max_nr_class);
1959  int *data_label = Malloc(int,l);
1960  int i;
1961 
1962  for ( i = 0; i < l; i++ ) {
1963  int this_label = ( int )prob->y[ i ];
1964  int j;
1965  for ( j = 0; j < nr_class; j++ ) {
1966  if ( this_label == label[ j ] ) {
1967  ++count[j];
1968  break;
1969  }
1970  }
1971  data_label[ i ] = j;
1972  if ( j == nr_class ) {
1973  if ( nr_class == max_nr_class ) {
1974  max_nr_class *= 2;
1975  // AMW cppcheck notes that we need to check for realloc success
1976  int *new_label_data = static_cast< int * >( realloc( label, max_nr_class * sizeof( int ) ) );
1977  if ( new_label_data == NULL ) {
1978  info( "\n\n!!! Critical memory error in svm_group_classes, label allocation !!!\n\n" );
1979  } else {
1980  label = new_label_data;
1981  }
1982 
1983  int *new_count_data = static_cast< int * >( realloc( count, max_nr_class * sizeof( int ) ) );
1984  if ( new_count_data == NULL ) {
1985  info( "\n\n!!! Critical memory error in svm_group_classes, count allocation !!!\n\n" );
1986  } else {
1987  count = new_count_data;
1988  }
1989  }
1990  label[ nr_class ] = this_label;
1991  count[ nr_class ] = 1;
1992  ++nr_class;
1993  }
1994  }
1995 
1996  int *start = Malloc(int,nr_class);
1997  start[0] = 0;
1998  for ( i = 1; i < nr_class; i++ ) {
1999  start[ i ] = start[ i - 1 ] + count[ i - 1 ];
2000  }
2001  for ( i = 0; i < l; i++ ) {
2002  perm[ start[ data_label[ i ] ] ] = i;
2003  ++start[ data_label[ i ] ];
2004  }
2005  start[ 0 ] = 0;
2006  for ( i = 1; i < nr_class; i++ ) {
2007  start[ i ] = start[ i - 1 ] + count[ i - 1 ];
2008  }
2009 
2010  *nr_class_ret = nr_class;
2011  *label_ret = label;
2012  *start_ret = start;
2013  *count_ret = count;
2014  free( data_label );
2015 }
2016 
2017 
2018 // Interface functions
2019 //
2021 {
2022  svm_model *model = Malloc(svm_model,1);
2023  model->param = *param;
2024  model->free_sv = 0; // XXX
2025 
2026  if ( param->svm_type == ONE_CLASS ||
2027  param->svm_type == EPSILON_SVR ||
2028  param->svm_type == NU_SVR ) {
2029  // regression or one-class-svm
2030  model->nr_class = 2;
2031  model->label = NULL;
2032  model->nSV = NULL;
2033  model->probA = NULL; model->probB = NULL;
2034  model->sv_coef = Malloc(double *,1);
2035 
2036  if ( param->probability &&
2037  (param->svm_type == EPSILON_SVR ||
2038  param->svm_type == NU_SVR) ) {
2039  model->probA = Malloc(double,1);
2040  model->probA[0] = svm_svr_probability(prob,param);
2041  }
2042 
2043  decision_function f = svm_train_one(prob,param,0,0);
2044  model->rho = Malloc(double,1);
2045  model->rho[0] = f.rho;
2046 
2047  int nSV = 0;
2048  int i;
2049  for ( i=0; i<prob->l; i++ ) {
2050  if ( fabs(f.alpha[i]) > 0 ) ++nSV;
2051  }
2052  model->l = nSV;
2053  model->SV = Malloc(svm_node *,nSV);
2054  model->sv_coef[0] = Malloc(double,nSV);
2055  model->sv_indices = Malloc(int,nSV);
2056  int j = 0;
2057  for ( i=0; i<prob->l; i++ ) {
2058  if ( fabs(f.alpha[i]) > 0 ) {
2059  model->SV[j] = prob->x[i];
2060  model->sv_coef[0][j] = f.alpha[i];
2061  model->sv_indices[j] = i+1;
2062  ++j;
2063  }
2064  }
2065 
2066  free(f.alpha);
2067  } else {
2068  // classification
2069  int l = prob->l;
2070  int nr_class;
2071  int *label = NULL;
2072  int *start = NULL;
2073  int *count = NULL;
2074  int *perm = Malloc(int,l);
2075 
2076  // group training data of the same class
2077  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2078  if ( nr_class == 1 ) {
2079  info("WARNING: training data in only one class. See README for details.\n");
2080  }
2081 
2082  svm_node **x = Malloc(svm_node *,l);
2083  int i;
2084  for ( i=0; i<l; i++ ) {
2085  x[i] = prob->x[perm[i]];
2086  }
2087 
2088  // calculate weighted C
2089 
2090  double *weighted_C = Malloc(double, nr_class);
2091  for ( i=0; i<nr_class; i++ ) {
2092  weighted_C[i] = param->C;
2093  }
2094  for ( i=0; i<param->nr_weight; i++ ) {
2095  int j;
2096  for ( j=0; j<nr_class; j++ ) {
2097  if ( param->weight_label[i] == label[j] ) {
2098  break;
2099  }
2100  }
2101  if ( j == nr_class ) {
2102  fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2103  } else {
2104  weighted_C[j] *= param->weight[i];
2105  }
2106  }
2107 
2108  // train k*(k-1)/2 models
2109 
2110  bool *nonzero = Malloc(bool,l);
2111  for ( i=0; i<l; i++ ) {
2112  nonzero[i] = false;
2113  }
2114  decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2115 
2116  double *probA=NULL,*probB=NULL;
2117  if ( param->probability ) {
2118  probA=Malloc(double,nr_class*(nr_class-1)/2);
2119  probB=Malloc(double,nr_class*(nr_class-1)/2);
2120  }
2121 
2122  int p = 0;
2123  for ( i=0; i<nr_class; i++ ) {
2124  for ( int j=i+1; j<nr_class; j++ ) {
2125  svm_problem sub_prob;
2126  int si = start[i], sj = start[j];
2127  int ci = count[i], cj = count[j];
2128  sub_prob.l = ci+cj;
2129  sub_prob.x = Malloc(svm_node *,sub_prob.l);
2130  sub_prob.y = Malloc(double,sub_prob.l);
2131  int k;
2132  for ( k=0; k<ci; k++ ) {
2133  sub_prob.x[k] = x[si+k];
2134  sub_prob.y[k] = +1;
2135  }
2136  for ( k=0; k<cj; k++ ) {
2137  sub_prob.x[ci+k] = x[sj+k];
2138  sub_prob.y[ci+k] = -1;
2139  }
2140 
2141  if ( param->probability ) {
2142  svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2143  }
2144 
2145  f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2146  for ( k=0; k<ci; k++ ) {
2147  if ( !nonzero[si+k] && fabs(f[p].alpha[k]) > 0 ) {
2148  nonzero[si+k] = true;
2149  }
2150  }
2151  for ( k=0; k<cj; k++ ) {
2152  if ( !nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0 ) {
2153  nonzero[sj+k] = true;
2154  }
2155  }
2156  free(sub_prob.x);
2157  free(sub_prob.y);
2158  ++p;
2159  }
2160  }
2161 
2162  // build output
2163 
2164  model->nr_class = nr_class;
2165 
2166  model->label = Malloc(int,nr_class);
2167  for ( i=0; i<nr_class; i++ ) {
2168  model->label[i] = label[i];
2169  }
2170 
2171  model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2172  for ( i=0; i<nr_class*(nr_class-1)/2; i++ ) {
2173  model->rho[i] = f[i].rho;
2174  }
2175 
2176  if ( param->probability ) {
2177  model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2178  model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2179  for ( i=0; i<nr_class*(nr_class-1)/2; i++ ) {
2180  model->probA[i] = probA[i];
2181  model->probB[i] = probB[i];
2182  }
2183  } else {
2184  model->probA=NULL;
2185  model->probB=NULL;
2186  }
2187 
2188  int total_sv = 0;
2189  int *nz_count = Malloc(int,nr_class);
2190  model->nSV = Malloc(int,nr_class);
2191  for ( i=0; i<nr_class; i++ ) {
2192  int nSV = 0;
2193  for ( int j=0; j<count[i]; j++ ) {
2194  if ( nonzero[start[i]+j] ) {
2195  ++nSV;
2196  ++total_sv;
2197  }
2198  }
2199  model->nSV[i] = nSV;
2200  nz_count[i] = nSV;
2201  }
2202 
2203  info("Total nSV = %d\n",total_sv);
2204 
2205  model->l = total_sv;
2206  model->SV = Malloc(svm_node *,total_sv);
2207  model->sv_indices = Malloc(int,total_sv);
2208  p = 0;
2209  for ( i=0; i<l; i++ ) {
2210  if ( nonzero[i] ) {
2211  model->SV[p] = x[i];
2212  model->sv_indices[p++] = perm[i] + 1;
2213  }
2214  }
2215 
2216  int *nz_start = Malloc(int,nr_class);
2217  nz_start[0] = 0;
2218  for ( i=1; i<nr_class; i++ ) {
2219  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2220  }
2221 
2222  model->sv_coef = Malloc(double *,nr_class-1);
2223  for ( i=0; i<nr_class-1; i++ ) {
2224  model->sv_coef[i] = Malloc(double,total_sv);
2225  }
2226 
2227  p = 0;
2228  for ( i=0; i<nr_class; i++ ) {
2229  for ( int j=i+1; j<nr_class; j++ ) {
2230  // classifier (i,j): coefficients with
2231  // i are in sv_coef[j-1][nz_start[i]...],
2232  // j are in sv_coef[i][nz_start[j]...]
2233 
2234  int si = start[i];
2235  int sj = start[j];
2236  int ci = count[i];
2237  int cj = count[j];
2238 
2239  int q = nz_start[i];
2240  int k;
2241  for ( k=0; k<ci; k++ ) {
2242  if ( nonzero[si+k] ) {
2243  model->sv_coef[j-1][q++] = f[p].alpha[k];
2244  }
2245  }
2246  q = nz_start[j];
2247  for ( k=0; k<cj; k++ ) {
2248  if ( nonzero[sj+k] ) {
2249  model->sv_coef[i][q++] = f[p].alpha[ci+k];
2250  }
2251  }
2252  ++p;
2253  }
2254  }
2255 
2256  free(label);
2257  free(probA);
2258  free(probB);
2259  free(count);
2260  free(perm);
2261  free(start);
2262  free(x);
2263  free(weighted_C);
2264  free(nonzero);
2265  for ( i=0; i<nr_class*(nr_class-1)/2; i++ ) {
2266  free(f[i].alpha);
2267  }
2268  free(f);
2269  free(nz_count);
2270  free(nz_start);
2271  }
2272  return model;
2273 }
2274 
2275 // Stratified cross validation
2276 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2277 {
2278  int i;
2279  int *fold_start = Malloc(int,nr_fold+1);
2280  int l = prob->l;
2281  int *perm = Malloc(int,l);
2282  int nr_class;
2283 
2284  // stratified cv may not give leave-one-out rate
2285  // Each class to l folds -> some folds may have zero elements
2286  if ( (param->svm_type == C_SVC ||
2287  param->svm_type == NU_SVC) && nr_fold < l ) {
2288  int *start = NULL;
2289  int *label = NULL;
2290  int *count = NULL;
2291  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2292 
2293  // random shuffle and then data grouped by fold using the array perm
2294  int *fold_count = Malloc(int,nr_fold);
2295  int c;
2296  int *index = Malloc(int,l);
2297  for ( i=0; i<l; i++ ) {
2298  index[i]=perm[i];
2299  }
2300  for ( c=0; c<nr_class; c++ ) {
2301  for ( i=0; i<count[c]; i++ ) {
2302  int j = i+rand()%(count[c]-i);
2303  swap(index[start[c]+j],index[start[c]+i]);
2304  }
2305  }
2306  for ( i=0; i<nr_fold; i++ ) {
2307  fold_count[i] = 0;
2308  for ( c=0; c<nr_class; c++ ) {
2309  fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2310  }
2311  }
2312  fold_start[0]=0;
2313  for ( i=1; i<=nr_fold; i++ ) {
2314  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2315  }
2316  for ( c=0; c<nr_class; c++ ) {
2317  for ( i=0; i<nr_fold; i++ ) {
2318  int begin = start[c]+i*count[c]/nr_fold;
2319  int end = start[c]+(i+1)*count[c]/nr_fold;
2320  for ( int j=begin; j<end; j++ ) {
2321  perm[fold_start[i]] = index[j];
2322  fold_start[i]++;
2323  }
2324  }
2325  }
2326  fold_start[0]=0;
2327  for ( i=1; i<=nr_fold; i++ ) {
2328  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2329  }
2330  free(start);
2331  free(label);
2332  free(count);
2333  free(index);
2334  free(fold_count);
2335  } else {
2336  for ( i=0; i<l; i++ ) perm[i]=i;
2337  for ( i=0; i<l; i++ ) {
2338  int j = i+rand()%(l-i);
2339  swap(perm[i],perm[j]);
2340  }
2341  for ( i=0; i<=nr_fold; i++ ) {
2342  fold_start[i]=i*l/nr_fold;
2343  }
2344  }
2345 
2346  for ( i=0; i<nr_fold; i++ ) {
2347  int begin = fold_start[i];
2348  int end = fold_start[i+1];
2349  int j,k;
2350  struct svm_problem subprob;
2351 
2352  subprob.l = l-(end-begin);
2353  subprob.x = Malloc(struct svm_node*,subprob.l);
2354  subprob.y = Malloc(double,subprob.l);
2355 
2356  k=0;
2357  for ( j=0; j<begin; j++ ) {
2358  subprob.x[k] = prob->x[perm[j]];
2359  subprob.y[k] = prob->y[perm[j]];
2360  ++k;
2361  }
2362  for ( j=end; j<l; j++ ) {
2363  subprob.x[k] = prob->x[perm[j]];
2364  subprob.y[k] = prob->y[perm[j]];
2365  ++k;
2366  }
2367  struct svm_model *submodel = svm_train(&subprob,param);
2368  if ( param->probability &&
2369  (param->svm_type == C_SVC || param->svm_type == NU_SVC) ) {
2370  double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2371  for ( j=begin; j<end; j++ ) {
2372  target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2373  }
2374  free(prob_estimates);
2375  } else {
2376  for ( j=begin; j<end; j++ ) {
2377  target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2378  }
2379  }
2380  svm_free_and_destroy_model(&submodel);
2381  free(subprob.x);
2382  free(subprob.y);
2383  }
2384  free(fold_start);
2385  free(perm);
2386 }
2387 
2388 
2389 int svm_get_svm_type(const svm_model *model)
2390 {
2391  return model->param.svm_type;
2392 }
2393 
2394 int svm_get_nr_class(const svm_model *model)
2395 {
2396  return model->nr_class;
2397 }
2398 
2399 void svm_get_labels(const svm_model *model, int* label)
2400 {
2401  if ( model->label != NULL ) {
2402  for ( int i=0; i<model->nr_class; i++ ) {
2403  label[i] = model->label[i];
2404  }
2405  }
2406 }
2407 
2408 void svm_get_sv_indices(const svm_model *model, int* indices)
2409 {
2410  if ( model->sv_indices != NULL ) {
2411  for ( int i=0; i<model->l; i++ ) {
2412  indices[i] = model->sv_indices[i];
2413  }
2414  }
2415 }
2416 
2417 int svm_get_nr_sv(const svm_model *model)
2418 {
2419  return model->l;
2420 }
2421 
2423 {
2424  if ( (model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2425  model->probA!=NULL ) {
2426  return model->probA[0];
2427  } else {
2428  fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2429  return 0;
2430  }
2431 }
2432 
2433 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2434 {
2435  int i;
2436  if ( model->param.svm_type == ONE_CLASS ||
2437  model->param.svm_type == EPSILON_SVR ||
2438  model->param.svm_type == NU_SVR ) {
2439  double *sv_coef = model->sv_coef[0];
2440  double sum = 0;
2441  for ( i=0; i<model->l; i++ ) {
2442  sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2443  }
2444  sum -= model->rho[0];
2445  *dec_values = sum;
2446 
2447  if ( model->param.svm_type == ONE_CLASS ) {
2448  return (sum>0)?1:-1;
2449  } else {
2450  return sum;
2451  }
2452  } else {
2453  int nr_class = model->nr_class;
2454  int l = model->l;
2455 
2456  double *kvalue = Malloc(double,l);
2457  for ( i=0; i<l; i++ ) {
2458  kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2459  }
2460 
2461  int *start = Malloc(int,nr_class);
2462  start[0] = 0;
2463  for ( i=1; i<nr_class; i++ ) {
2464  start[i] = start[i-1]+model->nSV[i-1];
2465  }
2466 
2467  int *vote = Malloc(int,nr_class);
2468  for ( i=0; i<nr_class; i++ ) {
2469  vote[i] = 0;
2470  }
2471 
2472  int p=0;
2473  for ( i=0; i<nr_class; i++ ) {
2474  for ( int j=i+1; j<nr_class; j++ ) {
2475  double sum = 0;
2476  int si = start[i];
2477  int sj = start[j];
2478  int ci = model->nSV[i];
2479  int cj = model->nSV[j];
2480 
2481  int k;
2482  double *coef1 = model->sv_coef[j-1];
2483  double *coef2 = model->sv_coef[i];
2484  for ( k=0; k<ci; k++ ) {
2485  sum += coef1[si+k] * kvalue[si+k];
2486  }
2487  for ( k=0; k<cj; k++ ) {
2488  sum += coef2[sj+k] * kvalue[sj+k];
2489  }
2490  sum -= model->rho[p];
2491  dec_values[p] = sum;
2492 
2493  if ( dec_values[p] > 0 ) {
2494  ++vote[i];
2495  } else {
2496  ++vote[j];
2497  }
2498  p++;
2499  }
2500  }
2501 
2502  int vote_max_idx = 0;
2503  for ( i=1; i<nr_class; i++ ) {
2504  if ( vote[i] > vote[vote_max_idx] ) {
2505  vote_max_idx = i;
2506  }
2507  }
2508 
2509  free(kvalue);
2510  free(start);
2511  free(vote);
2512  return model->label[vote_max_idx];
2513  }
2514 }
2515 
2516 double svm_predict(const svm_model *model, const svm_node *x)
2517 {
2518  int nr_class = model->nr_class;
2519  double *dec_values;
2520  if ( model->param.svm_type == ONE_CLASS ||
2521  model->param.svm_type == EPSILON_SVR ||
2522  model->param.svm_type == NU_SVR ) {
2523  dec_values = Malloc(double, 1);
2524  } else {
2525  dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2526  }
2527  double pred_result = svm_predict_values(model, x, dec_values);
2528  free(dec_values);
2529  return pred_result;
2530 }
2531 
2533  const svm_model *model, const svm_node *x, double *prob_estimates)
2534 {
2535  if ( (model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2536  model->probA!=NULL && model->probB!=NULL ) {
2537  int i;
2538  int nr_class = model->nr_class;
2539  double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2540  svm_predict_values(model, x, dec_values);
2541 
2542  double min_prob=1e-7;
2543  double **pairwise_prob=Malloc(double *,nr_class);
2544  for ( i=0; i<nr_class; i++ ) {
2545  pairwise_prob[i]=Malloc(double,nr_class);
2546  }
2547  int k=0;
2548  for ( i=0; i<nr_class; i++ ) {
2549  for ( int j=i+1; j<nr_class; j++ ) {
2550  pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2551  pairwise_prob[j][i]=1-pairwise_prob[i][j];
2552  k++;
2553  }
2554  }
2555  multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2556 
2557  int prob_max_idx = 0;
2558  for ( i=1; i<nr_class; i++ ) {
2559  if ( prob_estimates[i] > prob_estimates[prob_max_idx] ) {
2560  prob_max_idx = i;
2561  }
2562  }
2563  for ( i=0; i<nr_class; i++ ) {
2564  free(pairwise_prob[i]);
2565  }
2566  free(dec_values);
2567  free(pairwise_prob);
2568  return model->label[prob_max_idx];
2569  } else {
2570  return svm_predict(model, x);
2571  }
2572 }
2573 
2574 static const char *svm_type_table[] =
2575 {
2576 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2577 };
2578 
2579 static const char *kernel_type_table[]=
2580 {
2581 "linear","polynomial","rbf","sigmoid","precomputed",NULL
2582 };
2583 
2584 int svm_save_model(const char *model_file_name, const svm_model *model)
2585 {
2586  FILE *fp = fopen(model_file_name,"w");
2587  if ( fp==NULL ) return -1;
2588 
2589  char *old_locale = strdup(setlocale(LC_ALL, NULL));
2590  setlocale(LC_ALL, "C");
2591 
2592  const svm_parameter& param = model->param;
2593 
2594  fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2595  fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2596 
2597  if ( param.kernel_type == POLY ) {
2598  fprintf(fp,"degree %d\n", param.degree);
2599  }
2600 
2601  if ( param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID ) {
2602  fprintf(fp,"gamma %g\n", param.gamma);
2603  }
2604 
2605  if ( param.kernel_type == POLY || param.kernel_type == SIGMOID ) {
2606  fprintf(fp,"coef0 %g\n", param.coef0);
2607  }
2608 
2609  int nr_class = model->nr_class;
2610  int l = model->l;
2611  fprintf(fp, "nr_class %d\n", nr_class);
2612  fprintf(fp, "total_sv %d\n",l);
2613 
2614  {
2615  fprintf(fp, "rho");
2616  for ( int i=0; i<nr_class*(nr_class-1)/2; i++ ) {
2617  fprintf(fp," %g",model->rho[i]);
2618  }
2619  fprintf(fp, "\n");
2620  }
2621 
2622  if ( model->label ) {
2623  fprintf(fp, "label");
2624  for ( int i=0; i<nr_class; i++ ) {
2625  fprintf(fp," %d",model->label[i]);
2626  }
2627  fprintf(fp, "\n");
2628  }
2629 
2630  if ( model->probA ) { // regression has probA only
2631  fprintf(fp, "probA");
2632  for ( int i=0; i<nr_class*(nr_class-1)/2; i++ ) {
2633  fprintf(fp," %g",model->probA[i]);
2634  }
2635  fprintf(fp, "\n");
2636  }
2637  if ( model->probB ) {
2638  fprintf(fp, "probB");
2639  for ( int i=0; i<nr_class*(nr_class-1)/2; i++ ) {
2640  fprintf(fp," %g",model->probB[i]);
2641  }
2642  fprintf(fp, "\n");
2643  }
2644 
2645  if ( model->nSV ) {
2646  fprintf(fp, "nr_sv");
2647  for ( int i=0; i<nr_class; i++ ) {
2648  fprintf(fp," %d",model->nSV[i]);
2649  }
2650  fprintf(fp, "\n");
2651  }
2652 
2653  fprintf(fp, "SV\n");
2654  const double * const *sv_coef = model->sv_coef;
2655  const svm_node * const *SV = model->SV;
2656 
2657  for ( int i=0; i<l; i++ ) {
2658  for ( int j=0; j<nr_class-1; j++ ) {
2659  fprintf(fp, "%.16g ",sv_coef[j][i]);
2660  }
2661 
2662  const svm_node *p = SV[i];
2663 
2664  if ( param.kernel_type == PRECOMPUTED ) {
2665  fprintf(fp,"0:%d ",(int)(p->value));
2666  } else {
2667  while ( p->index != -1 )
2668  {
2669  fprintf(fp,"%d:%.8g ",p->index,p->value);
2670  p++;
2671  }
2672  }
2673  fprintf(fp, "\n");
2674  }
2675 
2676  setlocale(LC_ALL, old_locale);
2677  free(old_locale);
2678 
2679  if ( ferror(fp) != 0 || fclose(fp) != 0 ) return -1;
2680  else return 0;
2681 }
2682 
2683 static char *line = NULL;
2684 static int max_line_len;
2685 
2686 static char* readline(FILE *input)
2687 {
2688  if ( fgets(line,max_line_len,input) == NULL ) {
2689  return NULL;
2690  }
2691 
2692  while ( strrchr(line,'\n') == NULL )
2693  {
2694  max_line_len *= 2;
2695  char * newline = (char *) realloc(line,max_line_len);
2696  assert( newline ); // realloc returns a null pointer and doesn't free the memory on failure.
2697  line = newline;
2698  int len = (int) strlen(line);
2699  if ( fgets(line+len,max_line_len-len,input) == NULL ) {
2700  break;
2701  }
2702  }
2703  return line;
2704 }
2705 
2706 svm_model *svm_load_model(const char *model_file_name)
2707 {
2708  FILE *fp = fopen(model_file_name,"rb");
2709  if ( fp==NULL ) return NULL;
2710 
2711  char *old_locale = strdup(setlocale(LC_ALL, NULL));
2712  setlocale(LC_ALL, "C");
2713 
2714  // read parameters
2715 
2716  svm_model *model = Malloc(svm_model,1);
2717  svm_parameter& param = model->param;
2718  model->rho = NULL;
2719  model->probA = NULL;
2720  model->probB = NULL;
2721  model->label = NULL;
2722  model->nSV = NULL;
2723 
2724  char cmd[81];
2725  while ( 1 )
2726  {
2727  if ( fscanf(fp,"%80s",cmd) == EOF ) {
2728  std::cout << "Warning! End of file reached without any matches." << std::endl;
2729  }
2730 
2731  if ( strcmp(cmd,"svm_type")==0 ) {
2732  if ( fscanf(fp,"%80s",cmd) == EOF ) {
2733  std::cout << "Warning! End of file reached without any matches." << std::endl;
2734  }
2735  int i;
2736  for ( i=0; svm_type_table[i]; i++ ) {
2737  if ( strcmp(svm_type_table[i],cmd)==0 ) {
2738  param.svm_type=i;
2739  break;
2740  }
2741  }
2742  if ( svm_type_table[i] == NULL ) {
2743  fprintf(stderr,"unknown svm type.\n");
2744 
2745  setlocale(LC_ALL, old_locale);
2746  free(old_locale);
2747  free(model->rho);
2748  free(model->label);
2749  free(model->nSV);
2750  free(model);
2751  return NULL;
2752  }
2753  } else if ( strcmp(cmd,"kernel_type")==0 ) {
2754  if ( fscanf(fp,"%80s",cmd) == EOF ) {
2755  std::cout << "Warning! End of file reached without any matches." << std::endl;
2756  }
2757  int i;
2758  for ( i=0; kernel_type_table[i]; i++ ) {
2759  if ( strcmp(kernel_type_table[i],cmd)==0 ) {
2760  param.kernel_type=i;
2761  break;
2762  }
2763  }
2764  if ( kernel_type_table[i] == NULL ) {
2765  fprintf(stderr,"unknown kernel function.\n");
2766 
2767  setlocale(LC_ALL, old_locale);
2768  free(old_locale);
2769  free(model->rho);
2770  free(model->label);
2771  free(model->nSV);
2772  free(model);
2773  return NULL;
2774  }
2775  } else if ( strcmp(cmd,"degree")==0 ) {
2776  if ( fscanf(fp,"%d",&param.degree) == EOF ) {
2777  std::cout << "Warning! End of file reached without any matches." << std::endl;
2778  }
2779  } else if ( strcmp(cmd,"gamma")==0 ) {
2780  if ( fscanf(fp,"%lf",&param.gamma) == EOF ) {
2781  std::cout << "Warning! End of file reached without any matches." << std::endl;
2782  }
2783  } else if ( strcmp(cmd,"coef0")==0 ) {
2784  if ( fscanf(fp,"%lf",&param.coef0) == EOF ) {
2785  std::cout << "Warning! End of file reached without any matches." << std::endl;
2786  }
2787  } else if ( strcmp(cmd,"nr_class")==0 ) {
2788  if ( fscanf(fp,"%d",&model->nr_class) == EOF ) {
2789  std::cout << "Warning! End of file reached without any matches." << std::endl;
2790  }
2791  } else if ( strcmp(cmd,"total_sv")==0 ) {
2792  if ( fscanf(fp,"%d",&model->l) == EOF ) {
2793  std::cout << "Warning! End of file reached without any matches." << std::endl;
2794  }
2795  } else if ( strcmp(cmd,"rho")==0 ) {
2796  int n = model->nr_class * (model->nr_class-1)/2;
2797  model->rho = Malloc(double,n);
2798  for ( int i=0; i<n; i++ ) {
2799  if ( fscanf(fp,"%lf",&model->rho[i]) == EOF ) {
2800  std::cout << "Warning! End of file reached without any matches." << std::endl;
2801  }
2802  }
2803  } else if ( strcmp(cmd,"label")==0 ) {
2804  int n = model->nr_class;
2805  model->label = Malloc(int,n);
2806  for ( int i=0; i<n; i++ ) {
2807  if ( fscanf(fp,"%d",&model->label[i]) == EOF ) {
2808  std::cout << "Warning! End of file reached without any matches." << std::endl;
2809  }
2810  }
2811  } else if ( strcmp(cmd,"probA")==0 ) {
2812  int n = model->nr_class * (model->nr_class-1)/2;
2813  model->probA = Malloc(double,n);
2814  for ( int i=0; i<n; i++ ) {
2815  if ( fscanf(fp,"%lf",&model->probA[i]) == EOF ) {
2816  std::cout << "Warning! End of file reached without any matches." << std::endl;
2817  }
2818  }
2819  } else if ( strcmp(cmd,"probB")==0 ) {
2820  int n = model->nr_class * (model->nr_class-1)/2;
2821  model->probB = Malloc(double,n);
2822  for ( int i=0; i<n; i++ ) {
2823  if ( fscanf(fp,"%lf",&model->probB[i]) == EOF ) {
2824  std::cout << "Warning! End of file reached without any matches." << std::endl;
2825  }
2826  }
2827  } else if ( strcmp(cmd,"nr_sv")==0 ) {
2828  int n = model->nr_class;
2829  model->nSV = Malloc(int,n);
2830  for ( int i=0; i<n; i++ ) {
2831  if ( fscanf(fp,"%d",&model->nSV[i]) == EOF ) {
2832  std::cout << "Warning! End of file reached without any matches." << std::endl;
2833  }
2834  }
2835  } else if ( strcmp(cmd,"SV")==0 ) {
2836  while ( 1 )
2837  {
2838  int c = getc(fp);
2839  if ( c==EOF || c=='\n' ) break;
2840  }
2841  break;
2842  } else {
2843  fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2844 
2845  setlocale(LC_ALL, old_locale);
2846  free(old_locale);
2847  free(model->rho);
2848  free(model->label);
2849  free(model->nSV);
2850  free(model);
2851  return NULL;
2852  }
2853  }
2854 
2855  // read sv_coef and SV
2856 
2857  int elements = 0;
2858  long pos = ftell(fp);
2859 
2860  max_line_len = 1024;
2861  line = Malloc(char,max_line_len);
2862  char *p,*endptr,*idx,*val;
2863 
2864  while ( readline(fp)!=NULL )
2865  {
2866  p = strtok(line,":");
2867  while ( 1 )
2868  {
2869  p = strtok(NULL,":");
2870  if ( p == NULL ) {
2871  break;
2872  }
2873  ++elements;
2874  }
2875  }
2876  elements += model->l;
2877 
2878  fseek(fp,pos,SEEK_SET);
2879 
2880  int m = model->nr_class - 1;
2881  int l = model->l;
2882  model->sv_coef = Malloc(double *,m);
2883  int i;
2884  for ( i=0; i<m; i++ ) {
2885  model->sv_coef[i] = Malloc(double,l);
2886  }
2887  model->SV = Malloc(svm_node*,l);
2888  svm_node *x_space = NULL;
2889  if ( l>0 ) x_space = Malloc(svm_node,elements);
2890 
2891  int j=0;
2892  for ( i=0; i<l; i++ ) {
2893  readline(fp);
2894  model->SV[i] = &x_space[j];
2895 
2896  p = strtok(line, " \t");
2897  model->sv_coef[0][i] = strtod(p,&endptr);
2898  for ( int k=1; k<m; k++ ) {
2899  p = strtok(NULL, " \t");
2900  model->sv_coef[k][i] = strtod(p,&endptr);
2901  }
2902 
2903  while ( 1 )
2904  {
2905  idx = strtok(NULL, ":");
2906  val = strtok(NULL, " \t");
2907 
2908  if ( val == NULL ) {
2909  break;
2910  }
2911  x_space[j].index = (int) strtol(idx,&endptr,10);
2912  x_space[j].value = strtod(val,&endptr);
2913 
2914  ++j;
2915  }
2916  x_space[j++].index = -1;
2917  }
2918  free(line);
2919 
2920  setlocale(LC_ALL, old_locale);
2921  free(old_locale);
2922 
2923  if ( ferror(fp) != 0 || fclose(fp) != 0 ) {
2924  return NULL;
2925  }
2926 
2927  model->free_sv = 1; // XXX
2928  return model;
2929 }
2930 
2932 {
2933  if ( model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL ) {
2934  free((void *)(model_ptr->SV[0]));
2935  }
2936  if ( model_ptr->sv_coef ) {
2937  for ( int i=0; i<model_ptr->nr_class-1; i++ ) {
2938  free(model_ptr->sv_coef[i]);
2939  }
2940  }
2941 
2942  free(model_ptr->SV);
2943  model_ptr->SV = NULL;
2944 
2945  free(model_ptr->sv_coef);
2946  model_ptr->sv_coef = NULL;
2947 
2948  free(model_ptr->rho);
2949  model_ptr->rho = NULL;
2950 
2951  free(model_ptr->label);
2952  model_ptr->label= NULL;
2953 
2954  free(model_ptr->probA);
2955  model_ptr->probA = NULL;
2956 
2957  free(model_ptr->probB);
2958  model_ptr->probB= NULL;
2959 
2960  free(model_ptr->nSV);
2961  model_ptr->nSV = NULL;
2962 }
2963 
2965 {
2966  if ( model_ptr_ptr != NULL && *model_ptr_ptr != NULL ) {
2967  svm_free_model_content(*model_ptr_ptr);
2968  free(*model_ptr_ptr);
2969  *model_ptr_ptr = NULL;
2970  }
2971 }
2972 
2974 {
2975  free(param->weight_label);
2976  free(param->weight);
2977 }
2978 
2979 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2980 {
2981  // svm_type
2982 
2983  int svm_type = param->svm_type;
2984  if ( svm_type != C_SVC &&
2985  svm_type != NU_SVC &&
2986  svm_type != ONE_CLASS &&
2987  svm_type != EPSILON_SVR &&
2988  svm_type != NU_SVR ) {
2989  return "unknown svm type";
2990  }
2991 
2992  // kernel_type, degree
2993 
2994  int kernel_type = param->kernel_type;
2995  if ( kernel_type != LINEAR &&
2996  kernel_type != POLY &&
2997  kernel_type != RBF &&
2998  kernel_type != SIGMOID &&
2999  kernel_type != PRECOMPUTED ) {
3000  return "unknown kernel type";
3001  }
3002 
3003  if ( param->gamma < 0 ) {
3004  return "gamma < 0";
3005  }
3006 
3007  if ( param->degree < 0 ) {
3008  return "degree of polynomial kernel < 0";
3009  }
3010 
3011  // cache_size,eps,C,nu,p,shrinking
3012 
3013  if ( param->cache_size <= 0 ) {
3014  return "cache_size <= 0";
3015  }
3016 
3017  if ( param->eps <= 0 ) {
3018  return "eps <= 0";
3019  }
3020 
3021  if ( svm_type == C_SVC ||
3022  svm_type == EPSILON_SVR ||
3023  svm_type == NU_SVR ) {
3024  if ( param->C <= 0 ) {
3025  return "C <= 0";
3026  }
3027  }
3028 
3029  if ( svm_type == NU_SVC ||
3030  svm_type == ONE_CLASS ||
3031  svm_type == NU_SVR ) {
3032  if ( param->nu <= 0 || param->nu > 1 ) {
3033  return "nu <= 0 or nu > 1";
3034  }
3035  }
3036 
3037  if ( svm_type == EPSILON_SVR ) {
3038  if ( param->p < 0 ) {
3039  return "p < 0";
3040  }
3041  }
3042 
3043  if ( param->shrinking != 0 &&
3044  param->shrinking != 1 ) {
3045  return "shrinking != 0 and shrinking != 1";
3046  }
3047 
3048  if ( param->probability != 0 &&
3049  param->probability != 1 ) {
3050  return "probability != 0 and probability != 1";
3051  }
3052 
3053  if ( param->probability == 1 &&
3054  svm_type == ONE_CLASS ) {
3055  return "one-class SVM probability output not supported yet";
3056  }
3057 
3058 
3059  // check whether nu-svc is feasible
3060 
3061  if ( svm_type == NU_SVC ) {
3062  int l = prob->l;
3063  int max_nr_class = 16;
3064  int nr_class = 0;
3065  int *label = Malloc(int,max_nr_class);
3066  int *count = Malloc(int,max_nr_class);
3067 
3068  int i;
3069  for ( i=0; i<l; i++ ) {
3070  int this_label = (int)prob->y[i];
3071  int j;
3072  for ( j=0; j<nr_class; j++ ) {
3073  if ( this_label == label[j] ) {
3074  ++count[j];
3075  break;
3076  }
3077  }
3078  if ( j == nr_class ) {
3079  if ( nr_class == max_nr_class ) {
3080  max_nr_class *= 2;
3081  // AMW cppcheck notes that we need to check for realloc success
3082  int *new_label_data = static_cast< int * >( realloc( label, max_nr_class * sizeof( int ) ) );
3083  if ( new_label_data == NULL ) {
3084  info( "\n\n!!! Critical memory error in svm_group_classes, label allocation !!!\n\n" );
3085  } else {
3086  label = new_label_data;
3087  }
3088 
3089  int *new_count_data = static_cast< int * >( realloc( count, max_nr_class * sizeof( int ) ) );
3090  if ( new_count_data == NULL ) {
3091  info( "\n\n!!! Critical memory error in svm_group_classes, count allocation !!!\n\n" );
3092  } else {
3093  count = new_count_data;
3094  }
3095  }
3096  label[ nr_class ] = this_label;
3097  count[ nr_class ] = 1;
3098  ++nr_class;
3099  }
3100  }
3101 
3102  for ( i=0; i<nr_class; i++ ) {
3103  int n1 = count[i];
3104  for ( int j=i+1; j<nr_class; j++ ) {
3105  int n2 = count[j];
3106  if ( param->nu*(n1+n2)/2 > min(n1,n2) ) {
3107  free(label);
3108  free(count);
3109  return "specified nu is infeasible";
3110  }
3111  }
3112  }
3113  free(label);
3114  free(count);
3115  }
3116 
3117  return NULL;
3118 }
3119 
3121 {
3122  return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3123  model->probA!=NULL && model->probB!=NULL) ||
3124  ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3125  model->probA!=NULL);
3126 }
3127 
3128 void svm_set_print_string_function(void (*print_func)(const char *))
3129 {
3130  if ( print_func == NULL ) {
3132  } else {
3133  svm_print_string = print_func;
3134  }
3135 }
double * x_square
Definition: Svm.cc:219
void update_alpha_status(int i)
Definition: Svm.cc:419
#define LIBSVM_VERSION
Definition: Svm.hh:4
static T min(T x, T y)
Definition: Svm.cc:16
bool unshrink
Definition: Svm.cc:413
SVC_Q(const svm_problem &prob, const svm_parameter &param, const schar *y_)
Definition: Svm.cc:1212
double eps
Definition: Svm.cc:407
int len
Definition: Svm.cc:85
const QMatrix * Q
Definition: Svm.cc:405
int * nSV
Definition: Svm.hh:67
Definition: Svm.hh:25
static void sigmoid_train(int l, const double *dec_values, const double *labels, double &A, double &B)
Definition: Svm.cc:1651
const char * svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
Definition: Svm.cc:2979
double * QD
Definition: Svm.cc:1379
virtual ~QMatrix()
Definition: Svm.cc:196
static double dot(const svm_node *px, const svm_node *py)
Definition: Svm.cc:291
tuple gamma
Definition: loops.py:78
Definition: Svm.cc:191
void svm_destroy_param(svm_parameter *param)
Definition: Svm.cc:2973
virtual void do_shrinking()
Definition: Svm.cc:875
double kernel_rbf(int i, int j) const
Definition: Svm.cc:236
double get_C(int i)
Definition: Svm.cc:415
static decision_function svm_train_one(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn)
Definition: Svm.cc:1596
const svm_node ** x
Definition: Svm.cc:218
Qfloat * data
Definition: Svm.cc:84
void svm_get_sv_indices(const svm_model *model, int *indices)
Definition: Svm.cc:2408
static double powi(double base, int times)
Definition: Svm.cc:27
Kernel(int l, svm_node *const *x, const svm_parameter &param)
Definition: Svm.cc:250
void Solve(int l, const QMatrix &Q, const double *p_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo *si, int shrinking)
Definition: Svm.cc:496
SolutionInfo * si
Definition: Svm.cc:982
dictionary size
Definition: amino_acids.py:44
virtual void swap_index(int i, int j) const
Definition: Svm.cc:208
virtual void swap_index(int i, int j) const =0
int svm_check_probability_model(const svm_model *model)
Definition: Svm.cc:3120
~SVR_Q()
Definition: Svm.cc:1363
int l
Definition: Svm.cc:1373
head_t * prev
Definition: Svm.cc:83
static void swap(T &x, T &y)
Definition: Svm.cc:21
void swap_index(int i, int j) const
Definition: Svm.cc:1240
static double svm_svr_probability(const svm_problem *prob, const svm_parameter *param)
Definition: Svm.cc:1917
double * G_bar
Definition: Svm.cc:411
double value
Definition: Svm.hh:15
const double gamma
Definition: Svm.cc:224
virtual Qfloat * get_Q(int column, int len) const =0
double Cp
Definition: Svm.cc:408
ONE_CLASS_Q(const svm_problem &prob, const svm_parameter &param)
Definition: Svm.cc:1263
def x
int l
Definition: Svm.hh:56
static void solve_c_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn)
Definition: Svm.cc:1385
int nr_weight
Definition: Svm.hh:40
utility::keys::lookup::end< KeyType > const end
struct svm_node ** SV
Definition: Svm.hh:57
void svm_free_and_destroy_model(svm_model **model_ptr_ptr)
Definition: Svm.cc:2964
double * get_QD() const
Definition: Svm.cc:1358
double * G
Definition: Svm.cc:401
double * probB
Definition: Svm.hh:61
double kernel_precomputed(int i, int j) const
Definition: Svm.cc:244
double Cn
Definition: Svm.cc:408
schar * y
Definition: Svm.cc:400
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
Definition: Svm.cc:2433
void swap_index(int i, int j) const
Definition: Svm.cc:1290
Qfloat * get_Q(int i, int len) const
Definition: Svm.cc:1338
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
Definition: Svm.cc:2276
int svm_save_model(const char *model_file_name, const svm_model *model)
Definition: Svm.cc:2584
double svm_get_svr_probability(const svm_model *model)
Definition: Svm.cc:2422
int * weight_label
Definition: Svm.hh:41
virtual int select_working_set(int &i, int &j)
Definition: Svm.cc:763
double * alpha
Definition: Svm.cc:1592
static void info(const char *fmt,...)
Definition: Svm.cc:48
double log(double x, double base)
Computes log(x) in the given base.
Definition: util.hh:41
static double k_function(const svm_node *x, const svm_node *y, const svm_parameter &param)
Definition: Svm.cc:311
double * probA
Definition: Svm.hh:60
static void solve_nu_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: Svm.cc:1549
int nr_class
Definition: Svm.hh:55
Definition: Svm.hh:52
double p
Definition: Svm.hh:44
Fstring::size_type len(Fstring const &s)
Length.
Definition: Fstring.hh:2207
Fstring::size_type index(Fstring const &s, Fstring const &ss)
First Index Position of a Substring in an Fstring.
Definition: Fstring.hh:2180
signed char schar
Definition: Svm.cc:14
virtual ~Kernel()
Definition: Svm.cc:285
int l
Definition: Svm.cc:79
double * alpha
Definition: Svm.cc:404
static void solve_nu_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: Svm.cc:1422
double cache_size
Definition: Svm.hh:37
int * index
Definition: Svm.cc:1376
svm_model * svm_load_model(const char *model_file_name)
Definition: Svm.cc:2706
virtual double calculate_rho()
Definition: Svm.cc:930
member1 value
Definition: Tag.cc:296
bool be_shrunk(int i, double Gmax1, double Gmax2)
Definition: Svm.cc:856
void svm_set_print_string_function(void(*print_func)(const char *))
Definition: Svm.cc:3128
void swap_index(int i, int j)
Definition: Svm.cc:156
Qfloat * get_Q(int i, int len) const
Definition: Svm.cc:1223
#define C(a, b)
Definition: functions.cc:27
void swap_index(int i, int j)
Definition: Svm.cc:439
Real sum(ddGs &scores_to_sum)
double upper_bound_n
Definition: Svm.cc:391
tuple p
Definition: docking.py:9
Definition: Svm.cc:382
SVR_Q(const svm_problem &prob, const svm_parameter &param)
Definition: Svm.cc:1310
double svm_predict(const svm_model *model, const svm_node *x)
Definition: Svm.cc:2516
void lru_insert(head_t *h)
Definition: Svm.cc:118
double calculate_rho()
Definition: Svm.cc:1160
static void multiclass_probability(int k, double **r, double *p)
Definition: Svm.cc:1772
Cache(int l, long int size)
Definition: Svm.cc:94
void reconstruct_gradient()
Definition: Svm.cc:451
~Cache()
Definition: Svm.cc:103
double eps
Definition: Svm.hh:38
const int degree
Definition: Svm.cc:223
#define INF
Definition: Svm.cc:37
double upper_bound_p
Definition: Svm.cc:390
static void svm_binary_svc_probability(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn, double &probA, double &probB)
Definition: Svm.cc:1831
int get_data(const int index, Qfloat **data, int len)
Definition: Svm.cc:127
static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
Definition: Svm.cc:1952
double * QD
Definition: Svm.cc:1257
static double sigmoid_predict(double decision_value, double A, double B)
Definition: Svm.cc:1760
int select_working_set(int &i, int &j)
Definition: Svm.cc:990
static const char * kernel_type_table[]
Definition: Svm.cc:2579
head_t * head
Definition: Svm.cc:88
head_t lru_head
Definition: Svm.cc:89
Solver_NU()
Definition: Svm.cc:973
int shrinking
Definition: Svm.hh:45
double(Kernel::* kernel_function)(int i, int j) const
Definition: Svm.cc:215
bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
Definition: Svm.cc:1098
int svm_get_nr_class(const svm_model *model)
Definition: Svm.cc:2394
int svm_get_svm_type(const svm_model *model)
Definition: Svm.cc:2389
struct svm_node ** x
Definition: Svm.hh:22
int * label
Definition: Svm.hh:66
Definition: Svm.hh:26
int libsvm_version
Definition: Svm.cc:12
static int max_line_len
Definition: Svm.cc:2684
struct svm_parameter param
Definition: Svm.hh:54
void svm_get_labels(const svm_model *model, int *label)
Definition: Svm.cc:2399
Cache * cache
Definition: Svm.cc:1256
Cache * cache
Definition: Svm.cc:1374
int * sv_indices
Definition: Svm.hh:62
Definition: Svm.hh:25
double kernel_linear(int i, int j) const
Definition: Svm.cc:228
svm_model * svm_train(const svm_problem *prob, const svm_parameter *param)
Definition: Svm.cc:2020
static char * readline(FILE *input)
Definition: Svm.cc:2686
static char * line
Definition: Svm.cc:2683
double * rho
Definition: Svm.hh:59
Definition: Svm.cc:1209
Solver()
Definition: Svm.cc:384
int index
Definition: Svm.hh:14
~SVC_Q()
Definition: Svm.cc:1248
double * p
Definition: Svm.cc:409
static void(* svm_print_string)(const char *)
Definition: Svm.cc:46
void lru_delete(head_t *h)
Definition: Svm.cc:111
int * active_set
Definition: Svm.cc:410
double * QD
Definition: Svm.cc:1304
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
#define TAU
Definition: Svm.cc:38
double ** sv_coef
Definition: Svm.hh:58
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
Definition: Svm.cc:2532
virtual ~Solver()
Definition: Svm.cc:385
double rho
Definition: Svm.cc:1593
Definition: Svm.hh:26
static const char * svm_type_table[]
Definition: Svm.cc:2574
virtual Qfloat * get_Q(int column, int len) const =0
Definition: Svm.hh:25
const int kernel_type
Definition: Svm.cc:222
Definition: Svm.cc:199
utility::keys::lookup::begin< KeyType > const begin
schar * sign
Definition: Svm.cc:1375
char * alpha_status
Definition: Svm.cc:403
static void solve_one_class(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: Svm.cc:1479
int probability
Definition: Svm.hh:46
int degree
Definition: Svm.hh:32
int free_sv
Definition: Svm.hh:70
virtual double * get_QD() const =0
int next_buffer
Definition: Svm.cc:1377
double kernel_sigmoid(int i, int j) const
Definition: Svm.cc:240
Definition: Svm.cc:1307
Definition: Svm.hh:12
Qfloat * get_Q(int i, int len) const
Definition: Svm.cc:1273
bool is_free(int i)
Definition: Svm.cc:429
int svm_get_nr_sv(const svm_model *model)
Definition: Svm.cc:2417
const double coef0
Definition: Svm.cc:225
void Solve(int l, const QMatrix &Q, const double *p, const schar *y, double *alpha, double Cp, double Cn, double eps, SolutionInfo *si, int shrinking)
Definition: Svm.cc:974
virtual double * get_QD() const =0
double * y
Definition: Svm.hh:21
bool is_upper_bound(int i)
Definition: Svm.cc:427
const double * QD
Definition: Svm.cc:406
double * get_QD() const
Definition: Svm.cc:1235
Cache * cache
Definition: Svm.cc:1303
double gamma
Definition: Svm.hh:33
int active_size
Definition: Svm.cc:399
Definition: Svm.hh:25
int l
Definition: Svm.cc:412
int l
Definition: Svm.hh:20
double * weight
Definition: Svm.hh:42
void swap_index(int i, int j) const
Definition: Svm.cc:1331
Definition: Svm.hh:26
double * get_QD() const
Definition: Svm.cc:1285
double C
Definition: Svm.hh:39
Qfloat * buffer[2]
Definition: Svm.cc:1378
long int size
Definition: Svm.cc:80
static void clone(T *&dst, S *src, int n)
Definition: Svm.cc:22
void do_shrinking()
Definition: Svm.cc:1117
int svm_type
Definition: Svm.hh:30
static T max(T x, T y)
Definition: Svm.cc:19
double nu
Definition: Svm.hh:43
float Qfloat
Definition: Svm.cc:13
head_t * next
Definition: Svm.cc:83
double coef0
Definition: Svm.hh:34
int kernel_type
Definition: Svm.hh:31
static void solve_epsilon_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: Svm.cc:1513
def y
~ONE_CLASS_Q()
Definition: Svm.cc:1297
bool is_lower_bound(int i)
Definition: Svm.cc:428
Definition: Svm.cc:67
Definition: Svm.hh:26
void svm_free_model_content(svm_model *model_ptr)
Definition: Svm.cc:2931
schar * y
Definition: Svm.cc:1255
double kernel_poly(int i, int j) const
Definition: Svm.cc:232
#define Malloc(type, n)
Definition: Svm.cc:39
static void print_string_stdout(const char *s)
Definition: Svm.cc:41