## This file is part of mlpy.
## Support Vector Machines (SVM) based on SVM
## C-libraries developed by Stefano Merler.

## For feature weights see:
## C. Furlanello, M. Serafini, S. Merler, and G. Jurman.
## Advances in Neural Network Research: IJCNN 2003.
## An accelerated procedure for recursive feature ranking
## on microarray data.
## Elsevier, 2003.
    
## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2007 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ['Svm']

from numpy import *
import svmcore


## def KernelGaussian (x1, x2, kp):
##     """
##     Gaussian kernel
##     K(x1,x2,kp) = exp^(-(||x1-x2||)^2 / kp)
##     """
    
##     sub = x1 - x2
##     norm = (sum(abs(sub)**2))**(0.5)
##     return exp(-norm**2 / kp)

## def MatrixKernelGaussian(x, kp):
##     """
##     Create the matrix K
##     K[i, j] = KernelGaussian(x[i], x[j], kp)
##     """
    
##     K = empty((x.shape[0], x.shape[0]))
##     for i in xrange(x.shape[0]):
##         for j in xrange(i, x.shape[0]):
##             K[i,j] = KernelGaussian(x[i], x[j], kp)
##             K[j,i] = K[i,j]          
##     return K


def err(y, p):
    """
    Compute the Error.

    error = (fp + fn) / ts

    Input
    
      * *y* - classes    (two classes) [1D numpy array integer]
      * *p* - prediction (two classes) [1D numpy array integer]

    Output
    
      * error
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("err() works only for two-classes")

    diff = (y == p)
    return diff[diff == False].shape[0] / float(y.shape[0])


def MatrixKernelGaussian(X, kp):
    """
    Create the matrix K
    """
    
    j1 = ones((X.shape[0], 1))
    diagK1 = array([sum(X**2, 1)])
    K1 = dot(X, X.T)
    Q = (2 * K1 - diagK1 * j1.T - j1 * diagK1.T) / kp

    return exp(Q)

 
def MatrixKernelTversky(X, alpha_tversky, beta_tversky):
    """
    Create the matrix K
    """
    K = empty((X.shape[0],X.shape[0]))
    for i in range(X.shape[0]):
        for j in range(X.shape[0]):
            
            s11 = dot(X[i], X[i])
            s12 = dot(X[i], X[j])
            s22 = dot(X[j], X[j])
            
            K[i,j] = s12/(alpha_tversky * s11 + beta_tversky * s22 + (1.0 - alpha_tversky - beta_tversky) * s12)
            
    return K


def computeZ(K, y):
  """
  Compute the matrix Z[i,j] = y[i]*y[j]*K[i,j]

  See Maria Serafini Thesis, p. 29.
  """
  
  Z = empty((y.shape[0], y.shape[0]))     
  for i in xrange(y.shape[0]):
      for j in xrange(y.shape[0]):
          Z[i, j] = y[i] * y[j] * K[i, j]
  return Z


def computeZ_k(Z, k, x, kp):
  """
  Compute Z_k[i, j] = Z[i, j] * exp((x[i, k]-x[j, k])**2 / kp)

  See Maria Serafini Thesis, p. 30.
  """
  
  Z_k = empty_like(Z)  
  for i in xrange(Z.shape[0]):
      for j in xrange(Z.shape[0]):
          e = exp((x[i, k]-x[j, k])**2 / kp)
          if abs(e) == inf:
              raise StandardError("kp is too small or the data is not standardized")
          Z_k[i, j] = Z[i, j] * e
  return Z_k


def computeZ_tversky(Z, k, x, alpha_tversky, beta_tversky):
  """
  Compute Z_k[i, j] = Z[i, j] * tversky(x[i, k],x[j, k],alpha_tversky,beta_tversky)

  See Maria Serafini Thesis, p. 30.
  """
  
  Z_k = empty_like(Z)  
  for i in xrange(Z.shape[0]):
      for j in xrange(Z.shape[0]):
          s11 = x[i,k]*x[i,k]
          s12 = x[i,k]*x[j,k]
          s22 = x[j,k]*x[j,k]
          e =  s12/(alpha_tversky * s11 + beta_tversky * s22 + (1.0 - alpha_tversky - beta_tversky) * s12)
          if abs(e) == inf:
              raise StandardError("Tversky weights problem")
          Z_k[i, j] = Z[i, j] * e

  return Z_k


class Svm:
    """
    Support Vector Machines (SVM).

    :Example:
    
    >>> import numpy as np
    >>> import mlpy
    >>> xtr = np.array([[1.0, 2.0, 3.0, 1.0],  # first sample
    ...                 [1.0, 2.0, 3.0, 2.0],  # second sample
    ...                 [1.0, 2.0, 3.0, 1.0]]) # third sample
    >>> ytr = np.array([1, -1, 1])             # classes
    >>> mysvm = mlpy.Svm()                     # initialize Svm class
    >>> mysvm.compute(xtr, ytr)                # compute SVM
    1
    >>> mysvm.predict(xtr)                     # predict SVM model on training data
    array([ 1, -1,  1])
    >>> xts = np.array([4.0, 5.0, 6.0, 7.0])   # test point
    >>> mysvm.predict(xts)                     # predict SVM model on test point
    -1
    >>> mysvm.realpred                         # real-valued prediction
    -5.5
    >>> mysvm.weights(xtr, ytr)                # compute weights on training data
    array([ 0.,  0.,  0.,  1.])
    """
    
    def __init__(self, kernel = 'linear', kp = 0.1, C = 1.0, tol = 0.001,
                 eps = 0.001, maxloops = 1000, cost = 0.0, alpha_tversky = 1.0,
                 beta_tversky = 1.0, opt_offset=True):
        """
        Initialize the Svm class
        
        :Parameters:
        
          kernel : string ['linear', 'gaussian', 'polynomial', 'tr', 'tversky']
            kernel
          kp : float
            kernel parameter (two sigma squared) for gaussian and polynomial kernel
          C : float
            regularization parameter
          tol : float
            tolerance for testing KKT conditions
          eps : float
            convergence parameter
          maxloops : integer
            maximum number of optimization loops
          cost : float [-1.0, ..., 1.0]
            for cost-sensitive classification
          alpha_tversky : float
            positive multiplicative parameter for the norm of the first vector
          beta_tversky : float
            positive multiplicative parameter for the norm of the second vector
          opt_offset : bool
            compute the optimal offset
        """

        SVM_KERNELS = {'linear': 1,
                       'gaussian': 2,
                       'polynomial': 3,
                       'tversky': 4,
                       'tr': 5}
        
        self.__kernel   = SVM_KERNELS[kernel]
        self.__kp       = kp
        self.__C        = C
        self.__tol      = tol
        self.__eps      = eps
        self.__maxloops = maxloops
        self.__cost     = cost
        self.__alpha_tversky = alpha_tversky
        self.__beta_tversky = beta_tversky
        self.__opt_offset = opt_offset

        self.__x    = None
        self.__y    = None
        self.__w    = None
        self.__a    = None
        self.__b    = None
        self.__bopt = None
        self.__conv = None # svm convergence
        self.realpred = None

        self.__computed = False

        # For 'terminated ramps' (tr) only
        self.__sf_w  = None
        self.__sf_b  = None
        self.__sf_i  = None
        self.__sf_j  = None
        self.__nsf   = None
        self.__svm_x = None
        ################################     


    def compute(self, x, y):
        """Compute SVM model

        :Parameters:
          x : 2d ndarray float (samples x feats)
            training data
          y : 1d ndarray integer (-1 or 1)
            classes

        :Returns:     
          conv : integer
            svm convergence (0: false, 1: true)
        """

        classes = unique(y)
        if classes.shape[0] != 2:
            raise ValueError("Svm works only for two-classes problems")
     
        # Store x and y
        self.__x = x.copy()
        self.__y = y.copy()
       
        # Kernel 'tr'
        if self.__kernel == 5: 
            res = svmcore.computesvmtr(self.__x, self.__y, self.__C, self.__tol,
                                       self.__eps, self.__maxloops, self.__cost)
            self.__w        = res[0]
            self.__a        = res[1]
            self.__b        = res[2]
            self.__conv     = res[3]
            self.__sf_w     = res[4]
            self.__sf_b     = res[5]
            self.__sf_i     = res[6]
            self.__sf_j     = res[7]
            self.__svm_x    = res[8]
            self.__nsf      = self.__sf_w.shape[0]
            self.__computed = True
            
        # Kernel 'linear', 'gaussian', 'polynomial', 'tversky'
        else:
            res = svmcore.computesvm(self.__x, self.__y, self.__kernel, self.__kp, self.__C,
                                     self.__tol, self.__eps, self.__maxloops, self.__cost,
                                     self.__alpha_tversky, self.__beta_tversky)
            
            self.__w = res[0]
            self.__a = res[1]
            self.__b = res[2]
            self.__conv = res[3]
            self.__computed = True

        

        # Optimal offset
        self.__bopt = self.__b
        if self.__opt_offset:
            merr = inf
            self.predict(x)
            rp = sort(self.realpred)
            bs = rp[:-1] + (diff(rp) / 2.0)
            p = empty(x.shape[0], dtype=int)
            for b in bs:
                p[self.realpred >= b] = 1
                p[self.realpred < b] = -1
                e = err(y, p)
                if (e < merr):
                    merr = e
                    self.__bopt = self.__b + b

            self.realpred = None
       

        # Return convergence
        return self.__conv

    
    def predict(self, p):
        """
        Predict svm model on a test point(s)

        :Parameters:
          p : 1d or 2d ndarray float (samples x feats)
             test point(s)training dataInput
        
        :Returns:     
          cl : integer or 1d ndarray integer
            class(es) predicted

        :Attributes:
          Svm.realpred : float or 1d ndarray float
            real valued prediction
        """
        
        if self.__computed == False:
            raise StandardError("No SVM model computed")

        # Kernel 'tr'
        if self.__kernel == 5:
            if p.ndim == 1:
                self.realpred = svmcore.predictsvmtr(self.__x, self.__y, p, self.__w, self.__bopt,
                                                     self.__sf_w, self.__sf_b, self.__sf_i, self.__sf_j)
            elif p.ndim == 2:
                self.realpred = empty(p.shape[0], dtype = float)
                for i in range(p.shape[0]):
                    self.realpred[i] = svmcore.predictsvmtr(self.__x, self.__y, p[i], self.__w, self.__bopt,
                                                            self.__sf_w, self.__sf_b, self.__sf_i, self.__sf_j)
        # Kernel 'linear', 'gaussian', 'polynomial'
        else:
            if p.ndim == 1:
                self.realpred = svmcore.predictsvm(self.__x, self.__y, p, self.__w, self.__a,
                                                   self.__bopt, self.__kp, self.__kernel,
                                                   self.__alpha_tversky, self.__beta_tversky)
            elif p.ndim == 2:
                self.realpred = empty(p.shape[0], dtype = float)
                for i in range(p.shape[0]):
                    self.realpred[i] = svmcore.predictsvm(self.__x, self.__y, p[i], self.__w, self.__a,
                                                          self.__bopt, self.__kp, self.__kernel,
                                                          self.__alpha_tversky, self.__beta_tversky)            
        # Return prediction
        if p.ndim == 1:
            pred = 0
            if self.realpred > 0.0:
                pred = 1
            elif self.realpred < 0.0:
                pred = -1

        if p.ndim == 2:
            pred = zeros(p.shape[0], dtype = int)
            pred[where(self.realpred > 0.0)[0]] = 1
            pred[where(self.realpred < 0.0)[0]] = -1
                   
        return pred


    def weights(self, x, y):
        """
        Return feature weights

        :Parameters:
          x : 2d ndarray float (samples x feats)
            training data
          y : 1d ndarray integer (-1 or 1)
            classes

        :Returns:     
          fw : 1d ndarray float
            feature weights
        """

        self.compute(x, y)

        # Linear case
        if self.__kernel == 1:
            return self.__w**2

        # Gaussian and polynomial case
        elif self.__kernel in [2,3]:
            K = MatrixKernelGaussian(self.__x, self.__kp)
            Z = computeZ(K, self.__y)          
            # Compute dJ[i] = 0.5*a*Z*aT - 0.5*a*Z*(-i)*aT
            a = self.__a
            aT = self.__a.reshape(-1,1)
            dJ1 = 0.5 * dot(dot(a, Z), aT)
            dJ2 = empty(self.__x.shape[1])
            for i in range(self.__x.shape[1]):
                Z_i = computeZ_k(Z, i, self.__x, self.__kp) # Compute Z_i                
                dJ2[i] = 0.5 * dot(dot(a, Z_i), aT)
            return dJ1 - dJ2


        #Tversky case
        elif self.__kernel == 4:
            K = MatrixKernelTversky(self.__x, self.__alpha_tversky, self.__beta_tversky)
            Z = computeZ(K, self.__y)
            a = self.__a
            aT = self.__a.reshape(-1,1)
            dJ1 = 0.5 * dot(dot(a, Z), aT)
            dJ2 = empty(self.__x.shape[1])
            
            for i in range(self.__x.shape[1]):
                Z_i = computeZ_tversky(Z, i, self.__x, self.__alpha_tversky, self.__beta_tversky) # Compute Z_i                
                dJ2[i] = 0.5 * dot(dot(a, Z_i), aT)
            return dJ2 - dJ1
            
        # Tr case
        elif self.__kernel == 5:

            w      = empty((self.__nsf, x.shape[1]))
            norm_w = zeros((self.__nsf,))
            a      = zeros((self.__nsf,))
            h      = zeros((x.shape[1],))
                       
            for t in range(self.__nsf):
                it = self.__sf_i[t]
                jt = self.__sf_j[t]

                w[t] = abs(self.__sf_w[t] * (self.__y[it] * self.__x[it] + self.__y[jt] * self.__x[jt]))
                norm_w[t] = abs(w[t]).sum()

                for i in range(x.shape[0]):
                    a[t] += self.__y[i] * self.__a[i] * self.__svm_x[i][t]

            for j in range(x.shape[1]):
                for t in range(self.__nsf):
                    h[j] += abs(a[t]) * w[t][j] / norm_w[t]

            return h
        
