BayesOpt
student_t_process_nig.cpp
1 /*
2 -------------------------------------------------------------------------
3  This file is part of BayesOpt, an efficient C++ library for
4  Bayesian optimization.
5 
6  Copyright (C) 2011-2015 Ruben Martinez-Cantin <rmcantin@unizar.es>
7 
8  BayesOpt is free software: you can redistribute it and/or modify it
9  under the terms of the GNU Affero General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  BayesOpt is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU Affero General Public License for more details.
17 
18  You should have received a copy of the GNU Affero General Public License
19  along with BayesOpt. If not, see <http://www.gnu.org/licenses/>.
20 ------------------------------------------------------------------------
21 */
22 
23 #include <cstdlib>
24 #include <boost/math/special_functions/fpclassify.hpp>
25 #include <boost/numeric/ublas/banded.hpp>
26 #include "log.hpp"
28 #include "ublas_trace.hpp"
29 #include "ublas_elementwise.hpp"
31 
32 namespace bayesopt
33 {
34 
35  namespace ublas = boost::numeric::ublas;
36 
37  StudentTProcessNIG::StudentTProcessNIG(size_t dim, Parameters params,
38  const Dataset& data,
39  MeanModel& mean, randEngine& eng):
40  HierarchicalGaussianProcess(dim,params,data, mean, eng),
41  mAlpha(params.alpha), mBeta (params.beta),
42  mW0(params.mean.coef_mean.size()), mInvVarW(params.mean.coef_mean.size()),
43  mD(params.mean.coef_mean.size(),params.mean.coef_mean.size())
44  {
45  mW0 = params.mean.coef_mean;
46  for (size_t ii = 0; ii < params.mean.coef_mean.size(); ++ii)
47  {
48  double varii = params.mean.coef_std[ii] * params.mean.coef_std[ii];
49  mInvVarW(ii) = 1/varii;
50  }
51  d_ = new StudentTDistribution(eng);
52  } // Constructor
53 
54 
55 
56  StudentTProcessNIG::~StudentTProcessNIG()
57  {
58  delete d_;
59  } // Default destructor
60 
61 
63  {
64  double kq = computeSelfCorrelation(query);
65  vectord kn = computeCrossCorrelation(query);
66  vectord phi = mMean.getFeatures(query);
67 
68  vectord v(kn);
69  inplace_solve(mL,v,ublas::lower_tag());
70 
71  vectord rq = phi - prod(v,mKF);
72 
73  vectord rho(rq);
74  inplace_solve(mD,rho,ublas::lower_tag());
75 
76  double yPred = inner_prod(phi,mWMap) + inner_prod(v,mVf);
77  double sPred = sqrt( mSigma * (kq - inner_prod(v,v)
78  + inner_prod(rho,rho)));
79 
80  if ((boost::math::isnan(yPred)) || (boost::math::isnan(sPred)))
81  {
82  throw std::runtime_error("Error in prediction. NaN found.");
83  }
84 
85 
86  d_->setMeanAndStd(yPred,sPred);
87  return d_;
88  }
89 
90 
92  {
93  matrixd KK = computeCorrMatrix();
94  const size_t n = KK.size1();
95  const size_t p = mMean.nFeatures();
96  const size_t nalpha = (n+2*mAlpha);
97 
98  vectord v0 = mData.mY - prod(trans(mMean.mFeatM),mW0);
99  matrixd WW = zmatrixd(p,p); //TODO: diagonal matrix
100  utils::add_to_diagonal(WW,mInvVarW);
101  matrixd FW = prod(trans(mMean.mFeatM),WW);
102  KK += prod(FW,mMean.mFeatM);
103  matrixd BB(n,n);
105  inplace_solve(BB,v0,ublas::lower_tag());
106  double zz = inner_prod(v0,v0);
107  double sigmaMap = (mBeta/mAlpha + zz)/nalpha;
108 
109  double lik = nalpha/2 * std::log(1+zz/(2*mBeta*sigmaMap));
110  lik += utils::log_trace(BB);
111  lik += n/2 * std::log(sigmaMap);
112  return lik;
113  }
114 
115 
116 
118  {
119  size_t n = mData.getNSamples();
120  size_t p = mMean.nFeatures();
121 
122  mKF = trans(mMean.mFeatM);
123  inplace_solve(mL,mKF,ublas::lower_tag());
124  //TODO: make one line
125  matrixd DD(p,p);
126  DD = prod(trans(mKF),mKF);
127  utils::add_to_diagonal(DD,mInvVarW);
129 
130  vectord vn = mData.mY;
131  inplace_solve(mL,vn,ublas::lower_tag());
132  mWMap = prod(mMean.mFeatM,vn) + utils::ublas_elementwise_prod(mInvVarW,mW0);
133  utils::cholesky_solve(mD,mWMap,ublas::lower());
134 
135  mVf = mData.mY - prod(trans(mMean.mFeatM),mWMap);
136  inplace_solve(mL,mVf,ublas::lower_tag());
137 
138  vectord v0 = mData.mY - prod(trans(mMean.mFeatM),mW0);
139  //TODO: check for "cheaper" version
140  //matrixd KK = prod(mL,trans(mL));
141  matrixd KK = computeCorrMatrix();
142  matrixd WW = zmatrixd(p,p); //TODO: diagonal matrix
143  utils::add_to_diagonal(WW,mInvVarW);
144  const matrixd FW = prod(trans(mMean.mFeatM),WW);
145  KK += prod(FW,mMean.mFeatM);
146  matrixd BB(n,n);
148  inplace_solve(BB,v0,ublas::lower_tag());
149  mSigma = (mBeta/mAlpha + inner_prod(v0,v0))/(n+2*mAlpha);
150 
151  int dof = static_cast<int>(n+2*mAlpha);
152 
153  if ((boost::math::isnan(mWMap(0))) || (boost::math::isnan(mSigma)))
154  {
155  throw std::runtime_error("Error in precomputed prediction. NaN found.");
156  }
157 
158 
159  if (dof <= 0)
160  {
161  dof = n;
162  FILE_LOG(logERROR) << "ERROR: Incorrect alpha. Dof invalid."
163  << "Forcing Dof <= num of points.";
164  }
165 
166  d_->setDof(dof);
167  }
168 
169 } //namespace bayesopt
matrixd computeCorrMatrix()
Computes the Correlation (Kernel or Gram) matrix.
Namespace of the library interface.
Definition: using.dox:1
Student&#39;s t process with Normal-Inverse-Gamma hyperprior on mean and signal variance parameters...
StudentTDistribution * d_
Predictive distributions.
ProbabilityDistribution * prediction(const vectord &query)
Function that returns the prediction of the GP for a query point in the hypercube [0...
void precomputePrediction()
Precompute some values of the prediction that do not depends on the query.
Elementwise operations for ublas vector/matrix.
void setMeanAndStd(double mean, double std)
Sets the mean and std of the distribution.
void setDof(size_t dof)
Sets the degrees of freedom (dof) the distribution.
matrixd mL
Cholesky decomposition of the Correlation matrix.
v1 ublas_elementwise_prod(const v1 &a, const v2 &b)
Computes the elementwise product of two vectors or matrices.
size_t cholesky_decompose(const MATRIX &A, TRIA &L)
decompose the symmetric positive definit matrix A into product L L^T.
void cholesky_solve(const TRIA &L, MATRIX &x, ublas::lower)
solve system L L^T x = b inplace
Modules and helper macros for logging.
matrixd mFeatM
Value of the mean features at the input points.
double negativeLogLikelihood()
Computes the negative log likelihood and its gradient of the data.
vectord mY
Data values.
Definition: dataset.hpp:64
Student&#39;s t probability distribution.