15 template<
class NewType,
class OldType>
16 Eigen::SparseMatrix<NewType> pattern(
const Eigen::SparseMatrix<OldType> &S,
17 std::vector<NewType> x = std::vector<NewType>(0) ) {
18 if (S.nonZeros() > 0 && x.size() == 0) {
19 x.resize(S.nonZeros());
21 return Eigen::Map<const Eigen::SparseMatrix<NewType> > (S.rows(),
30 #ifdef TMBAD_SUPERNODAL 31 #include "supernodal_inverse_subset.hpp" 32 #define INVERSE_SUBSET_TEMPLATE Eigen::SupernodalInverseSubset 34 inline double logDeterminant(
const DEFAULT_SPARSE_FACTORIZATION &llt) {
35 return llt.logDeterminant();
38 #include "simplicial_inverse_subset.hpp" 39 #define INVERSE_SUBSET_TEMPLATE Eigen::SimplicialInverseSubset 40 typedef Eigen::SimplicialLLT<Eigen::SparseMatrix<double> > DEFAULT_SPARSE_FACTORIZATION;
41 inline double logDeterminant(
const DEFAULT_SPARSE_FACTORIZATION &llt) {
42 return 2. * llt.matrixL().nestedExpression().diagonal().array().log().sum();
66 struct vector : Eigen::Array<Type, Eigen::Dynamic, 1>
68 typedef Type value_type;
69 typedef Eigen::Array<Type, Eigen::Dynamic, 1> Base;
71 vector(
void) : Base() {}
73 template<
class Derived>
74 vector(
const Eigen::ArrayBase<Derived> &x) : Base(x) {}
75 template<
class Derived>
76 vector(
const Eigen::MatrixBase<Derived> &x) : Base(x) {}
77 vector(
size_t n) : Base(n) {}
79 operator std::vector<Type>()
const {
80 return std::vector<Type> (Base::data(),
81 Base::data() + Base::size());
83 vector(
const std::vector<Type> &x) :
84 Base(Eigen::Map<const Base> (x.data(), x.size())) { }
88 struct matrix : Eigen::Matrix<Type, Eigen::Dynamic, Eigen::Dynamic>
90 typedef Eigen::Matrix<Type, Eigen::Dynamic, Eigen::Dynamic> Base;
91 matrix(
void) : Base() {}
92 template<
class Derived>
93 matrix(
const Eigen::ArrayBase<Derived> &x) : Base(x) {}
94 template<
class Derived>
95 matrix(
const Eigen::MatrixBase<Derived> &x) : Base(x) {}
96 vector<Type> vec()
const {
98 a.resize(a.size(), 1);
108 template <
class Hessian_Type>
110 static const bool have_input_size_output_size =
true;
111 static const bool add_forward_replay_copy =
true;
112 std::shared_ptr<Hessian_Type>
hessian;
113 size_t nnz, x_rows, x_cols;
116 nnz ( hessian->Range() ),
117 x_rows ( hessian->n ),
119 TMBad::Index input_size()
const {
120 return nnz + x_rows * x_cols;
122 TMBad::Index output_size()
const {
123 return x_rows * x_cols;
125 vector<TMBad::Scalar> solve(
const vector<TMBad::Scalar> &h,
126 const vector<TMBad::Scalar> &x) {
127 typename Hessian_Type::template MatrixResult<TMBad::Scalar>::type
128 H = hessian -> as_matrix(h);
129 hessian -> llt_factorize(H);
130 matrix<TMBad::Scalar> xm = x.matrix();
131 xm.resize(x_rows, x_cols);
132 vector<TMBad::Scalar> y = hessian -> llt_solve(H, xm).vec();
135 vector<TMBad::Replay> solve(
const vector<TMBad::Replay> &h,
136 const vector<TMBad::Replay> &x) {
137 std::vector<TMBad::ad_plain> hx;
138 hx.insert(hx.end(), h.data(), h.data() + h.size());
139 hx.insert(hx.end(), x.data(), x.data() + x.size());
141 std::vector<TMBad::ad_plain> ans = Op(hx);
142 std::vector<TMBad::ad_aug> ans2(ans.begin(), ans.end());
146 size_t n = output_size();
147 vector<TMBad::Scalar>
154 size_t n = output_size();
156 h = args. x_segment(0, nnz),
157 y = args. y_segment(0, n),
159 vector<T> y2 = solve(h, dy);
160 for (
size_t j=0; j < x_cols; j++) {
161 vector<T> y_j = y .segment(j * x_rows, x_rows);
162 vector<T> y2_j = y2.segment(j * x_rows, x_rows);
163 vector<T> y2y_j = hessian -> crossprod(y2_j, y_j);
165 args.
dx_segment(nnz + j * x_rows, x_rows) += y2_j;
171 const char* op_name() {
return "JSolve"; }
181 template<
class Factorization=Eigen::LLT<Eigen::Matrix<double, -1, -1> > >
185 struct MatrixResult {
186 typedef matrix<T> type;
189 std::shared_ptr<Factorization> llt;
194 n(n), llt(std::make_shared<Factorization>()) {
195 Base::operator= ( H );
198 n(n), llt(std::make_shared<Factorization>()) {
199 std::vector<bool> keep_x(n,
true);
200 keep_x.resize(G.Domain(),
false);
201 std::vector<bool> keep_y(n,
true);
202 Base::operator= ( G.
JacFun(keep_x, keep_y) );
205 matrix<typename V::value_type> as_matrix(
const V &Hx) {
206 typedef typename V::value_type T;
207 return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> > (Hx.data(), n, n);
210 std::vector<T> eval(
const std::vector<T> &x) {
211 return Base::operator()(x);
214 matrix<T> operator()(
const std::vector<T> &x) {
215 return as_matrix(Base::operator()(x));
221 vector<T>
crossprod(
const vector<T> &y2,
const vector<T> &y) {
222 matrix<T> ans = ( y2.matrix() * y.matrix().transpose() ).
array();
226 void llt_factorize(
const matrix<TMBad::Scalar> &h) {
229 Eigen::ComputationInfo llt_info() {
234 matrix<TMBad::Scalar>
llt_solve(
const matrix<TMBad::Scalar> &H,
235 const matrix<TMBad::Scalar> &x) {
236 return llt->solve(x);
239 vector<T> solve(std::shared_ptr<jacobian_dense_t> ptr,
241 const vector<T> &x) {
249 template<
class Factorization=DEFAULT_SPARSE_FACTORIZATION >
251 typedef TMBad::Sparse<TMBad::ADFun<> > Base;
253 struct MatrixResult {
254 typedef Eigen::SparseMatrix<T> type;
257 std::shared_ptr<Factorization> llt;
259 Base::operator=(other);
269 llt = std::make_shared<Factorization>();
271 std::vector<TMBad::Scalar> dummy(this->Range(), 0);
272 Eigen::SparseMatrix<TMBad::Scalar> H_dummy = as_matrix(dummy);
273 llt->analyzePattern(H_dummy);
278 Base::operator= ( H );
283 std::vector<bool> keep_x(n,
true);
284 keep_x.resize(G.Domain(),
false);
285 std::vector<bool> keep_y(n,
true);
286 Base::operator= (G.
SpJacFun(keep_x, keep_y));
291 Eigen::SparseMatrix<typename V::value_type> as_matrix(
const V &Hx) {
292 typedef typename V::value_type T;
293 typedef Eigen::Triplet<T> T3;
294 std::vector<T3> tripletList(n);
296 for(
size_t i=0; i<n; i++) {
297 tripletList[i] = T3(i, i, 0);
299 size_t K = Hx.size();
300 for(
size_t k=0; k<K; k++) {
301 tripletList.push_back( T3( Base::i[k],
305 Eigen::SparseMatrix<T> mat(n, n);
306 mat.setFromTriplets(tripletList.begin(), tripletList.end());
310 std::vector<T> eval(
const std::vector<T> &x) {
311 return Base::operator()(x);
314 Eigen::SparseMatrix<T> operator()(
const std::vector<T> &x) {
315 std::vector<T> Hx = Base::operator()(x);
316 return as_matrix(Hx);
320 vector<T> crossprod(
const vector<T> &y2,
const vector<T> &y) {
321 size_t nnz = Base::Range();
323 for (
size_t k=0; k<nnz; k++) {
324 ans[k] = y2[ Base::i [k] ] * y[ Base::j [k] ];
329 void llt_factorize(
const Eigen::SparseMatrix<TMBad::Scalar> &h) {
332 Eigen::ComputationInfo llt_info() {
337 matrix<TMBad::Scalar>
llt_solve(
const Eigen::SparseMatrix<TMBad::Scalar> &H,
338 const matrix<TMBad::Scalar> &x) {
339 return llt->solve(x);
342 vector<T> solve(std::shared_ptr<jacobian_sparse_t> ptr,
344 const vector<T> &x) {
352 template<
class dummy =
void>
354 static const bool have_eval =
true;
355 static const bool add_forward_replay_copy =
true;
356 template<
class Type> Type eval(Type x0) {
360 args.
dx(0) += args.
dy(0);
362 const char* op_name() {
return "TagOp"; }
394 TMBad::ad_plain
Tag(
const TMBad::ad_plain &x) CSKIP( {
398 TMBad::Scalar
Tag(
const TMBad::Scalar &x) CSKIP( {
456 template<class dummy=void>
459 std::shared_ptr<jacobian_sparse_t<> > H;
460 std::shared_ptr<TMBad::ADFun<> > G;
461 std::shared_ptr<jacobian_dense_t<> > H0;
468 void DomainVecSet(
const std::vector<TMBad::Scalar> &x) {
469 H -> DomainVecSet(x);
470 G -> DomainVecSet(x);
471 H0 -> DomainVecSet(x);
490 struct sparse_plus_lowrank {
491 Eigen::SparseMatrix<T> H;
496 Eigen::Diagonal<Eigen::SparseMatrix<T> > diagonal() {
501 struct MatrixResult {
502 typedef sparse_plus_lowrank<T> type;
511 size_t k = F2.first.Range();
512 std::vector<bool> keep_rc(n,
true);
513 keep_rc.resize(F.Domain(),
false);
515 F3 = F2.
HesFun(keep_rc,
true,
false,
false);
516 H = std::make_shared<jacobian_sparse_t<> >(F3.first, n);
517 G = std::make_shared<TMBad::ADFun<> >(F3.second);
518 H0 = std::make_shared<jacobian_dense_t<> >(F3.third, k);
522 sparse_plus_lowrank<typename V::value_type> as_matrix(
const V &Hx) {
523 typedef typename V::value_type T;
524 const T* start = Hx.data();
525 std::vector<T> v1(start, start + H -> Range());
526 start += H -> Range();
527 std::vector<T> v2(start, start + G -> Range());
528 start += G -> Range();
529 std::vector<T> v3(start, start + H0 -> Range());
530 sparse_plus_lowrank<T> ans;
531 ans.H = H -> as_matrix(v1);
533 ans.G = vector<T>(v2);
534 ans.G.resize(n, v2.size() / n);
535 ans.H0 = H0 -> as_matrix(v3);
539 std::vector<T> eval(
const std::vector<T> &x) {
540 std::vector<T> ans = H -> eval(x);
541 std::vector<T> ans2 = (*G)(x);
542 std::vector<T> ans3 = H0 -> eval(x);
543 ans.insert(ans.end(), ans2.begin(), ans2.end());
544 ans.insert(ans.end(), ans3.begin(), ans3.end());
548 sparse_plus_lowrank<T> operator()(
const std::vector<T> &x) {
549 return as_matrix(eval(x));
551 void llt_factorize(
const sparse_plus_lowrank<TMBad::Scalar> &h) {
552 H -> llt_factorize(h.H);
555 Eigen::ComputationInfo llt_info() {
558 return H -> llt_info();
562 matrix<TMBad::Scalar>
llt_solve(
const sparse_plus_lowrank<TMBad::Scalar> &h,
563 const matrix<TMBad::Scalar> &x) {
564 matrix<TMBad::Scalar> W = H -> llt_solve(h.H, h.G);
565 matrix<TMBad::Scalar> H0M = h.H0 * h.G.transpose() * W;
566 H0M.diagonal().array() += TMBad::Scalar(1.);
567 matrix<TMBad::Scalar> y1 = H -> llt_solve(h.H, x);
568 matrix<TMBad::Scalar> y2 = W * H0M.ldlt().solve(h.H0 * W.transpose() * x);
573 const vector<T> &hvec,
574 const vector<T> &xvec) {
577 sparse_plus_lowrank<T> h = as_matrix(hvec);
581 solve(h.Hvec, h.G.vec());
583 W.resize(n, W.size() / n);
590 H0M.diagonal().array() += T(1.);
606 vector<TMBad::Scalar> solve(
const vector<TMBad::Scalar> &h,
607 const vector<TMBad::Scalar> &x) {
608 sparse_plus_lowrank<TMBad::Scalar> H = as_matrix(h);
610 return llt_solve(H, x.matrix()).
array();
615 const sparse_plus_lowrank<T> &h) {
619 solve(h.Hvec, h.G.vec());
621 W.resize(n, W.size() / n);
625 H0M.diagonal().array() += T(1.);
688 void set_defaults(SEXP x = R_NilValue) {
689 #define SET_DEFAULT(name, value) set_from_real(x, name, #name, value) 690 SET_DEFAULT(maxit, 1000);
691 SET_DEFAULT(max_reject, 10);
692 SET_DEFAULT(ok_exit_if_pdhess, 1);
693 SET_DEFAULT(trace, 0);
694 SET_DEFAULT(grad_tol, 1e-8);
695 SET_DEFAULT(step_tol, 1e-8);
696 SET_DEFAULT(tol10, 0.001);
697 SET_DEFAULT(mgcmax, 1e+60);
698 SET_DEFAULT(ustep, 1);
699 SET_DEFAULT(power, 0.5);
700 SET_DEFAULT(u0, 1e-04);
701 SET_DEFAULT(sparse,
false);
702 SET_DEFAULT(lowrank,
false);
703 SET_DEFAULT(decompose,
true);
704 SET_DEFAULT(simplify,
true);
705 SET_DEFAULT(on_failure_return_nan,
true);
706 SET_DEFAULT(on_failure_give_warning,
true);
707 SET_DEFAULT(signif_abs_reduction, 1e-6);
708 SET_DEFAULT(signif_rel_reduction, .5);
709 SET_DEFAULT(SPA,
false);
719 void set_from_real(SEXP x, T &target,
const char* name,
double value) {
720 SEXP y = getListElement(x, name);
721 target = (T) ( y != R_NilValue ? REAL(y)[0] : value );
724 template <
class Type>
765 template<
class Functor,
class Hessian_Type=jacobian_dense_t<> >
767 static const bool have_input_size_output_size =
true;
768 static const bool add_forward_replay_copy =
true;
769 typedef TMBad::Scalar Scalar;
772 std::shared_ptr<Hessian_Type>
hessian;
776 std::vector<TMBad::ad_aug> par_outer;
792 function.decompose_refs();
795 size_t n_inner =
function.Domain();
796 TMBAD_ASSERT(n_inner == (
size_t) start.size());
799 par_outer =
function.resolve_refs();
801 std::vector<bool> keep_inner(n_inner,
true);
802 keep_inner.resize(
function.Domain(),
false);
804 gradient =
function.
JacFun(keep_inner);
808 for (
size_t i=0; i<n_inner; i++) active[i] =
true;
809 size_t num_inactive = std::count(active.begin(), active.end(),
false);
811 std::cout <<
"Dead gradient args to 'simplify': ";
812 std::cout << num_inactive <<
"\n";
814 if (num_inactive > 0) {
815 function.DomainReduce(active);
816 gradient.DomainReduce(active);
817 std::vector<bool> active_outer(active.begin() + n_inner, active.end());
819 TMBAD_ASSERT(n_inner == (
size_t)
function.inner_inv_index.size());
825 hessian = std::make_shared<Hessian_Type>(
function,
gradient, n_inner);
826 hessian -> optimize();
830 function.SwapInner();
832 hessian -> SwapInner();
835 function.SwapOuter();
837 hessian -> SwapOuter();
840 vector<TMBad::ad_aug> add_to_tape() {
842 std::vector<TMBad::ad_aug> sol = solver(par_outer);
844 sol.insert(sol.end(), par_outer.begin(), par_outer.end());
848 double phi(
double u) {
851 double invphi(
double x) {
852 return 1. / (x + 1.);
854 double increase(
double u) {
855 return cfg.
u0 + (1. - cfg.
u0) * std::pow(u, cfg.
power);
857 double decrease(
double u) {
859 1. - increase(1. - u) :
860 (1. - cfg.
u0) * cfg.
power * u);
862 vector<Scalar> x_start;
863 const char* convergence_fail(
const char* msg,
867 Rcout <<
"Newton convergence failure: " << msg <<
"\n";
869 Rf_warning(
"Newton convergence failure: %s",
877 const char* newton_iterate(vector<Scalar> &x) {
878 int reject_counter = 0;
879 const char* msg = NULL;
880 Scalar f_x =
function(x)[0];
881 if (x_start.size() == x.size()) {
882 Scalar f_x_start =
function(x_start)[0];
883 if ( ! std::isfinite(f_x_start) &&
884 ! std::isfinite(f_x) ) {
886 convergence_fail(
"Invalid initial guess", x);
888 if (f_x_start < f_x || ! std::isfinite(f_x)) {
893 Scalar f_previous = f_x;
894 if (cfg.
trace) std::cout <<
"f_start=" << f_x <<
"\n";
895 for (
int i=0; i < cfg.
maxit; i++) {
897 Scalar mgc = g.abs().maxCoeff();
898 if ( ! std::isfinite(mgc) ||
901 convergence_fail(
"Inner gradient had non-finite components", x);
911 if (cfg.
trace) std::cout <<
"mgc=" << mgc <<
" ";
914 if (cfg.
trace) std::cout <<
"\n";
917 typename Hessian_Type::template MatrixResult<TMBad::Scalar>::type
918 H = (*hessian)(std::vector<Scalar>(x));
919 vector<Scalar> diag_cpy = H.diagonal().array();
921 Scalar m = diag_cpy.minCoeff();
922 if (std::isfinite(m) && m < 0) {
923 Scalar ustep_max = invphi(-m);
926 if (cfg.
trace) std::cout <<
"ustep=" << cfg.
ustep <<
" ";
929 H.diagonal().array() = diag_cpy;
930 H.diagonal().array() += phi( cfg.
ustep );
932 hessian -> llt_factorize(H);
933 if (hessian -> llt_info() == 0)
break;
939 vector<Scalar> x_new =
940 x - hessian -> llt_solve(H, g).array();
941 Scalar f =
function(x_new)[0];
943 bool accept = std::isfinite(f);
977 H = (*hessian)(std::vector<Scalar>(x));
979 hessian -> llt_factorize(H);
980 bool PD = (hessian -> llt_info() == 0);
981 if (cfg.
trace) std::cout <<
"Trying early exit - PD Hess? " << PD <<
"\n";
985 convergence_fail(
"Max number of rejections exceeded", x);
988 if (cfg.
trace) std::cout <<
"f=" << f <<
" ";
989 if (cfg.
trace) std::cout <<
"reject=" << reject_counter <<
" ";
990 if (cfg.
trace) std::cout <<
"\n";
993 convergence_fail(
"Iteration limit exceeded", x);
995 TMBad::Index input_size()
const {
996 return function.DomainOuter();
998 TMBad::Index output_size()
const {
999 return function.DomainInner();
1002 size_t n =
function.DomainOuter();
1007 function.DomainVecSet(x);
1009 hessian -> DomainVecSet(x);
1013 vector<Scalar> sol =
function.DomainVec();
1014 newton_iterate(sol);
1023 sol = args. y_segment(0, output_size());
1025 size_t n =
function.DomainOuter();
1028 std::vector<T> sol_x = sol; sol_x.insert(sol_x.end(), x.begin(), x.end());
1029 vector<T> hv = hessian -> eval(sol_x);
1030 vector<T> w2 = - hessian -> solve(hessian, hv, w);
1031 vector<T> g = gradient.
Jacobian(sol_x, w2);
1037 const char* op_name() {
return "Newton"; }
1039 Rcout << cfg.prefix <<
"======== function:\n";
1040 function.print(cfg);
1041 Rcout << cfg.prefix <<
"======== gradient:\n";
1042 gradient.
print(cfg);
1043 Rcout << cfg.prefix <<
"======== hessian:\n";
1044 hessian -> print(cfg);
1050 template<
class Factorization=DEFAULT_SPARSE_FACTORIZATION >
1052 static const bool have_input_size_output_size =
true;
1053 static const bool add_forward_replay_copy =
true;
1054 typedef TMBad::Scalar Scalar;
1055 Eigen::SparseMatrix<Scalar>
hessian;
1056 std::shared_ptr< Factorization > llt;
1057 INVERSE_SUBSET_TEMPLATE<double> ihessian;
1059 typedef atomic::tiny_ad::variable<1,1> ad1;
1060 INVERSE_SUBSET_TEMPLATE<ad1> D_ihessian;
1061 InvSubOperator(
const Eigen::SparseMatrix<Scalar> &hessian,
1062 std::shared_ptr< Factorization > llt) :
1063 hessian(hessian), llt(llt), ihessian(llt), D_ihessian(llt) { }
1064 TMBad::Index input_size()
const {
1065 return hessian.nonZeros();
1067 TMBad::Index output_size()
const {
1068 return hessian.nonZeros();
1071 size_t n = input_size();
1074 Eigen::SparseMatrix<Scalar> h = pattern(hessian, x);
1080 size_t n = input_size();
1081 std::vector<Scalar> x = args. x_segment(0, n);
1082 std::vector<Scalar> dy = args.
dy_segment(0, n);
1083 Eigen::SparseMatrix<Scalar> dy_mat = pattern(hessian, dy);
1085 dy_mat.diagonal() *= 2.; dy_mat *= .5;
1086 std::vector<ad1> x_ (n);
1087 for (
size_t i=0; i<n; i++) {
1089 x_[i].deriv[0] = dy_mat.valuePtr()[i];
1091 Eigen::SparseMatrix<ad1> h_ = pattern(hessian, x_);
1093 h_ = D_ihessian(h_);
1095 h_.diagonal() *= .5; h_ *= 2.;
1097 std::vector<Scalar> dx(n);
1098 for (
size_t i=0; i<n; i++) {
1099 dx[i] = h_.valuePtr()[i].deriv[0];
1105 Rf_error(
"Inverse subset: order 2 not yet implemented (try changing config())");
1110 const char* op_name() {
return "InvSub"; }
1112 template<
class Factorization=DEFAULT_SPARSE_FACTORIZATION >
1114 static const bool have_input_size_output_size =
true;
1115 static const bool add_forward_replay_copy =
true;
1116 typedef TMBad::Scalar Scalar;
1117 Eigen::SparseMatrix<Scalar>
hessian;
1118 std::shared_ptr< Factorization > llt;
1119 INVERSE_SUBSET_TEMPLATE<double> ihessian;
1120 LogDetOperator(
const Eigen::SparseMatrix<Scalar> &hessian,
1121 std::shared_ptr< Factorization > llt) :
1122 hessian(hessian), llt(llt), ihessian(llt) { }
1123 TMBad::Index input_size()
const {
1124 return hessian.nonZeros();
1126 TMBad::Index output_size()
const {
1130 size_t n = input_size();
1133 Eigen::SparseMatrix<Scalar> h = pattern(hessian, x);
1136 if (llt->info() != 0) {
1140 args.
y(0) = logDeterminant(*llt);
1143 size_t n = input_size();
1145 if (llt->info() != 0) {
1146 for (
size_t i=0; i<n; i++) args.
dx(i) = R_NaN;
1149 std::vector<Scalar> x = args.
x_segment(0, n);
1150 Eigen::SparseMatrix<Scalar> ih = pattern(hessian, x);
1154 ih.diagonal() *= .5; ih *= 2.;
1159 size_t n = input_size();
1161 std::vector<TMBad::ad_aug> x = args.
x_segment(0, n);
1162 std::vector<TMBad::ad_aug> y = IS(x);
1163 Eigen::SparseMatrix<TMBad::ad_aug> ih = pattern(hessian, y);
1165 ih.diagonal() *= .5; ih *= 2.;
1172 const char* op_name() {
return "logDet"; }
1174 template<
class Type>
1175 Type log_determinant_simple(
const Eigen::SparseMatrix<Type> &H) {
1178 Eigen::SimplicialLDLT< Eigen::SparseMatrix<Type> > ldl(H);
1180 vector<Type> D = ldl.vectorD();
1181 return D.log().sum();
1183 template<
class Type>
1184 Type log_determinant(
const Eigen::SparseMatrix<Type> &H,
1185 std::shared_ptr< jacobian_sparse_default_t > ptr) {
1187 return log_determinant_simple(H);
1188 const Type* vptr = H.valuePtr();
1189 size_t n = H.nonZeros();
1190 std::vector<Type> x(vptr, vptr + n);
1192 std::vector<Type> y = LD(x);
1195 template<
class Type>
1196 Type log_determinant(
const Eigen::SparseMatrix<Type> &H) {
1197 const Type* vptr = H.valuePtr();
1198 size_t n = H.nonZeros();
1199 std::vector<Type> x(vptr, vptr + n);
1200 Eigen::SparseMatrix<double> H_pattern = pattern<double>(H);
1201 std::shared_ptr< DEFAULT_SPARSE_FACTORIZATION > llt =
1202 std::make_shared< DEFAULT_SPARSE_FACTORIZATION > (H_pattern);
1204 std::vector<Type> y = LD(x);
1207 inline double log_determinant(
const Eigen::SparseMatrix<double> &H) {
1208 DEFAULT_SPARSE_FACTORIZATION llt(H);
1209 return logDeterminant(llt);
1211 inline double log_determinant(
const Eigen::SparseMatrix<double> &H,
1212 std::shared_ptr<jacobian_sparse_default_t> ptr) {
1214 DEFAULT_SPARSE_FACTORIZATION llt(H);
1215 return logDeterminant(llt);
1218 template<
class Type,
class PTR>
1219 Type log_determinant(
const matrix<Type> &H, PTR ptr) {
1223 template<
class Type>
1226 matrix<Type> H0M = (ptr -> getH0M(ptr, H)).
array();
1228 log_determinant(H.H, ptr->H) +
1229 log_determinant(H0M, NULL);
1234 template<
class Newton>
1235 void Newton_CTOR_Hook(
Newton &F,
const vector<double> &start) {
1237 F.newton_iterate(F.sol);
1239 template<
class Newton>
1240 void Newton_CTOR_Hook(
Newton &F,
const vector<TMBad::ad_aug> &start) {
1241 F.sol = F.add_to_tape();
1252 template<
class Functor,
class Type>
1254 Type operator()(Functor &F, vector<Type> x) {
1258 template<
class Functor>
1260 double operator()(Functor &F, vector<double> x) {
1265 double ans = asDouble(F(x));
1271 template<
class Functor,
class Type,
class Hessian_Type=jacobian_dense_t<> >
1274 typedef typename Hessian_Type::template MatrixResult<Type>::type hessian_t;
1278 NewtonSolver(Functor &F, vector<Type> start,
newton_config cfg) :
1279 Base(F, start, cfg), n(start.size()), F(F) {
1280 Newton_CTOR_Hook(*
this, start);
1283 operator vector<Type>()
const {
1290 if (Base::cfg.simplify) {
1293 return Base::function(std::vector<Type>(sol))[0];
1300 double sign = (Base::cfg.SPA ? -1 : 1);
1303 .5 * log_determinant(
hessian(),
1305 sign * .5 * log(2. * M_PI) * n;
1309 template<
class Functor,
class Type>
1310 NewtonSolver<Functor,
1314 Eigen::Array<Type, Eigen::Dynamic, 1> start,
1316 NewtonSolver<Functor, Type, jacobian_sparse_t<> > ans(F, start, cfg);
1320 template<
class Functor,
class Type>
1321 NewtonSolver<Functor,
1325 Eigen::Array<Type, Eigen::Dynamic, 1> start,
1327 NewtonSolver<Functor, Type, jacobian_dense_t<> > ans(F, start, cfg);
1331 template<
class Functor,
class Type>
1332 NewtonSolver<Functor,
1336 Eigen::Array<Type, Eigen::Dynamic, 1> start,
1338 NewtonSolver<Functor, Type, jacobian_sparse_plus_lowrank_t<> > ans(F, start, cfg);
1353 template<
class Functor,
class Type>
1355 Eigen::Array<Type, Eigen::Dynamic, 1> start,
1359 return NewtonSparse(F, start, cfg);
1361 return NewtonSparsePlusLowrank(F, start, cfg);
1364 return NewtonDense(F, start, cfg);
1381 template<
class Functor,
class Type>
1383 Eigen::Array<Type, Eigen::Dynamic, 1> &start,
1387 auto opt = NewtonSparse(F, start, cfg);
1388 start = opt.solution();
1389 return opt.Laplace();
1391 auto opt = NewtonSparsePlusLowrank(F, start, cfg);
1392 start = opt.solution();
1393 return opt.Laplace();
1396 auto opt = NewtonDense(F, start, cfg);
1397 start = opt.solution();
1398 return opt.Laplace();
1403 template <
class ADFun = TMBad::ADFun<> >
1406 std::vector<TMBad::Index> random;
1409 std::vector<TMBad::Index> random) :
1410 F(F), random(random) {
1411 TMBAD_ASSERT2(F.Range() == 1,
1412 "Laplace approximation is for scalar valued functions");
1415 std::vector<TMBad::ad_aug> x;
1416 T operator()(
const std::vector<T> &x_random) {
1417 for (
size_t i=0; i<random.size(); i++)
1418 x[random[i]] = x_random[i];
1424 x = std::vector<T> (xd.begin(), xd.end());
1425 ans.glob.ad_start();
1426 TMBad::Independent(x);
1428 T y =
Laplace(*
this, start, cfg);
1435 const std::vector<TMBad::Index> &random,
1437 slice<> S(F, random);
1438 return S.Laplace_(cfg);
segment_ref< ReverseArgs, dx_write > dx_segment(Index from, Index size)
segment version
std::vector< T > subset(const std::vector< T > &x, const std::vector< bool > &y)
Vector subset by boolean mask.
Sparse< ADFun > SpJacFun(std::vector< bool > keep_x=std::vector< bool >(0), std::vector< bool > keep_y=std::vector< bool >(0), SpJacFun_config config=SpJacFun_config())
Sparse Jacobian function generator.
bool on_failure_return_nan
Behaviour on convergence failure: Report nan-solution ?
segment_ref< ReverseArgs, x_read > x_segment(Index from, Index size)
segment version
segment_ref< ForwardArgs, x_read > x_segment(Index from, Index size)
segment version
bool simplify
Detect and apply 'dead gradients' simplification.
Operator with input/output dimension known at compile time.
Operator to mark intermediate variables on the tape.
Operator (H, x) -> solve(H, x)
double signif_rel_reduction
Consider this relative reduction 'significant'.
bool lowrank
Detect an additional low rank contribution in sparse case?
double mgcmax
Consider initial guess as invalid if the max gradient component is larger than this number...
segment_ref< ForwardArgs, y_write > y_segment(Index from, Index size)
segment version
global * get_glob()
Get pointer to current global AD context (or NULL if no context is active).
Type Laplace(Functor &F, Eigen::Array< Type, Eigen::Dynamic, 1 > &start, newton_config cfg=newton_config())
Tape a functor and return Laplace Approximation.
Decomposition of computational graph.
ad_plain add_to_stack(Scalar result=0)
Add nullary operator to the stack based on its result
Access input/output values and derivatives during a reverse pass. Write access granted for the input ...
double signif_abs_reduction
Consider this absolute reduction 'significant'.
ADFun JacFun(std::vector< bool > keep_x=std::vector< bool >(0), std::vector< bool > keep_y=std::vector< bool >(0))
Get Jacobian function object.
double step_tol
Convergence tolerance of consequtive function evaluations (not yet used)
Generalized newton solver similar to TMB R function 'newton'.
Access input/output values during a forward pass. Write access granted for the output value only...
bool SPA
Modify Laplace approximation to return saddlepoint approximation?
Newton configuration parameters.
void ad_stop()
Stop ad calculations from being piped to this glob.
Automatic differentiation function object.
int ok_exit_if_pdhess
max_reject exceeded is convergence success provided that Hessian is PD?
double tol10
Convergence tolerance of consequtive function evaluations (not yet used)
vector< T > crossprod(const vector< T > &y2, const vector< T > &y)
Sparsity restricted cross product In dense case this is the usual cross product.
Configuration of print method.
TMBad::ad_plain Tag(const TMBad::ad_plain &x)
Mark a variable during taping.
segment_ref< ReverseArgs, dy_read > dy_segment(Index from, Index size)
segment version
std::vector< Scalar > DomainVec()
Get most recent input parameter vector from the tape.
Type min(const vector< Type > &x)
Highly flexible atomic Newton() solver and Laplace() approximation.
Methods specific for a sparse hessian.
CppAD::vector< Type > matinv(CppAD::vector< Type > x)
Atomic version of matrix inversion. Inverts n-by-n matrix by LU-decomposition.
NewtonOperator(Functor &F, vector< TMBad::ad_aug > start, newton_config cfg)
Constructor.
double u0
Internal parameter controlling ustep updates.
Type & y(Index j)
j'th output variable of this operator
Position DomainVecSet(const InplaceVector &x)
Set the input parameter vector on the tape.
bool sparse
Use sparse as opposed to dense hessian ?
Supernodal Cholesky factor with access to protected members.
matrix< Type > hessian(Functor f, vector< Type > x)
Calculate hessian of vector function with scalar values.
std::vector< bool > activeDomain()
Which inputs are affecting some outputs.
void print(print_config cfg=print_config())
Print AD workspace.
Struct defining the main AD context.
Decomp2< ADFun > decompose(std::vector< Index > nodes)
Decompose this computational graph.
int max_reject
Max number of allowed rejections.
matrix< Type > matinv(matrix< Type > x)
Matrix inverse.
Interoperability with other vector classes.
matrix< TMBad::Scalar > llt_solve(const Eigen::SparseMatrix< TMBad::Scalar > &H, const matrix< TMBad::Scalar > &x)
matrix< Type > matmul(matrix< Type > x, matrix< Type > y)
Matrix multiply.
double grad_tol
Convergence tolerance of max gradient component.
bool atomic_sparse_log_determinant
Use atomic sparse log determinant (faster but limited order) ?
matrix< TMBad::Scalar > llt_solve(const sparse_plus_lowrank< TMBad::Scalar > &h, const matrix< TMBad::Scalar > &x)
void SwapInner()
Temporarily regard this object as function of inner parameters.
Type dy(Index j) const
Partial derivative of end result wrt. j'th output variable of this operator.
Operator auto-completion.
vector< Type > gradient(Functor F, vector< Type > x)
Calculate gradient of vector function with scalar values.
Operator that requires dynamic allocation. Compile time known input/output size.
CppAD::vector< Type > matmul(CppAD::vector< Type > x)
Atomic version of matrix multiply. Multiplies n1-by-n2 matrix with n2-by-n3 matrix.
double ustep
Initial step size between 0 and 1.
Methods specific for a sparse plus low rank hessian.
Methods specific for a dense hessian.
Matrix class used by TMB.
std::vector< Scalar > Jacobian(const std::vector< Scalar > &x)
Evaluate the Jacobian matrix.
matrix< TMBad::Scalar > llt_solve(const matrix< TMBad::Scalar > &H, const matrix< TMBad::Scalar > &x)
Vector class used by TMB.
Type & dx(Index j)
Partial derivative of end result wrt. j'th input variable of this operator.
void SwapOuter()
Temporarily regard this object as function of outer parameters.
bool on_failure_give_warning
Behaviour on convergence failure: Throw warning ?
int maxit
Max number of iterations.
void optimize()
Tape optimizer.
void ad_start()
Enable ad calulations to be piped to this glob.
int trace
Print trace info?
Decomp3< ADFun > HesFun(std::vector< bool > keep_rc=std::vector< bool >(0), bool sparse_1=true, bool sparse_2=true, bool sparse_3=true)
Calculate a sparse plus low rank representation of the Hessian.
CppAD::vector< Type > logdet(CppAD::vector< Type > x)
Atomic version of log determinant of positive definite n-by-n matrix.
double power
Internal parameter controlling ustep updates.
vector< Type > Newton(Functor &F, Eigen::Array< Type, Eigen::Dynamic, 1 > start, newton_config cfg=newton_config())
Tape a functor and return solution.
Decomposition of computational graph.