13 void start_parallel();
16 return static_cast<bool>(omp_in_parallel());
19 return static_cast<size_t>(omp_get_thread_num());
21 void start_parallel(){
22 #ifdef CPPAD_FRAMEWORK 23 CppAD::thread_alloc::free_all();
24 #endif // CPPAD_FRAMEWORK 27 std::cout <<
"Using " << nthreads <<
" threads\n";
28 #ifdef CPPAD_FRAMEWORK 29 CppAD::thread_alloc::parallel_setup(nthreads,in_parallel,thread_num);
30 CppAD::parallel_ad<AD<AD<AD<double> > > >();
31 CppAD::parallel_ad<AD<AD<double> > >();
32 CppAD::parallel_ad<AD<double> >();
33 CppAD::parallel_ad<double >();
34 #endif // CPPAD_FRAMEWORK 45 template<
class ADFunType>
53 #ifdef CPPAD_FRAMEWORK 54 #define ADFUN ADFun<double> 55 #endif // CPPAD_FRAMEWORK 56 #ifdef TMBAD_FRAMEWORK 57 #define ADFUN TMBad::ADFun<TMBad::ad_aug> 58 #endif // TMBAD_FRAMEWORK 61 typedef sphess_t<ADFUN > sphess;
72 struct parallelADFun : ADFUN {
95 size_t n=vecpf_.size();
98 for(
size_t i=0;i<n;i++)vecpf[i]=vecpf_[i];
99 domain=vecpf[0]->Domain();
100 range=vecpf[0]->Range();
102 for(
size_t i=0;i<n;i++){
103 vecind(i).resize(range);
104 for(
size_t j=0;j<range;j++){
112 parallelADFun(
const std::vector<Base> &vecf) {
115 #pragma omp parallel for num_threads(config.nthreads) 117 for (
int i=0; i<vecpf.size(); i++) {
118 vecpf[i] =
new Base(vecf[i]);
130 domain=H[0]->pf->Domain();
135 for(
int i=0;i<n;i++)vecpf[i]=H[i]->pf;
137 for(
int i=0;i<n;i++){
139 vecind[i]=((H[i]->i).cast<size_t>())+((H[i]->j).cast<
size_t>())*domain;
140 kmax+=vecind[i].size();
142 veci.resize(kmax);vecj.resize(kmax);
144 for(
int i=0;i<n;i++){pos(i)=0;};
145 if(config.trace.
parallel) std::cout <<
"Hessian number of non-zeros:\n";
146 for(
int i=0;i<n;i++){
147 if(config.trace.
parallel) std::cout <<
"nnz = " << vecind(i).size() <<
"\n";
155 for(
int i=0;i<n;i++){
if(pos(i)<vecind(i).size())value(i)=vecind(i)[pos(i)];
else value(i)=inf;}
157 for(
int i=0;i<n;i++){
if(value(i)<m)m=value(i);}
159 for(
int i=0;i<n;i++){
162 rowk=(H[i]->i)[pos(i)];
163 colk=(H[i]->j)[pos(i)];
173 veci.conservativeResize(k);vecj.conservativeResize(k);
177 if(config.trace.
parallel) std::cout <<
"Free parallelADFun object.\n";
178 for(
int i=0;i<vecpf.size();i++){
183 sphess_t<parallelADFun<double> > convert(){
184 sphess_t<parallelADFun<double> > ans(
this,veci,vecj);
188 template <
typename VectorBase>
189 VectorBase
subset(
const VectorBase& x,
size_t tapeid,
int p=1){
191 y.resize(vecind(tapeid).size()*p);
192 for(
int i=0;i<(int)y.size()/p;i++)
194 {y[i*p+j]=x[vecind(tapeid)[i]*p+j];}
198 template <
typename VectorBase>
199 void addinsert(VectorBase& x,
const VectorBase& y,
size_t tapeid,
int p=1){
200 for(
int i=0;i<(int)y.size()/p;i++)
202 {x[vecind(tapeid)[i]*p+j]+=y[i*p+j];}
206 size_t Domain(){
return domain;}
207 size_t Range(){
return range;}
209 #ifdef CPPAD_FRAMEWORK 215 template <
typename VectorBase>
216 VectorBase Forward(
size_t p,
const VectorBase& x, std::ostream& s = std::cout){
219 #pragma omp parallel for num_threads(config.nthreads) 221 for(
int i=0;i<ntapes;i++)ans(i) = vecpf(i)->Forward(p,x);
222 VectorBase out(range);
223 for(
size_t i=0;i<range;i++)out(i)=0;
224 for(
int i=0;i<ntapes;i++)addinsert(out,ans(i),i);
231 template <
typename VectorBase>
232 VectorBase Reverse(
size_t p,
const VectorBase &v){
235 #pragma omp parallel for num_threads(config.nthreads) 237 for(
int i=0;i<ntapes;i++)ans(i) = vecpf(i)->Reverse(p,
subset(v,i));
238 VectorBase out(p*domain);
239 for(
size_t i=0;i<p*domain;i++)out(i)=0;
240 for(
int i=0;i<ntapes;i++)out=out+ans(i);
243 template <
typename VectorBase>
244 VectorBase Jacobian(
const VectorBase &x){
247 #pragma omp parallel for num_threads(config.nthreads) 249 for(
int i=0;i<ntapes;i++)ans(i) = vecpf(i)->Jacobian(x);
250 VectorBase out( domain * range );
252 for(
int i=0;i<ntapes;i++)addinsert(out,ans(i),i,domain);
255 template <
typename VectorBase>
256 VectorBase Hessian(
const VectorBase &x,
size_t rangecomponent){
259 #pragma omp parallel for num_threads(config.nthreads) 261 for(
int i=0;i<ntapes;i++)ans(i) = vecpf(i)->Hessian(x,rangecomponent);
262 VectorBase out( domain * domain );
264 for(
int i=0;i<ntapes;i++)addinsert(out,ans(i),i,domain*domain);
269 if(config.trace.
optimize)std::cout <<
"Optimizing parallel tape... ";
271 #pragma omp parallel for num_threads(config.nthreads) if (config.optimize.parallel) 273 for(
int i=0;i<ntapes;i++)vecpf(i)->optimize();
274 if(config.trace.
optimize)std::cout <<
"Done\n";
276 #endif // CPPAD_FRAMEWORK 278 #ifdef TMBAD_FRAMEWORK 280 for(
int i=0; i<ntapes; i++) vecpf(i) -> unset_tail();
282 void set_tail(
const std::vector<TMBad::Index> &r) {
284 #pragma omp parallel for num_threads(config.nthreads) 286 for(
int i=0; i<ntapes; i++) vecpf(i) -> set_tail(r);
288 void force_update() {
290 #pragma omp parallel for num_threads(config.nthreads) 292 for(
int i=0; i<ntapes; i++) vecpf(i) -> force_update();
297 #pragma omp parallel for num_threads(config.nthreads) 299 for(
int i=0; i<ntapes; i++)
303 for(
int i=0; i<ntapes; i++) addinsert(out, ans(i), i);
309 #pragma omp parallel for num_threads(config.nthreads) 311 for(
int i=0; i<ntapes; i++)
315 for(
int i=0; i<ntapes; i++) addinsert(out, ans(i), i, domain);
319 const std::vector<bool> &keep_x,
320 const std::vector<bool> &keep_y ) {
323 #pragma omp parallel for num_threads(config.nthreads) 325 for(
int i=0; i<ntapes; i++)
329 std::vector<size_t> remap = TMBad::cumsum0<size_t> (keep_y);
330 for(
int i=0; i<ntapes; i++) {
342 int dim_x = std::count(keep_x.begin(), keep_x.end(),
true);
343 int dim_y = std::count(keep_y.begin(), keep_y.end(),
true);
346 std::swap(vecind, vecind2);
347 for (
int i=0; i<ntapes; i++) addinsert(out, ans(i), i, dim_x);
348 std::swap(vecind, vecind2);
355 #pragma omp parallel for num_threads(config.nthreads) 357 for(
int i=0; i<ntapes; i++)
361 for(
int i=0; i<ntapes; i++) out = out + ans(i);
366 template<
class Vector>
367 Vector forward(
const Vector &x) {
370 #pragma omp parallel for num_threads(config.nthreads) 372 for(
int i=0; i<ntapes; i++) ans(i) = vecpf(i)->forward(x);
375 for(
int i=0; i<ntapes; i++) out = out + ans(i);
378 template<
class Vector>
379 Vector reverse(
const Vector &w) {
382 #pragma omp parallel for num_threads(config.nthreads) 384 for(
int i=0; i<ntapes; i++) ans(i) = vecpf(i)->reverse(w);
387 for(
int i=0; i<ntapes; i++) out = out + ans(i);
390 #endif // TMBAD_FRAMEWORK std::vector< T > subset(const std::vector< T > &x, const std::vector< bool > &y)
Vector subset by boolean mask.
Vector class used by TMB.
bool optimize
Trace tape optimization.
int nthreads
Number of OpenMP threads to use (if OpenMP is enabled)
bool parallel
Trace info from parallel for loops.