7 #ifndef __solve_BCGThreaded_h__ 8 #define __solve_BCGThreaded_h__ 10 #define BCGT_DEBUG FALSE 11 #define BCGT_TRACE FALSE 12 #define BCGT_THREAD_DEBUG FALSE 13 #define BCGT_VERIFY (FE_CODEGEN<=FE_DEBUG) 32 template <
typename MATRIX,
typename VECTOR>
37 class PointerCounted:
public Counted 41 { m_pointer=pointer; }
50 m_spJobQueue(spJobQueue) {}
52 void operator()(
void);
56 m_spPointerCounted=spObject;
57 FEASSERT(m_spPointerCounted.isValid());
58 m_pBCGThreaded=m_spPointerCounted->m_pointer;
75 new PointerCounted(
this));
77 m_spGang->start(2,spPointerCounted);
86 void solve(VECTOR& x,
const MATRIX& A,
const VECTOR& b);
87 void setThreshold(F64 threshold) { m_threshold=threshold; }
90 void solve(U32 thread);
91 void solve(U32 thread, VECTOR& x,
const MATRIX& A,
const VECTOR& b);
115 template <
typename MATRIX,
typename VECTOR>
121 #if BCGT_THREAD_DEBUG 122 feLog(
"BCGThreaded<>::Worker::operator %p thread %d wait\n",
123 m_spJobQueue.raw(),m_id);
125 m_spJobQueue->waitForJob(job);
128 #if BCGT_THREAD_DEBUG 129 feLog(
"BCGThreaded<>::Worker::operator %p thread %d break\n",
130 m_spJobQueue.raw(),m_id);
134 #if BCGT_THREAD_DEBUG 135 feLog(
"BCGThreaded<>::Worker::operator %p thread %d solve %d\n",
136 m_spJobQueue.raw(),m_id,job);
138 m_pBCGThreaded->solve(job);
139 #if BCGT_THREAD_DEBUG 140 feLog(
"BCGThreaded<>::Worker::operator %p thread %d deliver %d\n",
141 m_spJobQueue.raw(),m_id,job);
143 m_spJobQueue->deliver(job);
147 template <
typename MATRIX,
typename VECTOR>
149 const MATRIX& A,
const VECTOR& b)
155 #if BCGT_THREAD_DEBUG 156 feLog(
"BCGThreaded<>::solve %p post\n",m_spGang.raw());
163 if(m_spGang->acceptDelivery(job))
165 #if BCGT_THREAD_DEBUG 166 feLog(
"BCGThreaded<>::solve %p acceptDelivery %d\n",
177 #if BCGT_THREAD_DEBUG 178 feLog(
"BCGThreaded<>::solve %p complete\n",m_spGang.raw());
182 template <
typename MATRIX,
typename VECTOR>
185 solve(thread,*m_x,*m_A,*m_b);
188 template <
typename MATRIX,
typename VECTOR>
190 const MATRIX& A,
const VECTOR& b)
208 feLog(
"\nA\n%s\nb=<%s>\n",c_print(A),c_print(b));
211 if(magnitudeSquared(b)<m_threshold)
214 feLog(
"BCGThreaded::solve has trivial solution\n");
220 m_spGang->synchronize();
226 for(U32 k=1;k<=m_N;k++)
244 m_dot_r_1=dot(rb_1,r_1);
257 m_dot_r_1=dot(rb_1,r_1);
258 m_beta=m_dot_r_1/dot(rb_2,r_2);
274 m_spGang->synchronize();
277 transformVector(A,p,Ap);
278 m_alpha=m_dot_r_1/dot(pb,Ap);
280 BWORD zero_mag=(magnitudeSquared(p)==0.0f);
282 #if BCGT_TRACE==FALSE 286 feLog(
"\n%d m_alpha=%.6G beta=%.6G\n",k,m_alpha,m_beta);
287 feLog(
"x<%s>\n",c_print(x));
288 feLog(
"r<%s>\n",c_print(r));
289 feLog(
"r_1<%s>\n",c_print(r_1));
290 feLog(
"r_2<%s>\n",c_print(r_2));
291 feLog(
"rb_1<%s>\n",c_print(rb_1));
292 feLog(
"rb_2<%s>\n",c_print(rb_2));
293 feLog(
"p<%s>\n",c_print(p));
294 feLog(
"p_1<%s>\n",c_print(p_1));
295 feLog(
"A*p<%s>\n",c_print(A*p));
296 feLog(
"pb<%s>\n",c_print(pb));
297 feLog(
"pb_1<%s>\n",c_print(pb_1));
302 feX(
"BCGThreaded::solve",
"direction lost its magnitude");
316 if(magnitudeSquared(r)<m_threshold)
319 feLog(
"BCGThreaded::solve early solve %d/%d\n",k,m_N);
322 m_spGang->synchronize();
328 feLog(
"BCGThreaded::solve ran %d/%d\n",k,m_N);
331 m_spGang->synchronize();
338 feLog(
"temp[1]<%s>\n",c_print(temp[1]));
339 feLog(
"rb<%s>\n",c_print(rb));
345 transposeTransformVector(A,pb,temp[1]);
347 m_spGang->synchronize();
359 feLog(
"\nx=<%s>\nA*x=<%s>\n",c_print(x),c_print(A*x));
364 for(U32 k=0;k<m_N;k++)
366 if(FE_INVALID_SCALAR(x[k]))
372 F64 distance=magnitude(Ax-b);
373 if(invalid || distance>1.0f)
375 feLog(
"BCGThreaded::solve failed to converge (dist=%.6G)\n",
379 feLog(
" collecting state ...\n");
380 feLog(
"A=\n%s\nx=<%s>\nA*x=<%s>\nb=<%s>\n",
381 c_print(A),c_print(x),
382 c_print(Ax),c_print(b));
Heap-based support for classes participating in fe::ptr <>
Definition: Counted.h:35
kernel
Definition: namespace.dox:3
Intrusive Smart Pointer.
Definition: src/core/ptr.h:53
solve Ax=b for x
Definition: BCGThreaded.h:33