00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028
00029 namespace tbb {
00030
00032
00033 struct pre_scan_tag {
00034 static bool is_final_scan() {return false;}
00035 };
00036
00038
00039 struct final_scan_tag {
00040 static bool is_final_scan() {return true;}
00041 };
00042
00044 namespace internal {
00045
00047
00048 template<typename Range, typename Body>
00049 class final_sum: public task {
00050 public:
00051 Body body;
00052 private:
00053 aligned_space<Range,1> range;
00055 Body* stuff_last;
00056 public:
00057 final_sum( Body& body_ ) :
00058 body(body_,split())
00059 {
00060 poison_pointer(stuff_last);
00061 }
00062 ~final_sum() {
00063 range.begin()->~Range();
00064 }
00065 void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066 new( range.begin() ) Range(range_);
00067 stuff_last = stuff_last_;
00068 }
00069 private:
00070 task* execute() {
00071 body( *range.begin(), final_scan_tag() );
00072 if( stuff_last )
00073 stuff_last->assign(body);
00074 return NULL;
00075 }
00076 };
00077
00079
00080 template<typename Range, typename Body>
00081 class sum_node: public task {
00082 typedef final_sum<Range,Body> final_sum_type;
00083 public:
00084 final_sum_type *incoming;
00085 final_sum_type *body;
00086 Body *stuff_last;
00087 private:
00088 final_sum_type *left_sum;
00089 sum_node *left;
00090 sum_node *right;
00091 bool left_is_final;
00092 Range range;
00093 sum_node( const Range range_, bool left_is_final_ ) :
00094 left_sum(NULL),
00095 left(NULL),
00096 right(NULL),
00097 left_is_final(left_is_final_),
00098 range(range_)
00099 {
00100
00101 poison_pointer(body);
00102 poison_pointer(incoming);
00103 }
00104 task* create_child( const Range& range, final_sum_type& f, sum_node* n, final_sum_type* incoming, Body* stuff_last ) {
00105 if( !n ) {
00106 f.recycle_as_child_of( *this );
00107 f.finish_construction( range, stuff_last );
00108 return &f;
00109 } else {
00110 n->body = &f;
00111 n->incoming = incoming;
00112 n->stuff_last = stuff_last;
00113 return n;
00114 }
00115 }
00116 task* execute() {
00117 if( body ) {
00118 if( incoming )
00119 left_sum->body.reverse_join( incoming->body );
00120 recycle_as_continuation();
00121 sum_node& c = *this;
00122 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
00123 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
00124 set_ref_count( (a!=NULL)+(b!=NULL) );
00125 body = NULL;
00126 if( a ) spawn(*b);
00127 else a = b;
00128 return a;
00129 } else {
00130 return NULL;
00131 }
00132 }
00133 template<typename Range_,typename Body_,typename Partitioner_>
00134 friend class start_scan;
00135
00136 template<typename Range_,typename Body_>
00137 friend class finish_scan;
00138 };
00139
00141
00142 template<typename Range, typename Body>
00143 class finish_scan: public task {
00144 typedef sum_node<Range,Body> sum_node_type;
00145 typedef final_sum<Range,Body> final_sum_type;
00146 final_sum_type** const sum;
00147 sum_node_type*& return_slot;
00148 public:
00149 final_sum_type* right_zombie;
00150 sum_node_type& result;
00151
00152 task* execute() {
00153 __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
00154 if( result.left )
00155 result.left_is_final = false;
00156 if( right_zombie && sum )
00157 ((*sum)->body).reverse_join(result.left_sum->body);
00158 __TBB_ASSERT( !return_slot, NULL );
00159 if( right_zombie || result.right ) {
00160 return_slot = &result;
00161 } else {
00162 destroy( result );
00163 }
00164 if( right_zombie && !sum && !result.right ) destroy(*right_zombie);
00165 return NULL;
00166 }
00167
00168 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
00169 sum(sum_),
00170 return_slot(return_slot_),
00171 right_zombie(NULL),
00172 result(result_)
00173 {
00174 __TBB_ASSERT( !return_slot, NULL );
00175 }
00176 };
00177
00179
00180 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00181 class start_scan: public task {
00182 typedef sum_node<Range,Body> sum_node_type;
00183 typedef final_sum<Range,Body> final_sum_type;
00184 final_sum_type* body;
00186 final_sum_type** sum;
00187 sum_node_type** return_slot;
00189 sum_node_type* parent_sum;
00190 bool is_final;
00191 bool is_right_child;
00192 Range range;
00193 typename Partitioner::partition_type partition;
00194 task* execute();
00195 public:
00196 start_scan( sum_node_type*& return_slot_, start_scan& parent, sum_node_type* parent_sum_ ) :
00197 body(parent.body),
00198 sum(parent.sum),
00199 return_slot(&return_slot_),
00200 parent_sum(parent_sum_),
00201 is_final(parent.is_final),
00202 is_right_child(false),
00203 range(parent.range,split()),
00204 partition(parent.partition,split())
00205 {
00206 __TBB_ASSERT( !*return_slot, NULL );
00207 }
00208
00209 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
00210 body(&body_),
00211 sum(NULL),
00212 return_slot(&return_slot_),
00213 parent_sum(NULL),
00214 is_final(true),
00215 is_right_child(false),
00216 range(range_),
00217 partition(partitioner_)
00218 {
00219 __TBB_ASSERT( !*return_slot, NULL );
00220 }
00221
00222 static void run( const Range& range, Body& body, const Partitioner& partitioner ) {
00223 if( !range.empty() ) {
00224 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00225 internal::sum_node<Range,Body>* root = NULL;
00226 typedef internal::final_sum<Range,Body> final_sum_type;
00227 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
00228 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00229 root,
00230 range,
00231 *temp_body,
00232 partitioner );
00233 task::spawn_root_and_wait( pass1 );
00234 if( root ) {
00235 root->body = temp_body;
00236 root->incoming = NULL;
00237 root->stuff_last = &body;
00238 task::spawn_root_and_wait( *root );
00239 } else {
00240 body.assign(temp_body->body);
00241 temp_body->finish_construction( range, NULL );
00242 temp_body->destroy(*temp_body);
00243 }
00244 }
00245 }
00246 };
00247
00248 template<typename Range, typename Body, typename Partitioner>
00249 task* start_scan<Range,Body,Partitioner>::execute() {
00250 typedef internal::finish_scan<Range,Body> finish_pass1_type;
00251 finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00252
00253
00254
00255 bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
00256 if( treat_as_stolen ) {
00257
00258 p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
00259 is_final = false;
00260 }
00261 task* next_task = NULL;
00262 if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
00263 if( is_final )
00264 (body->body)( range, final_scan_tag() );
00265 else if( sum )
00266 (body->body)( range, pre_scan_tag() );
00267 if( sum )
00268 *sum = body;
00269 __TBB_ASSERT( !*return_slot, NULL );
00270 } else {
00271 sum_node_type* result;
00272 if( parent_sum )
00273 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,is_final);
00274 else
00275 result = new(task::allocate_root()) sum_node_type(range,is_final);
00276 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
00277
00278 start_scan& b = *new( c.allocate_child() ) start_scan( result->right, *this, result );
00279 b.is_right_child = true;
00280
00281
00282
00283 recycle_as_child_of(c);
00284 c.set_ref_count(2);
00285 c.spawn(b);
00286 sum = &result->left_sum;
00287 return_slot = &result->left;
00288 is_right_child = false;
00289 next_task = this;
00290 parent_sum = result;
00291 __TBB_ASSERT( !*return_slot, NULL );
00292 }
00293 return next_task;
00294 }
00295 }
00297
00298
00299
00317
00319
00320 template<typename Range, typename Body>
00321 void parallel_scan( const Range& range, Body& body ) {
00322 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
00323 }
00324
00326
00327 template<typename Range, typename Body>
00328 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00329 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
00330 }
00331
00333
00334 template<typename Range, typename Body>
00335 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00336 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
00337 }
00339
00340 }
00341
00342 #endif
00343