parallel_scan.h

00001 /*
00002     Copyright 2005-2009 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028 
00029 namespace tbb {
00030 
00032 
00033 struct pre_scan_tag {
00034     static bool is_final_scan() {return false;}
00035 };
00036 
00038 
00039 struct final_scan_tag {
00040     static bool is_final_scan() {return true;}
00041 };
00042 
00044 namespace internal {
00045 
00047 
00048     template<typename Range, typename Body>
00049     class final_sum: public task {
00050     public:
00051         Body body;
00052     private:
00053         aligned_space<Range,1> range;
00055         Body* stuff_last;
00056     public:
00057         final_sum( Body& body_ ) :
00058             body(body_,split())
00059         {
00060             poison_pointer(stuff_last);
00061         }
00062         ~final_sum() {
00063             range.begin()->~Range();
00064         }     
00065         void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066             new( range.begin() ) Range(range_);
00067             stuff_last = stuff_last_;
00068         }
00069     private:
00070         /*override*/ task* execute() {
00071             body( *range.begin(), final_scan_tag() );
00072             if( stuff_last )
00073                 stuff_last->assign(body);
00074             return NULL;
00075         }
00076     };       
00077 
00079 
00080     template<typename Range, typename Body>
00081     class sum_node: public task {
00082         typedef final_sum<Range,Body> final_sum_type;
00083     public:
00084         final_sum_type *incoming; 
00085         final_sum_type *body;
00086         Body *stuff_last;
00087     private:
00088         final_sum_type *left_sum;
00089         sum_node *left;
00090         sum_node *right;     
00091         bool left_is_final;
00092         Range range;
00093         sum_node( const Range range_, bool left_is_final_ ) : 
00094             left_sum(NULL), 
00095             left(NULL), 
00096             right(NULL), 
00097             left_is_final(left_is_final_), 
00098             range(range_)
00099         {
00100             // Poison fields that will be set by second pass.
00101             poison_pointer(body);
00102             poison_pointer(incoming);
00103         }
00104         task* create_child( const Range& range, final_sum_type& f, sum_node* n, final_sum_type* incoming, Body* stuff_last ) {
00105             if( !n ) {
00106                 f.recycle_as_child_of( *this );
00107                 f.finish_construction( range, stuff_last );
00108                 return &f;
00109             } else {
00110                 n->body = &f;
00111                 n->incoming = incoming;
00112                 n->stuff_last = stuff_last;
00113                 return n;
00114             }
00115         }
00116         /*override*/ task* execute() {
00117             if( body ) {
00118                 if( incoming )
00119                     left_sum->body.reverse_join( incoming->body );
00120                 recycle_as_continuation();
00121                 sum_node& c = *this;
00122                 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
00123                 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
00124                 set_ref_count( (a!=NULL)+(b!=NULL) );
00125                 body = NULL; 
00126                 if( a ) spawn(*b);
00127                 else a = b;
00128                 return a;
00129             } else {
00130                 return NULL;
00131             }
00132         }
00133         template<typename Range_,typename Body_,typename Partitioner_>
00134         friend class start_scan;
00135 
00136         template<typename Range_,typename Body_>
00137         friend class finish_scan;
00138     };
00139 
00141 
00142     template<typename Range, typename Body>
00143     class finish_scan: public task {
00144         typedef sum_node<Range,Body> sum_node_type;
00145         typedef final_sum<Range,Body> final_sum_type;
00146         final_sum_type** const sum;
00147         sum_node_type*& return_slot;
00148     public:
00149         final_sum_type* right_zombie;
00150         sum_node_type& result;
00151 
00152         /*override*/ task* execute() {
00153             __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
00154             if( result.left )
00155                 result.left_is_final = false;
00156             if( right_zombie && sum ) 
00157                 ((*sum)->body).reverse_join(result.left_sum->body);
00158             __TBB_ASSERT( !return_slot, NULL );
00159             if( right_zombie || result.right ) {
00160                 return_slot = &result;
00161             } else {
00162                 destroy( result );
00163             }
00164             if( right_zombie && !sum && !result.right ) destroy(*right_zombie);
00165             return NULL;
00166         }
00167 
00168         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
00169             sum(sum_),
00170             return_slot(return_slot_), 
00171             right_zombie(NULL),
00172             result(result_)
00173         {
00174             __TBB_ASSERT( !return_slot, NULL );
00175         }
00176     };
00177 
00179 
00180     template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00181     class start_scan: public task {
00182         typedef sum_node<Range,Body> sum_node_type;
00183         typedef final_sum<Range,Body> final_sum_type;
00184         final_sum_type* body;
00186         final_sum_type** sum; 
00187         sum_node_type** return_slot;
00189         sum_node_type* parent_sum;
00190         bool is_final;
00191         bool is_right_child;
00192         Range range;
00193         typename Partitioner::partition_type partition;
00194         /*override*/ task* execute();
00195     public:
00196         start_scan( sum_node_type*& return_slot_, start_scan& parent, sum_node_type* parent_sum_ ) :
00197             body(parent.body),
00198             sum(parent.sum),
00199             return_slot(&return_slot_),
00200             parent_sum(parent_sum_),
00201             is_final(parent.is_final),
00202             is_right_child(false),
00203             range(parent.range,split()),
00204             partition(parent.partition,split())
00205         {
00206             __TBB_ASSERT( !*return_slot, NULL );
00207         }
00208 
00209         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
00210             body(&body_),
00211             sum(NULL),
00212             return_slot(&return_slot_),
00213             parent_sum(NULL),
00214             is_final(true),
00215             is_right_child(false),
00216             range(range_),
00217             partition(partitioner_)
00218         {
00219             __TBB_ASSERT( !*return_slot, NULL );
00220         }
00221 
00222         static void run(  const Range& range, Body& body, const Partitioner& partitioner ) {
00223             if( !range.empty() ) {
00224                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00225                 internal::sum_node<Range,Body>* root = NULL;
00226                 typedef internal::final_sum<Range,Body> final_sum_type;
00227                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
00228                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00229                     /*return_slot=*/root,
00230                     range,
00231                     *temp_body,
00232                     partitioner );
00233                 task::spawn_root_and_wait( pass1 );
00234                 if( root ) {
00235                     root->body = temp_body;
00236                     root->incoming = NULL;
00237                     root->stuff_last = &body;
00238                     task::spawn_root_and_wait( *root );
00239                 } else {
00240                     body.assign(temp_body->body);
00241                     temp_body->finish_construction( range, NULL );
00242                     temp_body->destroy(*temp_body);
00243                 }
00244             }
00245         }
00246     };
00247 
00248     template<typename Range, typename Body, typename Partitioner>
00249     task* start_scan<Range,Body,Partitioner>::execute() {
00250         typedef internal::finish_scan<Range,Body> finish_pass1_type;
00251         finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00252         // Inspecting p->result.left_sum would ordinarily be a race condition.
00253         // But we inspect it only if we are not a stolen task, in which case we
00254         // know that task assigning to p->result.left_sum has completed.
00255         bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
00256         if( treat_as_stolen ) {
00257             // Invocation is for right child that has been really stolen or needs to be virtually stolen
00258             p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
00259             is_final = false;
00260         }
00261         task* next_task = NULL;
00262         if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
00263             if( is_final )
00264                 (body->body)( range, final_scan_tag() );
00265             else if( sum )
00266                 (body->body)( range, pre_scan_tag() );
00267             if( sum ) 
00268                 *sum = body;
00269             __TBB_ASSERT( !*return_slot, NULL );
00270         } else {
00271             sum_node_type* result;
00272             if( parent_sum ) 
00273                 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
00274             else
00275                 result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
00276             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
00277             // Split off right child
00278             start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result );
00279             b.is_right_child = true;    
00280             // Left child is recycling of *this.  Must recycle this before spawning b, 
00281             // otherwise b might complete and decrement c.ref_count() to zero, which
00282             // would cause c.execute() to run prematurely.
00283             recycle_as_child_of(c);
00284             c.set_ref_count(2);
00285             c.spawn(b);
00286             sum = &result->left_sum;
00287             return_slot = &result->left;
00288             is_right_child = false;
00289             next_task = this;
00290             parent_sum = result; 
00291             __TBB_ASSERT( !*return_slot, NULL );
00292         }
00293         return next_task;
00294     } 
00295 } // namespace internal
00297 
00298 // Requirements on Range concept are documented in blocked_range.h
00299 
00317 
00319 
00320 template<typename Range, typename Body>
00321 void parallel_scan( const Range& range, Body& body ) {
00322     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
00323 }
00324 
00326 
00327 template<typename Range, typename Body>
00328 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00329     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
00330 }
00331 
00333 
00334 template<typename Range, typename Body>
00335 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00336     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
00337 }
00339 
00340 } // namespace tbb
00341 
00342 #endif /* __TBB_parallel_scan_H */
00343 

Copyright © 2005-2009 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.