00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023
00024 #include "tbb_stddef.h"
00025
00026 #if _WIN32||_WIN64
00027
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031
00032 #if __MINGW32__
00033 #include "machine/linux_ia32.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield() SwitchToThread()
00036 #elif defined(_M_IX86)
00037 #include "machine/windows_ia32.h"
00038 #elif defined(_M_AMD64)
00039 #include "machine/windows_intel64.h"
00040 #else
00041 #error Unsupported platform
00042 #endif
00043
00044 #ifdef _MANAGED
00045 #pragma managed(pop)
00046 #endif
00047
00048 #elif __linux__ || __FreeBSD__
00049
00050 #if __i386__
00051 #include "machine/linux_ia32.h"
00052 #elif __x86_64__
00053 #include "machine/linux_intel64.h"
00054 #elif __ia64__
00055 #include "machine/linux_ia64.h"
00056 #endif
00057
00058 #elif __APPLE__
00059
00060 #if __i386__
00061 #include "machine/linux_ia32.h"
00062 #elif __x86_64__
00063 #include "machine/linux_intel64.h"
00064 #elif __POWERPC__
00065 #include "machine/mac_ppc.h"
00066 #endif
00067
00068 #elif _AIX
00069
00070 #include "machine/ibm_aix51.h"
00071
00072 #elif __sun || __SUNPRO_CC
00073
00074 #define __asm__ asm
00075 #define __volatile__ volatile
00076 #if __i386 || __i386__
00077 #include "machine/linux_ia32.h"
00078 #elif __x86_64__
00079 #include "machine/linux_intel64.h"
00080 #endif
00081
00082 #endif
00083
00084 #if !defined(__TBB_CompareAndSwap4) \
00085 || !defined(__TBB_CompareAndSwap8) \
00086 || !defined(__TBB_Yield) \
00087 || !defined(__TBB_release_consistency_helper)
00088 #error Minimal requirements for tbb_machine.h not satisfied
00089 #endif
00090
00091 #ifndef __TBB_load_with_acquire
00093 template<typename T>
00094 inline T __TBB_load_with_acquire(const volatile T& location) {
00095 T temp = location;
00096 __TBB_release_consistency_helper();
00097 return temp;
00098 }
00099 #endif
00100
00101 #ifndef __TBB_store_with_release
00103 template<typename T, typename V>
00104 inline void __TBB_store_with_release(volatile T& location, V value) {
00105 __TBB_release_consistency_helper();
00106 location = T(value);
00107 }
00108 #endif
00109
00110 #ifndef __TBB_Pause
00111 inline void __TBB_Pause(int32_t) {
00112 __TBB_Yield();
00113 }
00114 #endif
00115
00116 namespace tbb {
00117 namespace internal {
00118
00120
00121 class atomic_backoff {
00123
00125 static const int32_t LOOPS_BEFORE_YIELD = 16;
00126 int32_t count;
00127 public:
00128 atomic_backoff() : count(1) {}
00129
00131 void pause() {
00132 if( count<=LOOPS_BEFORE_YIELD ) {
00133 __TBB_Pause(count);
00134
00135 count*=2;
00136 } else {
00137
00138 __TBB_Yield();
00139 }
00140 }
00141
00142
00143 bool bounded_pause() {
00144 if( count<=LOOPS_BEFORE_YIELD ) {
00145 __TBB_Pause(count);
00146
00147 count*=2;
00148 return true;
00149 } else {
00150 return false;
00151 }
00152 }
00153
00154 void reset() {
00155 count = 1;
00156 }
00157 };
00158
00160
00161 template<typename T, typename U>
00162 void spin_wait_while_eq( const volatile T& location, U value ) {
00163 atomic_backoff backoff;
00164 while( location==value ) backoff.pause();
00165 }
00166
00168
00169 template<typename T, typename U>
00170 void spin_wait_until_eq( const volatile T& location, const U value ) {
00171 atomic_backoff backoff;
00172 while( location!=value ) backoff.pause();
00173 }
00174
00175
00176
00177
00178 template<size_t S, typename T>
00179 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00180 volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00181 #if __TBB_BIG_ENDIAN
00182 const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00183 #else
00184 const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00185 #endif
00186 const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00187 atomic_backoff b;
00188 uint32_t result;
00189 for(;;) {
00190 result = *base;
00191 uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00192 uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00193
00194 result = __TBB_CompareAndSwap4( base, new_value, old_value );
00195 if( result==old_value
00196 || ((result^old_value)&mask)!=0 )
00197 break;
00198 else
00199 b.pause();
00200 }
00201 return T((result & mask) >> bitoffset);
00202 }
00203
00204 template<size_t S, typename T>
00205 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) {
00206 return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00207 }
00208
00209 template<>
00210 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00211 #ifdef __TBB_CompareAndSwap1
00212 return __TBB_CompareAndSwap1(ptr,value,comparand);
00213 #else
00214 return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00215 #endif
00216 }
00217
00218 template<>
00219 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00220 #ifdef __TBB_CompareAndSwap2
00221 return __TBB_CompareAndSwap2(ptr,value,comparand);
00222 #else
00223 return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00224 #endif
00225 }
00226
00227 template<>
00228 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00229 return __TBB_CompareAndSwap4(ptr,value,comparand);
00230 }
00231
00232 template<>
00233 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00234 return __TBB_CompareAndSwap8(ptr,value,comparand);
00235 }
00236
00237 template<size_t S, typename T>
00238 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00239 atomic_backoff b;
00240 T result;
00241 for(;;) {
00242 result = *reinterpret_cast<volatile T *>(ptr);
00243
00244 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00245 break;
00246 b.pause();
00247 }
00248 return result;
00249 }
00250
00251 template<size_t S, typename T>
00252 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00253 atomic_backoff b;
00254 T result;
00255 for(;;) {
00256 result = *reinterpret_cast<volatile T *>(ptr);
00257
00258 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00259 break;
00260 b.pause();
00261 }
00262 return result;
00263 }
00264
00265
00266
00267
00268
00269
00270
00271
00272 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00273
00274 #if __GNUC__ || __SUNPRO_CC
00275 struct __TBB_machine_type_with_strictest_alignment {
00276 int member[4];
00277 } __attribute__((aligned(16)));
00278 #elif _MSC_VER
00279 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00280 int member[4];
00281 };
00282 #else
00283 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00284 #endif
00285
00286 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00287 template<> struct type_with_alignment<1> { char member; };
00288 template<> struct type_with_alignment<2> { uint16_t member; };
00289 template<> struct type_with_alignment<4> { uint32_t member; };
00290 template<> struct type_with_alignment<8> { uint64_t member; };
00291
00292 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2
00294
00296 template<size_t Size, typename T>
00297 struct work_around_alignment_bug {
00298 #if _MSC_VER
00299 static const size_t alignment = __alignof(T);
00300 #else
00301 static const size_t alignment = __alignof__(T);
00302 #endif
00303 };
00304 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00305 #elif __GNUC__ || __SUNPRO_CC
00306 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00307 #else
00308 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00309 #endif
00310 #endif
00311
00312 }
00313 }
00314
00315 #ifndef __TBB_CompareAndSwap1
00316 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00317 #endif
00318
00319 #ifndef __TBB_CompareAndSwap2
00320 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00321 #endif
00322
00323 #ifndef __TBB_CompareAndSwapW
00324 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00325 #endif
00326
00327 #ifndef __TBB_FetchAndAdd1
00328 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00329 #endif
00330
00331 #ifndef __TBB_FetchAndAdd2
00332 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00333 #endif
00334
00335 #ifndef __TBB_FetchAndAdd4
00336 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00337 #endif
00338
00339 #ifndef __TBB_FetchAndAdd8
00340 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00341 #endif
00342
00343 #ifndef __TBB_FetchAndAddW
00344 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00345 #endif
00346
00347 #ifndef __TBB_FetchAndStore1
00348 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00349 #endif
00350
00351 #ifndef __TBB_FetchAndStore2
00352 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00353 #endif
00354
00355 #ifndef __TBB_FetchAndStore4
00356 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00357 #endif
00358
00359 #ifndef __TBB_FetchAndStore8
00360 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00361 #endif
00362
00363 #ifndef __TBB_FetchAndStoreW
00364 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00365 #endif
00366
00367 #if __TBB_DECL_FENCED_ATOMICS
00368
00369 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00370 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00371 #endif
00372 #ifndef __TBB_CompareAndSwap1acquire
00373 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00374 #endif
00375 #ifndef __TBB_CompareAndSwap1release
00376 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00377 #endif
00378
00379 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00380 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00381 #endif
00382 #ifndef __TBB_CompareAndSwap2acquire
00383 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00384 #endif
00385 #ifndef __TBB_CompareAndSwap2release
00386 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00387 #endif
00388
00389 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00390 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00391 #endif
00392 #ifndef __TBB_CompareAndSwap4acquire
00393 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00394 #endif
00395 #ifndef __TBB_CompareAndSwap4release
00396 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00397 #endif
00398
00399 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00400 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00401 #endif
00402 #ifndef __TBB_CompareAndSwap8acquire
00403 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00404 #endif
00405 #ifndef __TBB_CompareAndSwap8release
00406 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00407 #endif
00408
00409 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00410 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00411 #endif
00412 #ifndef __TBB_FetchAndAdd1acquire
00413 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00414 #endif
00415 #ifndef __TBB_FetchAndAdd1release
00416 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00417 #endif
00418
00419 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00420 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00421 #endif
00422 #ifndef __TBB_FetchAndAdd2acquire
00423 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00424 #endif
00425 #ifndef __TBB_FetchAndAdd2release
00426 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00427 #endif
00428
00429 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00430 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00431 #endif
00432 #ifndef __TBB_FetchAndAdd4acquire
00433 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00434 #endif
00435 #ifndef __TBB_FetchAndAdd4release
00436 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00437 #endif
00438
00439 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00440 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00441 #endif
00442 #ifndef __TBB_FetchAndAdd8acquire
00443 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00444 #endif
00445 #ifndef __TBB_FetchAndAdd8release
00446 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00447 #endif
00448
00449 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00450 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00451 #endif
00452 #ifndef __TBB_FetchAndStore1acquire
00453 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00454 #endif
00455 #ifndef __TBB_FetchAndStore1release
00456 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00457 #endif
00458
00459 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00460 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00461 #endif
00462 #ifndef __TBB_FetchAndStore2acquire
00463 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00464 #endif
00465 #ifndef __TBB_FetchAndStore2release
00466 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00467 #endif
00468
00469 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00470 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00471 #endif
00472 #ifndef __TBB_FetchAndStore4acquire
00473 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00474 #endif
00475 #ifndef __TBB_FetchAndStore4release
00476 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00477 #endif
00478
00479 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00480 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00481 #endif
00482 #ifndef __TBB_FetchAndStore8acquire
00483 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00484 #endif
00485 #ifndef __TBB_FetchAndStore8release
00486 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00487 #endif
00488
00489 #endif // __TBB_DECL_FENCED_ATOMICS
00490
00491
00492 #ifndef __TBB_FetchAndAddWrelease
00493 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00494 #endif
00495
00496 #ifndef __TBB_FetchAndIncrementWacquire
00497 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00498 #endif
00499
00500 #ifndef __TBB_FetchAndDecrementWrelease
00501 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00502 #endif
00503
00504 #if __TBB_WORDSIZE==4
00505
00506 #ifndef __TBB_Store8
00507 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00508 tbb::internal::atomic_backoff b;
00509 for(;;) {
00510 int64_t result = *(int64_t *)ptr;
00511 if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00512 b.pause();
00513 }
00514 }
00515 #endif
00516
00517 #ifndef __TBB_Load8
00518 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00519 int64_t result = *(int64_t *)ptr;
00520 result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result);
00521 return result;
00522 }
00523 #endif
00524 #endif
00525
00526 #ifndef __TBB_Log2
00527 inline intptr_t __TBB_Log2( uintptr_t x ) {
00528 if( x==0 ) return -1;
00529 intptr_t result = 0;
00530 uintptr_t tmp;
00531 #if __TBB_WORDSIZE>=8
00532 if( (tmp = x>>32) ) { x=tmp; result += 32; }
00533 #endif
00534 if( (tmp = x>>16) ) { x=tmp; result += 16; }
00535 if( (tmp = x>>8) ) { x=tmp; result += 8; }
00536 if( (tmp = x>>4) ) { x=tmp; result += 4; }
00537 if( (tmp = x>>2) ) { x=tmp; result += 2; }
00538 return (x&2)? result+1: result;
00539 }
00540 #endif
00541
00542 #ifndef __TBB_AtomicOR
00543 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00544 tbb::internal::atomic_backoff b;
00545 for(;;) {
00546 uintptr_t tmp = *(volatile uintptr_t *)operand;
00547 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00548 if( result==tmp ) break;
00549 b.pause();
00550 }
00551 }
00552 #endif
00553
00554 #ifndef __TBB_AtomicAND
00555 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00556 tbb::internal::atomic_backoff b;
00557 for(;;) {
00558 uintptr_t tmp = *(volatile uintptr_t *)operand;
00559 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00560 if( result==tmp ) break;
00561 b.pause();
00562 }
00563 }
00564 #endif
00565
00566 #ifndef __TBB_TryLockByte
00567 inline bool __TBB_TryLockByte( unsigned char &flag ) {
00568 return __TBB_CompareAndSwap1(&flag,1,0)==0;
00569 }
00570 #endif
00571
00572 #ifndef __TBB_LockByte
00573 inline uintptr_t __TBB_LockByte( unsigned char& flag ) {
00574 if ( !__TBB_TryLockByte(flag) ) {
00575 tbb::internal::atomic_backoff b;
00576 do {
00577 b.pause();
00578 } while ( !__TBB_TryLockByte(flag) );
00579 }
00580 return 0;
00581 }
00582 #endif
00583
00584 #endif