LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648  kmp_cmplx128_a4_t &rhs) {
649  return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652  kmp_cmplx128_a4_t &rhs) {
653  return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656  kmp_cmplx128_a4_t &rhs) {
657  return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660  kmp_cmplx128_a4_t &rhs) {
661  return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677  kmp_cmplx128_a16_t &rhs) {
678  return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID \
689  if (gtid == KMP_GTID_UNKNOWN) { \
690  gtid = __kmp_entry_gtid(); \
691  } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700  TYPE *lhs, TYPE rhs) { \
701  KMP_DEBUG_ASSERT(__kmp_init_serial); \
702  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728  \
729  (*lhs) OP(rhs); \
730  \
731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735  (*lhs) = (TYPE)((*lhs)OP rhs); \
736  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763  KMP_CHECK_GTID; \
764  OP_CRITICAL(OP, 0); \
765  return; \
766  }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770  KMP_CHECK_GTID; \
771  OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772  return; \
773  }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791  { \
792  TYPE old_value, new_value; \
793  old_value = *(TYPE volatile *)lhs; \
794  new_value = (TYPE)(old_value OP rhs); \
795  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798  KMP_DO_PAUSE; \
799  \
800  old_value = *(TYPE volatile *)lhs; \
801  new_value = (TYPE)(old_value OP rhs); \
802  } \
803  }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813  { \
814  struct _sss { \
815  TYPE cmp; \
816  kmp_int##BITS *vvv; \
817  }; \
818  struct _sss old_value, new_value; \
819  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826  KMP_DO_PAUSE; \
827  \
828  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830  } \
831  }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP) \
840  { \
841  struct _sss { \
842  TYPE cmp; \
843  kmp_int##BITS *vvv; \
844  }; \
845  struct _sss old_value, new_value; \
846  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849  new_value.cmp = old_value.cmp OP rhs; \
850  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853  KMP_DO_PAUSE; \
854  \
855  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856  new_value.cmp = old_value.cmp OP rhs; \
857  } \
858  }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863  (*lhs) = (*lhs)OP rhs; \
864  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873  GOMP_FLAG) \
874  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878  }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881  GOMP_FLAG) \
882  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884  OP_CMPXCHG(TYPE, BITS, OP) \
885  }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890  MASK, GOMP_FLAG) \
891  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894  }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902  GOMP_FLAG) \
903  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908  } else { \
909  KMP_CHECK_GTID; \
910  OP_UPDATE_CRITICAL(TYPE, OP, \
911  LCK_ID) /* unaligned address - use critical */ \
912  } \
913  }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916  GOMP_FLAG) \
917  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921  } else { \
922  KMP_CHECK_GTID; \
923  OP_UPDATE_CRITICAL(TYPE, OP, \
924  LCK_ID) /* unaligned address - use critical */ \
925  } \
926  }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931  MASK, GOMP_FLAG) \
932  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936  } else { \
937  KMP_CHECK_GTID; \
938  OP_UPDATE_CRITICAL(TYPE, OP, \
939  LCK_ID) /* unaligned address - use critical */ \
940  } \
941  }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953  KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964  KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 // TYPE_ID - operands type and size (fixed4, float4)
971 // OP_ID - operation identifier (add, sub, mul, ...)
972 // TYPE - operand type
973 // BITS - size in bits, used to distinguish low level calls
974 // OP - operator (used in critical section)
975 // LCK_ID - lock identifier, used to possibly distinguish lock variable
976 // MASK - used for alignment check
977 
978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986  0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994  0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004  0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008  0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016  0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026  0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028  0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036  0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044  0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and || */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082  OP_CRITICAL(= *lhs OP, LCK_ID) \
1083  }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092  OP_CMPXCHG(TYPE, BITS, OP) \
1093  }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103  } else { \
1104  KMP_CHECK_GTID; \
1105  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106  } \
1107  }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119  0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121  0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C: */
1129 /* MAX, MIN, .EQV., .NEQV. */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138  \
1139  if (*lhs OP rhs) { /* still need actions? */ \
1140  *lhs = rhs; \
1141  } \
1142  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148  KMP_CHECK_GTID; \
1149  MIN_MAX_CRITSECT(OP, 0); \
1150  return; \
1151  }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158  { \
1159  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160  TYPE old_value; \
1161  temp_val = *lhs; \
1162  old_value = temp_val; \
1163  while (old_value OP rhs && /* still need actions? */ \
1164  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165  (kmp_int##BITS *)lhs, \
1166  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168  temp_val = *lhs; \
1169  old_value = temp_val; \
1170  } \
1171  }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177  if (*lhs OP rhs) { /* need actions? */ \
1178  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179  MIN_MAX_CRITSECT(OP, LCK_ID) \
1180  } \
1181  }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188  GOMP_FLAG) \
1189  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190  if (*lhs OP rhs) { \
1191  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193  } \
1194  }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200  GOMP_FLAG) \
1201  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202  if (*lhs OP rhs) { \
1203  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206  } else { \
1207  KMP_CHECK_GTID; \
1208  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209  } \
1210  } \
1211  }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240  1) // __kmpc_atomic_float10_max
1241 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242  1) // __kmpc_atomic_float10_min
1243 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244 #if KMP_HAVE_QUAD
1245 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246  1) // __kmpc_atomic_float16_max
1247 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248  1) // __kmpc_atomic_float16_min
1249 #if (KMP_ARCH_X86)
1250 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251  1) // __kmpc_atomic_float16_max_a16
1252 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253  1) // __kmpc_atomic_float16_min_a16
1254 #endif // (KMP_ARCH_X86)
1255 #endif // KMP_HAVE_QUAD
1256 // ------------------------------------------------------------------------
1257 // Need separate macros for .EQV. because of the need of complement (~)
1258 // OP ignored for critical sections, ^=~ used instead
1259 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1260  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1261  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1262  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1263  }
1264 
1265 // ------------------------------------------------------------------------
1266 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267 // ------------------------------------------------------------------------
1268 // X86 or X86_64: no alignment problems ===================================
1269 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1270  GOMP_FLAG) \
1271  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1272  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1273  OP_CMPXCHG(TYPE, BITS, OP) \
1274  }
1275 // ------------------------------------------------------------------------
1276 #else
1277 // ------------------------------------------------------------------------
1278 // Code for other architectures that don't handle unaligned accesses.
1279 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1280  GOMP_FLAG) \
1281  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1282  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1283  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1284  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1285  } else { \
1286  KMP_CHECK_GTID; \
1287  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1288  } \
1289  }
1290 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291 
1292 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308 
1309 // ------------------------------------------------------------------------
1310 // Routines for Extended types: long double, _Quad, complex flavours (use
1311 // critical section)
1312 // TYPE_ID, OP_ID, TYPE - detailed above
1313 // OP - operator
1314 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1315 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1316  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1317  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1318  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1319  }
1320 
1321 /* ------------------------------------------------------------------------- */
1322 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323 // routines for long double type
1324 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325  1) // __kmpc_atomic_float10_add
1326 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327  1) // __kmpc_atomic_float10_sub
1328 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329  1) // __kmpc_atomic_float10_mul
1330 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331  1) // __kmpc_atomic_float10_div
1332 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333 #if KMP_HAVE_QUAD
1334 // routines for _Quad type
1335 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336  1) // __kmpc_atomic_float16_add
1337 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338  1) // __kmpc_atomic_float16_sub
1339 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340  1) // __kmpc_atomic_float16_mul
1341 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342  1) // __kmpc_atomic_float16_div
1343 #if (KMP_ARCH_X86)
1344 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345  1) // __kmpc_atomic_float16_add_a16
1346 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347  1) // __kmpc_atomic_float16_sub_a16
1348 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349  1) // __kmpc_atomic_float16_mul_a16
1350 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351  1) // __kmpc_atomic_float16_div_a16
1352 #endif // (KMP_ARCH_X86)
1353 #endif // KMP_HAVE_QUAD
1354 // routines for complex types
1355 
1356 #if USE_CMPXCHG_FIX
1357 // workaround for C78287 (complex(kind=4) data type)
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359  1) // __kmpc_atomic_cmplx4_add
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361  1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363  1) // __kmpc_atomic_cmplx4_mul
1364 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365  1) // __kmpc_atomic_cmplx4_div
1366 // end of the workaround for C78287
1367 #else
1368 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372 #endif // USE_CMPXCHG_FIX
1373 
1374 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380  1) // __kmpc_atomic_cmplx10_add
1381 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382  1) // __kmpc_atomic_cmplx10_sub
1383 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384  1) // __kmpc_atomic_cmplx10_mul
1385 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386  1) // __kmpc_atomic_cmplx10_div
1387 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388 #if KMP_HAVE_QUAD
1389 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390  1) // __kmpc_atomic_cmplx16_add
1391 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392  1) // __kmpc_atomic_cmplx16_sub
1393 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394  1) // __kmpc_atomic_cmplx16_mul
1395 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396  1) // __kmpc_atomic_cmplx16_div
1397 #if (KMP_ARCH_X86)
1398 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399  1) // __kmpc_atomic_cmplx16_add_a16
1400 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401  1) // __kmpc_atomic_cmplx16_sub_a16
1402 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403  1) // __kmpc_atomic_cmplx16_mul_a16
1404 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405  1) // __kmpc_atomic_cmplx16_div_a16
1406 #endif // (KMP_ARCH_X86)
1407 #endif // KMP_HAVE_QUAD
1408 
1409 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1410 // Supported only on IA-32 architecture and Intel(R) 64
1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412 
1413 // ------------------------------------------------------------------------
1414 // Operation on *lhs, rhs bound by critical section
1415 // OP - operator (it's supposed to contain an assignment)
1416 // LCK_ID - lock identifier
1417 // Note: don't check gtid as it should always be valid
1418 // 1, 2-byte - expect valid parameter, other - check before this macro
1419 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1420  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1421  \
1422  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1423  \
1424  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425 
1426 #ifdef KMP_GOMP_COMPAT
1427 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1428  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1429  KMP_CHECK_GTID; \
1430  OP_CRITICAL_REV(TYPE, OP, 0); \
1431  return; \
1432  }
1433 
1434 #else
1435 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436 #endif /* KMP_GOMP_COMPAT */
1437 
1438 // Beginning of a definition (provides name, parameters, gebug trace)
1439 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440 // fixed)
1441 // OP_ID - operation identifier (add, sub, mul, ...)
1442 // TYPE - operands' type
1443 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1444  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1445  TYPE *lhs, TYPE rhs) { \
1446  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1447  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448 
1449 // ------------------------------------------------------------------------
1450 // Operation on *lhs, rhs using "compare_and_store" routine
1451 // TYPE - operands' type
1452 // BITS - size in bits, used to distinguish low level calls
1453 // OP - operator
1454 // Note: temp_val introduced in order to force the compiler to read
1455 // *lhs only once (w/o it the compiler reads *lhs twice)
1456 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1457  { \
1458  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1459  TYPE old_value, new_value; \
1460  temp_val = *lhs; \
1461  old_value = temp_val; \
1462  new_value = (TYPE)(rhs OP old_value); \
1463  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1464  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1465  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1466  KMP_DO_PAUSE; \
1467  \
1468  temp_val = *lhs; \
1469  old_value = temp_val; \
1470  new_value = (TYPE)(rhs OP old_value); \
1471  } \
1472  }
1473 
1474 // -------------------------------------------------------------------------
1475 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1476  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1477  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1478  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1479  }
1480 
1481 // ------------------------------------------------------------------------
1482 // Entries definition for integer operands
1483 // TYPE_ID - operands type and size (fixed4, float4)
1484 // OP_ID - operation identifier (add, sub, mul, ...)
1485 // TYPE - operand type
1486 // BITS - size in bits, used to distinguish low level calls
1487 // OP - operator (used in critical section)
1488 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1489 
1490 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1491 // ------------------------------------------------------------------------
1492 // Routines for ATOMIC integer operands, other operators
1493 // ------------------------------------------------------------------------
1494 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1495 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507 
1508 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520 
1521 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533 
1534 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546 
1547 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551 
1552 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1557 
1558 // ------------------------------------------------------------------------
1559 // Routines for Extended types: long double, _Quad, complex flavours (use
1560 // critical section)
1561 // TYPE_ID, OP_ID, TYPE - detailed above
1562 // OP - operator
1563 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1564 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1565  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1566  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1567  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1568  }
1569 
1570 /* ------------------------------------------------------------------------- */
1571 // routines for long double type
1572 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573  1) // __kmpc_atomic_float10_sub_rev
1574 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575  1) // __kmpc_atomic_float10_div_rev
1576 #if KMP_HAVE_QUAD
1577 // routines for _Quad type
1578 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579  1) // __kmpc_atomic_float16_sub_rev
1580 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581  1) // __kmpc_atomic_float16_div_rev
1582 #if (KMP_ARCH_X86)
1583 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584  1) // __kmpc_atomic_float16_sub_a16_rev
1585 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586  1) // __kmpc_atomic_float16_div_a16_rev
1587 #endif // KMP_ARCH_X86
1588 #endif // KMP_HAVE_QUAD
1589 
1590 // routines for complex types
1591 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592  1) // __kmpc_atomic_cmplx4_sub_rev
1593 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594  1) // __kmpc_atomic_cmplx4_div_rev
1595 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596  1) // __kmpc_atomic_cmplx8_sub_rev
1597 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598  1) // __kmpc_atomic_cmplx8_div_rev
1599 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600  1) // __kmpc_atomic_cmplx10_sub_rev
1601 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602  1) // __kmpc_atomic_cmplx10_div_rev
1603 #if KMP_HAVE_QUAD
1604 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605  1) // __kmpc_atomic_cmplx16_sub_rev
1606 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607  1) // __kmpc_atomic_cmplx16_div_rev
1608 #if (KMP_ARCH_X86)
1609 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612  1) // __kmpc_atomic_cmplx16_div_a16_rev
1613 #endif // KMP_ARCH_X86
1614 #endif // KMP_HAVE_QUAD
1615 
1616 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618 
1619 /* ------------------------------------------------------------------------ */
1620 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1621 /* Note: in order to reduce the total number of types combinations */
1622 /* it is supposed that compiler converts RHS to longest floating type,*/
1623 /* that is _Quad, before call to any of these routines */
1624 /* Conversion to _Quad will be done by the compiler during calculation, */
1625 /* conversion back to TYPE - before the assignment, like: */
1626 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1627 /* Performance penalty expected because of SW emulation use */
1628 /* ------------------------------------------------------------------------ */
1629 
1630 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1631  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1632  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1633  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1634  KA_TRACE(100, \
1635  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1636  gtid));
1637 
1638 // -------------------------------------------------------------------------
1639 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1640  GOMP_FLAG) \
1641  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1642  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1643  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1644  }
1645 
1646 // -------------------------------------------------------------------------
1647 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648 // -------------------------------------------------------------------------
1649 // X86 or X86_64: no alignment problems ====================================
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651  LCK_ID, MASK, GOMP_FLAG) \
1652  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654  OP_CMPXCHG(TYPE, BITS, OP) \
1655  }
1656 // -------------------------------------------------------------------------
1657 #else
1658 // ------------------------------------------------------------------------
1659 // Code for other architectures that don't handle unaligned accesses.
1660 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1661  LCK_ID, MASK, GOMP_FLAG) \
1662  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1663  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1664  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1665  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1666  } else { \
1667  KMP_CHECK_GTID; \
1668  OP_UPDATE_CRITICAL(TYPE, OP, \
1669  LCK_ID) /* unaligned address - use critical */ \
1670  } \
1671  }
1672 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673 
1674 // -------------------------------------------------------------------------
1675 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676 // -------------------------------------------------------------------------
1677 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1678  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1679  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1682  }
1683 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1684  LCK_ID, GOMP_FLAG) \
1685  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1686  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1687  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1688  }
1689 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690 
1691 // RHS=float8
1692 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701  0) // __kmpc_atomic_fixed4_mul_float8
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703  0) // __kmpc_atomic_fixed4_div_float8
1704 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716 
1717 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718 // use them)
1719 #if KMP_HAVE_QUAD
1720 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755  0) // __kmpc_atomic_fixed4_add_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757  0) // __kmpc_atomic_fixed4u_add_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759  0) // __kmpc_atomic_fixed4_sub_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761  0) // __kmpc_atomic_fixed4u_sub_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763  0) // __kmpc_atomic_fixed4_mul_fp
1764 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765  0) // __kmpc_atomic_fixed4u_mul_fp
1766 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767  0) // __kmpc_atomic_fixed4_div_fp
1768 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769  0) // __kmpc_atomic_fixed4u_div_fp
1770 
1771 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787 
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796 
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805 
1806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808  1) // __kmpc_atomic_float10_add_fp
1809 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810  1) // __kmpc_atomic_float10_sub_fp
1811 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812  1) // __kmpc_atomic_float10_mul_fp
1813 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814  1) // __kmpc_atomic_float10_div_fp
1815 
1816 // Reverse operations
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825 
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834 
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836  0) // __kmpc_atomic_fixed4_sub_rev_fp
1837 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840  0) // __kmpc_atomic_fixed4_div_rev_fp
1841 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842  0) // __kmpc_atomic_fixed4u_div_rev_fp
1843 
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852 
1853 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857 
1858 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862 
1863 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864  1) // __kmpc_atomic_float10_sub_rev_fp
1865 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866  1) // __kmpc_atomic_float10_div_rev_fp
1867 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868 
1869 #endif // KMP_HAVE_QUAD
1870 
1871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872 // ------------------------------------------------------------------------
1873 // X86 or X86_64: no alignment problems ====================================
1874 #if USE_CMPXCHG_FIX
1875 // workaround for C78287 (complex(kind=4) data type)
1876 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1877  LCK_ID, MASK, GOMP_FLAG) \
1878  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1879  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1880  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1881  }
1882 // end of the second part of the workaround for C78287
1883 #else
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885  LCK_ID, MASK, GOMP_FLAG) \
1886  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888  OP_CMPXCHG(TYPE, BITS, OP) \
1889  }
1890 #endif // USE_CMPXCHG_FIX
1891 #else
1892 // ------------------------------------------------------------------------
1893 // Code for other architectures that don't handle unaligned accesses.
1894 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1895  LCK_ID, MASK, GOMP_FLAG) \
1896  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1897  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1898  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1899  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1900  } else { \
1901  KMP_CHECK_GTID; \
1902  OP_UPDATE_CRITICAL(TYPE, OP, \
1903  LCK_ID) /* unaligned address - use critical */ \
1904  } \
1905  }
1906 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907 
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916 
1917 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1918 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1919 
1920 // ------------------------------------------------------------------------
1921 // Atomic READ routines
1922 
1923 // ------------------------------------------------------------------------
1924 // Beginning of a definition (provides name, parameters, gebug trace)
1925 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1926 // fixed)
1927 // OP_ID - operation identifier (add, sub, mul, ...)
1928 // TYPE - operands' type
1929 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1930  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1931  TYPE *loc) { \
1932  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1933  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1934 
1935 // ------------------------------------------------------------------------
1936 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1937 // TYPE - operands' type
1938 // BITS - size in bits, used to distinguish low level calls
1939 // OP - operator
1940 // Note: temp_val introduced in order to force the compiler to read
1941 // *lhs only once (w/o it the compiler reads *lhs twice)
1942 // TODO: check if it is still necessary
1943 // Return old value regardless of the result of "compare & swap# operation
1944 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1945  { \
1946  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1947  union f_i_union { \
1948  TYPE f_val; \
1949  kmp_int##BITS i_val; \
1950  }; \
1951  union f_i_union old_value; \
1952  temp_val = *loc; \
1953  old_value.f_val = temp_val; \
1954  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1955  (kmp_int##BITS *)loc, \
1956  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1957  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1958  new_value = old_value.f_val; \
1959  return new_value; \
1960  }
1961 
1962 // -------------------------------------------------------------------------
1963 // Operation on *lhs, rhs bound by critical section
1964 // OP - operator (it's supposed to contain an assignment)
1965 // LCK_ID - lock identifier
1966 // Note: don't check gtid as it should always be valid
1967 // 1, 2-byte - expect valid parameter, other - check before this macro
1968 #define OP_CRITICAL_READ(OP, LCK_ID) \
1969  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1970  \
1971  new_value = (*loc); \
1972  \
1973  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1974 
1975 // -------------------------------------------------------------------------
1976 #ifdef KMP_GOMP_COMPAT
1977 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1978  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1979  KMP_CHECK_GTID; \
1980  OP_CRITICAL_READ(OP, 0); \
1981  return new_value; \
1982  }
1983 #else
1984 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1985 #endif /* KMP_GOMP_COMPAT */
1986 
1987 // -------------------------------------------------------------------------
1988 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1989  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1990  TYPE new_value; \
1991  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1992  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1993  return new_value; \
1994  }
1995 // -------------------------------------------------------------------------
1996 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1997  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1998  TYPE new_value; \
1999  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
2000  OP_CMPXCHG_READ(TYPE, BITS, OP) \
2001  }
2002 // ------------------------------------------------------------------------
2003 // Routines for Extended types: long double, _Quad, complex flavours (use
2004 // critical section)
2005 // TYPE_ID, OP_ID, TYPE - detailed above
2006 // OP - operator
2007 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2008 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2009  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2010  TYPE new_value; \
2011  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2012  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2013  return new_value; \
2014  }
2015 
2016 // ------------------------------------------------------------------------
2017 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2018 // value doesn't work.
2019 // Let's return the read value through the additional parameter.
2020 #if (KMP_OS_WINDOWS)
2021 
2022 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2023  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2024  \
2025  (*out) = (*loc); \
2026  \
2027  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2028 // ------------------------------------------------------------------------
2029 #ifdef KMP_GOMP_COMPAT
2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2031  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2032  KMP_CHECK_GTID; \
2033  OP_CRITICAL_READ_WRK(OP, 0); \
2034  }
2035 #else
2036 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2037 #endif /* KMP_GOMP_COMPAT */
2038 // ------------------------------------------------------------------------
2039 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2040  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2041  TYPE *loc) { \
2042  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2043  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2044 
2045 // ------------------------------------------------------------------------
2046 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2047  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2048  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2049  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2050  }
2051 
2052 #endif // KMP_OS_WINDOWS
2053 
2054 // ------------------------------------------------------------------------
2055 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2056 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2057 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2058  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2059 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2060  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2061 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2062  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2063 
2064 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2065 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2066  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2067 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2068  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2069 
2070 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2071  1) // __kmpc_atomic_float10_rd
2072 #if KMP_HAVE_QUAD
2073 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2074  1) // __kmpc_atomic_float16_rd
2075 #endif // KMP_HAVE_QUAD
2076 
2077 // Fix for CQ220361 on Windows* OS
2078 #if (KMP_OS_WINDOWS)
2079 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2080  1) // __kmpc_atomic_cmplx4_rd
2081 #else
2082 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2083  1) // __kmpc_atomic_cmplx4_rd
2084 #endif // (KMP_OS_WINDOWS)
2085 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2086  1) // __kmpc_atomic_cmplx8_rd
2087 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2088  1) // __kmpc_atomic_cmplx10_rd
2089 #if KMP_HAVE_QUAD
2090 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2091  1) // __kmpc_atomic_cmplx16_rd
2092 #if (KMP_ARCH_X86)
2093 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2094  1) // __kmpc_atomic_float16_a16_rd
2095 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2096  1) // __kmpc_atomic_cmplx16_a16_rd
2097 #endif // (KMP_ARCH_X86)
2098 #endif // KMP_HAVE_QUAD
2099 
2100 // ------------------------------------------------------------------------
2101 // Atomic WRITE routines
2102 
2103 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2104  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2105  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2106  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2107  }
2108 // ------------------------------------------------------------------------
2109 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2110  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2111  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2112  KMP_XCHG_REAL##BITS(lhs, rhs); \
2113  }
2114 
2115 // ------------------------------------------------------------------------
2116 // Operation on *lhs, rhs using "compare_and_store" routine
2117 // TYPE - operands' type
2118 // BITS - size in bits, used to distinguish low level calls
2119 // OP - operator
2120 // Note: temp_val introduced in order to force the compiler to read
2121 // *lhs only once (w/o it the compiler reads *lhs twice)
2122 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2123  { \
2124  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2125  TYPE old_value, new_value; \
2126  temp_val = *lhs; \
2127  old_value = temp_val; \
2128  new_value = rhs; \
2129  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2130  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2131  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2132  temp_val = *lhs; \
2133  old_value = temp_val; \
2134  new_value = rhs; \
2135  } \
2136  }
2137 
2138 // -------------------------------------------------------------------------
2139 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2140  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2141  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2142  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2143  }
2144 
2145 // ------------------------------------------------------------------------
2146 // Routines for Extended types: long double, _Quad, complex flavours (use
2147 // critical section)
2148 // TYPE_ID, OP_ID, TYPE - detailed above
2149 // OP - operator
2150 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2151 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2152  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2153  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2154  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2155  }
2156 // -------------------------------------------------------------------------
2157 
2158 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2159  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2160 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2161  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2162 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2163  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2164 #if (KMP_ARCH_X86)
2165 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2166  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2167 #else
2168 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2169  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2170 #endif // (KMP_ARCH_X86)
2171 
2172 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2173  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2174 #if (KMP_ARCH_X86)
2175 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2176  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2177 #else
2178 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2179  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2180 #endif // (KMP_ARCH_X86)
2181 
2182 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2183  1) // __kmpc_atomic_float10_wr
2184 #if KMP_HAVE_QUAD
2185 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2186  1) // __kmpc_atomic_float16_wr
2187 #endif // KMP_HAVE_QUAD
2188 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2189 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2190  1) // __kmpc_atomic_cmplx8_wr
2191 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2192  1) // __kmpc_atomic_cmplx10_wr
2193 #if KMP_HAVE_QUAD
2194 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2195  1) // __kmpc_atomic_cmplx16_wr
2196 #if (KMP_ARCH_X86)
2197 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2198  1) // __kmpc_atomic_float16_a16_wr
2199 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2200  1) // __kmpc_atomic_cmplx16_a16_wr
2201 #endif // (KMP_ARCH_X86)
2202 #endif // KMP_HAVE_QUAD
2203 
2204 // ------------------------------------------------------------------------
2205 // Atomic CAPTURE routines
2206 
2207 // Beginning of a definition (provides name, parameters, gebug trace)
2208 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2209 // fixed)
2210 // OP_ID - operation identifier (add, sub, mul, ...)
2211 // TYPE - operands' type
2212 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2213  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2214  TYPE *lhs, TYPE rhs, int flag) { \
2215  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2216  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2217 
2218 // -------------------------------------------------------------------------
2219 // Operation on *lhs, rhs bound by critical section
2220 // OP - operator (it's supposed to contain an assignment)
2221 // LCK_ID - lock identifier
2222 // Note: don't check gtid as it should always be valid
2223 // 1, 2-byte - expect valid parameter, other - check before this macro
2224 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2225  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2226  \
2227  if (flag) { \
2228  (*lhs) OP rhs; \
2229  new_value = (*lhs); \
2230  } else { \
2231  new_value = (*lhs); \
2232  (*lhs) OP rhs; \
2233  } \
2234  \
2235  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2236  return new_value;
2237 
2238 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2239  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2240  \
2241  if (flag) { \
2242  (*lhs) = (TYPE)((*lhs)OP rhs); \
2243  new_value = (*lhs); \
2244  } else { \
2245  new_value = (*lhs); \
2246  (*lhs) = (TYPE)((*lhs)OP rhs); \
2247  } \
2248  \
2249  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2250  return new_value;
2251 
2252 // ------------------------------------------------------------------------
2253 #ifdef KMP_GOMP_COMPAT
2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2255  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2256  KMP_CHECK_GTID; \
2257  OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2258  }
2259 #else
2260 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2261 #endif /* KMP_GOMP_COMPAT */
2262 
2263 // ------------------------------------------------------------------------
2264 // Operation on *lhs, rhs using "compare_and_store" routine
2265 // TYPE - operands' type
2266 // BITS - size in bits, used to distinguish low level calls
2267 // OP - operator
2268 // Note: temp_val introduced in order to force the compiler to read
2269 // *lhs only once (w/o it the compiler reads *lhs twice)
2270 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2271  { \
2272  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2273  TYPE old_value, new_value; \
2274  temp_val = *lhs; \
2275  old_value = temp_val; \
2276  new_value = (TYPE)(old_value OP rhs); \
2277  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2278  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2279  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2280  temp_val = *lhs; \
2281  old_value = temp_val; \
2282  new_value = (TYPE)(old_value OP rhs); \
2283  } \
2284  if (flag) { \
2285  return new_value; \
2286  } else \
2287  return old_value; \
2288  }
2289 
2290 // -------------------------------------------------------------------------
2291 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2292  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2293  TYPE new_value; \
2294  (void)new_value; \
2295  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2296  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2297  }
2298 
2299 // -------------------------------------------------------------------------
2300 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2301  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2302  TYPE old_value, new_value; \
2303  (void)new_value; \
2304  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2305  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2306  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2307  if (flag) { \
2308  return old_value OP rhs; \
2309  } else \
2310  return old_value; \
2311  }
2312 // -------------------------------------------------------------------------
2313 
2314 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2315  0) // __kmpc_atomic_fixed4_add_cpt
2316 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2317  0) // __kmpc_atomic_fixed4_sub_cpt
2318 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2319  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2320 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2321  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2322 
2323 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2324  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2325 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2326  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2327 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2328  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2329 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2330  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2331 
2332 // ------------------------------------------------------------------------
2333 // Entries definition for integer operands
2334 // TYPE_ID - operands type and size (fixed4, float4)
2335 // OP_ID - operation identifier (add, sub, mul, ...)
2336 // TYPE - operand type
2337 // BITS - size in bits, used to distinguish low level calls
2338 // OP - operator (used in critical section)
2339 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2340 // ------------------------------------------------------------------------
2341 // Routines for ATOMIC integer operands, other operators
2342 // ------------------------------------------------------------------------
2343 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2344 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2345  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2347  0) // __kmpc_atomic_fixed1_andb_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2349  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2351  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2353  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2355  0) // __kmpc_atomic_fixed1_orb_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2357  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2359  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2361  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2363  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2365  0) // __kmpc_atomic_fixed1_xor_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2367  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2369  0) // __kmpc_atomic_fixed2_andb_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2371  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2373  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2375  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2377  0) // __kmpc_atomic_fixed2_orb_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2379  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2381  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2383  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2385  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2387  0) // __kmpc_atomic_fixed2_xor_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2389  0) // __kmpc_atomic_fixed4_andb_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2391  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2393  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2395  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2397  0) // __kmpc_atomic_fixed4_orb_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2399  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2401  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2403  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2405  0) // __kmpc_atomic_fixed4_xor_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2407  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2409  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2411  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2413  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2414 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2415  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2416 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2417  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2418 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2419  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2420 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2421  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2422 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2423  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2424 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2425  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2426 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2427  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2428 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2429  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2430 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2431  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2432 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2433 
2434 // CAPTURE routines for mixed types RHS=float16
2435 #if KMP_HAVE_QUAD
2436 
2437 // Beginning of a definition (provides name, parameters, gebug trace)
2438 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2439 // fixed)
2440 // OP_ID - operation identifier (add, sub, mul, ...)
2441 // TYPE - operands' type
2442 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2443  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2444  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2445  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2446  KA_TRACE(100, \
2447  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2448  gtid));
2449 
2450 // -------------------------------------------------------------------------
2451 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2452  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2453  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2454  TYPE new_value; \
2455  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2456  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2457  }
2458 
2459 // -------------------------------------------------------------------------
2460 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2461  LCK_ID, GOMP_FLAG) \
2462  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2463  TYPE new_value; \
2464  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2465  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2466  }
2467 
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2469  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2471  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2473  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2475  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2477  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2479  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2480 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2481  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2482 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2483  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2484 
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2486  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2488  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2490  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2492  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2494  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2495 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2496  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2497 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2498  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2499 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2500  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2501 
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2503  0) // __kmpc_atomic_fixed4_add_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2505  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2507  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2509  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2511  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2512 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2513  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2514 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2515  0) // __kmpc_atomic_fixed4_div_cpt_fp
2516 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2517  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2518 
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2520  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2522  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2524  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2526  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2528  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2529 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2530  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2531 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2532  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2533 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2534  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2535 
2536 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2537  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2538 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2539  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2540 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2541  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2542 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2543  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2544 
2545 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2546  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2547 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2548  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2549 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2550  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2551 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2552  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2553 
2554 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2555  1) // __kmpc_atomic_float10_add_cpt_fp
2556 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2557  1) // __kmpc_atomic_float10_sub_cpt_fp
2558 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2559  1) // __kmpc_atomic_float10_mul_cpt_fp
2560 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2561  1) // __kmpc_atomic_float10_div_cpt_fp
2562 
2563 #endif // KMP_HAVE_QUAD
2564 
2565 // ------------------------------------------------------------------------
2566 // Routines for C/C++ Reduction operators && and ||
2567 
2568 // -------------------------------------------------------------------------
2569 // Operation on *lhs, rhs bound by critical section
2570 // OP - operator (it's supposed to contain an assignment)
2571 // LCK_ID - lock identifier
2572 // Note: don't check gtid as it should always be valid
2573 // 1, 2-byte - expect valid parameter, other - check before this macro
2574 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2575  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2576  \
2577  if (flag) { \
2578  new_value OP rhs; \
2579  (*lhs) = new_value; \
2580  } else { \
2581  new_value = (*lhs); \
2582  (*lhs) OP rhs; \
2583  } \
2584  \
2585  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2586 
2587 // ------------------------------------------------------------------------
2588 #ifdef KMP_GOMP_COMPAT
2589 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2590  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2591  KMP_CHECK_GTID; \
2592  OP_CRITICAL_L_CPT(OP, 0); \
2593  return new_value; \
2594  }
2595 #else
2596 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2597 #endif /* KMP_GOMP_COMPAT */
2598 
2599 // ------------------------------------------------------------------------
2600 // Need separate macros for &&, || because there is no combined assignment
2601 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2602  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2603  TYPE new_value; \
2604  (void)new_value; \
2605  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2606  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2607  }
2608 
2609 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2610  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2612  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2614  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2615 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2616  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2617 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2618  0) // __kmpc_atomic_fixed4_andl_cpt
2619 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2620  0) // __kmpc_atomic_fixed4_orl_cpt
2621 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2622  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2623 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2624  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2625 
2626 // -------------------------------------------------------------------------
2627 // Routines for Fortran operators that matched no one in C:
2628 // MAX, MIN, .EQV., .NEQV.
2629 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2630 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2631 
2632 // -------------------------------------------------------------------------
2633 // MIN and MAX need separate macros
2634 // OP - operator to check if we need any actions?
2635 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2636  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2637  \
2638  if (*lhs OP rhs) { /* still need actions? */ \
2639  old_value = *lhs; \
2640  *lhs = rhs; \
2641  if (flag) \
2642  new_value = rhs; \
2643  else \
2644  new_value = old_value; \
2645  } else { \
2646  new_value = *lhs; \
2647  } \
2648  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2649  return new_value;
2650 
2651 // -------------------------------------------------------------------------
2652 #ifdef KMP_GOMP_COMPAT
2653 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2654  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2655  KMP_CHECK_GTID; \
2656  MIN_MAX_CRITSECT_CPT(OP, 0); \
2657  }
2658 #else
2659 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2660 #endif /* KMP_GOMP_COMPAT */
2661 
2662 // -------------------------------------------------------------------------
2663 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2664  { \
2665  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2666  /*TYPE old_value; */ \
2667  temp_val = *lhs; \
2668  old_value = temp_val; \
2669  while (old_value OP rhs && /* still need actions? */ \
2670  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2671  (kmp_int##BITS *)lhs, \
2672  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2673  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2674  temp_val = *lhs; \
2675  old_value = temp_val; \
2676  } \
2677  if (flag) \
2678  return rhs; \
2679  else \
2680  return old_value; \
2681  }
2682 
2683 // -------------------------------------------------------------------------
2684 // 1-byte, 2-byte operands - use critical section
2685 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2686  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2687  TYPE new_value, old_value; \
2688  if (*lhs OP rhs) { /* need actions? */ \
2689  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2690  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2691  } \
2692  return *lhs; \
2693  }
2694 
2695 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2696  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2697  TYPE new_value, old_value; \
2698  (void)new_value; \
2699  if (*lhs OP rhs) { \
2700  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2701  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2702  } \
2703  return *lhs; \
2704  }
2705 
2706 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2707  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2709  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2711  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2712 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2713  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2714 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2715  0) // __kmpc_atomic_fixed4_max_cpt
2716 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2717  0) // __kmpc_atomic_fixed4_min_cpt
2718 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2719  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2720 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2721  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2722 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2723  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2724 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2725  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2726 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2727  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2728 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2729  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2730 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2731  1) // __kmpc_atomic_float10_max_cpt
2732 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2733  1) // __kmpc_atomic_float10_min_cpt
2734 #if KMP_HAVE_QUAD
2735 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2736  1) // __kmpc_atomic_float16_max_cpt
2737 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2738  1) // __kmpc_atomic_float16_min_cpt
2739 #if (KMP_ARCH_X86)
2740 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2741  1) // __kmpc_atomic_float16_max_a16_cpt
2742 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2743  1) // __kmpc_atomic_float16_mix_a16_cpt
2744 #endif // (KMP_ARCH_X86)
2745 #endif // KMP_HAVE_QUAD
2746 
2747 // ------------------------------------------------------------------------
2748 #ifdef KMP_GOMP_COMPAT
2749 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2750  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2751  KMP_CHECK_GTID; \
2752  OP_CRITICAL_CPT(OP, 0); \
2753  }
2754 #else
2755 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2756 #endif /* KMP_GOMP_COMPAT */
2757 // ------------------------------------------------------------------------
2758 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2759  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2760  TYPE new_value; \
2761  (void)new_value; \
2762  OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2763  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2764  }
2765 
2766 // ------------------------------------------------------------------------
2767 
2768 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2769  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2770 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2771  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2772 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2773  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2774 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2775  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2776 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2777  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2778 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2779  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2780 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2781  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2782 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2783  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2784 
2785 // ------------------------------------------------------------------------
2786 // Routines for Extended types: long double, _Quad, complex flavours (use
2787 // critical section)
2788 // TYPE_ID, OP_ID, TYPE - detailed above
2789 // OP - operator
2790 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2791 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2792  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2793  TYPE new_value; \
2794  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2795  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2796  }
2797 
2798 // ------------------------------------------------------------------------
2799 // Workaround for cmplx4. Regular routines with return value don't work
2800 // on Win_32e. Let's return captured values through the additional parameter.
2801 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2802  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2803  \
2804  if (flag) { \
2805  (*lhs) OP rhs; \
2806  (*out) = (*lhs); \
2807  } else { \
2808  (*out) = (*lhs); \
2809  (*lhs) OP rhs; \
2810  } \
2811  \
2812  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2813  return;
2814 // ------------------------------------------------------------------------
2815 
2816 #ifdef KMP_GOMP_COMPAT
2817 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2818  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2819  KMP_CHECK_GTID; \
2820  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2821  }
2822 #else
2823 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2824 #endif /* KMP_GOMP_COMPAT */
2825 // ------------------------------------------------------------------------
2826 
2827 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2828  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2829  TYPE rhs, TYPE *out, int flag) { \
2830  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2831  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2832 // ------------------------------------------------------------------------
2833 
2834 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2835  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2836  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2837  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2838  }
2839 // The end of workaround for cmplx4
2840 
2841 /* ------------------------------------------------------------------------- */
2842 // routines for long double type
2843 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2844  1) // __kmpc_atomic_float10_add_cpt
2845 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2846  1) // __kmpc_atomic_float10_sub_cpt
2847 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2848  1) // __kmpc_atomic_float10_mul_cpt
2849 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2850  1) // __kmpc_atomic_float10_div_cpt
2851 #if KMP_HAVE_QUAD
2852 // routines for _Quad type
2853 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2854  1) // __kmpc_atomic_float16_add_cpt
2855 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2856  1) // __kmpc_atomic_float16_sub_cpt
2857 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2858  1) // __kmpc_atomic_float16_mul_cpt
2859 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2860  1) // __kmpc_atomic_float16_div_cpt
2861 #if (KMP_ARCH_X86)
2862 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2863  1) // __kmpc_atomic_float16_add_a16_cpt
2864 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2865  1) // __kmpc_atomic_float16_sub_a16_cpt
2866 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2867  1) // __kmpc_atomic_float16_mul_a16_cpt
2868 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2869  1) // __kmpc_atomic_float16_div_a16_cpt
2870 #endif // (KMP_ARCH_X86)
2871 #endif // KMP_HAVE_QUAD
2872 
2873 // routines for complex types
2874 
2875 // cmplx4 routines to return void
2876 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2877  1) // __kmpc_atomic_cmplx4_add_cpt
2878 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2879  1) // __kmpc_atomic_cmplx4_sub_cpt
2880 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2881  1) // __kmpc_atomic_cmplx4_mul_cpt
2882 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2883  1) // __kmpc_atomic_cmplx4_div_cpt
2884 
2885 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2886  1) // __kmpc_atomic_cmplx8_add_cpt
2887 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2888  1) // __kmpc_atomic_cmplx8_sub_cpt
2889 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2890  1) // __kmpc_atomic_cmplx8_mul_cpt
2891 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2892  1) // __kmpc_atomic_cmplx8_div_cpt
2893 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2894  1) // __kmpc_atomic_cmplx10_add_cpt
2895 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2896  1) // __kmpc_atomic_cmplx10_sub_cpt
2897 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2898  1) // __kmpc_atomic_cmplx10_mul_cpt
2899 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2900  1) // __kmpc_atomic_cmplx10_div_cpt
2901 #if KMP_HAVE_QUAD
2902 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2903  1) // __kmpc_atomic_cmplx16_add_cpt
2904 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2905  1) // __kmpc_atomic_cmplx16_sub_cpt
2906 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2907  1) // __kmpc_atomic_cmplx16_mul_cpt
2908 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2909  1) // __kmpc_atomic_cmplx16_div_cpt
2910 #if (KMP_ARCH_X86)
2911 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2912  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2913 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2914  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2915 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2916  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2917 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2918  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2919 #endif // (KMP_ARCH_X86)
2920 #endif // KMP_HAVE_QUAD
2921 
2922 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2923 // binop x; v = x; } for non-commutative operations.
2924 // Supported only on IA-32 architecture and Intel(R) 64
2925 
2926 // -------------------------------------------------------------------------
2927 // Operation on *lhs, rhs bound by critical section
2928 // OP - operator (it's supposed to contain an assignment)
2929 // LCK_ID - lock identifier
2930 // Note: don't check gtid as it should always be valid
2931 // 1, 2-byte - expect valid parameter, other - check before this macro
2932 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2933  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2934  \
2935  if (flag) { \
2936  /*temp_val = (*lhs);*/ \
2937  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2938  new_value = (*lhs); \
2939  } else { \
2940  new_value = (*lhs); \
2941  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2942  } \
2943  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2944  return new_value;
2945 
2946 // ------------------------------------------------------------------------
2947 #ifdef KMP_GOMP_COMPAT
2948 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2949  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2950  KMP_CHECK_GTID; \
2951  OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2952  }
2953 #else
2954 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2955 #endif /* KMP_GOMP_COMPAT */
2956 
2957 // ------------------------------------------------------------------------
2958 // Operation on *lhs, rhs using "compare_and_store" routine
2959 // TYPE - operands' type
2960 // BITS - size in bits, used to distinguish low level calls
2961 // OP - operator
2962 // Note: temp_val introduced in order to force the compiler to read
2963 // *lhs only once (w/o it the compiler reads *lhs twice)
2964 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2965  { \
2966  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2967  TYPE old_value, new_value; \
2968  temp_val = *lhs; \
2969  old_value = temp_val; \
2970  new_value = (TYPE)(rhs OP old_value); \
2971  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2972  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2973  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2974  temp_val = *lhs; \
2975  old_value = temp_val; \
2976  new_value = (TYPE)(rhs OP old_value); \
2977  } \
2978  if (flag) { \
2979  return new_value; \
2980  } else \
2981  return old_value; \
2982  }
2983 
2984 // -------------------------------------------------------------------------
2985 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2986  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2987  TYPE new_value; \
2988  (void)new_value; \
2989  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2990  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2991  }
2992 
2993 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2994  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2996  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2998  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3000  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3002  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3004  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3006  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3008  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3012  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3014  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3016  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3018  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3020  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3022  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3024  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3026  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3028  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3030  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3032  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3034  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3036  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3038  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3040  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3042  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3043 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3044  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3045 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3046  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3047 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3048  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3049 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3050 
3051 // ------------------------------------------------------------------------
3052 // Routines for Extended types: long double, _Quad, complex flavours (use
3053 // critical section)
3054 // TYPE_ID, OP_ID, TYPE - detailed above
3055 // OP - operator
3056 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3057 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3058  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3059  TYPE new_value; \
3060  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3061  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3062  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3063  }
3064 
3065 /* ------------------------------------------------------------------------- */
3066 // routines for long double type
3067 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3068  1) // __kmpc_atomic_float10_sub_cpt_rev
3069 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3070  1) // __kmpc_atomic_float10_div_cpt_rev
3071 #if KMP_HAVE_QUAD
3072 // routines for _Quad type
3073 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3074  1) // __kmpc_atomic_float16_sub_cpt_rev
3075 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3076  1) // __kmpc_atomic_float16_div_cpt_rev
3077 #if (KMP_ARCH_X86)
3078 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3079  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3080 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3081  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3082 #endif // (KMP_ARCH_X86)
3083 #endif // KMP_HAVE_QUAD
3084 
3085 // routines for complex types
3086 
3087 // ------------------------------------------------------------------------
3088 // Workaround for cmplx4. Regular routines with return value don't work
3089 // on Win_32e. Let's return captured values through the additional parameter.
3090 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3091  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3092  \
3093  if (flag) { \
3094  (*lhs) = (rhs)OP(*lhs); \
3095  (*out) = (*lhs); \
3096  } else { \
3097  (*out) = (*lhs); \
3098  (*lhs) = (rhs)OP(*lhs); \
3099  } \
3100  \
3101  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3102  return;
3103 // ------------------------------------------------------------------------
3104 
3105 #ifdef KMP_GOMP_COMPAT
3106 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3107  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3108  KMP_CHECK_GTID; \
3109  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3110  }
3111 #else
3112 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3113 #endif /* KMP_GOMP_COMPAT */
3114 // ------------------------------------------------------------------------
3115 
3116 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3117  GOMP_FLAG) \
3118  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3119  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3120  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3121  }
3122 // The end of workaround for cmplx4
3123 
3124 // !!! TODO: check if we need to return void for cmplx4 routines
3125 // cmplx4 routines to return void
3126 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3127  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3128 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3129  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3130 
3131 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3132  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3133 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3134  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3135 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3136  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3137 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3138  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3139 #if KMP_HAVE_QUAD
3140 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3141  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3142 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3143  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3144 #if (KMP_ARCH_X86)
3145 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3146  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3147 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3148  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3149 #endif // (KMP_ARCH_X86)
3150 #endif // KMP_HAVE_QUAD
3151 
3152 // Capture reverse for mixed type: RHS=float16
3153 #if KMP_HAVE_QUAD
3154 
3155 // Beginning of a definition (provides name, parameters, gebug trace)
3156 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3157 // fixed)
3158 // OP_ID - operation identifier (add, sub, mul, ...)
3159 // TYPE - operands' type
3160 // -------------------------------------------------------------------------
3161 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3162  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3163  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3164  TYPE new_value; \
3165  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3166  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3167  }
3168 
3169 // -------------------------------------------------------------------------
3170 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3171  LCK_ID, GOMP_FLAG) \
3172  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3173  TYPE new_value; \
3174  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3175  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3176  }
3177 
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3179  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3181  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3183  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3185  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3186 
3187 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3188  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3189 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3190  1,
3191  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3192 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3193  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3194 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3195  1,
3196  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3197 
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3199  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3200 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3201  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3203  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3205  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3206 
3207 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3208  7,
3209  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3210 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3211  8i, 7,
3212  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3213 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3214  7,
3215  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3216 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3217  8i, 7,
3218  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3219 
3220 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3221  4r, 3,
3222  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3223 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3224  4r, 3,
3225  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3226 
3227 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3228  8r, 7,
3229  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3230 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3231  8r, 7,
3232  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3233 
3234 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3235  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3236 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3237  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3238 
3239 #endif // KMP_HAVE_QUAD
3240 
3241 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3242 
3243 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3244  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3245  TYPE rhs) { \
3246  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3247  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3248 
3249 #define CRITICAL_SWP(LCK_ID) \
3250  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3251  \
3252  old_value = (*lhs); \
3253  (*lhs) = rhs; \
3254  \
3255  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3256  return old_value;
3257 
3258 // ------------------------------------------------------------------------
3259 #ifdef KMP_GOMP_COMPAT
3260 #define GOMP_CRITICAL_SWP(FLAG) \
3261  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3262  KMP_CHECK_GTID; \
3263  CRITICAL_SWP(0); \
3264  }
3265 #else
3266 #define GOMP_CRITICAL_SWP(FLAG)
3267 #endif /* KMP_GOMP_COMPAT */
3268 
3269 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3270  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3271  TYPE old_value; \
3272  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3273  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3274  return old_value; \
3275  }
3276 // ------------------------------------------------------------------------
3277 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3278  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3279  TYPE old_value; \
3280  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3281  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3282  return old_value; \
3283  }
3284 
3285 // ------------------------------------------------------------------------
3286 #define CMPXCHG_SWP(TYPE, BITS) \
3287  { \
3288  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3289  TYPE old_value, new_value; \
3290  temp_val = *lhs; \
3291  old_value = temp_val; \
3292  new_value = rhs; \
3293  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3294  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3295  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3296  temp_val = *lhs; \
3297  old_value = temp_val; \
3298  new_value = rhs; \
3299  } \
3300  return old_value; \
3301  }
3302 
3303 // -------------------------------------------------------------------------
3304 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3305  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3306  TYPE old_value; \
3307  (void)old_value; \
3308  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3309  CMPXCHG_SWP(TYPE, BITS) \
3310  }
3311 
3312 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3313 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3314 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3315 
3316 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3317  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3318 
3319 #if (KMP_ARCH_X86)
3320 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3321  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3322 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3323  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3324 #else
3325 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3327  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328 #endif // (KMP_ARCH_X86)
3329 
3330 // ------------------------------------------------------------------------
3331 // Routines for Extended types: long double, _Quad, complex flavours (use
3332 // critical section)
3333 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3334  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3335  TYPE old_value; \
3336  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3337  CRITICAL_SWP(LCK_ID) \
3338  }
3339 
3340 // ------------------------------------------------------------------------
3341 // !!! TODO: check if we need to return void for cmplx4 routines
3342 // Workaround for cmplx4. Regular routines with return value don't work
3343 // on Win_32e. Let's return captured values through the additional parameter.
3344 
3345 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3346  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3347  TYPE rhs, TYPE *out) { \
3348  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3349  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3350 
3351 #define CRITICAL_SWP_WRK(LCK_ID) \
3352  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3353  \
3354  tmp = (*lhs); \
3355  (*lhs) = (rhs); \
3356  (*out) = tmp; \
3357  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3358  return;
3359 // ------------------------------------------------------------------------
3360 
3361 #ifdef KMP_GOMP_COMPAT
3362 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3363  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3364  KMP_CHECK_GTID; \
3365  CRITICAL_SWP_WRK(0); \
3366  }
3367 #else
3368 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3369 #endif /* KMP_GOMP_COMPAT */
3370 // ------------------------------------------------------------------------
3371 
3372 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3373  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3374  TYPE tmp; \
3375  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3376  CRITICAL_SWP_WRK(LCK_ID) \
3377  }
3378 // The end of workaround for cmplx4
3379 
3380 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3381 #if KMP_HAVE_QUAD
3382 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3383 #endif // KMP_HAVE_QUAD
3384 // cmplx4 routine to return void
3385 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3386 
3387 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3388 // __kmpc_atomic_cmplx4_swp
3389 
3390 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3391 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3392 #if KMP_HAVE_QUAD
3393 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3394 #if (KMP_ARCH_X86)
3395 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3396  1) // __kmpc_atomic_float16_a16_swp
3397 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3398  1) // __kmpc_atomic_cmplx16_a16_swp
3399 #endif // (KMP_ARCH_X86)
3400 #endif // KMP_HAVE_QUAD
3401 
3402 // End of OpenMP 4.0 Capture
3403 
3404 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3405 
3406 #undef OP_CRITICAL
3407 
3408 /* ------------------------------------------------------------------------ */
3409 /* Generic atomic routines */
3410 
3411 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3412  void (*f)(void *, void *, void *)) {
3413  KMP_DEBUG_ASSERT(__kmp_init_serial);
3414 
3415  if (
3416 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3417  FALSE /* must use lock */
3418 #else
3419  TRUE
3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421  ) {
3422  kmp_int8 old_value, new_value;
3423 
3424  old_value = *(kmp_int8 *)lhs;
3425  (*f)(&new_value, &old_value, rhs);
3426 
3427  /* TODO: Should this be acquire or release? */
3428  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3429  *(kmp_int8 *)&new_value)) {
3430  KMP_CPU_PAUSE();
3431 
3432  old_value = *(kmp_int8 *)lhs;
3433  (*f)(&new_value, &old_value, rhs);
3434  }
3435 
3436  return;
3437  } else {
3438  // All 1-byte data is of integer data type.
3439 
3440 #ifdef KMP_GOMP_COMPAT
3441  if (__kmp_atomic_mode == 2) {
3442  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3443  } else
3444 #endif /* KMP_GOMP_COMPAT */
3445  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3446 
3447  (*f)(lhs, lhs, rhs);
3448 
3449 #ifdef KMP_GOMP_COMPAT
3450  if (__kmp_atomic_mode == 2) {
3451  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3452  } else
3453 #endif /* KMP_GOMP_COMPAT */
3454  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3455  }
3456 }
3457 
3458 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3459  void (*f)(void *, void *, void *)) {
3460  if (
3461 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3462  FALSE /* must use lock */
3463 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3464  TRUE /* no alignment problems */
3465 #else
3466  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3467 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3468  ) {
3469  kmp_int16 old_value, new_value;
3470 
3471  old_value = *(kmp_int16 *)lhs;
3472  (*f)(&new_value, &old_value, rhs);
3473 
3474  /* TODO: Should this be acquire or release? */
3475  while (!KMP_COMPARE_AND_STORE_ACQ16(
3476  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3477  KMP_CPU_PAUSE();
3478 
3479  old_value = *(kmp_int16 *)lhs;
3480  (*f)(&new_value, &old_value, rhs);
3481  }
3482 
3483  return;
3484  } else {
3485  // All 2-byte data is of integer data type.
3486 
3487 #ifdef KMP_GOMP_COMPAT
3488  if (__kmp_atomic_mode == 2) {
3489  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3490  } else
3491 #endif /* KMP_GOMP_COMPAT */
3492  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3493 
3494  (*f)(lhs, lhs, rhs);
3495 
3496 #ifdef KMP_GOMP_COMPAT
3497  if (__kmp_atomic_mode == 2) {
3498  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3499  } else
3500 #endif /* KMP_GOMP_COMPAT */
3501  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3502  }
3503 }
3504 
3505 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3506  void (*f)(void *, void *, void *)) {
3507  KMP_DEBUG_ASSERT(__kmp_init_serial);
3508 
3509  if (
3510 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3511 // Gomp compatibility is broken if this routine is called for floats.
3512 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3513  TRUE /* no alignment problems */
3514 #else
3515  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3516 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3517  ) {
3518  kmp_int32 old_value, new_value;
3519 
3520  old_value = *(kmp_int32 *)lhs;
3521  (*f)(&new_value, &old_value, rhs);
3522 
3523  /* TODO: Should this be acquire or release? */
3524  while (!KMP_COMPARE_AND_STORE_ACQ32(
3525  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3526  KMP_CPU_PAUSE();
3527 
3528  old_value = *(kmp_int32 *)lhs;
3529  (*f)(&new_value, &old_value, rhs);
3530  }
3531 
3532  return;
3533  } else {
3534  // Use __kmp_atomic_lock_4i for all 4-byte data,
3535  // even if it isn't of integer data type.
3536 
3537 #ifdef KMP_GOMP_COMPAT
3538  if (__kmp_atomic_mode == 2) {
3539  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3540  } else
3541 #endif /* KMP_GOMP_COMPAT */
3542  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3543 
3544  (*f)(lhs, lhs, rhs);
3545 
3546 #ifdef KMP_GOMP_COMPAT
3547  if (__kmp_atomic_mode == 2) {
3548  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3549  } else
3550 #endif /* KMP_GOMP_COMPAT */
3551  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3552  }
3553 }
3554 
3555 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3556  void (*f)(void *, void *, void *)) {
3557  KMP_DEBUG_ASSERT(__kmp_init_serial);
3558  if (
3559 
3560 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3561  FALSE /* must use lock */
3562 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3563  TRUE /* no alignment problems */
3564 #else
3565  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3566 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3567  ) {
3568  kmp_int64 old_value, new_value;
3569 
3570  old_value = *(kmp_int64 *)lhs;
3571  (*f)(&new_value, &old_value, rhs);
3572  /* TODO: Should this be acquire or release? */
3573  while (!KMP_COMPARE_AND_STORE_ACQ64(
3574  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3575  KMP_CPU_PAUSE();
3576 
3577  old_value = *(kmp_int64 *)lhs;
3578  (*f)(&new_value, &old_value, rhs);
3579  }
3580 
3581  return;
3582  } else {
3583  // Use __kmp_atomic_lock_8i for all 8-byte data,
3584  // even if it isn't of integer data type.
3585 
3586 #ifdef KMP_GOMP_COMPAT
3587  if (__kmp_atomic_mode == 2) {
3588  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3589  } else
3590 #endif /* KMP_GOMP_COMPAT */
3591  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3592 
3593  (*f)(lhs, lhs, rhs);
3594 
3595 #ifdef KMP_GOMP_COMPAT
3596  if (__kmp_atomic_mode == 2) {
3597  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3598  } else
3599 #endif /* KMP_GOMP_COMPAT */
3600  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3601  }
3602 }
3603 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3604 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3605  void (*f)(void *, void *, void *)) {
3606  KMP_DEBUG_ASSERT(__kmp_init_serial);
3607 
3608 #ifdef KMP_GOMP_COMPAT
3609  if (__kmp_atomic_mode == 2) {
3610  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611  } else
3612 #endif /* KMP_GOMP_COMPAT */
3613  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3614 
3615  (*f)(lhs, lhs, rhs);
3616 
3617 #ifdef KMP_GOMP_COMPAT
3618  if (__kmp_atomic_mode == 2) {
3619  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3620  } else
3621 #endif /* KMP_GOMP_COMPAT */
3622  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3623 }
3624 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3625 
3626 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3627  void (*f)(void *, void *, void *)) {
3628  KMP_DEBUG_ASSERT(__kmp_init_serial);
3629 
3630 #ifdef KMP_GOMP_COMPAT
3631  if (__kmp_atomic_mode == 2) {
3632  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3633  } else
3634 #endif /* KMP_GOMP_COMPAT */
3635  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3636 
3637  (*f)(lhs, lhs, rhs);
3638 
3639 #ifdef KMP_GOMP_COMPAT
3640  if (__kmp_atomic_mode == 2) {
3641  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3642  } else
3643 #endif /* KMP_GOMP_COMPAT */
3644  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3645 }
3646 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3647 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3648  void (*f)(void *, void *, void *)) {
3649  KMP_DEBUG_ASSERT(__kmp_init_serial);
3650 
3651 #ifdef KMP_GOMP_COMPAT
3652  if (__kmp_atomic_mode == 2) {
3653  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3654  } else
3655 #endif /* KMP_GOMP_COMPAT */
3656  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3657 
3658  (*f)(lhs, lhs, rhs);
3659 
3660 #ifdef KMP_GOMP_COMPAT
3661  if (__kmp_atomic_mode == 2) {
3662  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3663  } else
3664 #endif /* KMP_GOMP_COMPAT */
3665  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3666 }
3667 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3668 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3669  void (*f)(void *, void *, void *)) {
3670  KMP_DEBUG_ASSERT(__kmp_init_serial);
3671 
3672 #ifdef KMP_GOMP_COMPAT
3673  if (__kmp_atomic_mode == 2) {
3674  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3675  } else
3676 #endif /* KMP_GOMP_COMPAT */
3677  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3678 
3679  (*f)(lhs, lhs, rhs);
3680 
3681 #ifdef KMP_GOMP_COMPAT
3682  if (__kmp_atomic_mode == 2) {
3683  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3684  } else
3685 #endif /* KMP_GOMP_COMPAT */
3686  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3687 }
3688 
3689 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3690 // compiler; duplicated in order to not use 3-party names in pure Intel code
3691 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3692 void __kmpc_atomic_start(void) {
3693  int gtid = __kmp_entry_gtid();
3694  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3695  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3696 }
3697 
3698 void __kmpc_atomic_end(void) {
3699  int gtid = __kmp_get_gtid();
3700  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3701  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3702 }
3703 
3704 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3705 
3706 // OpenMP 5.1 compare and swap
3707 
3722 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3723  return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3724 }
3725 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3726  short d) {
3727  return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3728 }
3729 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3730  kmp_int32 d) {
3731  return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3732 }
3733 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3734  kmp_int64 d) {
3735  return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3736 }
3737 
3752 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3753  return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3754 }
3755 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3756  short d) {
3757  return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3758 }
3759 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3760  kmp_int32 e, kmp_int32 d) {
3761  return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3762 }
3763 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3764  kmp_int64 e, kmp_int64 d) {
3765  return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3766 }
3767 
3784 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3785  char d, char *pv) {
3786  char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3787  if (old == e)
3788  return true;
3789  KMP_ASSERT(pv != NULL);
3790  *pv = old;
3791  return false;
3792 }
3793 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3794  short d, short *pv) {
3795  short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3796  if (old == e)
3797  return true;
3798  KMP_ASSERT(pv != NULL);
3799  *pv = old;
3800  return false;
3801 }
3802 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3803  kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3804  kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3805  if (old == e)
3806  return true;
3807  KMP_ASSERT(pv != NULL);
3808  *pv = old;
3809  return false;
3810 }
3811 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3812  kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3813  kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3814  if (old == e)
3815  return true;
3816  KMP_ASSERT(pv != NULL);
3817  *pv = old;
3818  return false;
3819 }
3820 
3837 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3838  char d, char *pv) {
3839  char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3840  KMP_ASSERT(pv != NULL);
3841  *pv = old == e ? d : old;
3842  return old;
3843 }
3844 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3845  short d, short *pv) {
3846  short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3847  KMP_ASSERT(pv != NULL);
3848  *pv = old == e ? d : old;
3849  return old;
3850 }
3851 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3852  kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3853  kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3854  KMP_ASSERT(pv != NULL);
3855  *pv = old == e ? d : old;
3856  return old;
3857 }
3858 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3859  kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3860  kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3861  KMP_ASSERT(pv != NULL);
3862  *pv = old == e ? d : old;
3863  return old;
3864 }
3865 
3866 // End OpenMP 5.1 compare + capture
3867 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3868 
3873 // end of file
Definition: kmp.h:234