LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648  kmp_cmplx128_a4_t &rhs) {
649  return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652  kmp_cmplx128_a4_t &rhs) {
653  return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656  kmp_cmplx128_a4_t &rhs) {
657  return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660  kmp_cmplx128_a4_t &rhs) {
661  return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677  kmp_cmplx128_a16_t &rhs) {
678  return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID \
689  if (gtid == KMP_GTID_UNKNOWN) { \
690  gtid = __kmp_entry_gtid(); \
691  } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700  TYPE *lhs, TYPE rhs) { \
701  KMP_DEBUG_ASSERT(__kmp_init_serial); \
702  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728  \
729  (*lhs) OP(rhs); \
730  \
731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735  (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
736  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763  KMP_CHECK_GTID; \
764  OP_CRITICAL(OP, 0); \
765  return; \
766  }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770  KMP_CHECK_GTID; \
771  OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772  return; \
773  }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791  { \
792  TYPE old_value, new_value; \
793  old_value = *(TYPE volatile *)lhs; \
794  new_value = (TYPE)(old_value OP((TYPE)rhs)); \
795  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798  KMP_DO_PAUSE; \
799  \
800  old_value = *(TYPE volatile *)lhs; \
801  new_value = (TYPE)(old_value OP((TYPE)rhs)); \
802  } \
803  }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813  { \
814  struct _sss { \
815  TYPE cmp; \
816  kmp_int##BITS *vvv; \
817  }; \
818  struct _sss old_value, new_value; \
819  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826  KMP_DO_PAUSE; \
827  \
828  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830  } \
831  }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
836 
837 // ------------------------------------------------------------------------
838 // X86 or X86_64: no alignment problems ====================================
839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
840  GOMP_FLAG) \
841  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
843  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
844  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
845  }
846 // -------------------------------------------------------------------------
847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
848  GOMP_FLAG) \
849  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
850  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
851  OP_CMPXCHG(TYPE, BITS, OP) \
852  }
853 #if USE_CMPXCHG_FIX
854 // -------------------------------------------------------------------------
855 // workaround for C78287 (complex(kind=4) data type)
856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
857  MASK, GOMP_FLAG) \
858  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
859  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
860  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
861  }
862 // end of the second part of the workaround for C78287
863 #endif // USE_CMPXCHG_FIX
864 
865 #else
866 // -------------------------------------------------------------------------
867 // Code for other architectures that don't handle unaligned accesses.
868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
869  GOMP_FLAG) \
870  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
871  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
872  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
873  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
874  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
875  } else { \
876  KMP_CHECK_GTID; \
877  OP_UPDATE_CRITICAL(TYPE, OP, \
878  LCK_ID) /* unaligned address - use critical */ \
879  } \
880  }
881 // -------------------------------------------------------------------------
882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
883  GOMP_FLAG) \
884  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
885  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
886  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
887  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
888  } else { \
889  KMP_CHECK_GTID; \
890  OP_UPDATE_CRITICAL(TYPE, OP, \
891  LCK_ID) /* unaligned address - use critical */ \
892  } \
893  }
894 #if USE_CMPXCHG_FIX
895 // -------------------------------------------------------------------------
896 // workaround for C78287 (complex(kind=4) data type)
897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
898  MASK, GOMP_FLAG) \
899  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
900  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
901  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
902  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
903  } else { \
904  KMP_CHECK_GTID; \
905  OP_UPDATE_CRITICAL(TYPE, OP, \
906  LCK_ID) /* unaligned address - use critical */ \
907  } \
908  }
909 // end of the second part of the workaround for C78287
910 #endif // USE_CMPXCHG_FIX
911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
912 
913 // Routines for ATOMIC 4-byte operands addition and subtraction
914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
915  0) // __kmpc_atomic_fixed4_add
916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
917  0) // __kmpc_atomic_fixed4_sub
918 
919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
920  KMP_ARCH_X86) // __kmpc_atomic_float4_add
921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
922  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
923 
924 // Routines for ATOMIC 8-byte operands addition and subtraction
925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
926  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
928  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
929 
930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
931  KMP_ARCH_X86) // __kmpc_atomic_float8_add
932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
933  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
934 
935 // ------------------------------------------------------------------------
936 // Entries definition for integer operands
937 // TYPE_ID - operands type and size (fixed4, float4)
938 // OP_ID - operation identifier (add, sub, mul, ...)
939 // TYPE - operand type
940 // BITS - size in bits, used to distinguish low level calls
941 // OP - operator (used in critical section)
942 // LCK_ID - lock identifier, used to possibly distinguish lock variable
943 // MASK - used for alignment check
944 
945 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
946 // ------------------------------------------------------------------------
947 // Routines for ATOMIC integer operands, other operators
948 // ------------------------------------------------------------------------
949 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
951  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
953  0) // __kmpc_atomic_fixed1_andb
954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
955  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
957  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
961  0) // __kmpc_atomic_fixed1_orb
962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
963  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
965  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
967  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
969  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
971  0) // __kmpc_atomic_fixed1_xor
972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
973  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
975  0) // __kmpc_atomic_fixed2_andb
976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
977  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
979  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
981  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
983  0) // __kmpc_atomic_fixed2_orb
984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
985  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
987  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
989  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
991  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
993  0) // __kmpc_atomic_fixed2_xor
994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
995  0) // __kmpc_atomic_fixed4_andb
996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
997  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
999  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1001  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1003  0) // __kmpc_atomic_fixed4_orb
1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1005  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1007  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1009  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1011  0) // __kmpc_atomic_fixed4_xor
1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1013  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1015  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1017  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1019  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1021  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1023  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1025  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1027  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1029  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1031  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1033  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1035  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1037  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1038 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1039 
1040 /* ------------------------------------------------------------------------ */
1041 /* Routines for C/C++ Reduction operators && and || */
1042 
1043 // ------------------------------------------------------------------------
1044 // Need separate macros for &&, || because there is no combined assignment
1045 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1047  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1048  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1049  OP_CRITICAL(= *lhs OP, LCK_ID) \
1050  }
1051 
1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1053 
1054 // ------------------------------------------------------------------------
1055 // X86 or X86_64: no alignment problems ===================================
1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1057  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1058  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1059  OP_CMPXCHG(TYPE, BITS, OP) \
1060  }
1061 
1062 #else
1063 // ------------------------------------------------------------------------
1064 // Code for other architectures that don't handle unaligned accesses.
1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1066  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1067  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1068  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1069  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1070  } else { \
1071  KMP_CHECK_GTID; \
1072  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1073  } \
1074  }
1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1076 
1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1078  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1080  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1082  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1084  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1086  0) // __kmpc_atomic_fixed4_andl
1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1088  0) // __kmpc_atomic_fixed4_orl
1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1090  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1092  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1093 
1094 /* ------------------------------------------------------------------------- */
1095 /* Routines for Fortran operators that matched no one in C: */
1096 /* MAX, MIN, .EQV., .NEQV. */
1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1099 
1100 // -------------------------------------------------------------------------
1101 // MIN and MAX need separate macros
1102 // OP - operator to check if we need any actions?
1103 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1104  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1105  \
1106  if (*lhs OP rhs) { /* still need actions? */ \
1107  *lhs = rhs; \
1108  } \
1109  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1110 
1111 // -------------------------------------------------------------------------
1112 #ifdef KMP_GOMP_COMPAT
1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1114  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1115  KMP_CHECK_GTID; \
1116  MIN_MAX_CRITSECT(OP, 0); \
1117  return; \
1118  }
1119 #else
1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1121 #endif /* KMP_GOMP_COMPAT */
1122 
1123 // -------------------------------------------------------------------------
1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1125  { \
1126  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1127  TYPE old_value; \
1128  temp_val = *lhs; \
1129  old_value = temp_val; \
1130  while (old_value OP rhs && /* still need actions? */ \
1131  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1132  (kmp_int##BITS *)lhs, \
1133  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1134  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1135  KMP_CPU_PAUSE(); \
1136  temp_val = *lhs; \
1137  old_value = temp_val; \
1138  } \
1139  }
1140 
1141 // -------------------------------------------------------------------------
1142 // 1-byte, 2-byte operands - use critical section
1143 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1144  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1145  if (*lhs OP rhs) { /* need actions? */ \
1146  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1147  MIN_MAX_CRITSECT(OP, LCK_ID) \
1148  } \
1149  }
1150 
1151 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1152 
1153 // -------------------------------------------------------------------------
1154 // X86 or X86_64: no alignment problems ====================================
1155 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1156  GOMP_FLAG) \
1157  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1158  if (*lhs OP rhs) { \
1159  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1160  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1161  } \
1162  }
1163 
1164 #else
1165 // -------------------------------------------------------------------------
1166 // Code for other architectures that don't handle unaligned accesses.
1167 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1168  GOMP_FLAG) \
1169  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1170  if (*lhs OP rhs) { \
1171  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1172  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1173  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1174  } else { \
1175  KMP_CHECK_GTID; \
1176  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1177  } \
1178  } \
1179  }
1180 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1181 
1182 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1183  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1184 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1185  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1186 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1187  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1188 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1189  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1190 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1191  0) // __kmpc_atomic_fixed4_max
1192 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1193  0) // __kmpc_atomic_fixed4_min
1194 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1195  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1196 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1197  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1198 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1199  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1200 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1201  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1202 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1203  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1204 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1205  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1206 #if KMP_HAVE_QUAD
1207 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1208  1) // __kmpc_atomic_float16_max
1209 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1210  1) // __kmpc_atomic_float16_min
1211 #if (KMP_ARCH_X86)
1212 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1213  1) // __kmpc_atomic_float16_max_a16
1214 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1215  1) // __kmpc_atomic_float16_min_a16
1216 #endif // (KMP_ARCH_X86)
1217 #endif // KMP_HAVE_QUAD
1218 // ------------------------------------------------------------------------
1219 // Need separate macros for .EQV. because of the need of complement (~)
1220 // OP ignored for critical sections, ^=~ used instead
1221 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1222  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1223  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1224  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1225  }
1226 
1227 // ------------------------------------------------------------------------
1228 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1229 // ------------------------------------------------------------------------
1230 // X86 or X86_64: no alignment problems ===================================
1231 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1232  GOMP_FLAG) \
1233  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1234  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1235  OP_CMPXCHG(TYPE, BITS, OP) \
1236  }
1237 // ------------------------------------------------------------------------
1238 #else
1239 // ------------------------------------------------------------------------
1240 // Code for other architectures that don't handle unaligned accesses.
1241 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1242  GOMP_FLAG) \
1243  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1244  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1245  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1246  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1247  } else { \
1248  KMP_CHECK_GTID; \
1249  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1250  } \
1251  }
1252 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1253 
1254 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1255  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1256 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1257  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1258 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1259  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1260 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1261  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1262 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1263  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1264 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1265  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1266 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1267  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1268 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1269  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1270 
1271 // ------------------------------------------------------------------------
1272 // Routines for Extended types: long double, _Quad, complex flavours (use
1273 // critical section)
1274 // TYPE_ID, OP_ID, TYPE - detailed above
1275 // OP - operator
1276 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1277 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1278  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1279  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1280  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1281  }
1282 
1283 /* ------------------------------------------------------------------------- */
1284 // routines for long double type
1285 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1286  1) // __kmpc_atomic_float10_add
1287 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1288  1) // __kmpc_atomic_float10_sub
1289 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1290  1) // __kmpc_atomic_float10_mul
1291 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1292  1) // __kmpc_atomic_float10_div
1293 #if KMP_HAVE_QUAD
1294 // routines for _Quad type
1295 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1296  1) // __kmpc_atomic_float16_add
1297 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1298  1) // __kmpc_atomic_float16_sub
1299 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1300  1) // __kmpc_atomic_float16_mul
1301 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1302  1) // __kmpc_atomic_float16_div
1303 #if (KMP_ARCH_X86)
1304 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1305  1) // __kmpc_atomic_float16_add_a16
1306 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1307  1) // __kmpc_atomic_float16_sub_a16
1308 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1309  1) // __kmpc_atomic_float16_mul_a16
1310 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1311  1) // __kmpc_atomic_float16_div_a16
1312 #endif // (KMP_ARCH_X86)
1313 #endif // KMP_HAVE_QUAD
1314 // routines for complex types
1315 
1316 #if USE_CMPXCHG_FIX
1317 // workaround for C78287 (complex(kind=4) data type)
1318 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1319  1) // __kmpc_atomic_cmplx4_add
1320 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1321  1) // __kmpc_atomic_cmplx4_sub
1322 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1323  1) // __kmpc_atomic_cmplx4_mul
1324 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1325  1) // __kmpc_atomic_cmplx4_div
1326 // end of the workaround for C78287
1327 #else
1328 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1329 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1330 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1331 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1332 #endif // USE_CMPXCHG_FIX
1333 
1334 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1335 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1336 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1337 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1338 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1339  1) // __kmpc_atomic_cmplx10_add
1340 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1341  1) // __kmpc_atomic_cmplx10_sub
1342 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1343  1) // __kmpc_atomic_cmplx10_mul
1344 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1345  1) // __kmpc_atomic_cmplx10_div
1346 #if KMP_HAVE_QUAD
1347 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1348  1) // __kmpc_atomic_cmplx16_add
1349 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1350  1) // __kmpc_atomic_cmplx16_sub
1351 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1352  1) // __kmpc_atomic_cmplx16_mul
1353 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1354  1) // __kmpc_atomic_cmplx16_div
1355 #if (KMP_ARCH_X86)
1356 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1357  1) // __kmpc_atomic_cmplx16_add_a16
1358 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1359  1) // __kmpc_atomic_cmplx16_sub_a16
1360 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1361  1) // __kmpc_atomic_cmplx16_mul_a16
1362 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1363  1) // __kmpc_atomic_cmplx16_div_a16
1364 #endif // (KMP_ARCH_X86)
1365 #endif // KMP_HAVE_QUAD
1366 
1367 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1368 // Supported only on IA-32 architecture and Intel(R) 64
1369 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1370 
1371 // ------------------------------------------------------------------------
1372 // Operation on *lhs, rhs bound by critical section
1373 // OP - operator (it's supposed to contain an assignment)
1374 // LCK_ID - lock identifier
1375 // Note: don't check gtid as it should always be valid
1376 // 1, 2-byte - expect valid parameter, other - check before this macro
1377 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1378  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1379  \
1380  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1381  \
1382  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1383 
1384 #ifdef KMP_GOMP_COMPAT
1385 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1386  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1387  KMP_CHECK_GTID; \
1388  OP_CRITICAL_REV(TYPE, OP, 0); \
1389  return; \
1390  }
1391 
1392 #else
1393 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1394 #endif /* KMP_GOMP_COMPAT */
1395 
1396 // Beginning of a definition (provides name, parameters, gebug trace)
1397 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1398 // fixed)
1399 // OP_ID - operation identifier (add, sub, mul, ...)
1400 // TYPE - operands' type
1401 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1402  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1403  TYPE *lhs, TYPE rhs) { \
1404  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1405  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1406 
1407 // ------------------------------------------------------------------------
1408 // Operation on *lhs, rhs using "compare_and_store" routine
1409 // TYPE - operands' type
1410 // BITS - size in bits, used to distinguish low level calls
1411 // OP - operator
1412 // Note: temp_val introduced in order to force the compiler to read
1413 // *lhs only once (w/o it the compiler reads *lhs twice)
1414 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1415  { \
1416  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1417  TYPE old_value, new_value; \
1418  temp_val = *lhs; \
1419  old_value = temp_val; \
1420  new_value = (TYPE)(rhs OP old_value); \
1421  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1422  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1423  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1424  KMP_DO_PAUSE; \
1425  \
1426  temp_val = *lhs; \
1427  old_value = temp_val; \
1428  new_value = (TYPE)(rhs OP old_value); \
1429  } \
1430  }
1431 
1432 // -------------------------------------------------------------------------
1433 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1434  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1435  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1436  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1437  }
1438 
1439 // ------------------------------------------------------------------------
1440 // Entries definition for integer operands
1441 // TYPE_ID - operands type and size (fixed4, float4)
1442 // OP_ID - operation identifier (add, sub, mul, ...)
1443 // TYPE - operand type
1444 // BITS - size in bits, used to distinguish low level calls
1445 // OP - operator (used in critical section)
1446 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1447 
1448 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1449 // ------------------------------------------------------------------------
1450 // Routines for ATOMIC integer operands, other operators
1451 // ------------------------------------------------------------------------
1452 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1453 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1454  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1455 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1456  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1457 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1458  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1459 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1460  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1461 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1462  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1463 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1464  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1465 
1466 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1467  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1468 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1469  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1470 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1471  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1472 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1473  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1474 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1475  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1476 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1477  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1478 
1479 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1480  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1481 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1482  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1483 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1484  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1485 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1486  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1487 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1488  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1489 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1490  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1491 
1492 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1493  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1494 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1495  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1496 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1497  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1498 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1499  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1500 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1501  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1502 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1503  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1504 
1505 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1506  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1507 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1508  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1509 
1510 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1511  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1512 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1513  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1514 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1515 
1516 // ------------------------------------------------------------------------
1517 // Routines for Extended types: long double, _Quad, complex flavours (use
1518 // critical section)
1519 // TYPE_ID, OP_ID, TYPE - detailed above
1520 // OP - operator
1521 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1522 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1523  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1524  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1525  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1526  }
1527 
1528 /* ------------------------------------------------------------------------- */
1529 // routines for long double type
1530 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1531  1) // __kmpc_atomic_float10_sub_rev
1532 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1533  1) // __kmpc_atomic_float10_div_rev
1534 #if KMP_HAVE_QUAD
1535 // routines for _Quad type
1536 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1537  1) // __kmpc_atomic_float16_sub_rev
1538 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1539  1) // __kmpc_atomic_float16_div_rev
1540 #if (KMP_ARCH_X86)
1541 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1542  1) // __kmpc_atomic_float16_sub_a16_rev
1543 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1544  1) // __kmpc_atomic_float16_div_a16_rev
1545 #endif // KMP_ARCH_X86
1546 #endif // KMP_HAVE_QUAD
1547 
1548 // routines for complex types
1549 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1550  1) // __kmpc_atomic_cmplx4_sub_rev
1551 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1552  1) // __kmpc_atomic_cmplx4_div_rev
1553 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1554  1) // __kmpc_atomic_cmplx8_sub_rev
1555 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1556  1) // __kmpc_atomic_cmplx8_div_rev
1557 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1558  1) // __kmpc_atomic_cmplx10_sub_rev
1559 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1560  1) // __kmpc_atomic_cmplx10_div_rev
1561 #if KMP_HAVE_QUAD
1562 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1563  1) // __kmpc_atomic_cmplx16_sub_rev
1564 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1565  1) // __kmpc_atomic_cmplx16_div_rev
1566 #if (KMP_ARCH_X86)
1567 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1568  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1569 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1570  1) // __kmpc_atomic_cmplx16_div_a16_rev
1571 #endif // KMP_ARCH_X86
1572 #endif // KMP_HAVE_QUAD
1573 
1574 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1575 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1576 
1577 /* ------------------------------------------------------------------------ */
1578 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1579 /* Note: in order to reduce the total number of types combinations */
1580 /* it is supposed that compiler converts RHS to longest floating type,*/
1581 /* that is _Quad, before call to any of these routines */
1582 /* Conversion to _Quad will be done by the compiler during calculation, */
1583 /* conversion back to TYPE - before the assignment, like: */
1584 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1585 /* Performance penalty expected because of SW emulation use */
1586 /* ------------------------------------------------------------------------ */
1587 
1588 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1589  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1590  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1591  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1592  KA_TRACE(100, \
1593  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1594  gtid));
1595 
1596 // -------------------------------------------------------------------------
1597 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1598  GOMP_FLAG) \
1599  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1600  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1601  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1602  }
1603 
1604 // -------------------------------------------------------------------------
1605 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1606 // -------------------------------------------------------------------------
1607 // X86 or X86_64: no alignment problems ====================================
1608 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1609  LCK_ID, MASK, GOMP_FLAG) \
1610  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1611  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1612  OP_CMPXCHG(TYPE, BITS, OP) \
1613  }
1614 // -------------------------------------------------------------------------
1615 #else
1616 // ------------------------------------------------------------------------
1617 // Code for other architectures that don't handle unaligned accesses.
1618 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1619  LCK_ID, MASK, GOMP_FLAG) \
1620  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1622  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1623  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1624  } else { \
1625  KMP_CHECK_GTID; \
1626  OP_UPDATE_CRITICAL(TYPE, OP, \
1627  LCK_ID) /* unaligned address - use critical */ \
1628  } \
1629  }
1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1631 
1632 // -------------------------------------------------------------------------
1633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1634 // -------------------------------------------------------------------------
1635 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1636  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1637  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1638  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1639  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1640  }
1641 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1642  LCK_ID, GOMP_FLAG) \
1643  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1644  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1645  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1646  }
1647 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1648 
1649 // RHS=float8
1650 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1651  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1652 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1653  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1654 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1655  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1656 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1657  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1658 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1659  0) // __kmpc_atomic_fixed4_mul_float8
1660 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1661  0) // __kmpc_atomic_fixed4_div_float8
1662 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1663  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1664 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1665  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1666 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1667  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1668 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1669  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1670 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1671  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1672 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1673  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1674 
1675 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1676 // use them)
1677 #if KMP_HAVE_QUAD
1678 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1679  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1680 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1681  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1683  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1684 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1685  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1687  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1688 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1689  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1691  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1692 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1693  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1694 
1695 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1696  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1697 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1698  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1700  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1701 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1702  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1704  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1705 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1706  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1708  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1709 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1710  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1711 
1712 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1713  0) // __kmpc_atomic_fixed4_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1715  0) // __kmpc_atomic_fixed4u_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1717  0) // __kmpc_atomic_fixed4_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1719  0) // __kmpc_atomic_fixed4u_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1721  0) // __kmpc_atomic_fixed4_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1723  0) // __kmpc_atomic_fixed4u_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1725  0) // __kmpc_atomic_fixed4_div_fp
1726 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1727  0) // __kmpc_atomic_fixed4u_div_fp
1728 
1729 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1730  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1731 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1732  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1733 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1734  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1735 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1736  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1737 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1738  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1739 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1740  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1741 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1742  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1743 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1744  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1745 
1746 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1747  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1748 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1749  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1750 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1751  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1752 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1753  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1754 
1755 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1756  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1757 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1758  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1759 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1760  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1761 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1762  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1763 
1764 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1765  1) // __kmpc_atomic_float10_add_fp
1766 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1767  1) // __kmpc_atomic_float10_sub_fp
1768 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1769  1) // __kmpc_atomic_float10_mul_fp
1770 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1771  1) // __kmpc_atomic_float10_div_fp
1772 
1773 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1774 // Reverse operations
1775 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1776  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1778  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1779 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1780  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1781 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1782  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1783 
1784 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1785  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1787  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1788 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1789  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1790 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1791  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1792 
1793 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1794  0) // __kmpc_atomic_fixed4_sub_rev_fp
1795 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1796  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1797 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1798  0) // __kmpc_atomic_fixed4_div_rev_fp
1799 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1800  0) // __kmpc_atomic_fixed4u_div_rev_fp
1801 
1802 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1803  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1804 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1805  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1806 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1807  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1808 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1809  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1810 
1811 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1812  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1814  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1815 
1816 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1817  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1818 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1819  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1820 
1821 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1822  1) // __kmpc_atomic_float10_sub_rev_fp
1823 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1824  1) // __kmpc_atomic_float10_div_rev_fp
1825 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1826 
1827 #endif // KMP_HAVE_QUAD
1828 
1829 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1830 // ------------------------------------------------------------------------
1831 // X86 or X86_64: no alignment problems ====================================
1832 #if USE_CMPXCHG_FIX
1833 // workaround for C78287 (complex(kind=4) data type)
1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1835  LCK_ID, MASK, GOMP_FLAG) \
1836  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1837  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1838  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1839  }
1840 // end of the second part of the workaround for C78287
1841 #else
1842 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1843  LCK_ID, MASK, GOMP_FLAG) \
1844  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1845  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1846  OP_CMPXCHG(TYPE, BITS, OP) \
1847  }
1848 #endif // USE_CMPXCHG_FIX
1849 #else
1850 // ------------------------------------------------------------------------
1851 // Code for other architectures that don't handle unaligned accesses.
1852 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1853  LCK_ID, MASK, GOMP_FLAG) \
1854  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1855  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1856  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1857  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1858  } else { \
1859  KMP_CHECK_GTID; \
1860  OP_UPDATE_CRITICAL(TYPE, OP, \
1861  LCK_ID) /* unaligned address - use critical */ \
1862  } \
1863  }
1864 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1865 
1866 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1867  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1868 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1869  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1870 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1871  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1872 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1873  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1874 
1875 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1876 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1877 
1878 // ------------------------------------------------------------------------
1879 // Atomic READ routines
1880 
1881 // ------------------------------------------------------------------------
1882 // Beginning of a definition (provides name, parameters, gebug trace)
1883 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1884 // fixed)
1885 // OP_ID - operation identifier (add, sub, mul, ...)
1886 // TYPE - operands' type
1887 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1888  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1889  TYPE *loc) { \
1890  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1891  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1892 
1893 // ------------------------------------------------------------------------
1894 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1895 // TYPE - operands' type
1896 // BITS - size in bits, used to distinguish low level calls
1897 // OP - operator
1898 // Note: temp_val introduced in order to force the compiler to read
1899 // *lhs only once (w/o it the compiler reads *lhs twice)
1900 // TODO: check if it is still necessary
1901 // Return old value regardless of the result of "compare & swap# operation
1902 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1903  { \
1904  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1905  union f_i_union { \
1906  TYPE f_val; \
1907  kmp_int##BITS i_val; \
1908  }; \
1909  union f_i_union old_value; \
1910  temp_val = *loc; \
1911  old_value.f_val = temp_val; \
1912  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1913  (kmp_int##BITS *)loc, \
1914  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1915  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1916  new_value = old_value.f_val; \
1917  return new_value; \
1918  }
1919 
1920 // -------------------------------------------------------------------------
1921 // Operation on *lhs, rhs bound by critical section
1922 // OP - operator (it's supposed to contain an assignment)
1923 // LCK_ID - lock identifier
1924 // Note: don't check gtid as it should always be valid
1925 // 1, 2-byte - expect valid parameter, other - check before this macro
1926 #define OP_CRITICAL_READ(OP, LCK_ID) \
1927  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1928  \
1929  new_value = (*loc); \
1930  \
1931  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1932 
1933 // -------------------------------------------------------------------------
1934 #ifdef KMP_GOMP_COMPAT
1935 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1936  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1937  KMP_CHECK_GTID; \
1938  OP_CRITICAL_READ(OP, 0); \
1939  return new_value; \
1940  }
1941 #else
1942 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1943 #endif /* KMP_GOMP_COMPAT */
1944 
1945 // -------------------------------------------------------------------------
1946 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1947  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1948  TYPE new_value; \
1949  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1950  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1951  return new_value; \
1952  }
1953 // -------------------------------------------------------------------------
1954 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1955  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1956  TYPE new_value; \
1957  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1958  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1959  }
1960 // ------------------------------------------------------------------------
1961 // Routines for Extended types: long double, _Quad, complex flavours (use
1962 // critical section)
1963 // TYPE_ID, OP_ID, TYPE - detailed above
1964 // OP - operator
1965 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1966 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1967  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1968  TYPE new_value; \
1969  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1970  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1971  return new_value; \
1972  }
1973 
1974 // ------------------------------------------------------------------------
1975 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1976 // value doesn't work.
1977 // Let's return the read value through the additional parameter.
1978 #if (KMP_OS_WINDOWS)
1979 
1980 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1981  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1982  \
1983  (*out) = (*loc); \
1984  \
1985  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1986 // ------------------------------------------------------------------------
1987 #ifdef KMP_GOMP_COMPAT
1988 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1989  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1990  KMP_CHECK_GTID; \
1991  OP_CRITICAL_READ_WRK(OP, 0); \
1992  }
1993 #else
1994 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1995 #endif /* KMP_GOMP_COMPAT */
1996 // ------------------------------------------------------------------------
1997 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1998  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1999  TYPE *loc) { \
2000  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2001  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2002 
2003 // ------------------------------------------------------------------------
2004 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2005  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2006  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2007  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2008  }
2009 
2010 #endif // KMP_OS_WINDOWS
2011 
2012 // ------------------------------------------------------------------------
2013 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2014 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2015 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2016  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2017 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2018  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2019 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2020  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2021 
2022 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2023 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2024  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2025 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2026  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2027 
2028 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2029  1) // __kmpc_atomic_float10_rd
2030 #if KMP_HAVE_QUAD
2031 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2032  1) // __kmpc_atomic_float16_rd
2033 #endif // KMP_HAVE_QUAD
2034 
2035 // Fix for CQ220361 on Windows* OS
2036 #if (KMP_OS_WINDOWS)
2037 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2038  1) // __kmpc_atomic_cmplx4_rd
2039 #else
2040 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2041  1) // __kmpc_atomic_cmplx4_rd
2042 #endif // (KMP_OS_WINDOWS)
2043 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2044  1) // __kmpc_atomic_cmplx8_rd
2045 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2046  1) // __kmpc_atomic_cmplx10_rd
2047 #if KMP_HAVE_QUAD
2048 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2049  1) // __kmpc_atomic_cmplx16_rd
2050 #if (KMP_ARCH_X86)
2051 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2052  1) // __kmpc_atomic_float16_a16_rd
2053 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2054  1) // __kmpc_atomic_cmplx16_a16_rd
2055 #endif // (KMP_ARCH_X86)
2056 #endif // KMP_HAVE_QUAD
2057 
2058 // ------------------------------------------------------------------------
2059 // Atomic WRITE routines
2060 
2061 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2062  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2063  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2064  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2065  }
2066 // ------------------------------------------------------------------------
2067 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2068  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2069  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2070  KMP_XCHG_REAL##BITS(lhs, rhs); \
2071  }
2072 
2073 // ------------------------------------------------------------------------
2074 // Operation on *lhs, rhs using "compare_and_store" routine
2075 // TYPE - operands' type
2076 // BITS - size in bits, used to distinguish low level calls
2077 // OP - operator
2078 // Note: temp_val introduced in order to force the compiler to read
2079 // *lhs only once (w/o it the compiler reads *lhs twice)
2080 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2081  { \
2082  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2083  TYPE old_value, new_value; \
2084  temp_val = *lhs; \
2085  old_value = temp_val; \
2086  new_value = rhs; \
2087  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2088  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2089  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2090  KMP_CPU_PAUSE(); \
2091  \
2092  temp_val = *lhs; \
2093  old_value = temp_val; \
2094  new_value = rhs; \
2095  } \
2096  }
2097 
2098 // -------------------------------------------------------------------------
2099 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2100  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2101  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2102  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2103  }
2104 
2105 // ------------------------------------------------------------------------
2106 // Routines for Extended types: long double, _Quad, complex flavours (use
2107 // critical section)
2108 // TYPE_ID, OP_ID, TYPE - detailed above
2109 // OP - operator
2110 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2111 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2112  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2113  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2114  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2115  }
2116 // -------------------------------------------------------------------------
2117 
2118 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2119  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2120 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2121  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2122 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2123  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2124 #if (KMP_ARCH_X86)
2125 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2126  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2127 #else
2128 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2129  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2130 #endif // (KMP_ARCH_X86)
2131 
2132 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2133  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2134 #if (KMP_ARCH_X86)
2135 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2136  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2137 #else
2138 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2139  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2140 #endif // (KMP_ARCH_X86)
2141 
2142 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2143  1) // __kmpc_atomic_float10_wr
2144 #if KMP_HAVE_QUAD
2145 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2146  1) // __kmpc_atomic_float16_wr
2147 #endif // KMP_HAVE_QUAD
2148 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2149 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2150  1) // __kmpc_atomic_cmplx8_wr
2151 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2152  1) // __kmpc_atomic_cmplx10_wr
2153 #if KMP_HAVE_QUAD
2154 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2155  1) // __kmpc_atomic_cmplx16_wr
2156 #if (KMP_ARCH_X86)
2157 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2158  1) // __kmpc_atomic_float16_a16_wr
2159 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2160  1) // __kmpc_atomic_cmplx16_a16_wr
2161 #endif // (KMP_ARCH_X86)
2162 #endif // KMP_HAVE_QUAD
2163 
2164 // ------------------------------------------------------------------------
2165 // Atomic CAPTURE routines
2166 
2167 // Beginning of a definition (provides name, parameters, gebug trace)
2168 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2169 // fixed)
2170 // OP_ID - operation identifier (add, sub, mul, ...)
2171 // TYPE - operands' type
2172 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2173  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2174  TYPE *lhs, TYPE rhs, int flag) { \
2175  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2176  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2177 
2178 // -------------------------------------------------------------------------
2179 // Operation on *lhs, rhs bound by critical section
2180 // OP - operator (it's supposed to contain an assignment)
2181 // LCK_ID - lock identifier
2182 // Note: don't check gtid as it should always be valid
2183 // 1, 2-byte - expect valid parameter, other - check before this macro
2184 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2185  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2186  \
2187  if (flag) { \
2188  (*lhs) OP rhs; \
2189  new_value = (*lhs); \
2190  } else { \
2191  new_value = (*lhs); \
2192  (*lhs) OP rhs; \
2193  } \
2194  \
2195  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2196  return new_value;
2197 
2198 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2199  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2200  \
2201  if (flag) { \
2202  (*lhs) = (TYPE)((*lhs)OP rhs); \
2203  new_value = (*lhs); \
2204  } else { \
2205  new_value = (*lhs); \
2206  (*lhs) = (TYPE)((*lhs)OP rhs); \
2207  } \
2208  \
2209  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2210  return new_value;
2211 
2212 // ------------------------------------------------------------------------
2213 #ifdef KMP_GOMP_COMPAT
2214 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2215  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2216  KMP_CHECK_GTID; \
2217  OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2218  }
2219 #else
2220 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2221 #endif /* KMP_GOMP_COMPAT */
2222 
2223 // ------------------------------------------------------------------------
2224 // Operation on *lhs, rhs using "compare_and_store" routine
2225 // TYPE - operands' type
2226 // BITS - size in bits, used to distinguish low level calls
2227 // OP - operator
2228 // Note: temp_val introduced in order to force the compiler to read
2229 // *lhs only once (w/o it the compiler reads *lhs twice)
2230 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2231  { \
2232  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2233  TYPE old_value, new_value; \
2234  temp_val = *lhs; \
2235  old_value = temp_val; \
2236  new_value = (TYPE)(old_value OP rhs); \
2237  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2238  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2239  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2240  KMP_CPU_PAUSE(); \
2241  \
2242  temp_val = *lhs; \
2243  old_value = temp_val; \
2244  new_value = (TYPE)(old_value OP rhs); \
2245  } \
2246  if (flag) { \
2247  return new_value; \
2248  } else \
2249  return old_value; \
2250  }
2251 
2252 // -------------------------------------------------------------------------
2253 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2254  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2255  TYPE new_value; \
2256  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2257  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2258  }
2259 
2260 // -------------------------------------------------------------------------
2261 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2262  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2263  TYPE old_value, new_value; \
2264  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2265  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2266  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2267  if (flag) { \
2268  return old_value OP rhs; \
2269  } else \
2270  return old_value; \
2271  }
2272 // -------------------------------------------------------------------------
2273 
2274 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2275  0) // __kmpc_atomic_fixed4_add_cpt
2276 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2277  0) // __kmpc_atomic_fixed4_sub_cpt
2278 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2279  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2280 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2281  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2282 
2283 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2284  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2285 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2286  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2287 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2288  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2289 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2290  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2291 
2292 // ------------------------------------------------------------------------
2293 // Entries definition for integer operands
2294 // TYPE_ID - operands type and size (fixed4, float4)
2295 // OP_ID - operation identifier (add, sub, mul, ...)
2296 // TYPE - operand type
2297 // BITS - size in bits, used to distinguish low level calls
2298 // OP - operator (used in critical section)
2299 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2300 // ------------------------------------------------------------------------
2301 // Routines for ATOMIC integer operands, other operators
2302 // ------------------------------------------------------------------------
2303 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2304 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2305  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2307  0) // __kmpc_atomic_fixed1_andb_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2309  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2311  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2313  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2315  0) // __kmpc_atomic_fixed1_orb_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2317  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2319  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2321  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2323  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2325  0) // __kmpc_atomic_fixed1_xor_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2327  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2329  0) // __kmpc_atomic_fixed2_andb_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2331  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2333  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2335  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2337  0) // __kmpc_atomic_fixed2_orb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2339  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2341  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2343  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2345  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2347  0) // __kmpc_atomic_fixed2_xor_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2349  0) // __kmpc_atomic_fixed4_andb_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2351  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2353  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2355  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2357  0) // __kmpc_atomic_fixed4_orb_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2359  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2361  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2363  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2365  0) // __kmpc_atomic_fixed4_xor_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2367  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2369  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2371  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2373  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2375  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2377  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2379  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2381  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2383  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2384 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2385  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2386 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2387  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2388 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2389  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2390 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2391  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2392 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2393 
2394 // CAPTURE routines for mixed types RHS=float16
2395 #if KMP_HAVE_QUAD
2396 
2397 // Beginning of a definition (provides name, parameters, gebug trace)
2398 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2399 // fixed)
2400 // OP_ID - operation identifier (add, sub, mul, ...)
2401 // TYPE - operands' type
2402 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2403  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2404  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2405  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2406  KA_TRACE(100, \
2407  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2408  gtid));
2409 
2410 // -------------------------------------------------------------------------
2411 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2412  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2413  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2414  TYPE new_value; \
2415  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2416  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2417  }
2418 
2419 // -------------------------------------------------------------------------
2420 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2421  LCK_ID, GOMP_FLAG) \
2422  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2423  TYPE new_value; \
2424  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2425  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2426  }
2427 
2428 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2429  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2430 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2431  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2433  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2435  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2437  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2439  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2441  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2443  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2444 
2445 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2446  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2447 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2448  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2450  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2452  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2454  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2456  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2458  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2460  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2461 
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2463  0) // __kmpc_atomic_fixed4_add_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2465  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2467  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2469  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2471  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2473  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2475  0) // __kmpc_atomic_fixed4_div_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2477  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2478 
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2480  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2482  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2484  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2486  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2488  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2490  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2492  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2494  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2495 
2496 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2497  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2499  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2501  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2503  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2504 
2505 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2506  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2507 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2508  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2509 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2510  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2512  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2513 
2514 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2515  1) // __kmpc_atomic_float10_add_cpt_fp
2516 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2517  1) // __kmpc_atomic_float10_sub_cpt_fp
2518 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2519  1) // __kmpc_atomic_float10_mul_cpt_fp
2520 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2521  1) // __kmpc_atomic_float10_div_cpt_fp
2522 
2523 #endif // KMP_HAVE_QUAD
2524 
2525 // ------------------------------------------------------------------------
2526 // Routines for C/C++ Reduction operators && and ||
2527 
2528 // -------------------------------------------------------------------------
2529 // Operation on *lhs, rhs bound by critical section
2530 // OP - operator (it's supposed to contain an assignment)
2531 // LCK_ID - lock identifier
2532 // Note: don't check gtid as it should always be valid
2533 // 1, 2-byte - expect valid parameter, other - check before this macro
2534 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2535  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2536  \
2537  if (flag) { \
2538  new_value OP rhs; \
2539  (*lhs) = new_value; \
2540  } else { \
2541  new_value = (*lhs); \
2542  (*lhs) OP rhs; \
2543  } \
2544  \
2545  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2546 
2547 // ------------------------------------------------------------------------
2548 #ifdef KMP_GOMP_COMPAT
2549 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2550  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2551  KMP_CHECK_GTID; \
2552  OP_CRITICAL_L_CPT(OP, 0); \
2553  return new_value; \
2554  }
2555 #else
2556 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2557 #endif /* KMP_GOMP_COMPAT */
2558 
2559 // ------------------------------------------------------------------------
2560 // Need separate macros for &&, || because there is no combined assignment
2561 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2562  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2563  TYPE new_value; \
2564  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2565  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2566  }
2567 
2568 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2569  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2570 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2571  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2572 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2573  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2574 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2575  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2576 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2577  0) // __kmpc_atomic_fixed4_andl_cpt
2578 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2579  0) // __kmpc_atomic_fixed4_orl_cpt
2580 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2581  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2582 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2583  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2584 
2585 // -------------------------------------------------------------------------
2586 // Routines for Fortran operators that matched no one in C:
2587 // MAX, MIN, .EQV., .NEQV.
2588 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2589 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2590 
2591 // -------------------------------------------------------------------------
2592 // MIN and MAX need separate macros
2593 // OP - operator to check if we need any actions?
2594 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2595  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2596  \
2597  if (*lhs OP rhs) { /* still need actions? */ \
2598  old_value = *lhs; \
2599  *lhs = rhs; \
2600  if (flag) \
2601  new_value = rhs; \
2602  else \
2603  new_value = old_value; \
2604  } else { \
2605  new_value = *lhs; \
2606  } \
2607  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2608  return new_value;
2609 
2610 // -------------------------------------------------------------------------
2611 #ifdef KMP_GOMP_COMPAT
2612 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2613  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2614  KMP_CHECK_GTID; \
2615  MIN_MAX_CRITSECT_CPT(OP, 0); \
2616  }
2617 #else
2618 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2619 #endif /* KMP_GOMP_COMPAT */
2620 
2621 // -------------------------------------------------------------------------
2622 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2623  { \
2624  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2625  /*TYPE old_value; */ \
2626  temp_val = *lhs; \
2627  old_value = temp_val; \
2628  while (old_value OP rhs && /* still need actions? */ \
2629  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2630  (kmp_int##BITS *)lhs, \
2631  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2632  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2633  KMP_CPU_PAUSE(); \
2634  temp_val = *lhs; \
2635  old_value = temp_val; \
2636  } \
2637  if (flag) \
2638  return rhs; \
2639  else \
2640  return old_value; \
2641  }
2642 
2643 // -------------------------------------------------------------------------
2644 // 1-byte, 2-byte operands - use critical section
2645 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2646  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2647  TYPE new_value, old_value; \
2648  if (*lhs OP rhs) { /* need actions? */ \
2649  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2650  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2651  } \
2652  return *lhs; \
2653  }
2654 
2655 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2656  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2657  TYPE new_value, old_value; \
2658  if (*lhs OP rhs) { \
2659  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2660  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2661  } \
2662  return *lhs; \
2663  }
2664 
2665 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2666  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2667 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2668  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2669 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2670  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2671 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2672  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2673 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2674  0) // __kmpc_atomic_fixed4_max_cpt
2675 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2676  0) // __kmpc_atomic_fixed4_min_cpt
2677 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2678  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2679 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2680  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2681 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2682  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2683 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2684  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2685 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2686  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2687 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2688  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2689 #if KMP_HAVE_QUAD
2690 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2691  1) // __kmpc_atomic_float16_max_cpt
2692 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2693  1) // __kmpc_atomic_float16_min_cpt
2694 #if (KMP_ARCH_X86)
2695 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2696  1) // __kmpc_atomic_float16_max_a16_cpt
2697 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2698  1) // __kmpc_atomic_float16_mix_a16_cpt
2699 #endif // (KMP_ARCH_X86)
2700 #endif // KMP_HAVE_QUAD
2701 
2702 // ------------------------------------------------------------------------
2703 #ifdef KMP_GOMP_COMPAT
2704 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2705  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2706  KMP_CHECK_GTID; \
2707  OP_CRITICAL_CPT(OP, 0); \
2708  }
2709 #else
2710 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2711 #endif /* KMP_GOMP_COMPAT */
2712 // ------------------------------------------------------------------------
2713 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2714  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2715  TYPE new_value; \
2716  OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2717  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2718  }
2719 
2720 // ------------------------------------------------------------------------
2721 
2722 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2723  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2724 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2725  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2726 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2727  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2728 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2729  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2730 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2731  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2732 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2733  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2734 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2735  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2736 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2737  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2738 
2739 // ------------------------------------------------------------------------
2740 // Routines for Extended types: long double, _Quad, complex flavours (use
2741 // critical section)
2742 // TYPE_ID, OP_ID, TYPE - detailed above
2743 // OP - operator
2744 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2745 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2746  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2747  TYPE new_value; \
2748  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2749  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2750  }
2751 
2752 // ------------------------------------------------------------------------
2753 // Workaround for cmplx4. Regular routines with return value don't work
2754 // on Win_32e. Let's return captured values through the additional parameter.
2755 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2756  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2757  \
2758  if (flag) { \
2759  (*lhs) OP rhs; \
2760  (*out) = (*lhs); \
2761  } else { \
2762  (*out) = (*lhs); \
2763  (*lhs) OP rhs; \
2764  } \
2765  \
2766  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2767  return;
2768 // ------------------------------------------------------------------------
2769 
2770 #ifdef KMP_GOMP_COMPAT
2771 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2772  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2773  KMP_CHECK_GTID; \
2774  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2775  }
2776 #else
2777 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2778 #endif /* KMP_GOMP_COMPAT */
2779 // ------------------------------------------------------------------------
2780 
2781 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2782  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2783  TYPE rhs, TYPE *out, int flag) { \
2784  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2785  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2786 // ------------------------------------------------------------------------
2787 
2788 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2789  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2790  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2791  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2792  }
2793 // The end of workaround for cmplx4
2794 
2795 /* ------------------------------------------------------------------------- */
2796 // routines for long double type
2797 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2798  1) // __kmpc_atomic_float10_add_cpt
2799 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2800  1) // __kmpc_atomic_float10_sub_cpt
2801 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2802  1) // __kmpc_atomic_float10_mul_cpt
2803 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2804  1) // __kmpc_atomic_float10_div_cpt
2805 #if KMP_HAVE_QUAD
2806 // routines for _Quad type
2807 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2808  1) // __kmpc_atomic_float16_add_cpt
2809 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2810  1) // __kmpc_atomic_float16_sub_cpt
2811 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2812  1) // __kmpc_atomic_float16_mul_cpt
2813 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2814  1) // __kmpc_atomic_float16_div_cpt
2815 #if (KMP_ARCH_X86)
2816 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2817  1) // __kmpc_atomic_float16_add_a16_cpt
2818 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2819  1) // __kmpc_atomic_float16_sub_a16_cpt
2820 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2821  1) // __kmpc_atomic_float16_mul_a16_cpt
2822 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2823  1) // __kmpc_atomic_float16_div_a16_cpt
2824 #endif // (KMP_ARCH_X86)
2825 #endif // KMP_HAVE_QUAD
2826 
2827 // routines for complex types
2828 
2829 // cmplx4 routines to return void
2830 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2831  1) // __kmpc_atomic_cmplx4_add_cpt
2832 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2833  1) // __kmpc_atomic_cmplx4_sub_cpt
2834 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2835  1) // __kmpc_atomic_cmplx4_mul_cpt
2836 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2837  1) // __kmpc_atomic_cmplx4_div_cpt
2838 
2839 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2840  1) // __kmpc_atomic_cmplx8_add_cpt
2841 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2842  1) // __kmpc_atomic_cmplx8_sub_cpt
2843 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2844  1) // __kmpc_atomic_cmplx8_mul_cpt
2845 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2846  1) // __kmpc_atomic_cmplx8_div_cpt
2847 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2848  1) // __kmpc_atomic_cmplx10_add_cpt
2849 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2850  1) // __kmpc_atomic_cmplx10_sub_cpt
2851 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2852  1) // __kmpc_atomic_cmplx10_mul_cpt
2853 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2854  1) // __kmpc_atomic_cmplx10_div_cpt
2855 #if KMP_HAVE_QUAD
2856 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2857  1) // __kmpc_atomic_cmplx16_add_cpt
2858 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2859  1) // __kmpc_atomic_cmplx16_sub_cpt
2860 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2861  1) // __kmpc_atomic_cmplx16_mul_cpt
2862 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2863  1) // __kmpc_atomic_cmplx16_div_cpt
2864 #if (KMP_ARCH_X86)
2865 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2866  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2867 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2868  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2869 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2870  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2871 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2872  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2873 #endif // (KMP_ARCH_X86)
2874 #endif // KMP_HAVE_QUAD
2875 
2876 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2877 // binop x; v = x; } for non-commutative operations.
2878 // Supported only on IA-32 architecture and Intel(R) 64
2879 
2880 // -------------------------------------------------------------------------
2881 // Operation on *lhs, rhs bound by critical section
2882 // OP - operator (it's supposed to contain an assignment)
2883 // LCK_ID - lock identifier
2884 // Note: don't check gtid as it should always be valid
2885 // 1, 2-byte - expect valid parameter, other - check before this macro
2886 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2887  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2888  \
2889  if (flag) { \
2890  /*temp_val = (*lhs);*/ \
2891  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2892  new_value = (*lhs); \
2893  } else { \
2894  new_value = (*lhs); \
2895  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2896  } \
2897  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2898  return new_value;
2899 
2900 // ------------------------------------------------------------------------
2901 #ifdef KMP_GOMP_COMPAT
2902 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2903  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2904  KMP_CHECK_GTID; \
2905  OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2906  }
2907 #else
2908 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2909 #endif /* KMP_GOMP_COMPAT */
2910 
2911 // ------------------------------------------------------------------------
2912 // Operation on *lhs, rhs using "compare_and_store" routine
2913 // TYPE - operands' type
2914 // BITS - size in bits, used to distinguish low level calls
2915 // OP - operator
2916 // Note: temp_val introduced in order to force the compiler to read
2917 // *lhs only once (w/o it the compiler reads *lhs twice)
2918 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2919  { \
2920  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2921  TYPE old_value, new_value; \
2922  temp_val = *lhs; \
2923  old_value = temp_val; \
2924  new_value = (TYPE)(rhs OP old_value); \
2925  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2926  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2927  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2928  KMP_CPU_PAUSE(); \
2929  \
2930  temp_val = *lhs; \
2931  old_value = temp_val; \
2932  new_value = (TYPE)(rhs OP old_value); \
2933  } \
2934  if (flag) { \
2935  return new_value; \
2936  } else \
2937  return old_value; \
2938  }
2939 
2940 // -------------------------------------------------------------------------
2941 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2942  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2943  TYPE new_value; \
2944  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2945  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2946  }
2947 
2948 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2949  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2951  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2953  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2955  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2957  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2959  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2961  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2963  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2965  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2967  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2969  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2970 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2971  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2972 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2973  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2974 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2975  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2976 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2977  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2978 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2979  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2980 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2981  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2982 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2983  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2984 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2985  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2986 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2987  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2988 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2989  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2990 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2991  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2992 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2993  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2994 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2995  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2996 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2997  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2998 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2999  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3000 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3001  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3002 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3003  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3004 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3005 
3006 // ------------------------------------------------------------------------
3007 // Routines for Extended types: long double, _Quad, complex flavours (use
3008 // critical section)
3009 // TYPE_ID, OP_ID, TYPE - detailed above
3010 // OP - operator
3011 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3012 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3013  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3014  TYPE new_value; \
3015  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3016  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3017  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3018  }
3019 
3020 /* ------------------------------------------------------------------------- */
3021 // routines for long double type
3022 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3023  1) // __kmpc_atomic_float10_sub_cpt_rev
3024 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3025  1) // __kmpc_atomic_float10_div_cpt_rev
3026 #if KMP_HAVE_QUAD
3027 // routines for _Quad type
3028 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3029  1) // __kmpc_atomic_float16_sub_cpt_rev
3030 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3031  1) // __kmpc_atomic_float16_div_cpt_rev
3032 #if (KMP_ARCH_X86)
3033 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3034  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3035 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3036  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3037 #endif // (KMP_ARCH_X86)
3038 #endif // KMP_HAVE_QUAD
3039 
3040 // routines for complex types
3041 
3042 // ------------------------------------------------------------------------
3043 // Workaround for cmplx4. Regular routines with return value don't work
3044 // on Win_32e. Let's return captured values through the additional parameter.
3045 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3046  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3047  \
3048  if (flag) { \
3049  (*lhs) = (rhs)OP(*lhs); \
3050  (*out) = (*lhs); \
3051  } else { \
3052  (*out) = (*lhs); \
3053  (*lhs) = (rhs)OP(*lhs); \
3054  } \
3055  \
3056  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3057  return;
3058 // ------------------------------------------------------------------------
3059 
3060 #ifdef KMP_GOMP_COMPAT
3061 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3062  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3063  KMP_CHECK_GTID; \
3064  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3065  }
3066 #else
3067 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3068 #endif /* KMP_GOMP_COMPAT */
3069 // ------------------------------------------------------------------------
3070 
3071 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3072  GOMP_FLAG) \
3073  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3074  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3075  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3076  }
3077 // The end of workaround for cmplx4
3078 
3079 // !!! TODO: check if we need to return void for cmplx4 routines
3080 // cmplx4 routines to return void
3081 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3082  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3083 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3084  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3085 
3086 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3087  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3088 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3089  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3090 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3091  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3092 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3093  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3094 #if KMP_HAVE_QUAD
3095 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3096  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3097 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3098  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3099 #if (KMP_ARCH_X86)
3100 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3101  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3102 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3103  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3104 #endif // (KMP_ARCH_X86)
3105 #endif // KMP_HAVE_QUAD
3106 
3107 // Capture reverse for mixed type: RHS=float16
3108 #if KMP_HAVE_QUAD
3109 
3110 // Beginning of a definition (provides name, parameters, gebug trace)
3111 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3112 // fixed)
3113 // OP_ID - operation identifier (add, sub, mul, ...)
3114 // TYPE - operands' type
3115 // -------------------------------------------------------------------------
3116 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3117  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3118  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3119  TYPE new_value; \
3120  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3121  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3122  }
3123 
3124 // -------------------------------------------------------------------------
3125 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3126  LCK_ID, GOMP_FLAG) \
3127  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3128  TYPE new_value; \
3129  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3130  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3131  }
3132 
3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3134  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3136  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3138  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3139 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3140  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3141 
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3143  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3144 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3145  1,
3146  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3148  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3149 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3150  1,
3151  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3152 
3153 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3154  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3155 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3156  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3157 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3158  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3159 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3160  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3161 
3162 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3163  7,
3164  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3165 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3166  8i, 7,
3167  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3169  7,
3170  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3171 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3172  8i, 7,
3173  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3174 
3175 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3176  4r, 3,
3177  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3179  4r, 3,
3180  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3181 
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3183  8r, 7,
3184  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3185 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3186  8r, 7,
3187  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3188 
3189 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3190  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3191 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3192  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3193 
3194 #endif // KMP_HAVE_QUAD
3195 
3196 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3197 
3198 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3199  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3200  TYPE rhs) { \
3201  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3202  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3203 
3204 #define CRITICAL_SWP(LCK_ID) \
3205  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3206  \
3207  old_value = (*lhs); \
3208  (*lhs) = rhs; \
3209  \
3210  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3211  return old_value;
3212 
3213 // ------------------------------------------------------------------------
3214 #ifdef KMP_GOMP_COMPAT
3215 #define GOMP_CRITICAL_SWP(FLAG) \
3216  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3217  KMP_CHECK_GTID; \
3218  CRITICAL_SWP(0); \
3219  }
3220 #else
3221 #define GOMP_CRITICAL_SWP(FLAG)
3222 #endif /* KMP_GOMP_COMPAT */
3223 
3224 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3225  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3226  TYPE old_value; \
3227  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3228  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3229  return old_value; \
3230  }
3231 // ------------------------------------------------------------------------
3232 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3233  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3234  TYPE old_value; \
3235  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3236  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3237  return old_value; \
3238  }
3239 
3240 // ------------------------------------------------------------------------
3241 #define CMPXCHG_SWP(TYPE, BITS) \
3242  { \
3243  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3244  TYPE old_value, new_value; \
3245  temp_val = *lhs; \
3246  old_value = temp_val; \
3247  new_value = rhs; \
3248  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3249  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3250  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3251  KMP_CPU_PAUSE(); \
3252  \
3253  temp_val = *lhs; \
3254  old_value = temp_val; \
3255  new_value = rhs; \
3256  } \
3257  return old_value; \
3258  }
3259 
3260 // -------------------------------------------------------------------------
3261 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3262  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3263  TYPE old_value; \
3264  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3265  CMPXCHG_SWP(TYPE, BITS) \
3266  }
3267 
3268 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3269 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3270 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3271 
3272 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3273  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3274 
3275 #if (KMP_ARCH_X86)
3276 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3277  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3278 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3279  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3280 #else
3281 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3282 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3283  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3284 #endif // (KMP_ARCH_X86)
3285 
3286 // ------------------------------------------------------------------------
3287 // Routines for Extended types: long double, _Quad, complex flavours (use
3288 // critical section)
3289 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3290  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3291  TYPE old_value; \
3292  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3293  CRITICAL_SWP(LCK_ID) \
3294  }
3295 
3296 // ------------------------------------------------------------------------
3297 // !!! TODO: check if we need to return void for cmplx4 routines
3298 // Workaround for cmplx4. Regular routines with return value don't work
3299 // on Win_32e. Let's return captured values through the additional parameter.
3300 
3301 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3302  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3303  TYPE rhs, TYPE *out) { \
3304  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3305  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3306 
3307 #define CRITICAL_SWP_WRK(LCK_ID) \
3308  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3309  \
3310  tmp = (*lhs); \
3311  (*lhs) = (rhs); \
3312  (*out) = tmp; \
3313  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3314  return;
3315 // ------------------------------------------------------------------------
3316 
3317 #ifdef KMP_GOMP_COMPAT
3318 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3319  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3320  KMP_CHECK_GTID; \
3321  CRITICAL_SWP_WRK(0); \
3322  }
3323 #else
3324 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3325 #endif /* KMP_GOMP_COMPAT */
3326 // ------------------------------------------------------------------------
3327 
3328 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3329  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3330  TYPE tmp; \
3331  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3332  CRITICAL_SWP_WRK(LCK_ID) \
3333  }
3334 // The end of workaround for cmplx4
3335 
3336 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3337 #if KMP_HAVE_QUAD
3338 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3339 #endif // KMP_HAVE_QUAD
3340 // cmplx4 routine to return void
3341 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3342 
3343 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3344 // __kmpc_atomic_cmplx4_swp
3345 
3346 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3347 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3348 #if KMP_HAVE_QUAD
3349 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3350 #if (KMP_ARCH_X86)
3351 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3352  1) // __kmpc_atomic_float16_a16_swp
3353 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3354  1) // __kmpc_atomic_cmplx16_a16_swp
3355 #endif // (KMP_ARCH_X86)
3356 #endif // KMP_HAVE_QUAD
3357 
3358 // End of OpenMP 4.0 Capture
3359 
3360 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3361 
3362 #undef OP_CRITICAL
3363 
3364 /* ------------------------------------------------------------------------ */
3365 /* Generic atomic routines */
3366 
3367 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3368  void (*f)(void *, void *, void *)) {
3369  KMP_DEBUG_ASSERT(__kmp_init_serial);
3370 
3371  if (
3372 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3373  FALSE /* must use lock */
3374 #else
3375  TRUE
3376 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3377  ) {
3378  kmp_int8 old_value, new_value;
3379 
3380  old_value = *(kmp_int8 *)lhs;
3381  (*f)(&new_value, &old_value, rhs);
3382 
3383  /* TODO: Should this be acquire or release? */
3384  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3385  *(kmp_int8 *)&new_value)) {
3386  KMP_CPU_PAUSE();
3387 
3388  old_value = *(kmp_int8 *)lhs;
3389  (*f)(&new_value, &old_value, rhs);
3390  }
3391 
3392  return;
3393  } else {
3394 // All 1-byte data is of integer data type.
3395 
3396 #ifdef KMP_GOMP_COMPAT
3397  if (__kmp_atomic_mode == 2) {
3398  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3399  } else
3400 #endif /* KMP_GOMP_COMPAT */
3401  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3402 
3403  (*f)(lhs, lhs, rhs);
3404 
3405 #ifdef KMP_GOMP_COMPAT
3406  if (__kmp_atomic_mode == 2) {
3407  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3408  } else
3409 #endif /* KMP_GOMP_COMPAT */
3410  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3411  }
3412 }
3413 
3414 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3415  void (*f)(void *, void *, void *)) {
3416  if (
3417 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3418  FALSE /* must use lock */
3419 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3420  TRUE /* no alignment problems */
3421 #else
3422  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3423 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3424  ) {
3425  kmp_int16 old_value, new_value;
3426 
3427  old_value = *(kmp_int16 *)lhs;
3428  (*f)(&new_value, &old_value, rhs);
3429 
3430  /* TODO: Should this be acquire or release? */
3431  while (!KMP_COMPARE_AND_STORE_ACQ16(
3432  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3433  KMP_CPU_PAUSE();
3434 
3435  old_value = *(kmp_int16 *)lhs;
3436  (*f)(&new_value, &old_value, rhs);
3437  }
3438 
3439  return;
3440  } else {
3441 // All 2-byte data is of integer data type.
3442 
3443 #ifdef KMP_GOMP_COMPAT
3444  if (__kmp_atomic_mode == 2) {
3445  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3446  } else
3447 #endif /* KMP_GOMP_COMPAT */
3448  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3449 
3450  (*f)(lhs, lhs, rhs);
3451 
3452 #ifdef KMP_GOMP_COMPAT
3453  if (__kmp_atomic_mode == 2) {
3454  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3455  } else
3456 #endif /* KMP_GOMP_COMPAT */
3457  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3458  }
3459 }
3460 
3461 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3462  void (*f)(void *, void *, void *)) {
3463  KMP_DEBUG_ASSERT(__kmp_init_serial);
3464 
3465  if (
3466 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3467 // Gomp compatibility is broken if this routine is called for floats.
3468 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3469  TRUE /* no alignment problems */
3470 #else
3471  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3472 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3473  ) {
3474  kmp_int32 old_value, new_value;
3475 
3476  old_value = *(kmp_int32 *)lhs;
3477  (*f)(&new_value, &old_value, rhs);
3478 
3479  /* TODO: Should this be acquire or release? */
3480  while (!KMP_COMPARE_AND_STORE_ACQ32(
3481  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3482  KMP_CPU_PAUSE();
3483 
3484  old_value = *(kmp_int32 *)lhs;
3485  (*f)(&new_value, &old_value, rhs);
3486  }
3487 
3488  return;
3489  } else {
3490 // Use __kmp_atomic_lock_4i for all 4-byte data,
3491 // even if it isn't of integer data type.
3492 
3493 #ifdef KMP_GOMP_COMPAT
3494  if (__kmp_atomic_mode == 2) {
3495  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3496  } else
3497 #endif /* KMP_GOMP_COMPAT */
3498  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3499 
3500  (*f)(lhs, lhs, rhs);
3501 
3502 #ifdef KMP_GOMP_COMPAT
3503  if (__kmp_atomic_mode == 2) {
3504  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3505  } else
3506 #endif /* KMP_GOMP_COMPAT */
3507  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3508  }
3509 }
3510 
3511 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3512  void (*f)(void *, void *, void *)) {
3513  KMP_DEBUG_ASSERT(__kmp_init_serial);
3514  if (
3515 
3516 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3517  FALSE /* must use lock */
3518 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3519  TRUE /* no alignment problems */
3520 #else
3521  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3522 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3523  ) {
3524  kmp_int64 old_value, new_value;
3525 
3526  old_value = *(kmp_int64 *)lhs;
3527  (*f)(&new_value, &old_value, rhs);
3528  /* TODO: Should this be acquire or release? */
3529  while (!KMP_COMPARE_AND_STORE_ACQ64(
3530  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3531  KMP_CPU_PAUSE();
3532 
3533  old_value = *(kmp_int64 *)lhs;
3534  (*f)(&new_value, &old_value, rhs);
3535  }
3536 
3537  return;
3538  } else {
3539 // Use __kmp_atomic_lock_8i for all 8-byte data,
3540 // even if it isn't of integer data type.
3541 
3542 #ifdef KMP_GOMP_COMPAT
3543  if (__kmp_atomic_mode == 2) {
3544  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3545  } else
3546 #endif /* KMP_GOMP_COMPAT */
3547  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3548 
3549  (*f)(lhs, lhs, rhs);
3550 
3551 #ifdef KMP_GOMP_COMPAT
3552  if (__kmp_atomic_mode == 2) {
3553  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3554  } else
3555 #endif /* KMP_GOMP_COMPAT */
3556  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3557  }
3558 }
3559 
3560 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3561  void (*f)(void *, void *, void *)) {
3562  KMP_DEBUG_ASSERT(__kmp_init_serial);
3563 
3564 #ifdef KMP_GOMP_COMPAT
3565  if (__kmp_atomic_mode == 2) {
3566  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3567  } else
3568 #endif /* KMP_GOMP_COMPAT */
3569  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3570 
3571  (*f)(lhs, lhs, rhs);
3572 
3573 #ifdef KMP_GOMP_COMPAT
3574  if (__kmp_atomic_mode == 2) {
3575  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3576  } else
3577 #endif /* KMP_GOMP_COMPAT */
3578  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3579 }
3580 
3581 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3582  void (*f)(void *, void *, void *)) {
3583  KMP_DEBUG_ASSERT(__kmp_init_serial);
3584 
3585 #ifdef KMP_GOMP_COMPAT
3586  if (__kmp_atomic_mode == 2) {
3587  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3588  } else
3589 #endif /* KMP_GOMP_COMPAT */
3590  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3591 
3592  (*f)(lhs, lhs, rhs);
3593 
3594 #ifdef KMP_GOMP_COMPAT
3595  if (__kmp_atomic_mode == 2) {
3596  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3597  } else
3598 #endif /* KMP_GOMP_COMPAT */
3599  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3600 }
3601 
3602 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3603  void (*f)(void *, void *, void *)) {
3604  KMP_DEBUG_ASSERT(__kmp_init_serial);
3605 
3606 #ifdef KMP_GOMP_COMPAT
3607  if (__kmp_atomic_mode == 2) {
3608  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3609  } else
3610 #endif /* KMP_GOMP_COMPAT */
3611  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3612 
3613  (*f)(lhs, lhs, rhs);
3614 
3615 #ifdef KMP_GOMP_COMPAT
3616  if (__kmp_atomic_mode == 2) {
3617  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3618  } else
3619 #endif /* KMP_GOMP_COMPAT */
3620  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3621 }
3622 
3623 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3624  void (*f)(void *, void *, void *)) {
3625  KMP_DEBUG_ASSERT(__kmp_init_serial);
3626 
3627 #ifdef KMP_GOMP_COMPAT
3628  if (__kmp_atomic_mode == 2) {
3629  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3630  } else
3631 #endif /* KMP_GOMP_COMPAT */
3632  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3633 
3634  (*f)(lhs, lhs, rhs);
3635 
3636 #ifdef KMP_GOMP_COMPAT
3637  if (__kmp_atomic_mode == 2) {
3638  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3639  } else
3640 #endif /* KMP_GOMP_COMPAT */
3641  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3642 }
3643 
3644 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3645 // compiler; duplicated in order to not use 3-party names in pure Intel code
3646 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3647 void __kmpc_atomic_start(void) {
3648  int gtid = __kmp_entry_gtid();
3649  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3650  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3651 }
3652 
3653 void __kmpc_atomic_end(void) {
3654  int gtid = __kmp_get_gtid();
3655  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3656  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3657 }
3658 
3663 // end of file
Definition: kmp.h:229