[Offload][NFC] Fix typos discovered by codespell (#125119)

https://github.com/codespell-project/codespell % `codespell --ignore-words-list=archtype,hsa,identty,inout,iself,nd,te,ths,vertexes --write-changes`
2025-01-31 16:35:29 +01:00
parent 55be370f37
commit 1f56bb3137
46 changed files with 107 additions and 107 deletions
--- a/offload/DeviceRTL/include/Configuration.h
+++ b/offload/DeviceRTL/include/Configuration.h
@@ -27,7 +27,7 @@ uint32_t getNumDevices();
 /// Return the device number in the system for omp_get_device_num.
 uint32_t getDeviceNum();

-/// Return the user choosen debug level.
+/// Return the user chosen debug level.
 uint32_t getDebugKind();

 /// Return if teams oversubscription is assumed
--- a/offload/DeviceRTL/include/Mapping.h
+++ b/offload/DeviceRTL/include/Mapping.h
@@ -63,7 +63,7 @@ LaneMaskTy activemask();
 /// Return a mask describing all threads with a smaller Id in the warp.
 LaneMaskTy lanemaskLT();

-/// Return a mask describing all threads with a larget Id in the warp.
+/// Return a mask describing all threads with a larger Id in the warp.
 LaneMaskTy lanemaskGT();

 /// Return the thread Id in the warp, in [0, getWarpSize()).
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -33,7 +33,7 @@ namespace memory {
 /// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
 void *allocShared(uint64_t Size, const char *Reason);

-/// Free \p Ptr, alloated via allocShared, for \p Reason.
+/// Free \p Ptr, allocated via allocShared, for \p Reason.
 ///
 /// Note: See the restrictions on __kmpc_free_shared for proper usage.
 void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
@@ -44,7 +44,7 @@ void *allocGlobal(uint64_t Size, const char *Reason);
 /// Return a pointer to the dynamic shared memory buffer.
 void *getDynamicBuffer();

-/// Free \p Ptr, alloated via allocGlobal, for \p Reason.
+/// Free \p Ptr, allocated via allocGlobal, for \p Reason.
 void freeGlobal(void *Ptr, const char *Reason);

 } // namespace memory
@@ -365,7 +365,7 @@ inline state::Value<uint32_t, state::VK_Level> Level;
 /// The `active-level` describes which of the parallel level counted with the
 /// `level-var` is active. There can only be one.
 ///
-/// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0.
+/// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0.
 inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;

 /// TODO
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -22,7 +22,7 @@ namespace atomic {

 enum OrderingTy {
  relaxed = __ATOMIC_RELAXED,
-  aquire = __ATOMIC_ACQUIRE,
+  acquire = __ATOMIC_ACQUIRE,
  release = __ATOMIC_RELEASE,
  acq_rel = __ATOMIC_ACQ_REL,
  seq_cst = __ATOMIC_SEQ_CST,
--- a/offload/DeviceRTL/src/Configuration.cpp
+++ b/offload/DeviceRTL/src/Configuration.cpp
@@ -27,7 +27,7 @@ using namespace ompx;
    0;
 [[gnu::weak]] extern const uint32_t __omp_rtl_assume_teams_oversubscription = 0;

-// This variable should be visibile to the plugin so we override the default
+// This variable should be visible to the plugin so we override the default
 // hidden visibility.
 [[gnu::used, gnu::retain, gnu::weak,
  gnu::visibility("protected")]] DeviceEnvironmentTy
--- a/offload/DeviceRTL/src/Misc.cpp
+++ b/offload/DeviceRTL/src/Misc.cpp
@@ -33,7 +33,7 @@ double getWTime();

 double getWTick() {
  // The number of ticks per second for the AMDGPU clock varies by card and can
-  // only be retrived by querying the driver. We rely on the device environment
+  // only be retrieved by querying the driver. We rely on the device environment
  // to inform us what the proper frequency is.
  return 1.0 / config::getClockFrequency();
 }
--- a/offload/DeviceRTL/src/Reduction.cpp
+++ b/offload/DeviceRTL/src/Reduction.cpp
@@ -206,7 +206,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
  // to the number of slots in the buffer.
  bool IsMaster = (ThreadId == 0);
  while (IsMaster) {
-    Bound = atomic::load(&IterCnt, atomic::aquire);
+    Bound = atomic::load(&IterCnt, atomic::acquire);
    if (TeamId < Bound + num_of_records)
      break;
  }
@@ -259,7 +259,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
  unsigned NumRecs = kmpcMin(NumTeams, uint32_t(num_of_records));
  if (ChunkTeamCount == NumTeams - Bound - 1) {
    // Ensure we see the global memory writes by other teams
-    fence::kernel(atomic::aquire);
+    fence::kernel(atomic::acquire);

    //
    // Last team processing.
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -84,7 +84,7 @@ uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering,
  default:
    __builtin_unreachable();
    Case(atomic::relaxed);
-    Case(atomic::aquire);
+    Case(atomic::acquire);
    Case(atomic::release);
    Case(atomic::acq_rel);
    Case(atomic::seq_cst);
@@ -107,7 +107,7 @@ void namedBarrier() {
  uint32_t WarpSize = mapping::getWarpSize();
  uint32_t NumWaves = NumThreads / WarpSize;

-  fence::team(atomic::aquire);
+  fence::team(atomic::acquire);

  // named barrier implementation for amdgcn.
  // Uses two 16 bit unsigned counters. One for the number of waves to have
@@ -172,7 +172,7 @@ void syncThreads(atomic::OrderingTy Ordering) {
  __builtin_amdgcn_s_barrier();

  if (Ordering != atomic::relaxed)
-    fenceTeam(Ordering == atomic::acq_rel ? atomic::aquire : atomic::seq_cst);
+    fenceTeam(Ordering == atomic::acq_rel ? atomic::acquire : atomic::seq_cst);
 }
 void syncThreadsAligned(atomic::OrderingTy Ordering) { syncThreads(Ordering); }

@@ -198,7 +198,7 @@ void setCriticalLock(omp_lock_t *Lock) {
        !cas((uint32_t *)Lock, UNSET, SET, atomic::relaxed, atomic::relaxed)) {
      __builtin_amdgcn_s_sleep(32);
    }
-    fenceKernel(atomic::aquire);
+    fenceKernel(atomic::acquire);
  }
 }

--- a/offload/DeviceRTL/src/Workshare.cpp
+++ b/offload/DeviceRTL/src/Workshare.cpp
@@ -79,7 +79,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
    lb = lb + entityId * chunk;
    T inputUb = ub;
    ub = lb + chunk - 1; // Clang uses i <= ub
-    // Say ub' is the begining of the last chunk. Then who ever has a
+    // Say ub' is the beginning of the last chunk. Then who ever has a
    // lower bound plus a multiple of the increment equal to ub' is
    // the last one.
    T beginingLastChunk = inputUb - (inputUb % chunk);
@@ -806,7 +806,7 @@ public:
                                NumIters, OneIterationPerThread);
  }

-  /// Worksharing `distrbute`-loop.
+  /// Worksharing `distribute`-loop.
  static void Distribute(IdentTy *Loc, void (*LoopBody)(Ty, void *), void *Arg,
                         Ty NumIters, Ty BlockChunk) {
    ASSERT(icv::Level == 0, "Bad distribute");
@@ -853,7 +853,7 @@ public:
    ASSERT(state::ParallelTeamSize == 1, "Bad distribute");
  }

-  /// Worksharing `distrbute parallel for`-loop.
+  /// Worksharing `distribute parallel for`-loop.
  static void DistributeFor(IdentTy *Loc, void (*LoopBody)(Ty, void *),
                            void *Arg, Ty NumIters, Ty NumThreads,
                            Ty BlockChunk, Ty ThreadChunk) {