[Offload][NFC] Fix typos discovered by codespell (#125119)
https://github.com/codespell-project/codespell % `codespell --ignore-words-list=archtype,hsa,identty,inout,iself,nd,te,ths,vertexes --write-changes`
This commit is contained in:
@@ -27,7 +27,7 @@ uint32_t getNumDevices();
|
||||
/// Return the device number in the system for omp_get_device_num.
|
||||
uint32_t getDeviceNum();
|
||||
|
||||
/// Return the user choosen debug level.
|
||||
/// Return the user chosen debug level.
|
||||
uint32_t getDebugKind();
|
||||
|
||||
/// Return if teams oversubscription is assumed
|
||||
|
||||
@@ -63,7 +63,7 @@ LaneMaskTy activemask();
|
||||
/// Return a mask describing all threads with a smaller Id in the warp.
|
||||
LaneMaskTy lanemaskLT();
|
||||
|
||||
/// Return a mask describing all threads with a larget Id in the warp.
|
||||
/// Return a mask describing all threads with a larger Id in the warp.
|
||||
LaneMaskTy lanemaskGT();
|
||||
|
||||
/// Return the thread Id in the warp, in [0, getWarpSize()).
|
||||
|
||||
@@ -33,7 +33,7 @@ namespace memory {
|
||||
/// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
|
||||
void *allocShared(uint64_t Size, const char *Reason);
|
||||
|
||||
/// Free \p Ptr, alloated via allocShared, for \p Reason.
|
||||
/// Free \p Ptr, allocated via allocShared, for \p Reason.
|
||||
///
|
||||
/// Note: See the restrictions on __kmpc_free_shared for proper usage.
|
||||
void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
|
||||
@@ -44,7 +44,7 @@ void *allocGlobal(uint64_t Size, const char *Reason);
|
||||
/// Return a pointer to the dynamic shared memory buffer.
|
||||
void *getDynamicBuffer();
|
||||
|
||||
/// Free \p Ptr, alloated via allocGlobal, for \p Reason.
|
||||
/// Free \p Ptr, allocated via allocGlobal, for \p Reason.
|
||||
void freeGlobal(void *Ptr, const char *Reason);
|
||||
|
||||
} // namespace memory
|
||||
@@ -365,7 +365,7 @@ inline state::Value<uint32_t, state::VK_Level> Level;
|
||||
/// The `active-level` describes which of the parallel level counted with the
|
||||
/// `level-var` is active. There can only be one.
|
||||
///
|
||||
/// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0.
|
||||
/// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0.
|
||||
inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;
|
||||
|
||||
/// TODO
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace atomic {
|
||||
|
||||
enum OrderingTy {
|
||||
relaxed = __ATOMIC_RELAXED,
|
||||
aquire = __ATOMIC_ACQUIRE,
|
||||
acquire = __ATOMIC_ACQUIRE,
|
||||
release = __ATOMIC_RELEASE,
|
||||
acq_rel = __ATOMIC_ACQ_REL,
|
||||
seq_cst = __ATOMIC_SEQ_CST,
|
||||
|
||||
@@ -27,7 +27,7 @@ using namespace ompx;
|
||||
0;
|
||||
[[gnu::weak]] extern const uint32_t __omp_rtl_assume_teams_oversubscription = 0;
|
||||
|
||||
// This variable should be visibile to the plugin so we override the default
|
||||
// This variable should be visible to the plugin so we override the default
|
||||
// hidden visibility.
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility("protected")]] DeviceEnvironmentTy
|
||||
|
||||
@@ -33,7 +33,7 @@ double getWTime();
|
||||
|
||||
double getWTick() {
|
||||
// The number of ticks per second for the AMDGPU clock varies by card and can
|
||||
// only be retrived by querying the driver. We rely on the device environment
|
||||
// only be retrieved by querying the driver. We rely on the device environment
|
||||
// to inform us what the proper frequency is.
|
||||
return 1.0 / config::getClockFrequency();
|
||||
}
|
||||
|
||||
@@ -206,7 +206,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
|
||||
// to the number of slots in the buffer.
|
||||
bool IsMaster = (ThreadId == 0);
|
||||
while (IsMaster) {
|
||||
Bound = atomic::load(&IterCnt, atomic::aquire);
|
||||
Bound = atomic::load(&IterCnt, atomic::acquire);
|
||||
if (TeamId < Bound + num_of_records)
|
||||
break;
|
||||
}
|
||||
@@ -259,7 +259,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
|
||||
unsigned NumRecs = kmpcMin(NumTeams, uint32_t(num_of_records));
|
||||
if (ChunkTeamCount == NumTeams - Bound - 1) {
|
||||
// Ensure we see the global memory writes by other teams
|
||||
fence::kernel(atomic::aquire);
|
||||
fence::kernel(atomic::acquire);
|
||||
|
||||
//
|
||||
// Last team processing.
|
||||
|
||||
@@ -84,7 +84,7 @@ uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering,
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
Case(atomic::relaxed);
|
||||
Case(atomic::aquire);
|
||||
Case(atomic::acquire);
|
||||
Case(atomic::release);
|
||||
Case(atomic::acq_rel);
|
||||
Case(atomic::seq_cst);
|
||||
@@ -107,7 +107,7 @@ void namedBarrier() {
|
||||
uint32_t WarpSize = mapping::getWarpSize();
|
||||
uint32_t NumWaves = NumThreads / WarpSize;
|
||||
|
||||
fence::team(atomic::aquire);
|
||||
fence::team(atomic::acquire);
|
||||
|
||||
// named barrier implementation for amdgcn.
|
||||
// Uses two 16 bit unsigned counters. One for the number of waves to have
|
||||
@@ -172,7 +172,7 @@ void syncThreads(atomic::OrderingTy Ordering) {
|
||||
__builtin_amdgcn_s_barrier();
|
||||
|
||||
if (Ordering != atomic::relaxed)
|
||||
fenceTeam(Ordering == atomic::acq_rel ? atomic::aquire : atomic::seq_cst);
|
||||
fenceTeam(Ordering == atomic::acq_rel ? atomic::acquire : atomic::seq_cst);
|
||||
}
|
||||
void syncThreadsAligned(atomic::OrderingTy Ordering) { syncThreads(Ordering); }
|
||||
|
||||
@@ -198,7 +198,7 @@ void setCriticalLock(omp_lock_t *Lock) {
|
||||
!cas((uint32_t *)Lock, UNSET, SET, atomic::relaxed, atomic::relaxed)) {
|
||||
__builtin_amdgcn_s_sleep(32);
|
||||
}
|
||||
fenceKernel(atomic::aquire);
|
||||
fenceKernel(atomic::acquire);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
|
||||
lb = lb + entityId * chunk;
|
||||
T inputUb = ub;
|
||||
ub = lb + chunk - 1; // Clang uses i <= ub
|
||||
// Say ub' is the begining of the last chunk. Then who ever has a
|
||||
// Say ub' is the beginning of the last chunk. Then who ever has a
|
||||
// lower bound plus a multiple of the increment equal to ub' is
|
||||
// the last one.
|
||||
T beginingLastChunk = inputUb - (inputUb % chunk);
|
||||
@@ -806,7 +806,7 @@ public:
|
||||
NumIters, OneIterationPerThread);
|
||||
}
|
||||
|
||||
/// Worksharing `distrbute`-loop.
|
||||
/// Worksharing `distribute`-loop.
|
||||
static void Distribute(IdentTy *Loc, void (*LoopBody)(Ty, void *), void *Arg,
|
||||
Ty NumIters, Ty BlockChunk) {
|
||||
ASSERT(icv::Level == 0, "Bad distribute");
|
||||
@@ -853,7 +853,7 @@ public:
|
||||
ASSERT(state::ParallelTeamSize == 1, "Bad distribute");
|
||||
}
|
||||
|
||||
/// Worksharing `distrbute parallel for`-loop.
|
||||
/// Worksharing `distribute parallel for`-loop.
|
||||
static void DistributeFor(IdentTy *Loc, void (*LoopBody)(Ty, void *),
|
||||
void *Arg, Ty NumIters, Ty NumThreads,
|
||||
Ty BlockChunk, Ty ThreadChunk) {
|
||||
|
||||
Reference in New Issue
Block a user