Files
clang-p2996/parallel-libs/acxxel/tests/multi_device_test.cpp
Jason Henline bdc410baba [Acxxel] Remove setActiveDeviceForThread
Summary:
After experimenting with CUDA, I realized that we really only need to
set the active context right before creating an object such as a stream
or a device memory allocation. When we go on to use these objects later,
it is fine if the context that created them is no longer active,
operations with those objects will succeed anyway.

Since it turns out that we don't have to check the active context for
every operation, it makes sense to hide this active context from users
(by removing the "ActiveDeviceForThread" setter and getter) and to
change the Acxxel API to explicitly pass in the device ID to create
objects.

This change improves the Acxxel API and greatly simplifies the CUDA and
OpenCL implementations because they no longer require thread_local data.

Reviewers: jlebar, jprice

Subscribers: mgorny, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D26050

llvm-svn: 285372
2016-10-28 00:54:02 +00:00

88 lines
2.8 KiB
C++

#include "acxxel.h"
#include "config.h"
#include "gtest/gtest.h"
namespace {
using PlatformGetter = acxxel::Expected<acxxel::Platform *> (*)();
class MultiDeviceTest : public ::testing::TestWithParam<PlatformGetter> {};
TEST_P(MultiDeviceTest, AsyncCopy) {
acxxel::Platform *Platform = GetParam()().takeValue();
int DeviceCount = Platform->getDeviceCount().getValue();
EXPECT_GT(DeviceCount, 0);
int Length = 3;
auto A = std::unique_ptr<int[]>(new int[Length]);
auto B0 = std::unique_ptr<int[]>(new int[Length]);
auto B1 = std::unique_ptr<int[]>(new int[Length]);
auto ASpan = acxxel::Span<int>(A.get(), Length);
auto B0Span = acxxel::Span<int>(B0.get(), Length);
auto B1Span = acxxel::Span<int>(B1.get(), Length);
for (int I = 0; I < Length; ++I)
A[I] = I;
auto AsyncA = Platform->registerHostMem(ASpan).takeValue();
auto AsyncB0 = Platform->registerHostMem(B0Span).takeValue();
auto AsyncB1 = Platform->registerHostMem(B1Span).takeValue();
acxxel::Stream Stream0 = Platform->createStream(0).takeValue();
acxxel::Stream Stream1 = Platform->createStream(1).takeValue();
auto Device0 = Platform->mallocD<int>(Length, 0).takeValue();
auto Device1 = Platform->mallocD<int>(Length, 1).takeValue();
EXPECT_FALSE(Stream0.asyncCopyHToD(AsyncA, Device0, Length)
.asyncCopyDToH(Device0, AsyncB0, Length)
.sync()
.isError());
EXPECT_FALSE(Stream1.asyncCopyHToD(AsyncA, Device1, Length)
.asyncCopyDToH(Device1, AsyncB1, Length)
.sync()
.isError());
for (int I = 0; I < Length; ++I) {
EXPECT_EQ(B0[I], I);
EXPECT_EQ(B1[I], I);
}
}
TEST_P(MultiDeviceTest, Events) {
acxxel::Platform *Platform = GetParam()().takeValue();
int DeviceCount = Platform->getDeviceCount().getValue();
EXPECT_GT(DeviceCount, 0);
acxxel::Stream Stream0 = Platform->createStream(0).takeValue();
acxxel::Stream Stream1 = Platform->createStream(1).takeValue();
acxxel::Event Event0 = Platform->createEvent(0).takeValue();
acxxel::Event Event1 = Platform->createEvent(1).takeValue();
EXPECT_FALSE(Stream0.enqueueEvent(Event0).sync().isError());
EXPECT_FALSE(Stream1.enqueueEvent(Event1).sync().isError());
EXPECT_TRUE(Event0.isDone());
EXPECT_TRUE(Event1.isDone());
EXPECT_FALSE(Event0.sync().isError());
EXPECT_FALSE(Event1.sync().isError());
}
#if defined(ACXXEL_ENABLE_CUDA) || defined(ACXXEL_ENABLE_OPENCL)
INSTANTIATE_TEST_CASE_P(BothPlatformTest, MultiDeviceTest,
::testing::Values(
#ifdef ACXXEL_ENABLE_CUDA
acxxel::getCUDAPlatform
#ifdef ACXXEL_ENABLE_OPENCL
,
#endif
#endif
#ifdef ACXXEL_ENABLE_OPENCL
acxxel::getOpenCLPlatform
#endif
));
#endif
} // namespace