Skip to content

Commit 38ccdf5

Browse files
committed
GPU: Unify the classes GPUInterfaceOutputRegion and GPUOutputControl in one (having the same purpose anyway)
1 parent b0cb089 commit 38ccdf5

File tree

8 files changed

+64
-62
lines changed

8 files changed

+64
-62
lines changed

Detectors/TPC/workflow/src/CATrackerSpec.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
528528
outputRegions.compressedClusters.allocator = [&bufferCompressedClustersChar, &pc](size_t size) -> void* {bufferCompressedClustersChar = pc.outputs().make<char>(Output{gDataOriginTPC, "COMPCLUSTERSFLAT", 0}, size).data(); return bufferCompressedClustersChar; };
529529
} else {
530530
bufferCompressedClusters.emplace(pc.outputs().make<std::vector<char>>(Output{gDataOriginTPC, "COMPCLUSTERSFLAT", 0}, processAttributes->outputBufferSize));
531-
outputRegions.compressedClusters.ptr = bufferCompressedClustersChar = bufferCompressedClusters->get().data();
531+
outputRegions.compressedClusters.ptrBase = bufferCompressedClustersChar = bufferCompressedClusters->get().data();
532532
outputRegions.compressedClusters.size = bufferCompressedClusters->get().size();
533533
}
534534
}
@@ -539,7 +539,7 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
539539
} else {
540540
clusterOutput.emplace(pc.outputs().make<std::vector<char>>({gDataOriginTPC, outputLabel, NSectors, Lifetime::Timeframe, {clusterOutputSectorHeader}}, processAttributes->outputBufferSize));
541541
clusterOutputChar = clusterOutput->get().data();
542-
outputRegions.clustersNative.ptr = clusterOutputChar + sizeof(ClusterCountIndex);
542+
outputRegions.clustersNative.ptrBase = clusterOutputChar + sizeof(ClusterCountIndex);
543543
outputRegions.clustersNative.size = clusterOutput->get().size() - sizeof(ClusterCountIndex);
544544
}
545545
}
@@ -548,7 +548,7 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
548548
outputRegions.tpcTracks.allocator = [&bufferTPCTracksChar, &pc](size_t size) -> void* {bufferTPCTracksChar = pc.outputs().make<char>(Output{gDataOriginTPC, "TRACKSGPU", 0}, size).data(); return bufferTPCTracksChar; };
549549
} else {
550550
bufferTPCTracks.emplace(pc.outputs().make<std::vector<char>>(Output{gDataOriginTPC, "TRACKSGPU", 0}, processAttributes->outputBufferSize));
551-
outputRegions.tpcTracks.ptr = bufferTPCTracksChar = bufferTPCTracks->get().data();
551+
outputRegions.tpcTracks.ptrBase = bufferTPCTracksChar = bufferTPCTracks->get().data();
552552
outputRegions.tpcTracks.size = bufferTPCTracks->get().size();
553553
}
554554
}
@@ -557,7 +557,7 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
557557
outputRegions.sharedClusterMap.allocator = [&bufferSharedClusterMapChar, &pc](size_t size) -> void* {bufferSharedClusterMapChar = pc.outputs().make<char>(Output{gDataOriginTPC, "CLSHAREDMAP", 0}, size).data(); return bufferSharedClusterMapChar; };
558558
} else {
559559
bufferSharedClusterMap.emplace(pc.outputs().make<std::vector<char>>(Output{gDataOriginTPC, "CLSHAREDMAP", 0}, processAttributes->outputBufferSize));
560-
outputRegions.sharedClusterMap.ptr = bufferSharedClusterMapChar = bufferSharedClusterMap->get().data();
560+
outputRegions.sharedClusterMap.ptrBase = bufferSharedClusterMapChar = bufferSharedClusterMap->get().data();
561561
outputRegions.sharedClusterMap.size = bufferSharedClusterMap->get().size();
562562
}
563563
}

GPU/GPUTracking/Base/GPUOutputControl.h

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -23,57 +23,57 @@ namespace GPUCA_NAMESPACE
2323
{
2424
namespace gpu
2525
{
26+
27+
// This defines an output region. ptrBase points to a memory buffer, which should have a proper alignment.
28+
// Since DPL does not respect the alignment of data types, we do not impose anything specic but just use void*, but it should be >= 64 bytes ideally.
29+
// The size defines the maximum possible buffer size when GPUReconstruction is called, and returns the number of filled bytes when it returns.
30+
// If the buffer size is exceeded, size is set to 1
31+
// ptrCurrent must equal ptr if set (or nullptr), and can be incremented by GPUReconstruction step by step if multiple buffers are used.
32+
// If ptr == nullptr, there is no region defined and GPUReconstruction will write its output to an internal buffer.
33+
// If allocator is set, it is called as a callback to provide a ptr to the memory.
34+
2635
struct GPUOutputControl {
27-
enum OutputTypeStruct { AllocateInternal = 0,
28-
UseExternalBuffer = 1 };
2936
GPUOutputControl() = default;
30-
void set(void* ptr, size_t size)
37+
void set(void* p, size_t s)
3138
{
32-
new (this) GPUOutputControl;
33-
OutputType = GPUOutputControl::UseExternalBuffer;
34-
OutputBase = OutputPtr = (char*)ptr;
35-
OutputMaxSize = size;
39+
reset();
40+
ptrBase = ptrCurrent = p;
41+
size = s;
3642
}
37-
void set(const std::function<void*(size_t)>& allocator)
43+
void set(const std::function<void*(size_t)>& a)
3844
{
39-
new (this) GPUOutputControl;
40-
OutputType = GPUOutputControl::UseExternalBuffer;
41-
OutputAllocator = allocator;
45+
reset();
46+
allocator = a;
4247
}
4348
void reset()
4449
{
4550
new (this) GPUOutputControl;
4651
}
52+
bool useExternal() { return size || allocator; }
53+
bool useInternal() { return !useExternal(); }
54+
void checkCurrent()
55+
{
56+
if (ptrBase && ptrCurrent == nullptr) {
57+
ptrCurrent = ptrBase;
58+
}
59+
}
4760

48-
void* OutputBase = nullptr; // Base ptr to memory pool, occupied size is OutputPtr - OutputBase
49-
void* OutputPtr = nullptr; // Pointer to Output Space
50-
size_t OutputMaxSize = 0; // Max Size of Output Data if Pointer to output space is given
51-
std::function<void*(size_t)> OutputAllocator = nullptr; // Allocator callback
52-
OutputTypeStruct OutputType = AllocateInternal; // How to perform the output
53-
char EndOfSpace = 0; // end of space flag
54-
};
55-
56-
// This defines an output region. Ptr points to a memory buffer, which should have a proper alignment.
57-
// Since DPL does not respect the alignment of data types, we do not impose anything specic but just use void*, but it should be >= 64 bytes ideally.
58-
// The size defines the maximum possible buffer size when GPUReconstruction is called, and returns the number of filled bytes when it returns.
59-
// If ptr == nullptr, there is no region defined and GPUReconstruction will write its output to an internal buffer.
60-
// If allocator is set, it is called as a callback to provide a ptr to the memory.
61-
struct GPUInterfaceOutputRegion {
62-
void* ptr = nullptr;
63-
size_t size = 0;
64-
std::function<void*(size_t)> allocator = nullptr;
61+
void* ptrBase = nullptr; // Base ptr to memory pool, occupied size is ptrCurrent - ptr
62+
void* ptrCurrent = nullptr; // Pointer to free Output Space
63+
size_t size = 0; // Max Size of Output Data if Pointer to output space is given
64+
std::function<void*(size_t)> allocator = nullptr; // Allocator callback
6565
};
6666

6767
struct GPUTrackingOutputs {
68-
GPUInterfaceOutputRegion compressedClusters;
69-
GPUInterfaceOutputRegion clustersNative;
70-
GPUInterfaceOutputRegion tpcTracks;
71-
GPUInterfaceOutputRegion clusterLabels;
72-
GPUInterfaceOutputRegion sharedClusterMap;
68+
GPUOutputControl compressedClusters;
69+
GPUOutputControl clustersNative;
70+
GPUOutputControl tpcTracks;
71+
GPUOutputControl clusterLabels;
72+
GPUOutputControl sharedClusterMap;
7373

7474
static size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(compressedClusters); }
75-
GPUInterfaceOutputRegion* asArray() { return (GPUInterfaceOutputRegion*)this; }
76-
size_t getIndex(const GPUInterfaceOutputRegion& v) { return &v - (const GPUInterfaceOutputRegion*)this; }
75+
GPUOutputControl* asArray() { return (GPUOutputControl*)this; }
76+
size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; }
7777
};
7878

7979
} // namespace gpu

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ int GPUReconstruction::InitPhaseBeforeDevice()
305305
mChains[i]->RegisterPermanentMemoryAndProcessors();
306306
size_t memPrimary, memPageLocked;
307307
mChains[i]->MemorySize(memPrimary, memPageLocked);
308-
if (!IsGPU() || mOutputControl.OutputType == GPUOutputControl::AllocateInternal) {
308+
if (!IsGPU() || mOutputControl.useInternal()) {
309309
memPageLocked = memPrimary;
310310
}
311311
mDeviceMemorySize += memPrimary;
@@ -315,7 +315,7 @@ int GPUReconstruction::InitPhaseBeforeDevice()
315315
mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize;
316316
} else if (mProcessingSettings.forceMemoryPoolSize > 2) {
317317
mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize;
318-
if (!IsGPU() || mOutputControl.OutputType == GPUOutputControl::AllocateInternal) {
318+
if (!IsGPU() || mOutputControl.useInternal()) {
319319
mHostMemorySize = mDeviceMemorySize;
320320
}
321321
}
@@ -517,7 +517,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res,
517517

518518
void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool)
519519
{
520-
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->OutputType == GPUOutputControl::AllocateInternal)) {
520+
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) {
521521
if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
522522
if (res->mPtrDevice && res->mReuse < 0) {
523523
operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT);
@@ -550,14 +550,14 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
550550
res->mOverrideSize = GPUCA_BUFFER_ALIGNMENT;
551551
}
552552
if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || mProcessingSettings.keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory
553-
if (control && control->OutputType == GPUOutputControl::UseExternalBuffer) {
554-
if (control->OutputAllocator) {
553+
if (control && control->useExternal()) {
554+
if (control->allocator) {
555555
res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
556-
res->mPtr = control->OutputAllocator(res->mSize);
556+
res->mPtr = control->allocator(res->mSize);
557557
res->mSize = std::max<size_t>((char*)res->SetPointers(res->mPtr) - (char*)res->mPtr, res->mOverrideSize);
558558
} else {
559559
void* dummy = nullptr;
560-
res->mSize = AllocateRegisteredMemoryHelper(res, res->mPtr, control->OutputPtr, control->OutputBase, control->OutputMaxSize, &GPUMemoryResource::SetPointers, dummy);
560+
res->mSize = AllocateRegisteredMemoryHelper(res, res->mPtr, control->ptrCurrent, control->ptrBase, control->size, &GPUMemoryResource::SetPointers, dummy);
561561
}
562562
} else {
563563
res->mSize = AllocateRegisteredMemoryHelper(res, res->mPtr, recPool->mHostMemoryPool, recPool->mHostMemoryBase, recPool->mHostMemorySize, &GPUMemoryResource::SetPointers, recPool->mHostMemoryPoolEnd);

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,11 @@ inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std
421421
return nullptr;
422422
}
423423
T* retVal;
424-
if (mInputControl.OutputType == GPUOutputControl::UseExternalBuffer) {
424+
if (mInputControl.useExternal()) {
425425
u.reset(nullptr);
426-
GPUProcessor::computePointerWithAlignment(mInputControl.OutputPtr, retVal, n);
427-
if ((size_t)((char*)mInputControl.OutputPtr - (char*)mInputControl.OutputBase) > mInputControl.OutputMaxSize) {
426+
mInputControl.checkCurrent();
427+
GPUProcessor::computePointerWithAlignment(mInputControl.ptrCurrent, retVal, n);
428+
if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) {
428429
throw std::bad_alloc();
429430
}
430431
} else {

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,10 +1377,10 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13771377
if (propagateMCLabels) {
13781378
// TODO: write to buffer directly
13791379
o2::dataformats::MCTruthContainer<o2::MCCompLabel> mcLabels;
1380-
if (mOutputClusterLabels == nullptr || !mOutputClusterLabels->OutputAllocator) {
1380+
if (mOutputClusterLabels == nullptr || !mOutputClusterLabels->allocator) {
13811381
throw std::runtime_error("Cluster MC Label buffer missing");
13821382
}
1383-
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(mOutputClusterLabels->OutputAllocator(0));
1383+
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(mOutputClusterLabels->allocator(0));
13841384

13851385
assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true);
13861386
assert(propagateMCLabels ? mcLinearLabels.data.size() >= nClsTotal : true);

GPU/GPUTracking/Interface/GPUO2Interface.cxx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceO
118118
for (unsigned int i = 0; i < mOutputRegions->count(); i++) {
119119
if (outputs->asArray()[i].allocator) {
120120
mOutputControls[i].set(outputs->asArray()[i].allocator);
121-
} else if (outputs->asArray()[i].ptr) {
122-
mOutputControls[i].set(outputs->asArray()[i].ptr, outputs->asArray()[i].size);
121+
} else if (outputs->asArray()[i].ptrBase) {
122+
mOutputControls[i].set(outputs->asArray()[i].ptrBase, outputs->asArray()[i].size);
123123
} else {
124124
mOutputControls[i].reset();
125125
}
@@ -135,9 +135,9 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceO
135135
return retVal;
136136
}
137137
if (mConfig->configInterface.outputToExternalBuffers) {
138-
outputs->compressedClusters.size = mOutputControls[outputs->getIndex(outputs->compressedClusters)].EndOfSpace ? 0 : mChain->mIOPtrs.tpcCompressedClusters->totalDataSize;
139-
outputs->clustersNative.size = mOutputControls[outputs->getIndex(outputs->clustersNative)].EndOfSpace ? 0 : (mChain->mIOPtrs.clustersNative->nClustersTotal * sizeof(*mChain->mIOPtrs.clustersNative->clustersLinear));
140-
outputs->tpcTracks.size = mOutputControls[outputs->getIndex(outputs->tpcTracks)].EndOfSpace ? 0 : (size_t)((char*)mOutputControls[outputs->getIndex(outputs->tpcTracks)].OutputPtr - (char*)mOutputControls[outputs->getIndex(outputs->tpcTracks)].OutputBase);
138+
outputs->compressedClusters.size = mOutputControls[outputs->getIndex(outputs->compressedClusters)].size == 1 ? 0 : mChain->mIOPtrs.tpcCompressedClusters->totalDataSize;
139+
outputs->clustersNative.size = mOutputControls[outputs->getIndex(outputs->clustersNative)].size == 1 ? 0 : (mChain->mIOPtrs.clustersNative->nClustersTotal * sizeof(*mChain->mIOPtrs.clustersNative->clustersLinear));
140+
outputs->tpcTracks.size = mOutputControls[outputs->getIndex(outputs->tpcTracks)].size == 1 ? 0 : (size_t)((char*)mOutputControls[outputs->getIndex(outputs->tpcTracks)].ptrCurrent - (char*)mOutputControls[outputs->getIndex(outputs->tpcTracks)].ptrBase);
141141
}
142142
if (mConfig->configQA.shipToQC) {
143143
outputs->qa.hist1 = &mChain->GetQA()->getHistograms1D();

GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,19 @@ void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int nTracks, int
3030
// Allocate All memory needed for slice output
3131
const size_t memsize = EstimateSize(nTracks, nTrackHits);
3232

33-
if (outputControl && outputControl->OutputType != GPUOutputControl::AllocateInternal) {
33+
if (outputControl && outputControl->useExternal()) {
3434
static std::atomic_flag lock = ATOMIC_FLAG_INIT;
3535
while (lock.test_and_set(std::memory_order_acquire)) {
3636
}
37-
if (outputControl->OutputMaxSize - ((char*)outputControl->OutputPtr - (char*)outputControl->OutputBase) < memsize) {
38-
outputControl->EndOfSpace = 1;
37+
outputControl->checkCurrent();
38+
if (outputControl->size - ((char*)outputControl->ptrCurrent - (char*)outputControl->ptrBase) < memsize) {
39+
outputControl->size = 1;
3940
ptrOutput = nullptr;
4041
lock.clear(std::memory_order_release);
4142
return;
4243
}
43-
ptrOutput = reinterpret_cast<GPUTPCSliceOutput*>(outputControl->OutputPtr);
44-
outputControl->OutputPtr = (char*)outputControl->OutputPtr + memsize;
44+
ptrOutput = reinterpret_cast<GPUTPCSliceOutput*>(outputControl->ptrCurrent);
45+
outputControl->ptrCurrent = (char*)outputControl->ptrCurrent + memsize;
4546
lock.clear(std::memory_order_release);
4647
} else {
4748
if (internalMemory) {

GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ void* GPUTPCTrackerComponent::TrackerDoEvent(void* par)
670670
int ret = 0;
671671
size = 0;
672672

673-
if (fRec->OutputControl().EndOfSpace) {
673+
if (fRec->OutputControl().size == 1) {
674674
HLTWarning("Output buffer size exceeded buffer size %d, tracks are not stored", maxBufferSize);
675675
ret = -ENOSPC;
676676
} else {

0 commit comments

Comments
 (0)