Skip to content

Commit b0cb089

Browse files
committed
GPU: Simplification: Keep an array of outputControl Structs instead of maintaining them individuaally
1 parent 1845d93 commit b0cb089

File tree

4 files changed

+53
-72
lines changed

4 files changed

+53
-72
lines changed

GPU/GPUTracking/Base/GPUOutputControl.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,30 @@ struct GPUOutputControl {
5252
OutputTypeStruct OutputType = AllocateInternal; // How to perform the output
5353
char EndOfSpace = 0; // end of space flag
5454
};
55+
56+
// This defines an output region. Ptr points to a memory buffer, which should have a proper alignment.
57+
// Since DPL does not respect the alignment of data types, we do not impose anything specic but just use void*, but it should be >= 64 bytes ideally.
58+
// The size defines the maximum possible buffer size when GPUReconstruction is called, and returns the number of filled bytes when it returns.
59+
// If ptr == nullptr, there is no region defined and GPUReconstruction will write its output to an internal buffer.
60+
// If allocator is set, it is called as a callback to provide a ptr to the memory.
61+
struct GPUInterfaceOutputRegion {
62+
void* ptr = nullptr;
63+
size_t size = 0;
64+
std::function<void*(size_t)> allocator = nullptr;
65+
};
66+
67+
struct GPUTrackingOutputs {
68+
GPUInterfaceOutputRegion compressedClusters;
69+
GPUInterfaceOutputRegion clustersNative;
70+
GPUInterfaceOutputRegion tpcTracks;
71+
GPUInterfaceOutputRegion clusterLabels;
72+
GPUInterfaceOutputRegion sharedClusterMap;
73+
74+
static size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(compressedClusters); }
75+
GPUInterfaceOutputRegion* asArray() { return (GPUInterfaceOutputRegion*)this; }
76+
size_t getIndex(const GPUInterfaceOutputRegion& v) { return &v - (const GPUInterfaceOutputRegion*)this; }
77+
};
78+
5579
} // namespace gpu
5680
} // namespace GPUCA_NAMESPACE
5781

GPU/GPUTracking/Interface/GPUO2Interface.cxx

Lines changed: 20 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "GPUO2InterfaceConfiguration.h"
2020
#include "GPUParam.inc"
2121
#include "GPUQA.h"
22+
#include "GPUOutputControl.h"
2223
#include <iostream>
2324
#include <fstream>
2425
#ifdef WITH_OPENMP
@@ -53,22 +54,19 @@ int GPUTPCO2Interface::Initialize(const GPUO2InterfaceConfiguration& config)
5354
}
5455
mRec->SetSettings(&mConfig->configEvent, &mConfig->configReconstruction, &mConfig->configProcessing, &mConfig->configWorkflow);
5556
mChain->SetCalibObjects(mConfig->configCalib);
57+
mOutputRegions.reset(new GPUTrackingOutputs);
58+
mOutputControls.resize(GPUTrackingOutputs::count());
5659
if (mConfig->configInterface.outputToExternalBuffers) {
57-
mOutputCompressedClusters.reset(new GPUOutputControl);
58-
mChain->SetOutputControlCompressedClusters(mOutputCompressedClusters.get());
59-
mOutputClustersNative.reset(new GPUOutputControl);
60-
mChain->SetOutputControlClustersNative(mOutputClustersNative.get());
61-
mOutputTPCTracks.reset(new GPUOutputControl);
62-
mChain->SetOutputControlTPCTracks(mOutputTPCTracks.get());
63-
mOutputSharedClusterMap.reset(new GPUOutputControl);
64-
mChain->SetOutputControlSharedClusterMap(mOutputSharedClusterMap.get());
60+
mChain->SetOutputControlCompressedClusters(&mOutputControls[mOutputRegions->getIndex(mOutputRegions->compressedClusters)]);
61+
mChain->SetOutputControlClustersNative(&mOutputControls[mOutputRegions->getIndex(mOutputRegions->clustersNative)]);
62+
mChain->SetOutputControlTPCTracks(&mOutputControls[mOutputRegions->getIndex(mOutputRegions->tpcTracks)]);
63+
mChain->SetOutputControlSharedClusterMap(&mOutputControls[mOutputRegions->getIndex(mOutputRegions->sharedClusterMap)]);
6564
GPUOutputControl dummy;
6665
dummy.set([](size_t size) -> void* {throw std::runtime_error("invalid output memory request, no common output buffer set"); return nullptr; });
6766
mRec->SetOutputControl(dummy);
6867
}
6968
if (mConfig->configProcessing.runMC) {
70-
mOutputTPCClusterLabels.reset(new GPUOutputControl);
71-
mChain->SetOutputControlClusterLabels(mOutputTPCClusterLabels.get());
69+
mChain->SetOutputControlClusterLabels(&mOutputControls[mOutputRegions->getIndex(mOutputRegions->clusterLabels)]);
7270
}
7371

7472
if (mRec->Init()) {
@@ -117,42 +115,17 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceO
117115

118116
mChain->mIOPtrs = *data;
119117
if (mConfig->configInterface.outputToExternalBuffers) {
120-
if (outputs->compressedClusters.allocator) {
121-
mOutputCompressedClusters->set(outputs->compressedClusters.allocator);
122-
} else if (outputs->compressedClusters.ptr) {
123-
mOutputCompressedClusters->set(outputs->compressedClusters.ptr, outputs->compressedClusters.size);
124-
} else {
125-
mOutputCompressedClusters->reset();
126-
}
127-
if (outputs->clustersNative.allocator) {
128-
mOutputClustersNative->set(outputs->clustersNative.allocator);
129-
} else if (outputs->clustersNative.ptr) {
130-
mOutputClustersNative->set(outputs->clustersNative.ptr, outputs->clustersNative.size);
131-
} else {
132-
mOutputClustersNative->reset();
133-
}
134-
if (outputs->tpcTracks.allocator) {
135-
mOutputTPCTracks->set(outputs->tpcTracks.allocator);
136-
} else if (outputs->tpcTracks.ptr) {
137-
mOutputTPCTracks->set(outputs->tpcTracks.ptr, outputs->tpcTracks.size);
138-
} else {
139-
mOutputTPCTracks->reset();
140-
}
141-
if (outputs->sharedClusterMap.allocator) {
142-
mOutputSharedClusterMap->set(outputs->sharedClusterMap.allocator);
143-
} else if (outputs->sharedClusterMap.ptr) {
144-
mOutputSharedClusterMap->set(outputs->sharedClusterMap.ptr, outputs->sharedClusterMap.size);
145-
} else {
146-
mOutputSharedClusterMap->reset();
147-
}
148-
}
149-
if (mConfig->configProcessing.runMC) {
150-
if (outputs->clusterLabels.allocator) {
151-
mOutputTPCClusterLabels->set(outputs->clusterLabels.allocator);
152-
} else {
153-
mOutputTPCClusterLabels->reset();
118+
for (unsigned int i = 0; i < mOutputRegions->count(); i++) {
119+
if (outputs->asArray()[i].allocator) {
120+
mOutputControls[i].set(outputs->asArray()[i].allocator);
121+
} else if (outputs->asArray()[i].ptr) {
122+
mOutputControls[i].set(outputs->asArray()[i].ptr, outputs->asArray()[i].size);
123+
} else {
124+
mOutputControls[i].reset();
125+
}
154126
}
155127
}
128+
156129
int retVal = mRec->RunChains();
157130
if (retVal == 2) {
158131
retVal = 0; // 2 signals end of event display, ignore
@@ -162,9 +135,9 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceO
162135
return retVal;
163136
}
164137
if (mConfig->configInterface.outputToExternalBuffers) {
165-
outputs->compressedClusters.size = mOutputCompressedClusters->EndOfSpace ? 0 : mChain->mIOPtrs.tpcCompressedClusters->totalDataSize;
166-
outputs->clustersNative.size = mOutputClustersNative->EndOfSpace ? 0 : (mChain->mIOPtrs.clustersNative->nClustersTotal * sizeof(*mChain->mIOPtrs.clustersNative->clustersLinear));
167-
outputs->tpcTracks.size = mOutputCompressedClusters->EndOfSpace ? 0 : (size_t)((char*)mOutputCompressedClusters->OutputPtr - (char*)mOutputCompressedClusters->OutputBase);
138+
outputs->compressedClusters.size = mOutputControls[outputs->getIndex(outputs->compressedClusters)].EndOfSpace ? 0 : mChain->mIOPtrs.tpcCompressedClusters->totalDataSize;
139+
outputs->clustersNative.size = mOutputControls[outputs->getIndex(outputs->clustersNative)].EndOfSpace ? 0 : (mChain->mIOPtrs.clustersNative->nClustersTotal * sizeof(*mChain->mIOPtrs.clustersNative->clustersLinear));
140+
outputs->tpcTracks.size = mOutputControls[outputs->getIndex(outputs->tpcTracks)].EndOfSpace ? 0 : (size_t)((char*)mOutputControls[outputs->getIndex(outputs->tpcTracks)].OutputPtr - (char*)mOutputControls[outputs->getIndex(outputs->tpcTracks)].OutputBase);
168141
}
169142
if (mConfig->configQA.shipToQC) {
170143
outputs->qa.hist1 = &mChain->GetQA()->getHistograms1D();

GPU/GPUTracking/Interface/GPUO2Interface.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#endif
2727

2828
#include <memory>
29+
#include <vector>
2930
#include "GPUCommonDef.h"
3031
#include "GPUDataTypes.h"
3132
namespace o2::tpc
@@ -41,6 +42,7 @@ class GPUChainTracking;
4142
struct GPUO2InterfaceConfiguration;
4243
struct GPUInterfaceOutputs;
4344
struct GPUOutputControl;
45+
struct GPUTrackingOutputs;
4446

4547
class GPUTPCO2Interface
4648
{
@@ -69,14 +71,11 @@ class GPUTPCO2Interface
6971
bool mInitialized = false;
7072
bool mContinuous = false;
7173

72-
std::unique_ptr<GPUReconstruction> mRec;
73-
GPUChainTracking* mChain = nullptr;
74-
std::unique_ptr<GPUO2InterfaceConfiguration> mConfig;
75-
std::unique_ptr<GPUOutputControl> mOutputCompressedClusters;
76-
std::unique_ptr<GPUOutputControl> mOutputClustersNative;
77-
std::unique_ptr<GPUOutputControl> mOutputTPCTracks;
78-
std::unique_ptr<GPUOutputControl> mOutputTPCClusterLabels;
79-
std::unique_ptr<GPUOutputControl> mOutputSharedClusterMap;
74+
std::unique_ptr<GPUReconstruction> mRec; //!
75+
GPUChainTracking* mChain = nullptr; //!
76+
std::unique_ptr<GPUO2InterfaceConfiguration> mConfig; //!
77+
std::unique_ptr<GPUTrackingOutputs> mOutputRegions; //!
78+
std::vector<GPUOutputControl> mOutputControls; //!
8079
};
8180
} // namespace o2::gpu
8281

GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "GPUSettings.h"
3333
#include "GPUDataTypes.h"
3434
#include "GPUHostDataTypes.h"
35+
#include "GPUOutputControl.h"
3536
#include "DataFormatsTPC/Constants.h"
3637

3738
class TH1F;
@@ -50,29 +51,13 @@ namespace gpu
5051
class TPCFastTransform;
5152
struct GPUSettingsO2;
5253

53-
// This defines an output region. Ptr points to a memory buffer, which should have a proper alignment.
54-
// Since DPL does not respect the alignment of data types, we do not impose anything specic but just use a char data type, but it should be >= 64 bytes ideally.
55-
// The size defines the maximum possible buffer size when GPUReconstruction is called, and returns the number of filled bytes when it returns.
56-
// If ptr == nullptr, there is no region defined and GPUReconstruction will write its output to an internal buffer.
57-
// If allocator is set, it is called as a callback to provide a ptr to the memory.
58-
struct GPUInterfaceOutputRegion {
59-
void* ptr = nullptr;
60-
size_t size = 0;
61-
std::function<void*(size_t)> allocator = nullptr;
62-
};
63-
6454
struct GPUInterfaceQAOutputs {
6555
const std::vector<TH1F>* hist1;
6656
const std::vector<TH2F>* hist2;
6757
const std::vector<TH1D>* hist3;
6858
};
6959

70-
struct GPUInterfaceOutputs {
71-
GPUInterfaceOutputRegion compressedClusters;
72-
GPUInterfaceOutputRegion clustersNative;
73-
GPUInterfaceOutputRegion tpcTracks;
74-
GPUInterfaceOutputRegion clusterLabels;
75-
GPUInterfaceOutputRegion sharedClusterMap;
60+
struct GPUInterfaceOutputs : public GPUTrackingOutputs {
7661
GPUInterfaceQAOutputs qa;
7762
};
7863

0 commit comments

Comments
 (0)