Skip to content

How to Dynamically Update Input for an MLP Inference Pipeline Using ACL #1255

@knowledgaction

Description

@knowledgaction

I have implemented an MLP (Multi-Layer Perceptron) model using Arm Compute Library. The model receives input data through a pointer named mlp_input. However, the input data is not predetermined and needs to be computed dynamically by other modules in my program.

My current implementation follows this workflow:

Pass input pointer
Call do_setup()
Call do_run()
What I want to achieve is a more efficient pattern:

Call do_setup() once
Pass different input pointers multiple times (as data becomes available)
Call do_run() for each new input
Repeat steps 2-3 as needed
This approach would allow me to initialize the inference graph once and reuse it with different input data without rebuilding the graph each time, improving both code efficiency and performance. The specific code is as follows:

#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/Tensor.h>
#include <arm_compute/core/TensorInfo.h>
#include <arm_compute/graph.h>
#include "arm_compute/graph.h"
#include "support/ToolchainSupport.h"
#include "utils/CommonGraphOptions.h"
#include "utils/GraphUtils.h"
#include "utils/Utils.h"
#include
#include
#include
#include
#include

constexpr bool ENABLE_VERBOSE = true;

void print_tensor(const float *ptr, const arm_compute::TensorInfo &info, const std::string &label)
{
if (!ENABLE_VERBOSE) return;
size_t total_size = info.tensor_shape().total_size();
std::cout << "\n" << label << std::endl;
std::cout << "Total Size: " << total_size << std::endl;
std::cout << "Elements:" << std::endl;
for (size_t i = 0; i < total_size; ++i)
{
if (i % 16 == 0) {
if (i != 0) std::cout << std::endl;
std::cout << " [" << i << "] ";
}
std::cout << ptr[i] << " ";
}
std::cout << std::endl;
}

class DirectAccessor final : public arm_compute::graph::ITensorAccessor
{
private:
float *_external_data;
size_t _data_size;
bool _first_call = true;

public:
DirectAccessor(float *external_data, size_t size)
: _external_data(external_data), _data_size(size) {}

bool access_tensor(arm_compute::ITensor &tensor) override
{
    if (!_first_call) return false;
    _first_call = false;
    
    auto &dst_tensor = dynamic_cast<arm_compute::Tensor&>(tensor);
    size_t tensor_elements = dst_tensor.info()->tensor_shape().total_size();
    ARM_COMPUTE_ERROR_ON(tensor_elements != _data_size);

    size_t dst_offset_bytes = dst_tensor.info()->offset_first_element_in_bytes();
    size_t total_bytes = tensor_elements * sizeof(float);
    float *dst_ptr = reinterpret_cast<float*>(dst_tensor.buffer() + dst_offset_bytes);

    std::memcpy(dst_ptr, _external_data, total_bytes);

    return true;
}

};

class OutputAccessor final : public arm_compute::graph::ITensorAccessor
{
public:
bool access_tensor(arm_compute::ITensor &tensor) override
{
if (ENABLE_VERBOSE) {
auto &tensor_ref = dynamic_cast<arm_compute::Tensor&>(tensor);
size_t num_elements = tensor_ref.info()->tensor_shape().total_size();
size_t offset_bytes = tensor_ref.info()->offset_first_element_in_bytes();
float data = reinterpret_cast<float>(tensor_ref.buffer() + offset_bytes);
for (size_t i = 0; i < num_elements; ++i)
{
std::cout << data[i] << " ";
}
std::cout << std::endl;
}

    return true;
}

};

class GraphExample
{
private:
arm_compute::graph::frontend::Stream graph;
static constexpr size_t NUM = 128;
std::string DATA_PATH = "/home/firefly/ABC/ACL_TEST/fine_matcher_weights/";

public:
float *external_input;

GraphExample() : graph(0, "DataPassingGraph"){}

void do_setup()
{
    arm_compute::graph::TensorDescriptor input_desc(arm_compute::TensorShape(128), arm_compute::DataType::F32);
    
    graph << arm_compute::graph::Target::NEON
        << arm_compute::graph::frontend::InputLayer(input_desc, std::make_unique<DirectAccessor>(external_input, 128))
        << arm_compute::graph::frontend::FullyConnectedLayer(512,
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear1_weights.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear1_bias.npy"))
        << arm_compute::graph::frontend::ReshapeLayer(arm_compute::TensorShape(1, 1, 512))
        << arm_compute::graph::frontend::ScaleLayer(
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn1_scale.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn1_offset.npy"))
        << arm_compute::graph::frontend::ActivationLayer(arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU))
        << arm_compute::graph::frontend::FullyConnectedLayer(512,
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear2_weights.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear2_bias.npy"))
        << arm_compute::graph::frontend::ReshapeLayer(arm_compute::TensorShape(1, 1, 512))
        << arm_compute::graph::frontend::ScaleLayer(
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn2_scale.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn2_offset.npy"))
        << arm_compute::graph::frontend::ActivationLayer(arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU))
        << arm_compute::graph::frontend::FullyConnectedLayer(512,
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear3_weights.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear3_bias.npy"))
        << arm_compute::graph::frontend::ReshapeLayer(arm_compute::TensorShape(1, 1, 512))
        << arm_compute::graph::frontend::ScaleLayer(
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn3_scale.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn3_offset.npy"))
        << arm_compute::graph::frontend::ActivationLayer(arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU))
        << arm_compute::graph::frontend::FullyConnectedLayer(512,
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear4_weights.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear4_bias.npy"))
        << arm_compute::graph::frontend::ReshapeLayer(arm_compute::TensorShape(1, 1, 512))
        << arm_compute::graph::frontend::ScaleLayer(
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn4_scale.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "bn4_offset.npy"))
        << arm_compute::graph::frontend::ActivationLayer(arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU))
        << arm_compute::graph::frontend::FullyConnectedLayer(64,
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear5_weights.npy"),
                arm_compute::graph_utils::get_weights_accessor(DATA_PATH, "linear5_bias.npy"))
        << arm_compute::graph::frontend::OutputLayer(std::make_unique<OutputAccessor>());

    arm_compute::graph::GraphConfig config;
    config.num_threads = 1;
    config.use_tuner = false;
    graph.finalize(arm_compute::graph::Target::NEON, config);
}

void do_run(int num_runs = 100)
{
    auto start = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < num_runs; ++i) {
        graph.run();
    }
    auto end = std::chrono::high_resolution_clock::now();

    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    double avg_time_ms = duration.count() / 1000.0 / num_runs;
}

};

int main()
{
float mlp_input[128];
float a[128];
for (int i = 0; i < 128; i++) {
mlp_input[i] = 0.64f;
a[i] = 1.0f;
}

GraphExample example;
example.external_input = mlp_input;

example.do_setup();
example.do_run(100);

return 0;

}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions