Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ Checks: >
-readability-use-std-min-max,
WarningsAsErrors: '*'
HeaderFilterRegex: '.*'
ExcludeHeaderFilterRegex: '\.fbs\.h|common/cmdline\.h|/mini_\w*\.h'
ExcludeHeaderFilterRegex: '\.fbs\.h|common/cmdline\.h|mini_[^/]*\.h'
FormatStyle: 'file'
CheckOptions:
- key: modernize-use-default-member-init.UseAssignment
Expand Down
37 changes: 36 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,8 @@ RUNTIME_CPP_COMPONENTS = \
trace_helper \
tracing \
wasm_cpu_features \
webgpu_dawn \
webgpu_dawn_arm \
webgpu_dawn_x86 \
webgpu_emscripten \
windows_aarch64_cpu_features_arm \
windows_clock \
Expand Down Expand Up @@ -1094,6 +1095,8 @@ RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown"

RUNTIME_TRIPLE_WEBGPU_32 = "wasm32-unknown-unknown-unknown"
RUNTIME_TRIPLE_WEBGPU_64 = "wasm64-unknown-unknown-unknown"
RUNTIME_TRIPLE_WEBGPU_ARM_64 = "aarch64-unknown-unknown-unknown"
RUNTIME_TRIPLE_WEBGPU_X86_64 = "x86_64-unknown-unknown-unknown"

# `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++
# standard but still skip threadsafe guards for static initialization in our runtime code)
Expand Down Expand Up @@ -1147,6 +1150,22 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d

$(BUILD_DIR)/initmod.webgpu_dawn_arm_32.ll: $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_arm_32.d

$(BUILD_DIR)/initmod.webgpu_dawn_arm_64.ll: $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_ARM_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_arm_64.d

$(BUILD_DIR)/initmod.webgpu_dawn_x86_32.ll: $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_x86_32.d

$(BUILD_DIR)/initmod.webgpu_dawn_x86_64.ll: $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_X86_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_x86_64.d

$(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d
Expand All @@ -1155,6 +1174,22 @@ $(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d

$(BUILD_DIR)/initmod.webgpu_dawn_arm_32_debug.ll: $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_arm_32_debug.d

$(BUILD_DIR)/initmod.webgpu_dawn_arm_64_debug.ll: $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_ARM_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_arm.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_arm_64_debug.d

$(BUILD_DIR)/initmod.webgpu_dawn_x86_32_debug.ll: $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_x86_32_debug.d

$(BUILD_DIR)/initmod.webgpu_dawn_x86_64_debug.ll: $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_X86_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_dawn_x86.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_dawn_x86_64_debug.d

$(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp
@mkdir -p $(@D)
$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d
Expand Down
60 changes: 47 additions & 13 deletions doc/WebGPU.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ device codegen may be required before it becomes profitable to use.

## Running with WebAssembly via Emscripten: `HL_TARGET=wasm-32-wasmrt-webgpu`

> _Tested with top-of-tree Emscripten as of 2023-02-23, against Chrome v113._
> _Tested with Emscripten 5.0.0 (a7c5deabd7c88ba1c38ebe988112256775f944c6)_
> _Tested with Node.js 25.5.0_

Halide can generate WebGPU code that can be integrated with WASM code using
Emscripten.

When invoking `emcc` to link Halide-generated objects, include these flags:
`-s USE_WEBGPU=1 -s ASYNCIFY`.
`--use-port=emdawnwebgpu -s JSPI`.

Tests that use AOT compilation can be run using a native WebGPU implementation
Tests that use AOT compilation can be run using a WebGPU implementation
that has Node.js bindings, such as [Dawn](https://dawn.googlesource.com/dawn/).
You must set an environment variable named `HL_WEBGPU_NODE_BINDINGS` that
has an absolute path to the bindings to run these tests, e.g. `HL_WEBGPU_NODE_BINDINGS=/path/to/dawn.node`.
Expand All @@ -47,17 +48,15 @@ JIT compilation is not supported when using WebGPU with WASM.

## Running natively: `HL_TARGET=host-webgpu`

> _Tested with top-of-tree Dawn as of 2023-11-27 [commit b5d38fc7dc2a20081312c95e379c4a918df8b7d4]._
> _Tested with Dawn release branch chromium/7698 (536c572aba)_

For testing purposes, Halide can also target native WebGPU libraries, such as
[Dawn](https://dawn.googlesource.com/dawn/) or
[wgpu](https://github.com/gfx-rs/wgpu).
This is currently the only path that can run the JIT correctness tests.
See [below](#setting-up-dawn) for instructions on building Dawn.

> Note that as of 2023-11-27, wgpu is not supported due to
> [lacking `override` support for WGSL](https://github.com/gfx-rs/wgpu/issues/1762)
> which we require > in order to set GPU block sizes.
> Note that as of 2026-02-17, wgpu is not supported due to lack of WaitAny timeout support.

When targeting WebGPU with a native target, Halide defaults to looking for a
build of Dawn (with several common names and suffixes); you can override this
Expand Down Expand Up @@ -120,9 +119,44 @@ The recommended method for updating `mini_webgpu.h` is to copy the
`gen/include/dawn/webgpu.h` file from the Dawn build directory, then:
- Restore the `// clang-format {off,on}` lines.
- Comment out the `#include <std*>` lines.
- Remove the `void` parameter from the `WGPUProc` declaration.

This guarantees a version of the WebGPU header that is compatible with Dawn.
When the native API eventually stabilizes, it should be possible to obtain a
header from the `webgpu-native` GitHub organization that will be compatible
with Dawn, wgpu, and Emscripten.
- Include the following block to define things that would normally be defined in system headers:
```
// BEGIN Halide-specific changes
//
// For the Halide runtime, we can't include these headers,
// so we define NULL, SIZE_MAX, and integer limit macros here.
// #include <stdint.h>
// #include <stddef.h>
// #include <math.h>

#ifndef NULL
#ifdef __cplusplus
#define NULL nullptr
#else
#define NULL ((void*)0)
#endif
#endif

#ifndef SIZE_MAX
#define SIZE_MAX (~(size_t)0)
#endif

#ifndef UINT32_MAX
#define UINT32_MAX (~(uint32_t)0)
#endif

#ifndef UINT64_MAX
#define UINT64_MAX (~(uint64_t)0)
#endif

// This _should_ be correct on all platforms we support, but needs checking.
#ifndef UINT32_C
#define UINT32_C(x) ((uint32_t)(x))
#endif

// END Halide-specific changes

```

This guarantees a version of the WebGPU header that is compatible with how
Halide builds the runtime.
45 changes: 40 additions & 5 deletions src/LLVM_Runtime_Linker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,6 @@ DECLARE_CPP_INITMOD(timer_profiler)
DECLARE_CPP_INITMOD(to_string)
DECLARE_CPP_INITMOD(trace_helper)
DECLARE_CPP_INITMOD(tracing)
// TODO(https://github.com/halide/Halide/issues/7248)
// DECLARE_CPP_INITMOD(webgpu)
DECLARE_CPP_INITMOD(webgpu_dawn)
DECLARE_CPP_INITMOD(webgpu_emscripten)
DECLARE_CPP_INITMOD(windows_clock)
DECLARE_CPP_INITMOD(windows_cuda)
DECLARE_CPP_INITMOD(windows_get_symbol)
Expand Down Expand Up @@ -284,6 +280,38 @@ DECLARE_NO_INITMOD(vulkan)
DECLARE_NO_INITMOD(windows_vulkan)
#endif // WITH_VULKAN

#ifdef WITH_WEBGPU
// TODO(https://github.com/halide/Halide/issues/7248)
#ifdef WITH_X86
DECLARE_CPP_INITMOD(webgpu_dawn_x86)
DECLARE_CPP_INITMOD(webgpu_dawn_x86_debug)
#else
DECLARE_NO_INITMOD(webgpu_dawn_x86)
DECLARE_NO_INITMOD(webgpu_dawn_x86_debug)
#endif
#ifdef WITH_ARM
DECLARE_CPP_INITMOD(webgpu_dawn_arm)
DECLARE_CPP_INITMOD(webgpu_dawn_arm_debug)
#else
DECLARE_NO_INITMOD(webgpu_dawn_arm)
DECLARE_NO_INITMOD(webgpu_dawn_arm_debug)
#endif
#ifdef WITH_WEBASSEMBLY
DECLARE_CPP_INITMOD(webgpu_emscripten)
DECLARE_CPP_INITMOD(webgpu_emscripten_debug)
#else
DECLARE_NO_INITMOD(webgpu_emscripten)
DECLARE_NO_INITMOD(webgpu_emscripten_debug)
#endif
#else
DECLARE_NO_INITMOD(webgpu_dawn_x86)
DECLARE_NO_INITMOD(webgpu_dawn_arm)
DECLARE_NO_INITMOD(webgpu_emscripten)
DECLARE_NO_INITMOD(webgpu_dawn_x86_debug)
DECLARE_NO_INITMOD(webgpu_dawn_arm_debug)
DECLARE_NO_INITMOD(webgpu_emscripten_debug)
#endif // WITH_WEBGPU

#ifdef WITH_X86
DECLARE_LL_INITMOD(x86_amx)
DECLARE_LL_INITMOD(x86_avx512)
Expand Down Expand Up @@ -1340,7 +1368,14 @@ std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVM
if (t.os == Target::WebAssemblyRuntime) {
modules.push_back(get_initmod_webgpu_emscripten(c, bits_64, debug));
} else {
modules.push_back(get_initmod_webgpu_dawn(c, bits_64, debug));
user_assert(bits_64) << "Native WebGPU target only available on 64-bit targets for now.\n";
if (t.arch == Target::X86) {
modules.push_back(get_initmod_webgpu_dawn_x86(c, bits_64, debug));
} else if (t.arch == Target::ARM) {
modules.push_back(get_initmod_webgpu_dawn_arm(c, bits_64, debug));
} else {
user_error << "WebGPU can only be used on X86 or ARM architectures.\n";
}
}
}
}
Expand Down
23 changes: 20 additions & 3 deletions src/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ set(RUNTIME_CPP
wasm_cpu_features
# TODO(https://github.com/halide/Halide/issues/7248)
# webgpu
webgpu_dawn
webgpu_dawn_arm
webgpu_dawn_x86
webgpu_emscripten
windows_aarch64_cpu_features_arm
windows_clock
Expand Down Expand Up @@ -232,13 +233,29 @@ foreach (i IN LISTS RUNTIME_CPP)
set(TARGET "x86_64-unknown-windows-unknown")
endif ()
endif ()
elseif (i MATCHES "webgpu")
elseif (i MATCHES "webgpu_emscripten")
# for WASM, we need to set a wasm target rather than a generic or native target
if (j EQUAL 32)
# wasm32 will fail for some i386 builds, but i386 won't
set(TARGET "wasm32-unknown-unknown-unknown")
else ()
set(TARGET "wasm64-unknown-unknown-unknown")
endif ()
elseif (i MATCHES "webgpu_dawn_x86$")
if (j EQUAL 32)
# use default generic targets from below
set(TARGET "i386-unknown-unknown-unknown")
else ()
# due to struct passing, we need to use the correct arch
set(TARGET "x86_64-unknown-unknown-unknown")
endif ()
elseif (i MATCHES "webgpu_dawn_arm$")
if (j EQUAL 32 AND "ARM" IN_LIST Halide_LLVM_COMPONENTS)
# use default generic targets from below
set(TARGET "i386-unknown-unknown-unknown")
else ()
# due to struct passing, we need to use the correct arch
set(TARGET "aarch64-unknown-unknown-unknown")
endif ()
else ()
# don't be fooled: these are just generic 32/64-bit targets for our purposes here
if (j EQUAL 32)
Expand Down
Loading
Loading