Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
476df8e
global bool
Phylliida Oct 24, 2025
6d85b94
reworked circular to global flag
Phylliida Oct 24, 2025
0092711
cleaner implementation of tiling support in sd cpp
Phylliida Oct 24, 2025
ee0e82a
cleaned rope
Phylliida Oct 24, 2025
cbb261d
working simplified but still need wraps
Phylliida Oct 24, 2025
8d7f679
Further clean of rope
Phylliida Oct 24, 2025
4f2db1b
resolve flux conflict
Phylliida Oct 24, 2025
e6fb4e8
switch to pad op circular only
Dec 10, 2025
00c92ef
Set ggml to most recent
Dec 12, 2025
144c278
Revert ggml temp
Dec 12, 2025
5975e1e
Merge branch 'master' into tiling-support
Phylliida Dec 12, 2025
247d67f
Update ggml to most recent
Dec 12, 2025
686a208
Revert unneded flux change
Dec 12, 2025
15076b0
move circular flag to the GGMLRunnerContext
Dec 12, 2025
bf28347
Pass through circular param in all places where conv is called
Dec 12, 2025
5f2de58
fix of constant and minor cleanup
Dec 12, 2025
d7d8da1
Added back --circular option
Dec 12, 2025
822f9a5
Conv2d circular in vae and various models
Dec 12, 2025
8e829ed
Fix temporal padding for qwen image and other vaes
Dec 12, 2025
4054e3c
Z Image circular tiling
Dec 13, 2025
4b87268
x and y axis seamless only
Dec 13, 2025
935f980
First attempt at chroma seamless x and y
Dec 13, 2025
820fb6b
refactor into pure x and y, almost there
Dec 13, 2025
32e1b75
Fix crash on chroma
Dec 13, 2025
dc6e887
Refactor into cleaner variable choices
Dec 13, 2025
665190f
Removed redundant set_circular_enabled
Dec 13, 2025
145e178
Sync ggml
Dec 16, 2025
29b9d2f
Merge branch 'master' into tiling-support
leejet Dec 21, 2025
d1743b8
simplify circular parameter
leejet Dec 21, 2025
251f8e5
format code
leejet Dec 21, 2025
7b71c7c
no need to perform circular pad on the clip
leejet Dec 21, 2025
bd39034
simplify circular_axes setting
leejet Dec 21, 2025
d727d4a
unify function naming
leejet Dec 21, 2025
c3cf58f
remove unnecessary member variables
leejet Dec 21, 2025
a788c84
simplify rope
leejet Dec 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DownSampleBlock : public GGMLBlock {
if (vae_downsample) {
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);

x = ggml_pad(ctx->ggml_ctx, x, 1, 1, 0, 0);
x = ggml_ext_pad(ctx->ggml_ctx, x, 1, 1, 0, 0, ctx->circular_x_enabled, ctx->circular_y_enabled);
x = conv->forward(ctx, x);
} else {
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["op"]);
Expand Down
6 changes: 3 additions & 3 deletions denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,18 +366,18 @@ struct KLOptimalScheduler : SigmaScheduler {

for (uint32_t i = 0; i < n; ++i) {
// t goes from 0.0 to 1.0
float t = static_cast<float>(i) / static_cast<float>(n-1);
float t = static_cast<float>(i) / static_cast<float>(n - 1);

// Interpolate in the angle domain
float angle = t * alpha_min + (1.0f - t) * alpha_max;

// Convert back to sigma
sigmas.push_back(std::tan(angle));
}
}

// Append the final zero to sigma
sigmas.push_back(0.0f);

return sigmas;
}
};
Expand Down
29 changes: 27 additions & 2 deletions diffusion_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ struct DiffusionModel {
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) = 0;
virtual size_t get_params_buffer_size() = 0;
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter){};
virtual int64_t get_adm_in_channels() = 0;
virtual void set_flash_attn_enabled(bool enabled) = 0;
virtual int64_t get_adm_in_channels() = 0;
virtual void set_flash_attn_enabled(bool enabled) = 0;
virtual void set_circular_axes(bool circular_x, bool circular_y) = 0;
};

struct UNetModel : public DiffusionModel {
Expand Down Expand Up @@ -87,6 +88,10 @@ struct UNetModel : public DiffusionModel {
unet.set_flash_attention_enabled(enabled);
}

void set_circular_axes(bool circular_x, bool circular_y) override {
unet.set_circular_axes(circular_x, circular_y);
}

bool compute(int n_threads,
DiffusionParams diffusion_params,
struct ggml_tensor** output = nullptr,
Expand Down Expand Up @@ -148,6 +153,10 @@ struct MMDiTModel : public DiffusionModel {
mmdit.set_flash_attention_enabled(enabled);
}

void set_circular_axes(bool circular_x, bool circular_y) override {
mmdit.set_circular_axes(circular_x, circular_y);
}

bool compute(int n_threads,
DiffusionParams diffusion_params,
struct ggml_tensor** output = nullptr,
Expand Down Expand Up @@ -210,6 +219,10 @@ struct FluxModel : public DiffusionModel {
flux.set_flash_attention_enabled(enabled);
}

void set_circular_axes(bool circular_x, bool circular_y) override {
flux.set_circular_axes(circular_x, circular_y);
}

bool compute(int n_threads,
DiffusionParams diffusion_params,
struct ggml_tensor** output = nullptr,
Expand Down Expand Up @@ -277,6 +290,10 @@ struct WanModel : public DiffusionModel {
wan.set_flash_attention_enabled(enabled);
}

void set_circular_axes(bool circular_x, bool circular_y) override {
wan.set_circular_axes(circular_x, circular_y);
}

bool compute(int n_threads,
DiffusionParams diffusion_params,
struct ggml_tensor** output = nullptr,
Expand Down Expand Up @@ -343,6 +360,10 @@ struct QwenImageModel : public DiffusionModel {
qwen_image.set_flash_attention_enabled(enabled);
}

void set_circular_axes(bool circular_x, bool circular_y) override {
qwen_image.set_circular_axes(circular_x, circular_y);
}

bool compute(int n_threads,
DiffusionParams diffusion_params,
struct ggml_tensor** output = nullptr,
Expand Down Expand Up @@ -406,6 +427,10 @@ struct ZImageModel : public DiffusionModel {
z_image.set_flash_attention_enabled(enabled);
}

void set_circular_axes(bool circular_x, bool circular_y) override {
z_image.set_circular_axes(circular_x, circular_y);
}

bool compute(int n_threads,
DiffusionParams diffusion_params,
struct ggml_tensor** output = nullptr,
Expand Down
21 changes: 21 additions & 0 deletions examples/common/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,10 @@ struct SDContextParams {
bool diffusion_conv_direct = false;
bool vae_conv_direct = false;

bool circular = false;
bool circular_x = false;
bool circular_y = false;

bool chroma_use_dit_mask = true;
bool chroma_use_t5_mask = false;
int chroma_t5_mask_pad = 1;
Expand Down Expand Up @@ -605,6 +609,18 @@ struct SDContextParams {
"--vae-conv-direct",
"use ggml_conv2d_direct in the vae model",
true, &vae_conv_direct},
{"",
"--circular",
"enable circular padding for convolutions",
true, &circular},
{"",
"--circularx",
"enable circular RoPE wrapping on x-axis (width) only",
true, &circular_x},
{"",
"--circulary",
"enable circular RoPE wrapping on y-axis (height) only",
true, &circular_y},
{"",
"--chroma-disable-dit-mask",
"disable dit mask for chroma",
Expand Down Expand Up @@ -868,6 +884,9 @@ struct SDContextParams {
<< " diffusion_flash_attn: " << (diffusion_flash_attn ? "true" : "false") << ",\n"
<< " diffusion_conv_direct: " << (diffusion_conv_direct ? "true" : "false") << ",\n"
<< " vae_conv_direct: " << (vae_conv_direct ? "true" : "false") << ",\n"
<< " circular: " << (circular ? "true" : "false") << ",\n"
<< " circular_x: " << (circular_x ? "true" : "false") << ",\n"
<< " circular_y: " << (circular_y ? "true" : "false") << ",\n"
<< " chroma_use_dit_mask: " << (chroma_use_dit_mask ? "true" : "false") << ",\n"
<< " chroma_use_t5_mask: " << (chroma_use_t5_mask ? "true" : "false") << ",\n"
<< " chroma_t5_mask_pad: " << chroma_t5_mask_pad << ",\n"
Expand Down Expand Up @@ -928,6 +947,8 @@ struct SDContextParams {
taesd_preview,
diffusion_conv_direct,
vae_conv_direct,
circular || circular_x,
circular || circular_y,
force_sdxl_vae_conv_scale,
chroma_use_dit_mask,
chroma_use_t5_mask,
Expand Down
28 changes: 15 additions & 13 deletions flux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,14 +860,14 @@ namespace Flux {
}
}

struct ggml_tensor* pad_to_patch_size(struct ggml_context* ctx,
struct ggml_tensor* pad_to_patch_size(GGMLRunnerContext* ctx,
struct ggml_tensor* x) {
int64_t W = x->ne[0];
int64_t H = x->ne[1];

int pad_h = (params.patch_size - H % params.patch_size) % params.patch_size;
int pad_w = (params.patch_size - W % params.patch_size) % params.patch_size;
x = ggml_pad(ctx, x, pad_w, pad_h, 0, 0); // [N, C, H + pad_h, W + pad_w]
x = ggml_ext_pad(ctx->ggml_ctx, x, pad_w, pad_h, 0, 0, ctx->circular_x_enabled, ctx->circular_y_enabled);
return x;
}

Expand All @@ -893,11 +893,11 @@ namespace Flux {
return x;
}

struct ggml_tensor* process_img(struct ggml_context* ctx,
struct ggml_tensor* process_img(GGMLRunnerContext* ctx,
struct ggml_tensor* x) {
// img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
x = pad_to_patch_size(ctx, x);
x = patchify(ctx, x);
x = patchify(ctx->ggml_ctx, x);
return x;
}

Expand Down Expand Up @@ -1076,7 +1076,7 @@ namespace Flux {
int pad_h = (patch_size - H % patch_size) % patch_size;
int pad_w = (patch_size - W % patch_size) % patch_size;

auto img = pad_to_patch_size(ctx->ggml_ctx, x);
auto img = pad_to_patch_size(ctx, x);
auto orig_img = img;

if (params.chroma_radiance_params.use_patch_size_32) {
Expand Down Expand Up @@ -1150,16 +1150,16 @@ namespace Flux {
int pad_h = (patch_size - H % patch_size) % patch_size;
int pad_w = (patch_size - W % patch_size) % patch_size;

auto img = process_img(ctx->ggml_ctx, x);
auto img = process_img(ctx, x);
uint64_t img_tokens = img->ne[1];

if (params.version == VERSION_FLUX_FILL) {
GGML_ASSERT(c_concat != nullptr);
ggml_tensor* masked = ggml_view_4d(ctx->ggml_ctx, c_concat, c_concat->ne[0], c_concat->ne[1], C, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], 0);
ggml_tensor* mask = ggml_view_4d(ctx->ggml_ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);

masked = process_img(ctx->ggml_ctx, masked);
mask = process_img(ctx->ggml_ctx, mask);
masked = process_img(ctx, masked);
mask = process_img(ctx, mask);

img = ggml_concat(ctx->ggml_ctx, img, ggml_concat(ctx->ggml_ctx, masked, mask, 0), 0);
} else if (params.version == VERSION_FLEX_2) {
Expand All @@ -1168,21 +1168,21 @@ namespace Flux {
ggml_tensor* mask = ggml_view_4d(ctx->ggml_ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 1, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
ggml_tensor* control = ggml_view_4d(ctx->ggml_ctx, c_concat, c_concat->ne[0], c_concat->ne[1], C, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * (C + 1));

masked = process_img(ctx->ggml_ctx, masked);
mask = process_img(ctx->ggml_ctx, mask);
control = process_img(ctx->ggml_ctx, control);
masked = process_img(ctx, masked);
mask = process_img(ctx, mask);
control = process_img(ctx, control);

img = ggml_concat(ctx->ggml_ctx, img, ggml_concat(ctx->ggml_ctx, ggml_concat(ctx->ggml_ctx, masked, mask, 0), control, 0), 0);
} else if (params.version == VERSION_FLUX_CONTROLS) {
GGML_ASSERT(c_concat != nullptr);

auto control = process_img(ctx->ggml_ctx, c_concat);
auto control = process_img(ctx, c_concat);
img = ggml_concat(ctx->ggml_ctx, img, control, 0);
}

if (ref_latents.size() > 0) {
for (ggml_tensor* ref : ref_latents) {
ref = process_img(ctx->ggml_ctx, ref);
ref = process_img(ctx, ref);
img = ggml_concat(ctx->ggml_ctx, img, ref, 1);
}
}
Expand Down Expand Up @@ -1472,6 +1472,8 @@ namespace Flux {
increase_ref_index,
flux_params.ref_index_scale,
flux_params.theta,
circular_y_enabled,
circular_x_enabled,
flux_params.axes_dim);
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
// LOG_DEBUG("pos_len %d", pos_len);
Expand Down
2 changes: 1 addition & 1 deletion ggml
Loading
Loading