Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
2354fda
init
hlky Dec 4, 2025
1e2009d
passed transformer
hlky Dec 4, 2025
0c30839
ruff
hlky Dec 4, 2025
52f996e
convert passed
hlky Dec 4, 2025
4b446b3
__init__
hlky Dec 4, 2025
a1ff390
pipeline example
hlky Dec 4, 2025
7ab347d
ruff
hlky Dec 4, 2025
8cab0c9
pipeline load_image
hlky Dec 4, 2025
8688fa6
t_scale
hlky Dec 4, 2025
9051272
x_pad_token
hlky Dec 4, 2025
0d8c3f1
controlnet_block_samples
hlky Dec 4, 2025
f789325
conditioning_scale
hlky Dec 4, 2025
5f8ab7b
self.config
hlky Dec 4, 2025
bc72f9c
sample_mode, default controlnet_conditioning_scale
hlky Dec 4, 2025
13b706a
ruff
hlky Dec 4, 2025
728ba02
Merge branch 'main' into z-image-controlnet
hlky Dec 4, 2025
09849a7
ZImageControlTransformer2DModel
hlky Dec 5, 2025
f63a5a8
ModuleDict
hlky Dec 5, 2025
f9540cb
patchify control_context
hlky Dec 5, 2025
3e472ac
transformer weights
hlky Dec 5, 2025
0e7c643
-enumerate in ZImageTransformer2DModel
hlky Dec 5, 2025
413c7cb
Merge branch 'main' into z-image-controlnet
hlky Dec 10, 2025
a00f104
Option 3
hlky Dec 11, 2025
a961402
from_single_file
hlky Dec 11, 2025
6e1c218
Remove convert script
hlky Dec 11, 2025
8e7743a
Copied from
hlky Dec 11, 2025
c135170
Example
hlky Dec 11, 2025
a737b3c
doc-builder style
hlky Dec 12, 2025
7bc847a
check_dummies
hlky Dec 12, 2025
ffde035
custom_init_isort
hlky Dec 12, 2025
04388f4
init v2
hlky Dec 13, 2025
62ee1c1
handle 2.0 t2i pipeline control_image dimension
hlky Dec 13, 2025
faf5a24
ZImageControlNetInpaintPipeline
hlky Dec 13, 2025
6126f02
t_scale
hlky Dec 13, 2025
c3def6b
config.all_patch_size
hlky Dec 13, 2025
f80ed52
not self.add_control_noise_refiner
hlky Dec 13, 2025
721011e
-self
hlky Dec 13, 2025
efadd91
* (mask_condition < 0.5)
hlky Dec 13, 2025
f4b7fcc
create_z_image_controlnet_config unknown type
hlky Dec 13, 2025
dd9775c
pop control_noise_refiner from 2.0 state_dict
hlky Dec 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/diffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@
"WanAnimateTransformer3DModel",
"WanTransformer3DModel",
"WanVACETransformer3DModel",
"ZImageControlNetModel",
"ZImageTransformer2DModel",
"attention_backend",
]
Expand Down Expand Up @@ -666,6 +667,8 @@
"WuerstchenCombinedPipeline",
"WuerstchenDecoderPipeline",
"WuerstchenPriorPipeline",
"ZImageControlNetPipeline",
"ZImageControlNetInpaintPipeline",
"ZImageImg2ImgPipeline",
"ZImagePipeline",
]
Expand Down Expand Up @@ -1012,6 +1015,7 @@
WanAnimateTransformer3DModel,
WanTransformer3DModel,
WanVACETransformer3DModel,
ZImageControlNetModel,
ZImageTransformer2DModel,
attention_backend,
)
Expand Down Expand Up @@ -1369,6 +1373,8 @@
WuerstchenCombinedPipeline,
WuerstchenDecoderPipeline,
WuerstchenPriorPipeline,
ZImageControlNetInpaintPipeline,
ZImageControlNetPipeline,
ZImageImg2ImgPipeline,
ZImagePipeline,
)
Expand Down
16 changes: 15 additions & 1 deletion src/diffusers/loaders/single_file_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
convert_stable_cascade_unet_single_file_to_diffusers,
convert_wan_transformer_to_diffusers,
convert_wan_vae_to_diffusers,
convert_z_image_controlnet_checkpoint_to_diffusers,
convert_z_image_transformer_checkpoint_to_diffusers,
create_controlnet_diffusers_config_from_ldm,
create_unet_diffusers_config_from_ldm,
create_vae_diffusers_config_from_ldm,
create_z_image_controlnet_config,
fetch_diffusers_config,
fetch_original_config,
load_single_file_checkpoint,
Expand Down Expand Up @@ -172,11 +174,19 @@
"checkpoint_mapping_fn": convert_z_image_transformer_checkpoint_to_diffusers,
"default_subfolder": "transformer",
},
"ZImageControlNetModel": {
"checkpoint_mapping_fn": convert_z_image_controlnet_checkpoint_to_diffusers,
"config_create_fn": create_z_image_controlnet_config,
},
}


def _should_convert_state_dict_to_diffusers(model_state_dict, checkpoint_state_dict):
return not set(model_state_dict.keys()).issubset(set(checkpoint_state_dict.keys()))
model_state_dict_keys = set(model_state_dict.keys())
checkpoint_state_dict_keys = set(checkpoint_state_dict.keys())
is_subset = model_state_dict_keys.issubset(checkpoint_state_dict_keys)
is_match = model_state_dict_keys == checkpoint_state_dict_keys
return not (is_subset and is_match)


def _get_single_file_loadable_mapping_class(cls):
Expand Down Expand Up @@ -369,6 +379,10 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
diffusers_model_config = config_mapping_fn(
original_config=original_config, checkpoint=checkpoint, **config_mapping_kwargs
)
elif "config_create_fn" in mapping_functions:
config_create_fn = mapping_functions["config_create_fn"]
config_create_kwargs = _get_mapping_function_kwargs(config_create_fn, **kwargs)
diffusers_model_config = config_create_fn(checkpoint=checkpoint, **config_create_kwargs)
else:
if config is not None:
if isinstance(config, str):
Expand Down
53 changes: 53 additions & 0 deletions src/diffusers/loaders/single_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
"instruct-pix2pix": "model.diffusion_model.input_blocks.0.0.weight",
"lumina2": ["model.diffusion_model.cap_embedder.0.weight", "cap_embedder.0.weight"],
"z-image-turbo": "cap_embedder.0.weight",
"z-image-turbo-controlnet": "control_all_x_embedder.2-1.weight",
"sana": [
"blocks.0.cross_attn.q_linear.weight",
"blocks.0.cross_attn.q_linear.bias",
Expand Down Expand Up @@ -779,6 +780,9 @@ def infer_diffusers_model_type(checkpoint):
else:
raise ValueError(f"Unexpected x_embedder shape: {x_embedder_shape} when loading Cosmos 2.0 model.")

elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet"] in checkpoint:
model_type = "z-image-turbo-controlnet"

else:
model_type = "v1"

Expand Down Expand Up @@ -3885,3 +3889,52 @@ def update_state_dict(state_dict: dict[str, object], old_key: str, new_key: str)
handler_fn_inplace(key, converted_state_dict)

return converted_state_dict


def create_z_image_controlnet_config(checkpoint, **kwargs):
v1_config = {
"control_in_dim": 16,
"control_layers_places": [0, 5, 10, 15, 20, 25],
"dim": 3840,
"n_heads": 30,
"n_kv_heads": 30,
"n_refiner_layers": 2,
"norm_eps": 1e-05,
"qk_norm": True,
"all_f_patch_size": [1],
"all_patch_size": [2],
}
v2_config = {
"control_in_dim": 33,
"control_layers_places": [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28],
"control_refiner_layers_places": [0, 1],
"add_control_noise_refiner": True,
"dim": 3840,
"n_heads": 30,
"n_kv_heads": 30,
"n_refiner_layers": 2,
"norm_eps": 1e-05,
"qk_norm": True,
"all_f_patch_size": [1],
"all_patch_size": [2],
}
control_x_embedder_weight_shape = checkpoint["control_all_x_embedder.2-1.weight"].shape[1]
if control_x_embedder_weight_shape == 64:
return v1_config
elif control_x_embedder_weight_shape == 132:
return v2_config
else:
raise ValueError("Unknown Z-Image Turbo ControlNet type.")


def convert_z_image_controlnet_checkpoint_to_diffusers(checkpoint, **kwargs):
control_x_embedder_weight_shape = checkpoint["control_all_x_embedder.2-1.weight"].shape[1]
if control_x_embedder_weight_shape == 64:
return checkpoint
elif control_x_embedder_weight_shape == 132:
converted_state_dict = {
key: checkpoint.pop(key) for key in list(checkpoint.keys()) if not key.startswith("control_noise_refiner.")
}
return converted_state_dict
else:
raise ValueError("Unknown Z-Image Turbo ControlNet type.")
2 changes: 2 additions & 0 deletions src/diffusers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
_import_structure["controlnets.controlnet_sparsectrl"] = ["SparseControlNetModel"]
_import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"]
_import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
_import_structure["controlnets.controlnet_z_image"] = ["ZImageControlNetModel"]
_import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"]
_import_structure["controlnets.multicontrolnet_union"] = ["MultiControlNetUnionModel"]
_import_structure["embeddings"] = ["ImageProjection"]
Expand Down Expand Up @@ -180,6 +181,7 @@
SD3MultiControlNetModel,
SparseControlNetModel,
UNetControlNetXSModel,
ZImageControlNetModel,
)
from .embeddings import ImageProjection
from .modeling_utils import ModelMixin
Expand Down
1 change: 1 addition & 0 deletions src/diffusers/models/controlnets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from .controlnet_union import ControlNetUnionModel
from .controlnet_xs import ControlNetXSAdapter, ControlNetXSOutput, UNetControlNetXSModel
from .controlnet_z_image import ZImageControlNetModel
from .multicontrolnet import MultiControlNetModel
from .multicontrolnet_union import MultiControlNetUnionModel

Expand Down
Loading