diff --git a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_inpaint.py b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_inpaint.py index 75be62cf6db2..77db9c416fbc 100644 --- a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_inpaint.py +++ b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_inpaint.py @@ -873,6 +873,8 @@ def __call__( crops_coords = None resize_mode = "default" + original_image = None + prompt_image = None if image is not None and not (isinstance(image, torch.Tensor) and image.size(1) == self.latent_channels): image = self.image_processor.resize(image, calculated_height, calculated_width) original_image = image diff --git a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py index bc688aeee319..1d1226b04d62 100644 --- a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py +++ b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py @@ -675,6 +675,10 @@ def __call__( device = self._execution_device # 3. Preprocess image + condition_image_sizes = None + condition_images = None + vae_image_sizes = None + vae_images = None if image is not None and not (isinstance(image, torch.Tensor) and image.size(1) == self.latent_channels): if not isinstance(image, list): image = [image] diff --git a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py index 2c9da7545e8a..36e05f445642 100644 --- a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +++ b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py @@ -313,6 +313,7 @@ def encode_prompt( def check_inputs( self, prompt, + image, strength, height, width, @@ -324,6 +325,9 @@ def check_inputs( callback_on_step_end_tensor_inputs=None, max_sequence_length=None, ): + if image is None: + raise ValueError("`image` input cannot be undefined for img2img generation.") + if strength < 0 or strength > 1: raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") @@ -654,6 +658,7 @@ def __call__( # 1. Check inputs. Raise error if not correct self.check_inputs( prompt, + image, strength, height, width, diff --git a/src/diffusers/pipelines/wan/pipeline_wan.py b/src/diffusers/pipelines/wan/pipeline_wan.py index dc2bb471101d..2ac8e897d6ab 100644 --- a/src/diffusers/pipelines/wan/pipeline_wan.py +++ b/src/diffusers/pipelines/wan/pipeline_wan.py @@ -22,7 +22,7 @@ from ...callbacks import MultiPipelineCallbacks, PipelineCallback from ...loaders import WanLoraLoaderMixin from ...models import AutoencoderKLWan, WanTransformer3DModel -from ...schedulers import FlowMatchEulerDiscreteScheduler +from ...schedulers import UniPCMultistepScheduler from ...utils import is_ftfy_available, is_torch_xla_available, logging, replace_example_docstring from ...utils.torch_utils import randn_tensor from ...video_processor import VideoProcessor @@ -133,7 +133,7 @@ def __init__( tokenizer: AutoTokenizer, text_encoder: UMT5EncoderModel, vae: AutoencoderKLWan, - scheduler: FlowMatchEulerDiscreteScheduler, + scheduler: UniPCMultistepScheduler, transformer: Optional[WanTransformer3DModel] = None, transformer_2: Optional[WanTransformer3DModel] = None, boundary_ratio: Optional[float] = None, @@ -496,6 +496,16 @@ def __call__( num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1 num_frames = max(num_frames, 1) + multiple_of = self.vae_scale_factor_spatial * 2 # 2 for patchification + calc_height = height // multiple_of * multiple_of + calc_width = width // multiple_of * multiple_of + if height != calc_height or width != calc_width: + logger.warning( + f"`height` and `width` must be multiples of {multiple_of} for proper patchification. " + f"Adjusting ({height}, {width}) -> ({calc_height}, {calc_width})." + ) + height, width = calc_height, calc_width + if self.config.boundary_ratio is not None and guidance_scale_2 is None: guidance_scale_2 = guidance_scale diff --git a/src/diffusers/pipelines/wan/pipeline_wan_i2v.py b/src/diffusers/pipelines/wan/pipeline_wan_i2v.py index b7fd0b05980f..e637702c5a46 100644 --- a/src/diffusers/pipelines/wan/pipeline_wan_i2v.py +++ b/src/diffusers/pipelines/wan/pipeline_wan_i2v.py @@ -24,7 +24,7 @@ from ...image_processor import PipelineImageInput from ...loaders import WanLoraLoaderMixin from ...models import AutoencoderKLWan, WanTransformer3DModel -from ...schedulers import FlowMatchEulerDiscreteScheduler +from ...schedulers import UniPCMultistepScheduler from ...utils import is_ftfy_available, is_torch_xla_available, logging, replace_example_docstring from ...utils.torch_utils import randn_tensor from ...video_processor import VideoProcessor @@ -169,7 +169,7 @@ def __init__( tokenizer: AutoTokenizer, text_encoder: UMT5EncoderModel, vae: AutoencoderKLWan, - scheduler: FlowMatchEulerDiscreteScheduler, + scheduler: UniPCMultistepScheduler, image_processor: CLIPImageProcessor = None, image_encoder: CLIPVisionModel = None, transformer: WanTransformer3DModel = None, @@ -637,6 +637,16 @@ def __call__( num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1 num_frames = max(num_frames, 1) + multiple_of = self.vae_scale_factor_spatial * 2 # 2 for patchification + calc_height = height // multiple_of * multiple_of + calc_width = width // multiple_of * multiple_of + if height != calc_height or width != calc_width: + logger.warning( + f"`height` and `width` must be multiples of {multiple_of} for proper patchification. " + f"Adjusting ({height}, {width}) -> ({calc_height}, {calc_width})." + ) + height, width = calc_height, calc_width + if self.config.boundary_ratio is not None and guidance_scale_2 is None: guidance_scale_2 = guidance_scale diff --git a/src/diffusers/pipelines/wan/pipeline_wan_vace.py b/src/diffusers/pipelines/wan/pipeline_wan_vace.py index 351ae2e70563..6936e54e98f7 100644 --- a/src/diffusers/pipelines/wan/pipeline_wan_vace.py +++ b/src/diffusers/pipelines/wan/pipeline_wan_vace.py @@ -24,7 +24,7 @@ from ...image_processor import PipelineImageInput from ...loaders import WanLoraLoaderMixin from ...models import AutoencoderKLWan, WanVACETransformer3DModel -from ...schedulers import FlowMatchEulerDiscreteScheduler +from ...schedulers import UniPCMultistepScheduler from ...utils import is_ftfy_available, is_torch_xla_available, logging, replace_example_docstring from ...utils.torch_utils import randn_tensor from ...video_processor import VideoProcessor @@ -180,7 +180,7 @@ def __init__( tokenizer: AutoTokenizer, text_encoder: UMT5EncoderModel, vae: AutoencoderKLWan, - scheduler: FlowMatchEulerDiscreteScheduler, + scheduler: UniPCMultistepScheduler, transformer: WanVACETransformer3DModel = None, transformer_2: WanVACETransformer3DModel = None, boundary_ratio: Optional[float] = None, diff --git a/src/diffusers/pipelines/wan/pipeline_wan_video2video.py b/src/diffusers/pipelines/wan/pipeline_wan_video2video.py index 5475b6e8b479..6dd2e9220e05 100644 --- a/src/diffusers/pipelines/wan/pipeline_wan_video2video.py +++ b/src/diffusers/pipelines/wan/pipeline_wan_video2video.py @@ -24,7 +24,7 @@ from ...callbacks import MultiPipelineCallbacks, PipelineCallback from ...loaders import WanLoraLoaderMixin from ...models import AutoencoderKLWan, WanTransformer3DModel -from ...schedulers import FlowMatchEulerDiscreteScheduler +from ...schedulers import UniPCMultistepScheduler from ...utils import is_ftfy_available, is_torch_xla_available, logging, replace_example_docstring from ...utils.torch_utils import randn_tensor from ...video_processor import VideoProcessor @@ -202,7 +202,7 @@ def __init__( text_encoder: UMT5EncoderModel, transformer: WanTransformer3DModel, vae: AutoencoderKLWan, - scheduler: FlowMatchEulerDiscreteScheduler, + scheduler: UniPCMultistepScheduler, ): super().__init__()