diff --git a/examples/dreambooth/README_flux2.md b/examples/dreambooth/README_flux2.md index ad5d61f1f9e2..3839e377c0b3 100644 --- a/examples/dreambooth/README_flux2.md +++ b/examples/dreambooth/README_flux2.md @@ -347,16 +347,17 @@ When LoRA was first adapted from language models to diffusion models, it was app More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma separated string the exact modules for LoRA training. Here are some examples of target modules you can provide: -- for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"` -- to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"` -- to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"` +- for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.to_qkv_mlp_proj"` +- to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.to_qkv_mlp_proj,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.linear_in,ff.linear_out,ff_context.linear_in,ff_context.linear_out"` +- to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.to_qkv_mlp_proj,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.linear_in,ff.linear_out,ff_context.linear_in,ff_context.linear_out,norm_out.linear,norm_out.proj_out"` > [!NOTE] > `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma separated string: > **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k` -> **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` +> **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` > [!NOTE] > keep in mind that while training more layers can improve quality and expressiveness, it also increases the size of the output LoRA weights. - +> [!NOTE] +In FLUX2, the q, k, and v projections are fused into a single linear layer named attn.to_qkv_mlp_proj within the single transformer block. Also, the attention output is just attn.to_out, not attn.to_out.0 — it’s no longer a ModuleList like in transformer block. ## Training Image-to-Image diff --git a/examples/dreambooth/train_dreambooth_lora_flux2.py b/examples/dreambooth/train_dreambooth_lora_flux2.py index 317ed2c2b2e1..f17372e9effc 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux2.py +++ b/examples/dreambooth/train_dreambooth_lora_flux2.py @@ -1256,7 +1256,10 @@ def main(args): if args.lora_layers is not None: target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: + # just train transformer_blocks target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + # train transformer_blocks and single_transformer_blocks + target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + ["to_qkv_mlp_proj", *[f"single_transformer_blocks.{i}.attn.to_out" for i in range(48)]] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig( diff --git a/examples/dreambooth/train_dreambooth_lora_flux2_klein.py b/examples/dreambooth/train_dreambooth_lora_flux2_klein.py index 278c25900a3a..c010483bf031 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux2_klein.py +++ b/examples/dreambooth/train_dreambooth_lora_flux2_klein.py @@ -1249,7 +1249,10 @@ def main(args): if args.lora_layers is not None: target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: + # just train transformer_blocks target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + # train transformer_blocks and single_transformer_blocks + target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + ["to_qkv_mlp_proj", *[f"single_transformer_blocks.{i}.attn.to_out" for i in range(24)]] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig(