Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/Model Support.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
[Kandinsky 5](#kandinsky-5) | DiT | 2025 | Kandinsky Lab | 6B | No | Modern, Decent Quality |
[Anima](#anima) | DiT | 2026 | Circlestone Labs | 2B | WTF | Modern, very small, decent for anime |
[ERNIE](#ernie) | DiT | 2026 | Baidu | 8B | Minimal | Modern, intelligent, good quality, fast |
[HiDream O1](#hidream-o1) | "Pixel UiT" | 2026 | HiDream | 8B | Minimal | Modern, intelligent, fast, decent quality |

Old or bad options also tracked listed via [Obscure Model Support](/docs/Obscure%20Model%20Support.md):

Expand Down Expand Up @@ -590,6 +591,29 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended
- Out of range doesn't corrupt immediately but will fail at composition.
- Prefers aspects from square to 16:9, gets funny in 21:9

# HiDream-O1

- HiDream's [HiDream O1](<https://huggingface.co/HiDream-ai/HiDream-O1-Image>) is supported in SwarmUI!
- It is an 8B model, with both a base and an official 'dev' distill designed to run faster
- The "Dev" model (in fat BF16) can be downloaded here [Comfy-Org/HiDream-O1-Image - dev](<https://huggingface.co/Comfy-Org/HiDream-O1-Image/resolve/main/checkpoints/hidream_o1_image_dev_bf16.safetensors>)
- Dev FP8 version can be downloaded here [Comfy-Org/HiDream-O1-Image - dev FP8](<https://huggingface.co/Comfy-Org/HiDream-O1-Image/resolve/main/checkpoints/hidream_o1_image_dev_fp8_scaled.safetensors>)
- Or the base version (in fat BF16) [Comfy-Org/HiDream-O1-Image - base](<https://huggingface.co/Comfy-Org/HiDream-O1-Image/resolve/main/checkpoints/hidream_o1_image_bf16.safetensors>)
- Base FP8 version can be downloaded here [Comfy-Org/HiDream-O1-Image - base FP8](<https://huggingface.co/Comfy-Org/HiDream-O1-Image/resolve/main/checkpoints/hidream_o1_image_fp8_scaled.safetensors>)
- Save in `Stable-Diffusion`
- It has no VAE, but has in-middle dedicated large patch scaling to compensate
- Its text encoding is similarly native-integrated
- **Parameters:**
- **Prompt:** Supports general prompting in any format just fine. Speaks at least English and Chinese. Was designed to use LLM-written prompts.
- **Prompt Images:** You can upload up to 10 images for image-editing input, but it only strongly obeys single-image input.
- **Sampler:** Default is fine.
- **Scheduler:** Default is fine.
- **CFG Scale:** For Dev, `1`, for base normal CFG ranges (around `5`)
- **Steps:** For Dev `28` is their recommendation, but even just `4` works fine for simple images. For Base, 50 steps is the official recommendation.
- **Resolution:** Side length `2048` is the model's standard, but a wide range works well.
- Because of the aggressive patch scaling, 2048 on this model looks more like 1024 on most other models. 1024 on this model looks noticeably worse. Going above 2048 will have some color distortion.
- **Dev Lora:**
- A low-step dev lora can be downloaded here [Kijai/hidream-O1-image_comfy](<https://huggingface.co/Kijai/hidream-O1-image_comfy/resolve/main/loras/hidream_o1_dev_lora_rank_64_bf16_pruned_v1.safetensors>). It allows use of the base model with the distilled behavior from the Dev model. 8 steps will generate a coherent image of lower quality, 16 steps seems closer to original quality. Use CFG Scale 1.

# Video Models

- Video models are documented in [Video Model Support](/docs/Video%20Model%20Support.md).
Expand Down
37 changes: 36 additions & 1 deletion src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ public string CreateKSampler(JArray model, JArray pos, JArray neg, JArray latent
latent = [srCond, 2];
}
}
else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie())
else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie() || IsHiDreamO1())
{
defscheduler ??= "simple";
}
Expand Down Expand Up @@ -1023,6 +1023,41 @@ void makeRefLatent(JArray image)
}
}
}
else if (IsHiDreamO1())
{
List<JArray> refImages = [];
if (UserInput.TryGet(T2IParamTypes.PromptImages, out List<Image> images) && images.Count > 0)
{
int count = Math.Min(images.Count, 10);
for (int i = 0; i < count; i++)
{
refImages.Add(GetPromptImage(true, false, i));
}
}
else if (MaskShrunkInfo is not null && MaskShrunkInfo.ScaledImage is not null)
{
refImages.Add([MaskShrunkInfo.ScaledImage, 0]);
}
else if (BasicInputImage is not null)
{
refImages.Add(BasicInputImage.Path);
}
if (refImages.Count > 0)
{
JObject refInputs = new()
{
["positive"] = pos,
["negative"] = neg
};
for (int i = 0; i < refImages.Count; i++)
{
refInputs[$"images.image_{i + 1}"] = refImages[i];
}
string refNode = CreateNode("HiDreamO1ReferenceImages", refInputs);
pos = [refNode, 0];
neg = [refNode, 1];
}
}
else if (IsWanVideo()) // TODO: Somehow check if this is actually a phantom model?
{
if (UserInput.TryGet(T2IParamTypes.PromptImages, out List<Image> images) && images.Count > 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ public bool IsKontext()
/// <summary>Returns true if the current model is HiDream-i1.</summary>
public bool IsHiDream() => IsModelCompatClass(T2IModelClassSorter.CompatHiDreamI1);

/// <summary>Returns true if the current model is HiDream-O1 Image.</summary>
public bool IsHiDreamO1() => IsModelCompatClass(T2IModelClassSorter.CompatHiDreamO1);

/// <summary>Returns true if the current model supports Flux Guidance.</summary>
public bool HasFluxGuidance()
{
Expand Down Expand Up @@ -404,6 +407,15 @@ public WGNodeData EmptyImage(int width, int height, int batchSize, string id = n
["width"] = width
}, id));
}
else if (IsHiDreamO1())
{
return resultImage(CreateNode("EmptyHiDreamO1LatentImage", new JObject()
{
["batch_size"] = batchSize,
["height"] = height,
["width"] = width
}, id));
}
else if (UserInput.Get(ComfyUIBackendExtension.ShiftedLatentAverageInit, false))
{
double offA = 0, offB = 0, offC = 0, offD = 0;
Expand Down Expand Up @@ -1113,6 +1125,26 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC)
LoadingClip = [quadClipLoader, 0];
helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFluxVAE, "flux-1", "flux-ae");
}
else if (IsHiDreamO1())
{
string noiseScaleNode = CreateNode("ModelNoiseScale", new JObject()
{
["model"] = LoadingModel,
["noise_scale"] = 7.5 // TODO: Configurable?
});
LoadingModel = [noiseScaleNode, 0];
string seamSmoothingNode = CreateNode("HiDreamO1PatchSeamSmoothing", new JObject()
{ // TODO: Configurable?
["model"] = LoadingModel,
["start_percent"] = 0.8,
["end_percent"] = 1.00,
["pattern"] = "single_shift",
["passes"] = "2",
["blend"] = "average",
["strength"] = 1.00
});
LoadingModel = [seamSmoothingNode, 0];
}
else if (IsOmniGen())
{
helpers.LoadClip("omnigen2", helpers.GetOmniQwenModel());
Expand Down
12 changes: 12 additions & 0 deletions src/Text2Image/T2IModelClassSorter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public static T2IModelCompatClass
CompatZImage = RegisterCompat(new() { ID = "z-image", ShortCode = "ZImg", LorasTargetTextEnc = false }),
CompatZetaChroma = RegisterCompat(new() { ID = "zeta-chroma", ShortCode = "ZChr", LorasTargetTextEnc = false }),
CompatAnima = RegisterCompat(new() { ID = "anima", ShortCode = "Anima", LorasTargetTextEnc = false }),
CompatHiDreamO1 = RegisterCompat(new() { ID = "hidream-o1", ShortCode = "HiDrO1", LorasTargetTextEnc = false }),
// Audio models
CompatAceStep15 = RegisterCompat(new() { ID = "ace-step-1_5", ShortCode = "Ace15", IsAudioModel = true }),
// Obscure old random ones
Expand Down Expand Up @@ -199,6 +200,8 @@ bool isZImageLora(JObject h) => (hasLoraKey(h, "layers.0.adaLN_modulation.0") &&
bool isWanVace(JObject h) => hasKey(h, "vace_blocks.0.after_proj.bias");
bool isHiDream(JObject h) => h.ContainsKey("caption_projection.0.linear.weight");
bool isHiDreamLora(JObject h) => hasKey(h, "double_stream_blocks.0.block.ff_i.shared_experts.w1.lora_A.weight");
bool isHiDreamO1(JObject h) => (h.ContainsKey("model.t_embedder1.mlp.0.weight") && h.ContainsKey("model.t_embedder1.mlp.0.bias"));
bool isHiDreamO1Lora(JObject h) => hasLoraKey(h, "final_layer2.linear") && hasLoraKey(h, "language_model.layers.0.self_attn.q_proj");
bool isChroma(JObject h) => h.ContainsKey("distilled_guidance_layer.in_proj.bias") && h.ContainsKey("double_blocks.0.img_attn.proj.bias");
bool isChromaRadiance(JObject h) => h.ContainsKey("nerf_image_embedder.embedder.0.bias");
bool isOmniGen(JObject h) => h.ContainsKey("time_caption_embed.timestep_embedder.linear_2.weight") && h.ContainsKey("context_refiner.0.attn.norm_k.weight");
Expand Down Expand Up @@ -723,6 +726,14 @@ JToken GetEmbeddingKey(JObject h)
{
return isHiDreamLora(h);
}});
Register(new() { ID = "hidream-o1", CompatClass = CompatHiDreamO1, Name = "HiDream O1 Image", StandardWidth = 2048, StandardHeight = 2048, IsThisModelOfClass = (m, h) =>
{
return isHiDreamO1(h);
}});
Register(new() { ID = "hidream-o1/lora", CompatClass = CompatHiDreamO1, Name = "HiDream O1 LoRA", StandardWidth = 2048, StandardHeight = 2048, IsThisModelOfClass = (m, h) =>
{
return isHiDreamO1Lora(h);
}});
Register(new() { ID = "omnigen-2", CompatClass = CompatOmniGen2, Name = "OmniGen 2", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) =>
{
return isOmniGen(h);
Expand Down Expand Up @@ -855,6 +866,7 @@ JToken GetEmbeddingKey(JObject h)
Remaps["hunyuanvideo1.5_720p_i2v"] = "hunyuan-video-1_5";
Remaps["hunyuanvideo1.5_1080p_sr_distilled"] = "hunyuan-video-1_5-sr";
Remaps["hunyuanvideo1.5_720p_sr_distilled"] = "hunyuan-video-1_5-sr";
Remaps["hidream_o1_image"] = "hidream-o1";
}

/// <summary>Returns the model class that matches this model, or null if none.</summary>
Expand Down
Loading