Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_subdirectory(llama)
add_subdirectory(minicpm_o)
add_subdirectory(minicpm4)
add_subdirectory(qwen3)
add_subdirectory(qwen3_5)
add_subdirectory(qwen3_service)
add_subdirectory(qwen3_moe)
add_subdirectory(deepseek_ocr)
Expand All @@ -20,6 +21,7 @@ endif()

if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE OR MLLM_BUILD_QNN_BACKEND)
add_subdirectory(qwen3_qnn_aot)
add_subdirectory(qwen3_5_qnn_aot)
add_subdirectory(qwen2_qnn_aot)
add_subdirectory(llama_qnn_aot)
endif()
3 changes: 3 additions & 0 deletions examples/qwen3_5/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_executable(mllm-qwen3-5-runner main.cpp)
target_link_libraries(mllm-qwen3-5-runner PRIVATE MllmRT MllmCPUBackend)
target_include_directories(mllm-qwen3-5-runner PRIVATE ${MLLM_INCLUDE_DIR})
76 changes: 76 additions & 0 deletions examples/qwen3_5/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#include <iostream>
#include <fmt/core.h>
#include <mllm/mllm.hpp>
#include <mllm/models/qwen3_5/modeling_qwen3_5.hpp>
#include <mllm/models/qwen3_5/tokenization_qwen3_5.hpp>
#include <mllm/utils/AnyValue.hpp>

using mllm::Argparse;

MLLM_MAIN({
auto& help = Argparse::add<bool>("-h|--help").help("Show help message");
auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model path").required(true);
auto& model_version = Argparse::add<std::string>("-mv|--model_version").help("Model version").required(true);
auto& tokenizer_path = Argparse::add<std::string>("-t|--tokenizer_path").help("Tokenizer directory").required(true);
auto& config_path = Argparse::add<std::string>("-c|--config_path").help("Config path").required(true);

Argparse::parse(argc, argv);

#ifdef MLLM_PERFETTO_ENABLE
mllm::perf::start();
#endif

mllm::ModelFileVersion file_version = mllm::ModelFileVersion::kV1;
if (model_version.get() == "v1") {
file_version = mllm::ModelFileVersion::kV1;
} else if (model_version.get() == "v2") {
file_version = mllm::ModelFileVersion::kV2;
}

if (help.isSet()) {
Argparse::printHelp();
mllm::shutdownContext();
return 0;
}
Comment on lines +30 to +34
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Help flag check may fail due to required argument validation.

The help flag is checked after Argparse::parse(argc, argv) (line 17), but arguments are marked as required(true). If the user runs with just -h, the parser may fail before reaching the help check.

🛠️ Proposed fix

Consider checking for help before validating required arguments, or ensuring Argparse::parse doesn't error on missing required args when -h is present. A common pattern is:

+  // Check for help before full parse
+  for (int i = 1; i < argc; ++i) {
+    if (std::string(argv[i]) == "-h" || std::string(argv[i]) == "--help") {
+      Argparse::printHelp();
+      mllm::shutdownContext();
+      return 0;
+    }
+  }
+
   Argparse::parse(argc, argv);
-
-  if (help.isSet()) {
-    Argparse::printHelp();
-    mllm::shutdownContext();
-    return 0;
-  }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@examples/qwen3_5/main.cpp` around lines 30 - 34, The help-check currently
runs after Argparse::parse(argc, argv) which will fail if required(true)
arguments are missing; modify the flow so help is detected before required-arg
validation by either (a) checking argv for "-h" or "--help" before calling
Argparse::parse, or (b) using a parser option/alternate parse method that allows
short-circuiting validation when help is present; update the code around
Argparse::parse(argc, argv), help.isSet(), and the subsequent
Argparse::printHelp()/mllm::shutdownContext() so that printHelp() is invoked and
the program exits without triggering required-argument errors.


{
auto cfg = mllm::models::qwen3_5::Qwen3_5Config(config_path.get());
auto tokenizer = mllm::models::qwen3_5::Qwen3_5Tokenizer(tokenizer_path.get());
auto model = mllm::models::qwen3_5::Qwen3_5ForCausalLM(cfg);

fmt::print("Qwen3.5 0.8B: {} layers ({} full attention + {} GDN)\n",
cfg.num_hidden_layers, cfg.numFullAttentionLayers(), cfg.numGDNLayers());

auto param = mllm::load(model_path.get(), file_version);
model.load(param);

fmt::print("\n{:*^60}\n", " Qwen3.5 Interactive CLI ");
fmt::print("Enter 'exit' or 'quit' to end the session\n\n");

std::string prompt_text;

fmt::print("Prompt text (or 'exit/quit'): ");
std::getline(std::cin, prompt_text);

try {
fmt::print("Processing...\n");
auto inputs = tokenizer.convertMessage({.prompt = prompt_text});

fmt::print("\nResponse: ");

for (auto& step : model.chat(inputs)) { std::wcout << tokenizer.detokenize(step.cur_token_id) << std::flush; }

fmt::print("\n{}\n", std::string(60, '-'));
} catch (const std::exception& e) { fmt::print("\nError: {}\n{}\n", e.what(), std::string(60, '-')); }

model.perfSummary();
}

#ifdef MLLM_PERFETTO_ENABLE
mllm::perf::stop();
mllm::perf::saveReport("qwen3_5.perf");
#endif

mllm::print("\n");
mllm::memoryReport();
})
11 changes: 11 additions & 0 deletions examples/qwen3_5_qnn_aot/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# AOT compile target runs on x86 (cross-compilation for Qualcomm HTP)
if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE)
add_executable(mllm-qwen3_5-aot-c compile.cpp)
target_link_libraries(mllm-qwen3_5-aot-c PRIVATE MllmRT MllmCPUBackend MllmQNNBackend)
target_include_directories(mllm-qwen3_5-aot-c PRIVATE ${MLLM_INCLUDE_DIR})
endif()

# Hybrid CPU+QNN runtime (runs on device: CPU for GDN, QNN for full attention)
add_executable(mllm-qwen3_5-aot-runner aot_run.cpp)
target_link_libraries(mllm-qwen3_5-aot-runner PRIVATE MllmRT MllmCPUBackend MllmQNNBackend)
target_include_directories(mllm-qwen3_5-aot-runner PRIVATE ${MLLM_INCLUDE_DIR})
Loading