diff --git a/qdp/qdp-core/src/gpu/encodings/amplitude.rs b/qdp/qdp-core/src/gpu/encodings/amplitude.rs index 1be318cfe3..a1928bc352 100644 --- a/qdp/qdp-core/src/gpu/encodings/amplitude.rs +++ b/qdp/qdp-core/src/gpu/encodings/amplitude.rs @@ -245,21 +245,9 @@ impl QuantumEncoder for AmplitudeEncoder { buffer }; - // Validate norms on host to catch zero or NaN samples early - { - crate::profile_scope!("GPU::NormValidation"); - let host_inv_norms = device - .dtoh_sync_copy(&inv_norms_gpu) - .map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?; - - if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) { - return Err(MahoutError::InvalidInput( - "One or more samples have zero or invalid norm".to_string(), - )); - } - } - - // Launch batch kernel + // Launch batch encode kernel — takes GPU norm buffer directly, no D2H needed yet. + // We defer the norm validation D2H copy until AFTER the encode kernel + sync so that + // the norm kernel → encode kernel sequence runs without an intermediate GPU-CPU roundtrip. { crate::profile_scope!("GPU::BatchKernelLaunch"); let state_ptr = batch_state_vector.ptr_f64().ok_or_else(|| { @@ -288,7 +276,7 @@ impl QuantumEncoder for AmplitudeEncoder { } } - // Synchronize + // Synchronize — all GPU work (norm + encode) complete after this point. { crate::profile_scope!("GPU::Synchronize"); device @@ -296,6 +284,22 @@ impl QuantumEncoder for AmplitudeEncoder { .map_err(|e| MahoutError::Cuda(format!("Sync failed: {:?}", e)))?; } + // Validate norms on host AFTER sync: D2H copy no longer blocks the encode kernel. + // This preserves error detection for zero/NaN samples without adding a mid-pipeline + // GPU-CPU roundtrip between the norm and encode kernels. + { + crate::profile_scope!("GPU::NormValidation"); + let host_inv_norms = device + .dtoh_sync_copy(&inv_norms_gpu) + .map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?; + + if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) { + return Err(MahoutError::InvalidInput( + "One or more samples have zero or invalid norm".to_string(), + )); + } + } + Ok(batch_state_vector) } @@ -412,17 +416,8 @@ impl QuantumEncoder for AmplitudeEncoder { } buffer }; - { - crate::profile_scope!("GPU::NormValidation"); - let host_inv_norms = device - .dtoh_sync_copy(&inv_norms_gpu) - .map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?; - if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) { - return Err(MahoutError::InvalidInput( - "One or more samples have zero or invalid norm".to_string(), - )); - } - } + // Launch encode kernel before D2H norm validation: GPU norm buffer is passed directly, + // so the encode kernel can run immediately after the norm kernel without a CPU roundtrip. { crate::profile_scope!("GPU::BatchKernelLaunch"); use cudarc::driver::DevicePtr; @@ -450,10 +445,22 @@ impl QuantumEncoder for AmplitudeEncoder { ))); } } + // Synchronize first; then validate norms on host (D2H after all GPU work is done). { crate::profile_scope!("GPU::Synchronize"); sync_cuda_stream(stream, "CUDA stream synchronize failed")?; } + { + crate::profile_scope!("GPU::NormValidation"); + let host_inv_norms = device + .dtoh_sync_copy(&inv_norms_gpu) + .map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?; + if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) { + return Err(MahoutError::InvalidInput( + "One or more samples have zero or invalid norm".to_string(), + )); + } + } Ok(batch_state_vector) } diff --git a/qdp/qdp-core/src/pipeline_runner.rs b/qdp/qdp-core/src/pipeline_runner.rs index 9a41ee4bc1..d3ce2a7a13 100644 --- a/qdp/qdp-core/src/pipeline_runner.rs +++ b/qdp/qdp-core/src/pipeline_runner.rs @@ -259,6 +259,47 @@ impl PipelineIterator { }) } + /// Create a pipeline iterator from an in-memory array (e.g. from Python numpy). + /// Data is owned by the iterator; the full encode loop runs in Rust (take_batch + encode_batch). + pub fn new_from_array( + engine: QdpEngine, + data: Vec, + num_samples: usize, + sample_size: usize, + config: PipelineConfig, + batch_limit: usize, + ) -> Result { + let vector_len = vector_len(config.num_qubits, &config.encoding_method); + if sample_size != vector_len { + return Err(MahoutError::InvalidInput(format!( + "Array sample_size {} does not match vector_len {} for num_qubits={}, encoding={}", + sample_size, vector_len, config.num_qubits, config.encoding_method + ))); + } + if data.len() != num_samples * sample_size { + return Err(MahoutError::InvalidInput(format!( + "Array length {} is not num_samples ({}) * sample_size ({})", + data.len(), + num_samples, + sample_size + ))); + } + let source = DataSource::InMemory { + data, + cursor: 0, + num_samples, + sample_size, + batches_yielded: 0, + batch_limit, + }; + Ok(Self { + engine, + config, + source, + vector_len, + }) + } + /// Create a pipeline iterator from a Parquet file using streaming read (Phase 2b). /// Only `.parquet` is supported; reduces memory for large files by reading in chunks. /// Validates sample_size == vector_len after the first chunk. @@ -428,7 +469,61 @@ impl PipelineIterator { } /// Returns the next batch as a DLPack pointer; `Ok(None)` when exhausted. + /// For InMemory source, passes a slice reference to encode_batch (no per-batch copy). pub fn next_batch(&mut self) -> Result> { + // InMemory: update cursor, then encode from &data[start..end] to avoid to_vec(). + let in_memory_range: Option<(usize, usize, usize, usize)> = match &mut self.source { + DataSource::InMemory { + data, + cursor, + sample_size, + batches_yielded, + batch_limit, + .. + } => { + if *batches_yielded >= *batch_limit { + None + } else { + let remaining = (data.len() - *cursor) / *sample_size; + if remaining == 0 { + None + } else { + let batch_n = remaining.min(self.config.batch_size); + let start = *cursor; + let end = start + batch_n * *sample_size; + *cursor = end; + *batches_yielded += 1; + Some(( + start, + batch_n, + *sample_size, + self.config.num_qubits as usize, + )) + } + } + } + _ => None, + }; + + if let Some((start, batch_n, sample_size, num_qubits)) = in_memory_range { + let slice = match &self.source { + DataSource::InMemory { data, .. } => { + let len = batch_n * sample_size; + &data[start..start + len] + } + _ => unreachable!(), + }; + let ptr = self.engine.encode_batch( + slice, + batch_n, + sample_size, + num_qubits, + &self.config.encoding_method, + )?; + return Ok(Some(ptr)); + } + + // Synthetic / Streaming: take_batch_from_source (may copy) then encode. let Some((batch_data, batch_n, sample_size, num_qubits)) = self.take_batch_from_source()? else { return Ok(None); diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/README.md b/qdp/qdp-python/benchmark/encoding_benchmarks/README.md index 97c70ab369..3cb4dc64d8 100644 --- a/qdp/qdp-python/benchmark/encoding_benchmarks/README.md +++ b/qdp/qdp-python/benchmark/encoding_benchmarks/README.md @@ -75,3 +75,25 @@ To see the full list of options and defaults, append `--help`: uv run python benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py --help uv run python benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py --help ``` + +## Credit Card Fraud amplitude baseline (PennyLane) + +Minimal, reproducible steps (run from `qdp/qdp-python`): + +1. **Download dataset (once)** — Kaggle `creditcard.csv` mirror: + + ```bash + mkdir -p benchmark/encoding_benchmarks/pennylane_baseline/data + curl -L -o benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \ + https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv + ``` + +2. **Run the PennyLane baseline** — StandardScaler → PCA(16) → L2 norm → 4‑qubit amplitude VQC: + + ```bash + uv run python benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py \ + --data-file benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \ + --max-samples 300000 --iters 200 --batch-size 512 --trials 1 + ``` + +This prints compile time, train time / throughput, and task metrics (AUPRC, F1, precision, recall) on the test set. diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py new file mode 100644 index 0000000000..34e9148fd3 --- /dev/null +++ b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +PennyLane baseline: Credit Card Fraud (binary, highly imbalanced), amplitude encoding. + +Best practices (2025–2026, aligned with ENCODING_BENCHMARK_PLAN.md §2.2): +- Data: StandardScaler + PCA (here 16–30 components) → padding to 2**num_qubits → L2-normalized vector. +- Splits: Stratified train/validation/test; do not use accuracy as primary metric. +- Imbalance: Class-weighted loss (minority class up-weighted); optional oversampling. +- Task metrics: AUPRC (precision–recall AUC), F1-score, precision, recall on test set. +- System metrics: Compile time (first forward), train time, throughput (samples/sec). + +Data source: + CSV with columns V1..V28, Amount, Class (0=legit, 1=fraud). Example: Kaggle + "Credit Card Fraud Detection" (https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud). + Pass path via --data-file. If no file, a small synthetic imbalanced dataset is used for smoke test. + +Training always runs on GPU via lightning.gpu for fair comparison with QDP pipeline. +""" + +from __future__ import annotations + +import argparse +import time +from pathlib import Path +from typing import Any + +import numpy as np +import torch + +try: + import pennylane as qml +except ImportError as e: + raise SystemExit( + "PennyLane is required. Install with: uv sync --group benchmark" + ) from e + +try: + from sklearn.decomposition import PCA + from sklearn.metrics import ( + average_precision_score, + f1_score, + precision_score, + recall_score, + ) + from sklearn.model_selection import train_test_split + from sklearn.preprocessing import StandardScaler +except ImportError as e: + raise SystemExit( + "scikit-learn is required. Install with: uv sync --group benchmark" + ) from e + + +NUM_QUBITS = 5 +FEATURE_DIM = 2**NUM_QUBITS # amplitude embedding dimension (32 for 5 qubits) + + +def _layer(layer_weights: torch.Tensor, wires: tuple[int, ...]) -> None: + """Single variational layer: Rot on each wire + ring of CNOTs.""" + for i, w in enumerate(wires): + qml.Rot(layer_weights[i, 0], layer_weights[i, 1], layer_weights[i, 2], wires=w) + for i in range(len(wires)): + qml.CNOT(wires=[wires[i], wires[(i + 1) % len(wires)]]) + + +def load_creditcard_csv(path: str) -> tuple[np.ndarray, np.ndarray]: + """ + Load Credit Card Fraud CSV. Expects columns including V1..V28, Amount, Class. + Returns (X_raw shape (n, 30), y shape (n,) with 0/1). + """ + data = np.genfromtxt(path, delimiter=",", skip_header=1, dtype=np.float64) + if data.ndim == 1: + data = data.reshape(1, -1) + # Last column = Class; rest = features (Time, V1..V28, Amount) + X = data[:, :-1] + y = data[:, -1].astype(np.int32) + # If CSV has header row with "Time", we already skipped it + if X.shape[1] >= 30: + X = X[:, -30:] # last 30 cols: V1..V28, Amount (and drop Time if 31) + elif X.shape[1] < 30: + # Pad with zeros to 30 + pad = np.zeros((X.shape[0], 30 - X.shape[1]), dtype=np.float64) + X = np.hstack([X, pad]) + return X, y + + +def make_synthetic_imbalanced( + seed: int, n_total: int = 2000, fraud_ratio: float = 0.02 +) -> tuple[np.ndarray, np.ndarray]: + """Synthetic 30-D imbalanced binary data for smoke test when no CSV is provided.""" + rng = np.random.default_rng(seed) + n_fraud = max(1, int(n_total * fraud_ratio)) + n_legit = n_total - n_fraud + X_legit = rng.standard_normal((n_legit, 30)).astype(np.float64) * 0.5 + X_fraud = rng.standard_normal((n_fraud, 30)).astype(np.float64) * 0.5 + 1.0 + X = np.vstack([X_legit, X_fraud]) + y = np.array([0] * n_legit + [1] * n_fraud, dtype=np.int32) + perm = rng.permutation(n_total) + return X[perm], y[perm] + + +def preprocess( + X: np.ndarray, + y: np.ndarray, + pca_dim: int, + seed: int, + test_size: float = 0.2, + val_size: float = 0.1, +) -> tuple[ + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + Any, + Any, + np.ndarray, +]: + """ + StandardScaler → PCA (to <= pca_dim) → pad to FEATURE_DIM → L2 normalize. + Stratified train/val/test. Returns X_train, y_train, X_val, y_val, X_test, y_test (all numpy), + scaler, pca (fitted), sample_weights_train (for weighted loss). + """ + scaler = StandardScaler() + X_scaled = scaler.fit_transform(X) + + pca = PCA( + n_components=min(pca_dim, X_scaled.shape[1], X_scaled.shape[0] - 1), + random_state=seed, + ) + X_pca = pca.fit_transform(X_scaled) + # Pad PCA features up to FEATURE_DIM for amplitude embedding (remaining entries are zeros). + if X_pca.shape[1] < FEATURE_DIM: + pad = np.zeros((X_pca.shape[0], FEATURE_DIM - X_pca.shape[1]), dtype=np.float64) + X_pca = np.hstack([X_pca, pad]) + + norm = np.linalg.norm(X_pca, axis=1, keepdims=True) + norm[norm < 1e-12] = 1.0 + X_norm = (X_pca / norm).astype(np.float64) + + rng = np.random.RandomState(seed) + idx = rng.permutation(len(y)) + X_norm, y = X_norm[idx], y[idx] + + # Stratified split: first test, then val from train + X_temp, X_test, y_temp, y_test = train_test_split( + X_norm, y, test_size=test_size, stratify=y, random_state=seed + ) + val_ratio = val_size / (1 - test_size) + X_train, X_val, y_train, y_val = train_test_split( + X_temp, y_temp, test_size=val_ratio, stratify=y_temp, random_state=seed + ) + + # Class weights for weighted MSE: n / (2 * n_class) + n0 = max(1, int(np.sum(y_train == 0))) + n1 = max(1, int(np.sum(y_train == 1))) + w0 = len(y_train) / (2 * n0) + w1 = len(y_train) / (2 * n1) + sample_weights = np.where(y_train == 0, w0, w1).astype(np.float64) + + return ( + X_train, + y_train, + X_val, + y_val, + X_test, + y_test, + scaler, + pca, + sample_weights, + ) + + +def run_training( + X_train: np.ndarray, + y_train: np.ndarray, + X_test: np.ndarray, + y_test: np.ndarray, + sample_weights: np.ndarray, + *, + num_layers: int, + iterations: int, + batch_size: int, + lr: float, + seed: int, +) -> dict[str, Any]: + """Train 5-qubit amplitude VQC on GPU with class-weighted loss; report AUPRC, F1, compile/train time.""" + if not torch.cuda.is_available(): + raise RuntimeError("CUDA GPU is required for training. No CUDA device found.") + try: + dev = qml.device("lightning.gpu", wires=NUM_QUBITS) + except Exception as e: + raise RuntimeError( + "lightning.gpu is required for GPU training. Install with: " + "pip install pennylane-lightning[gpu]" + ) from e + + device = torch.device("cuda") + dtype = torch.float64 + wires = tuple(range(NUM_QUBITS)) + + @qml.qnode(dev, interface="torch", diff_method="adjoint") + def circuit(weights: torch.Tensor, features: torch.Tensor) -> torch.Tensor: + qml.AmplitudeEmbedding(features, wires=wires, normalize=True) + for w in weights: + _layer(w, wires) + return qml.expval(qml.PauliZ(0)) + + def model( + weights: torch.Tensor, bias: torch.Tensor, x: torch.Tensor + ) -> torch.Tensor: + return circuit(weights, x) + bias + + def cost( + weights: torch.Tensor, + bias: torch.Tensor, + X_batch: torch.Tensor, + Y_batch: torch.Tensor, + w_batch: torch.Tensor, + ) -> torch.Tensor: + # Y in {0,1} -> target in {-1, 1} + target = Y_batch * 2.0 - 1.0 + pred = model(weights, bias, X_batch) + return (w_batch * (target - pred) ** 2).sum() / (w_batch.sum() + 1e-12) + + n_train = len(y_train) + + torch.manual_seed(seed) + weights = torch.nn.Parameter( + 0.01 * torch.randn(num_layers, NUM_QUBITS, 3, device=device, dtype=dtype) + ) + bias = torch.nn.Parameter(torch.tensor(0.0, device=device, dtype=dtype)) + opt = torch.optim.Adam([weights, bias], lr=lr) + + X_train_t = torch.tensor(X_train, dtype=dtype, device=device) + # Float so autograd does not try to differentiate ints + Y_train_t = torch.tensor( + np.asarray(y_train, dtype=np.float64), dtype=dtype, device=device + ) + W_train_t = torch.tensor(sample_weights, dtype=dtype, device=device) + + X_test_t = torch.tensor(X_test, dtype=dtype, device=device) + + # Compile (first forward + cost) + t0 = time.perf_counter() + _ = circuit(weights, X_train_t[0]) + _ = cost(weights, bias, X_train_t[:1], Y_train_t[:1], W_train_t[:1]) + compile_sec = time.perf_counter() - t0 + + # Train + _batch_n = min(batch_size, n_train) + t0 = time.perf_counter() + for _ in range(iterations): + opt.zero_grad() + idx = torch.randint(0, n_train, (_batch_n,), device=device) + Xb = X_train_t[idx] + Yb = Y_train_t[idx] + Wb = W_train_t[idx] + loss = cost(weights, bias, Xb, Yb, Wb) + loss.backward() + opt.step() + train_sec = time.perf_counter() - t0 + + # Test-set predictions and scores (for AUPRC we need continuous scores) + with torch.no_grad(): + pred_scores = model(weights, bias, X_test_t).cpu().numpy().flatten() + pred_binary = (np.sign(pred_scores) > 0).astype(np.int32) + # Map expval in [-1,1] to positive-class score in [0,1] for AUPRC + scores_positive = (pred_scores + 1.0) / 2.0 + + y_test_np = np.asarray(y_test) + auprc = float(average_precision_score(y_test_np, scores_positive)) + f1 = float(f1_score(y_test_np, pred_binary, zero_division=0)) + prec = float(precision_score(y_test_np, pred_binary, zero_division=0)) + rec = float(recall_score(y_test_np, pred_binary, zero_division=0)) + + return { + "compile_time_sec": compile_sec, + "train_time_sec": train_sec, + "samples_per_sec": (iterations * _batch_n) / train_sec + if train_sec > 0 + else 0.0, + "auprc": auprc, + "f1_score": f1, + "precision": prec, + "recall": rec, + "n_train": n_train, + "n_test": len(y_test), + "iterations": iterations, + } + + +def main() -> None: + parser = argparse.ArgumentParser( + description="PennyLane Credit Card Fraud baseline (amplitude, 5 qubits, AUPRC/F1, GPU training)" + ) + parser.add_argument( + "--data-file", + type=str, + default=None, + help="Path to CSV (e.g. Kaggle creditcard.csv). If omitted, use synthetic imbalanced data.", + ) + parser.add_argument( + "--max-samples", + type=int, + default=50_000, + help="Max samples to use from CSV (default: 50000); ignored for synthetic.", + ) + parser.add_argument( + "--pca-dim", + type=int, + default=30, + help="PCA components before padding to 2**num_qubits (default: 30, capped by feature dim).", + ) + parser.add_argument("--seed", type=int, default=42, help="Random seed") + parser.add_argument("--iters", type=int, default=5000, help="Optimizer steps") + parser.add_argument("--batch-size", type=int, default=256, help="Batch size") + parser.add_argument("--layers", type=int, default=2, help="Variational layers") + parser.add_argument("--lr", type=float, default=0.01, help="Learning rate") + parser.add_argument( + "--trials", + type=int, + default=1, + help="Number of runs (same data, different seeds); report median AUPRC/F1.", + ) + args = parser.parse_args() + + if args.data_file: + path = Path(args.data_file) + if not path.is_file(): + raise SystemExit(f"Data file not found: {path}") + X, y = load_creditcard_csv(str(path)) + if len(X) > args.max_samples: + rng = np.random.default_rng(args.seed) + idx = rng.choice(len(X), size=args.max_samples, replace=False) + X, y = X[idx], y[idx] + data_src = f"CSV {path.name} (n={len(X)})" + else: + X, y = make_synthetic_imbalanced(args.seed, n_total=2000, fraud_ratio=0.02) + data_src = f"synthetic imbalanced (n={len(X)}, fraud~2%%)" + + ( + X_train, + y_train, + X_val, + y_val, + X_test, + y_test, + _scaler, + _pca, + sample_weights, + ) = preprocess( + X, + y, + pca_dim=args.pca_dim, + seed=args.seed, + test_size=0.2, + val_size=0.1, + ) + + print("Credit Card Fraud amplitude baseline (PennyLane, GPU)") + print( + f" Data: {data_src} → StandardScaler → PCA({args.pca_dim}) → pad to {FEATURE_DIM} → L2 norm" + ) + print( + f" Train/val/test: {len(X_train)} / {len(X_val)} / {len(X_test)} (stratified)" + ) + print( + f" Iters: {args.iters}, batch: {args.batch_size}, layers: {args.layers}, lr: {args.lr}" + ) + + results: list[dict[str, Any]] = [] + for t in range(args.trials): + r = run_training( + X_train, + y_train, + X_test, + y_test, + sample_weights, + num_layers=args.layers, + iterations=args.iters, + batch_size=args.batch_size, + lr=args.lr, + seed=args.seed + t, + ) + results.append(r) + print(f"\n Trial {t + 1}:") + print(f" Compile: {r['compile_time_sec']:.4f} s") + print( + f" Train: {r['train_time_sec']:.4f} s ({r['samples_per_sec']:.1f} samples/s)" + ) + print(f" AUPRC: {r['auprc']:.4f}") + print( + f" F1: {r['f1_score']:.4f} (P: {r['precision']:.4f}, R: {r['recall']:.4f})" + ) + + if args.trials > 1: + auprcs = sorted(r["auprc"] for r in results) + f1s = sorted(r["f1_score"] for r in results) + mid = args.trials // 2 + print( + f"\n Median AUPRC: {auprcs[mid]:.4f} (min: {auprcs[0]:.4f}, max: {auprcs[-1]:.4f})" + ) + print( + f" Median F1: {f1s[mid]:.4f} (min: {f1s[0]:.4f}, max: {f1s[-1]:.4f})" + ) + + +if __name__ == "__main__": + main() diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py index 66045f853a..3e684c25dc 100644 --- a/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py +++ b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py @@ -28,6 +28,8 @@ - Total samples: 100 (2-class Iris). Full Iris has 150 (3 classes). Pipeline: state prep (Möttönen angles) → Rot layers + CNOT → expval(PauliZ(0)) + bias; square loss; Adam or Nesterov. + +Training always runs on GPU via lightning.gpu for fair comparison with QDP pipeline. """ from __future__ import annotations @@ -38,11 +40,10 @@ from typing import Any import numpy as np +import torch try: import pennylane as qml - from pennylane import numpy as pnp - from pennylane.optimize import AdamOptimizer, NesterovMomentumOptimizer except ImportError as e: raise SystemExit( "PennyLane is required. Install with: uv sync --group benchmark" @@ -90,7 +91,7 @@ def state_preparation(a, wires=(0, 1)) -> None: def layer(layer_weights, wires=(0, 1)) -> None: """Rot on each wire + CNOT (tutorial Iris section).""" for i, w in enumerate(wires): - qml.Rot(*layer_weights[i], wires=w) + qml.Rot(layer_weights[i, 0], layer_weights[i, 1], layer_weights[i, 2], wires=w) qml.CNOT(wires=list(wires)) @@ -131,7 +132,7 @@ def load_iris_binary(seed: int = 42) -> tuple[np.ndarray, np.ndarray]: return features, Y -# --- Training: build circuit, split data, optimize, evaluate --- +# --- Training: build circuit, split data, optimize, evaluate (GPU via lightning.gpu) --- def run_training( features: np.ndarray, Y: np.ndarray, @@ -142,14 +143,23 @@ def run_training( lr: float, seed: int, test_size: float = 0.25, - optimizer: str = "adam", early_stop_target: float | None = 0.9, ) -> dict[str, Any]: - """Train classifier: circuit + bias, square loss, batched. Optional early stop when test acc ≥ target.""" - dev = qml.device("default.qubit", wires=NUM_QUBITS) + """Train classifier on GPU: circuit + bias, square loss, batched. Optional early stop when test acc ≥ target.""" + if not torch.cuda.is_available(): + raise RuntimeError("CUDA GPU is required for training. No CUDA device found.") + try: + dev = qml.device("lightning.gpu", wires=NUM_QUBITS) + except Exception as e: + raise RuntimeError( + "lightning.gpu is required for GPU training. Install with: " + "pip install pennylane-lightning[gpu]" + ) from e + device = torch.device("cuda") + dtype = torch.float64 # Circuit: state_prep(angles) → layers of Rot+CNOT → expval(PauliZ(0)) - @qml.qnode(dev, interface="autograd", diff_method="backprop") + @qml.qnode(dev, interface="torch", diff_method="adjoint") def circuit(weights, angles): state_preparation(angles, wires=(0, 1)) for lw in weights: @@ -161,78 +171,75 @@ def model(weights, bias, angles): def cost(weights, bias, X_batch, Y_batch): preds = model(weights, bias, X_batch.T) - return pnp.mean((Y_batch - preds) ** 2) + return torch.mean((Y_batch - preds) ** 2) # Train/val split (seed-driven) n = len(Y) - np.random.seed(seed) - try: - pnp.random.seed(seed) - except Exception: - pass rng = np.random.default_rng(seed) idx = rng.permutation(n) n_train = int(n * (1 - test_size)) - feats_train = pnp.array(features[idx[:n_train]]) - Y_train = pnp.array(Y[idx[:n_train]]) - feats_test = features[idx[n_train:]] - Y_test = Y[idx[n_train:]] + + feats_train_t = torch.tensor(features[idx[:n_train]], dtype=dtype, device=device) + Y_train_t = torch.tensor( + np.asarray(Y[idx[:n_train]], dtype=np.float64), dtype=dtype, device=device + ) + feats_test_t = torch.tensor(features[idx[n_train:]], dtype=dtype, device=device) + Y_test_t = torch.tensor( + np.asarray(Y[idx[n_train:]], dtype=np.float64), dtype=dtype, device=device + ) # Weights and optimizer - weights_init = 0.01 * pnp.random.randn( - num_layers, NUM_QUBITS, 3, requires_grad=True + torch.manual_seed(seed) + weights = torch.nn.Parameter( + 0.01 * torch.randn(num_layers, NUM_QUBITS, 3, device=device, dtype=dtype) ) - bias_init = pnp.array(0.0, requires_grad=True) - if optimizer == "adam": - opt = AdamOptimizer(lr) - else: - opt = NesterovMomentumOptimizer(lr) + bias = torch.nn.Parameter(torch.tensor(0.0, device=device, dtype=dtype)) + opt = torch.optim.Adam([weights, bias], lr=lr) # Compile (first run) t0 = time.perf_counter() - _ = circuit(weights_init, feats_train[0]) - _ = cost(weights_init, bias_init, feats_train[:1], Y_train[:1]) + _ = circuit(weights, feats_train_t[0]) + _ = cost(weights, bias, feats_train_t[:1], Y_train_t[:1]) compile_sec = time.perf_counter() - t0 # Optimize (batched steps; optional early stop every 100 steps) t0 = time.perf_counter() - weights, bias = weights_init, bias_init steps_done = 0 for step in range(iterations): + opt.zero_grad() batch_idx = rng.integers(0, n_train, size=(batch_size,)) - fb = feats_train[batch_idx] - yb = Y_train[batch_idx] - out = opt.step(cost, weights, bias, fb, yb) - weights, bias = out[0], out[1] + fb = feats_train_t[batch_idx] + yb = Y_train_t[batch_idx] + loss = cost(weights, bias, fb, yb) + loss.backward() + opt.step() steps_done += 1 if early_stop_target is not None and (step + 1) % 100 == 0: - pred_test_now = np.sign( - np.array(model(weights, bias, pnp.array(feats_test).T)) - ).flatten() - test_acc_now = float( - np.mean(np.abs(pred_test_now - np.array(Y_test)) < 1e-5) - ) + with torch.no_grad(): + pred_test_now = torch.sign( + model(weights, bias, feats_test_t.T) + ).flatten() + test_acc_now = ( + (pred_test_now - Y_test_t).abs().lt(1e-5).float().mean().item() + ) if test_acc_now >= early_stop_target: break train_sec = time.perf_counter() - t0 # Metrics (train/test accuracy) - pred_train = np.sign(np.array(model(weights, bias, feats_train.T))).flatten() - pred_test = np.sign( - np.array(model(weights, bias, pnp.array(feats_test).T)) - ).flatten() - Y_train_np = np.array(Y_train) - Y_test_np = np.array(Y_test) - train_acc = float(np.mean(np.abs(pred_train - Y_train_np) < 1e-5)) - test_acc = float(np.mean(np.abs(pred_test - Y_test_np) < 1e-5)) + with torch.no_grad(): + pred_train = torch.sign(model(weights, bias, feats_train_t.T)).flatten() + pred_test = torch.sign(model(weights, bias, feats_test_t.T)).flatten() + train_acc = (pred_train - Y_train_t).abs().lt(1e-5).float().mean().item() + test_acc = (pred_test - Y_test_t).abs().lt(1e-5).float().mean().item() return { "compile_time_sec": compile_sec, "train_time_sec": train_sec, - "train_accuracy": train_acc, - "test_accuracy": test_acc, + "train_accuracy": float(train_acc), + "test_accuracy": float(test_acc), "n_train": n_train, - "n_test": len(Y_test), + "n_test": len(Y) - n_train, "epochs": steps_done, "samples_per_sec": (steps_done * batch_size) / train_sec if train_sec > 0 @@ -242,7 +249,7 @@ def cost(weights, bias, X_batch, Y_batch): def main() -> None: parser = argparse.ArgumentParser( - description="PennyLane Iris amplitude encoding baseline (2-class)" + description="PennyLane Iris amplitude encoding baseline (2-class, GPU training)" ) parser.add_argument( "--iters", @@ -266,13 +273,6 @@ def main() -> None: help="Test fraction (default: 0.1); ignored if --data-file set", ) parser.add_argument("--seed", type=int, default=0, help="Random seed (default: 0)") - parser.add_argument( - "--optimizer", - type=str, - default="adam", - choices=("adam", "nesterov"), - help="Optimizer (default: adam)", - ) parser.add_argument( "--trials", type=int, @@ -303,12 +303,12 @@ def main() -> None: test_size = args.test_size data_src = "sklearn load_iris, classes 0 & 1, 4 features" n = len(Y) - print("Iris amplitude baseline (PennyLane) — 2-class variational classifier") + print("Iris amplitude baseline (PennyLane, GPU) — 2-class variational classifier") print( f" Data: {data_src} → L2 norm → get_angles (n={n}; 2-class Iris = 100 samples)" ) print( - f" Iters: {args.iters}, batch_size: {args.batch_size}, layers: {args.layers}, lr: {args.lr}, optimizer: {args.optimizer}" + f" Iters: {args.iters}, batch_size: {args.batch_size}, layers: {args.layers}, lr: {args.lr}" ) results: list[dict[str, Any]] = [] @@ -322,7 +322,6 @@ def main() -> None: lr=args.lr, seed=args.seed + t, test_size=test_size, - optimizer=args.optimizer, early_stop_target=args.early_stop if args.early_stop > 0 else None, ) results.append(r) diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/creditcardfraud_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/creditcardfraud_amplitude.py new file mode 100644 index 0000000000..70dc17ecb7 --- /dev/null +++ b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/creditcardfraud_amplitude.py @@ -0,0 +1,632 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +QDP pipeline: Credit Card Fraud (binary, highly imbalanced), amplitude encoding. + +Goal: **same data, model, loss, and metrics as the PennyLane baseline; only the +encoding step is different**. Here we: + +- Preprocess features exactly as in the baseline: + StandardScaler → PCA (to <= pca_dim) → pad to FEATURE_DIM → L2-normalized vector. +- Use QDP (`QuantumDataLoader` with `encoding("amplitude")`) to encode these + FEATURE_DIM vectors into **amplitude state vectors** of length `2**NUM_QUBITS`. +- Feed the encoded state vectors into a PennyLane circuit via `qml.AmplitudeEmbedding`, + then apply the same variational layers, optimizer, and loss as the baseline. + +Best practices (aligned with ENCODING_BENCHMARK_PLAN.md §2.2): + +- Dataset: Kaggle "Credit Card Fraud Detection" (Time, V1..V28, Amount, Class). +- Metrics: AUPRC (precision–recall AUC), F1-score, precision, recall. +- Imbalance: class-weighted loss (minority class up-weighted); no accuracy. + +Training always runs on GPU via lightning.gpu. +""" + +from __future__ import annotations + +import argparse +import time +from collections.abc import Iterator +from pathlib import Path +from typing import Any + +import numpy as np +import torch + +try: + import pennylane as qml +except ImportError as e: + raise SystemExit( + "PennyLane is required. Install with: uv sync --group benchmark" + ) from e + +try: + from sklearn.decomposition import PCA + from sklearn.metrics import ( + average_precision_score, + f1_score, + precision_score, + recall_score, + ) + from sklearn.model_selection import train_test_split + from sklearn.preprocessing import StandardScaler +except ImportError as e: + raise SystemExit( + "scikit-learn is required. Install with: uv sync --group benchmark" + ) from e + +try: + from qumat_qdp import QdpEngine, QuantumDataLoader +except ImportError as e: + raise SystemExit( + "qumat_qdp (QDP Python bindings) is required. Build with: uv run maturin develop" + ) from e + + +NUM_QUBITS = 5 +STATE_DIM = 2**NUM_QUBITS # length of encoded state vector +FEATURE_DIM = STATE_DIM # pre-QDP feature dimension (padded to this) + + +def _layer(layer_weights: torch.Tensor, wires: tuple[int, ...]) -> None: + """Single variational layer: Rot on each wire + ring of CNOTs.""" + for i, w in enumerate(wires): + qml.Rot(layer_weights[i, 0], layer_weights[i, 1], layer_weights[i, 2], wires=w) + for i in range(len(wires)): + qml.CNOT(wires=[wires[i], wires[(i + 1) % len(wires)]]) + + +def load_creditcard_csv(path: str) -> tuple[np.ndarray, np.ndarray]: + """ + Load Credit Card Fraud CSV. Expects columns including V1..V28, Amount, Class. + Returns (X_raw shape (n, 30), y shape (n,) with 0/1). + """ + data = np.genfromtxt(path, delimiter=",", skip_header=1, dtype=np.float64) + if data.ndim == 1: + data = data.reshape(1, -1) + # Last column = Class; rest = features (Time, V1..V28, Amount) + X = data[:, :-1] + y = data[:, -1].astype(np.int32) + # If CSV has header row with "Time", we already skipped it + if X.shape[1] >= 30: + X = X[:, -30:] # last 30 cols: V1..V28, Amount (and drop Time if 31) + elif X.shape[1] < 30: + # Pad with zeros to 30 + pad = np.zeros((X.shape[0], 30 - X.shape[1]), dtype=np.float64) + X = np.hstack([X, pad]) + return X, y + + +def make_synthetic_imbalanced( + seed: int, n_total: int = 2000, fraud_ratio: float = 0.02 +) -> tuple[np.ndarray, np.ndarray]: + """Synthetic 30-D imbalanced binary data for smoke test when no CSV is provided.""" + rng = np.random.default_rng(seed) + n_fraud = max(1, int(n_total * fraud_ratio)) + n_legit = n_total - n_fraud + X_legit = rng.standard_normal((n_legit, 30)).astype(np.float64) * 0.5 + X_fraud = rng.standard_normal((n_fraud, 30)).astype(np.float64) * 0.5 + 1.0 + X = np.vstack([X_legit, X_fraud]) + y = np.array([0] * n_legit + [1] * n_fraud, dtype=np.int32) + perm = rng.permutation(n_total) + return X[perm], y[perm] + + +def preprocess( + X: np.ndarray, + y: np.ndarray, + pca_dim: int, + seed: int, + test_size: float = 0.2, + val_size: float = 0.1, +) -> tuple[ + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + Any, + Any, + np.ndarray, +]: + """ + StandardScaler → PCA (to <= pca_dim) → pad to FEATURE_DIM → L2 normalize. + Stratified train/val/test. Returns X_train, y_train, X_val, y_val, X_test, y_test, + plus scaler, pca, and sample_weights for weighted loss. + """ + scaler = StandardScaler() + X_scaled = scaler.fit_transform(X) + + pca = PCA( + n_components=min(pca_dim, X_scaled.shape[1], X_scaled.shape[0] - 1), + random_state=seed, + ) + X_pca = pca.fit_transform(X_scaled) + if X_pca.shape[1] < FEATURE_DIM: + pad = np.zeros((X_pca.shape[0], FEATURE_DIM - X_pca.shape[1]), dtype=np.float64) + X_pca = np.hstack([X_pca, pad]) + + norm = np.linalg.norm(X_pca, axis=1, keepdims=True) + norm[norm < 1e-12] = 1.0 + X_norm = (X_pca / norm).astype(np.float64) + + rng = np.random.RandomState(seed) + idx = rng.permutation(len(y)) + X_norm, y = X_norm[idx], y[idx] + + # Stratified split: first test, then val from train + X_temp, X_test, y_temp, y_test = train_test_split( + X_norm, y, test_size=test_size, stratify=y, random_state=seed + ) + val_ratio = val_size / (1 - test_size) + X_train, X_val, y_train, y_val = train_test_split( + X_temp, y_temp, test_size=val_ratio, stratify=y_temp, random_state=seed + ) + + # Class weights for weighted MSE: n / (2 * n_class) + n0 = max(1, int(np.sum(y_train == 0))) + n1 = max(1, int(np.sum(y_train == 1))) + w0 = len(y_train) / (2 * n0) + w1 = len(y_train) / (2 * n1) + sample_weights = np.where(y_train == 0, w0, w1).astype(np.float64) + + return ( + X_train, + y_train, + X_val, + y_val, + X_test, + y_test, + scaler, + pca, + sample_weights, + ) + + +def encode_via_qdp_engine( + X_norm: np.ndarray, + *, + batch_size: int, + device_id: int = 0, +) -> torch.Tensor: + """ + QDP API: amplitude-encode in memory via QdpEngine.encode() (batched). + No temp file; minimal CPU–GPU transfer by batching. + Returns GPU torch.Tensor shape (n, STATE_DIM). + """ + n, dim = X_norm.shape + if dim != FEATURE_DIM: + raise ValueError( + f"X_norm must have {FEATURE_DIM} features for {NUM_QUBITS} qubits, got {dim}" + ) + # Ensure float64 C-contiguous once before the loop (preprocess() already guarantees this, + # but guard against callers passing non-contiguous or non-float64 arrays). + if not (X_norm.dtype == np.float64 and X_norm.flags["C_CONTIGUOUS"]): + X_norm = np.ascontiguousarray(X_norm, dtype=np.float64) + engine = QdpEngine(device_id=device_id) + batches_list: list[torch.Tensor] = [] + for start in range(0, n, batch_size): + end = min(start + batch_size, n) + # Pass slice directly — no per-batch astype() copy needed. + qt = engine.encode(X_norm[start:end], NUM_QUBITS, "amplitude") + t = torch.from_dlpack(qt) + batches_list.append(t) + # torch.cat produces exactly n rows and a contiguous tensor. + encoded = torch.cat(batches_list, dim=0) + # DLPack exports complex128 (CuDoubleComplex) even though imaginary parts are always 0.0 + # (amplitude encoding of real input → real state vector; CUDA kernel hardcodes imag=0.0). + # Taking .real gives a float64 view (zero-copy) matching the baseline's dtype and halving + # memory footprint, which also avoids any complex-arithmetic paths in PennyLane. + if encoded.is_complex(): + encoded = encoded.real + if encoded.shape[1] != STATE_DIM: + raise ValueError( + f"Encoded state dimension mismatch: expected {STATE_DIM}, got {encoded.shape[1]}" + ) + return encoded + + +def encoded_batches_from_loader( + X_norm: np.ndarray, + *, + batch_size: int, + device_id: int = 0, + data_dir: str | None = None, + filename: str = "creditcard_train.npy", +) -> Iterator[tuple[torch.Tensor, int, int]]: + """ + DataLoader API: stream amplitude-encoded batches from QuantumDataLoader (in-memory). + Uses source_array() (no temp file). Always returns GPU torch.Tensor batches. + Yields (batch, start_idx, end_idx). + """ + n, dim = X_norm.shape + if dim != FEATURE_DIM: + raise ValueError( + f"X_norm must have {FEATURE_DIM} features for {NUM_QUBITS} qubits, got {dim}" + ) + total_batches = (n + batch_size - 1) // batch_size + loader = ( + QuantumDataLoader(device_id=device_id) + .qubits(NUM_QUBITS) + .encoding("amplitude") + .batches(total_batches, size=batch_size) + .source_array(X_norm.astype(np.float64)) + .as_torch(device="cuda") + ) + start = 0 + for batch in loader: + end = min(start + batch.shape[0], n) + actual = batch[: end - start] + if actual.shape[1] != STATE_DIM: + raise ValueError( + f"Encoded state dimension mismatch: expected {STATE_DIM}, got {actual.shape[1]}" + ) + yield actual, start, end + start = end + + +def run_training( + encoded_train: torch.Tensor, + encoded_test: torch.Tensor, + y_train: np.ndarray, + y_test: np.ndarray, + sample_weights: np.ndarray, + *, + num_layers: int, + iterations: int, + batch_size: int, + lr: float, + seed: int, +) -> dict[str, Any]: + """Train 5-qubit amplitude VQC on GPU; dispatch to lightning.gpu.""" + if not torch.cuda.is_available(): + raise RuntimeError("CUDA GPU is required for training. No CUDA device found.") + try: + dev_qml = qml.device("lightning.gpu", wires=NUM_QUBITS) + except Exception as e: + raise RuntimeError( + "lightning.gpu is required for GPU training. Install with: " + "pip install pennylane-lightning[gpu]" + ) from e + + # Ensure encoded data is on GPU + if not encoded_train.is_cuda: + encoded_train = encoded_train.cuda() + if not encoded_test.is_cuda: + encoded_test = encoded_test.cuda() + + device = encoded_train.device + dtype = encoded_train.dtype + n_train = len(y_train) + y_test_np = np.asarray(y_test) + + Y_train_t = torch.tensor( + np.asarray(y_train, dtype=np.float64), dtype=dtype, device=device + ) + W_train_t = torch.tensor(sample_weights, dtype=dtype, device=device) + + wires = tuple(range(NUM_QUBITS)) + + @qml.qnode(dev_qml, interface="torch", diff_method="adjoint") + def circuit(weights: torch.Tensor, state_vector: torch.Tensor) -> torch.Tensor: + qml.AmplitudeEmbedding(state_vector, wires=wires, normalize=False) + for w in weights: + _layer(w, wires) + return qml.expval(qml.PauliZ(0)) + + def model( + weights: torch.Tensor, bias: torch.Tensor, state_batch: torch.Tensor + ) -> torch.Tensor: + return circuit(weights, state_batch) + bias + + def cost( + weights: torch.Tensor, + bias: torch.Tensor, + states_batch: torch.Tensor, + Y_batch: torch.Tensor, + w_batch: torch.Tensor, + ) -> torch.Tensor: + target = Y_batch * 2.0 - 1.0 + preds = model(weights, bias, states_batch) + return (w_batch * (target - preds) ** 2).sum() / (w_batch.sum() + 1e-12) + + torch.manual_seed(seed) + weights = torch.nn.Parameter( + 0.01 * torch.randn(num_layers, NUM_QUBITS, 3, device=device, dtype=dtype) + ) + bias = torch.nn.Parameter(torch.tensor(0.0, device=device, dtype=dtype)) + opt = torch.optim.Adam([weights, bias], lr=lr) + + t0 = time.perf_counter() + _ = circuit(weights, encoded_train[:1]) + _ = cost(weights, bias, encoded_train[:1], Y_train_t[:1], W_train_t[:1]) + compile_sec = time.perf_counter() - t0 + + # Use torch.randint so indices stay on GPU — avoids implicit H2D transfer of 256 indices + # per step (NumPy rng.integers → CPU array → implicit copy to index CUDA tensor). + _batch_n = min(batch_size, n_train) + t0 = time.perf_counter() + for _ in range(iterations): + opt.zero_grad() + idx = torch.randint(0, n_train, (_batch_n,), device=device) + sb = encoded_train[idx] + yb = Y_train_t[idx] + wb = W_train_t[idx] + loss = cost(weights, bias, sb, yb, wb) + loss.backward() + opt.step() + train_sec = time.perf_counter() - t0 + + with torch.no_grad(): + pred_scores = model(weights, bias, encoded_test).cpu().numpy().flatten() + pred_binary = (np.sign(pred_scores) > 0).astype(np.int32) + scores_positive = (pred_scores + 1.0) / 2.0 + auprc = float(average_precision_score(y_test_np, scores_positive)) + f1 = float(f1_score(y_test_np, pred_binary, zero_division=0)) + prec = float(precision_score(y_test_np, pred_binary, zero_division=0)) + rec = float(recall_score(y_test_np, pred_binary, zero_division=0)) + + return { + "compile_time_sec": compile_sec, + "train_time_sec": train_sec, + "samples_per_sec": (iterations * _batch_n) / train_sec + if train_sec > 0 + else 0.0, + "auprc": auprc, + "f1_score": f1, + "precision": prec, + "recall": rec, + "n_train": n_train, + "n_test": len(y_test_np), + "iterations": iterations, + } + + +def run_training_from_loader( + X_train: np.ndarray, + encoded_test: torch.Tensor, + y_train: np.ndarray, + y_test: np.ndarray, + sample_weights: np.ndarray, + *, + num_layers: int, + iterations: int, + encode_batch_size: int, + device_id: int = 0, + encode_data_dir: str | None = None, + lr: float, + seed: int, + batch_size: int = 256, +) -> dict[str, Any]: + """Train by streaming encoded batches from QuantumDataLoader on GPU. + Encode once on GPU, then train with lightning.gpu.""" + + encoded_train = encode_via_qdp_engine( + X_train, + batch_size=encode_batch_size, + device_id=device_id, + ) + return run_training( + encoded_train, + encoded_test, + y_train, + y_test, + sample_weights, + num_layers=num_layers, + iterations=iterations, + batch_size=batch_size, + lr=lr, + seed=seed, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="QDP Credit Card Fraud pipeline (amplitude, 5 qubits, AUPRC/F1, GPU training)" + ) + parser.add_argument( + "--data-file", + type=str, + default=None, + help="Path to CSV (e.g. Kaggle creditcard.csv). If omitted, use synthetic imbalanced data.", + ) + parser.add_argument( + "--max-samples", + type=int, + default=50_000, + help="Max samples to use from CSV (default: 50000); ignored for synthetic.", + ) + parser.add_argument( + "--pca-dim", + type=int, + default=30, + help="PCA components before padding to FEATURE_DIM (default: 30, capped by feature dim).", + ) + parser.add_argument("--seed", type=int, default=42, help="Random seed") + parser.add_argument( + "--iters", + type=int, + default=5000, + help="Optimizer steps (default: 5000; use same as baseline for apples-to-apples).", + ) + parser.add_argument( + "--batch-size", type=int, default=256, help="Training batch size" + ) + parser.add_argument("--layers", type=int, default=2, help="Variational layers") + parser.add_argument("--lr", type=float, default=0.01, help="Learning rate") + parser.add_argument( + "--trials", + type=int, + default=1, + help="Number of runs (same data, different seeds); report median AUPRC/F1.", + ) + parser.add_argument( + "--device-id", + type=int, + default=0, + help="QDP device id (default: 0)", + ) + parser.add_argument( + "--encode-batch-size", + type=int, + default=4096, + help="Batch size for QDP encoding (default: 4096).", + ) + parser.add_argument( + "--encode-data-dir", + type=str, + default=None, + help="Directory for temporary .npy files used by QDP loader (default: system temp).", + ) + parser.add_argument( + "--use-loader", + action="store_true", + help="Stream encoded batches via QuantumDataLoader.source_array() (DataLoader API).", + ) + args = parser.parse_args() + + if args.data_file: + path = Path(args.data_file) + if not path.is_file(): + raise SystemExit(f"Data file not found: {path}") + X, y = load_creditcard_csv(str(path)) + if len(X) > args.max_samples: + rng = np.random.default_rng(args.seed) + idx = rng.choice(len(X), size=args.max_samples, replace=False) + X, y = X[idx], y[idx] + data_src = f"CSV {path.name} (n={len(X)})" + else: + X, y = make_synthetic_imbalanced(args.seed, n_total=2000, fraud_ratio=0.02) + data_src = f"synthetic imbalanced (n={len(X)}, fraud~2%)" + + ( + X_train, + y_train, + X_val, + y_val, + X_test, + y_test, + _scaler, + _pca, + sample_weights, + ) = preprocess( + X, + y, + pca_dim=args.pca_dim, + seed=args.seed, + test_size=0.2, + val_size=0.1, + ) + + print("QDP Credit Card Fraud amplitude pipeline (GPU)") + print( + f" Data: {data_src} → StandardScaler → PCA({args.pca_dim}) " + f"→ pad to {FEATURE_DIM} → QDP amplitude → L2 norm (implicit)" + ) + print( + f" Train/val/test (features pre-QDP): " + f"{len(X_train)} / {len(X_val)} / {len(X_test)} (stratified)" + ) + print( + f" Iters: {args.iters}, train batch: {args.batch_size}, " + f"encode batch: {args.encode_batch_size}, layers: {args.layers}, lr: {args.lr}" + ) + print(" Encode + Train: GPU (QDP encode + lightning.gpu circuit).") + + # Encode test set via QDP (keep on GPU) + t_enc0 = time.perf_counter() + encoded_test = encode_via_qdp_engine( + X_test, + batch_size=args.encode_batch_size, + device_id=args.device_id, + ) + enc_test_sec = time.perf_counter() - t_enc0 + print(f" Encode test ({len(X_test)} samples): {enc_test_sec:.4f} s") + + results: list[dict[str, Any]] = [] + for t in range(args.trials): + if args.use_loader: + r = run_training_from_loader( + X_train, + encoded_test, + y_train, + y_test, + sample_weights, + num_layers=args.layers, + iterations=args.iters, + encode_batch_size=args.encode_batch_size, + device_id=args.device_id, + encode_data_dir=args.encode_data_dir, + lr=args.lr, + seed=args.seed + t, + batch_size=args.batch_size, + ) + r["encode_train_sec"] = 0.0 # encoded lazily inside loader + else: + t_enc1 = time.perf_counter() + encoded_train = encode_via_qdp_engine( + X_train, + batch_size=args.encode_batch_size, + device_id=args.device_id, + ) + enc_train_sec = time.perf_counter() - t_enc1 + r = run_training( + encoded_train, + encoded_test, + y_train, + y_test, + sample_weights, + num_layers=args.layers, + iterations=args.iters, + batch_size=args.batch_size, + lr=args.lr, + seed=args.seed + t, + ) + r["encode_train_sec"] = enc_train_sec + results.append(r) + print(f"\n Trial {t + 1}:") + print( + f" Encode train ({len(X_train)} samples): {r.get('encode_train_sec', 0.0):.4f} s" + ) + print(f" Compile: {r['compile_time_sec']:.4f} s") + print( + f" Train: {r['train_time_sec']:.4f} s " + f"({r['samples_per_sec']:.1f} samples/s)" + ) + print(f" AUPRC: {r['auprc']:.4f}") + print( + f" F1: {r['f1_score']:.4f} " + f"(P: {r['precision']:.4f}, R: {r['recall']:.4f})" + ) + + if args.trials > 1: + auprcs = sorted(r["auprc"] for r in results) + f1s = sorted(r["f1_score"] for r in results) + mid = args.trials // 2 + print( + f"\n Median AUPRC: {auprcs[mid]:.4f} " + f"(min: {auprcs[0]:.4f}, max: {auprcs[-1]:.4f})" + ) + print( + f" Median F1: {f1s[mid]:.4f} (min: {f1s[0]:.4f}, max: {f1s[-1]:.4f})" + ) + + +if __name__ == "__main__": + main() diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py index 5ab902b74a..7540e980e0 100644 --- a/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py +++ b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py @@ -29,6 +29,8 @@ Only difference from baseline: encoding. Here we use QDP (QdpEngine.encode + amplitude) → StatePrep(encoded); baseline uses get_angles → state_preparation(angles). Rest: same circuit (Rot + CNOT), loss, optimizer, CLI. + +Training always runs on GPU via lightning.gpu. """ from __future__ import annotations @@ -39,11 +41,10 @@ from typing import Any import numpy as np +import torch try: import pennylane as qml - from pennylane import numpy as pnp - from pennylane.optimize import AdamOptimizer, NesterovMomentumOptimizer except ImportError as e: raise SystemExit( "PennyLane is required. Install with: uv sync --group benchmark" @@ -57,18 +58,17 @@ "scikit-learn is required. Install with: uv sync --group benchmark" ) from e -import torch from qumat_qdp import QdpEngine NUM_QUBITS = 2 STATE_DIM = 2**NUM_QUBITS # 4 -# --- Circuit: variational layer (Rot + CNOT); state prep is StatePrep(encoded) in training --- +# --- Circuit: variational layer (Rot + CNOT); state prep is AmplitudeEmbedding(encoded) in training --- def layer(layer_weights, wires=(0, 1)) -> None: """Rot on each wire + CNOT (tutorial Iris section).""" for i, w in enumerate(wires): - qml.Rot(*layer_weights[i], wires=w) + qml.Rot(layer_weights[i, 0], layer_weights[i, 1], layer_weights[i, 2], wires=w) qml.CNOT(wires=list(wires)) @@ -120,7 +120,7 @@ def encode_via_qdp( """QDP: use QdpEngine.encode on 4-D vectors (amplitude), return encoded (n, 4) on GPU. Uses in-memory encoding via QdpEngine instead of writing/reading .npy files. The returned - tensor stays on the selected CUDA device and can be fed directly to qml.StatePrep. + tensor stays on the selected CUDA device and can be fed directly to qml.AmplitudeEmbedding. """ n, dim = X_norm.shape if dim != STATE_DIM: @@ -134,13 +134,17 @@ def encode_via_qdp( encoding_method="amplitude", ) encoded = torch.from_dlpack(qt) - return encoded[:n] + # DLPack exports complex dtype even though imaginary parts are always 0.0 + # (CUDA kernel hardcodes imag=0.0). Taking .real gives a real-valued zero-copy view. + if encoded.is_complex(): + encoded = encoded.real + return encoded[:n].clone() -# --- Training: StatePrep(encoded) + Rot layers, square loss, optional early stop --- +# --- Training: AmplitudeEmbedding(encoded) + Rot layers, square loss, GPU only --- def run_training( - encoded_train: torch.Tensor | np.ndarray, - encoded_test: torch.Tensor | np.ndarray, + encoded_train: torch.Tensor, + encoded_test: torch.Tensor, Y_train: np.ndarray, Y_test: np.ndarray, *, @@ -150,176 +154,28 @@ def run_training( lr: float, seed: int, early_stop_target: float | None = None, - optimizer: str = "nesterov", ) -> dict[str, Any]: - """Train variational classifier: StatePrep(encoded) + Rot layers + bias, square loss, batched. - If encoded_* are on GPU and lightning.gpu is available, training runs on GPU; otherwise on CPU. + """Train variational classifier on GPU: AmplitudeEmbedding(encoded) + Rot layers + bias, square loss, batched. When early_stop_target is set, evaluate test acc every 100 steps and stop when >= target.""" - n_train = len(Y_train) - np.random.seed(seed) - rng = np.random.default_rng(seed) - - # Prefer Lightning GPU when encoded data is on GPU - use_gpu = isinstance(encoded_train, torch.Tensor) and encoded_train.is_cuda - dev_qml = None - if use_gpu: - try: - dev_qml = qml.device("lightning.gpu", wires=NUM_QUBITS) - except Exception: - use_gpu = False - if not use_gpu or dev_qml is None: - dev_qml = qml.device("default.qubit", wires=NUM_QUBITS) - use_gpu = False - if isinstance(encoded_train, torch.Tensor): - encoded_train = encoded_train.cpu().numpy() - if isinstance(encoded_test, torch.Tensor): - encoded_test = encoded_test.cpu().numpy() - - if use_gpu: - return _run_training_gpu( - encoded_train, - encoded_test, - Y_train, - Y_test, - dev_qml=dev_qml, - num_layers=num_layers, - iterations=iterations, - batch_size=batch_size, - lr=lr, - seed=seed, - n_train=n_train, - rng=rng, - early_stop_target=early_stop_target, - ) - return _run_training_cpu( - encoded_train, - encoded_test, - Y_train, - Y_test, - dev_qml=dev_qml, - num_layers=num_layers, - iterations=iterations, - batch_size=batch_size, - lr=lr, - seed=seed, - n_train=n_train, - rng=rng, - qml_device="cpu", - early_stop_target=early_stop_target, - optimizer=optimizer, - ) - - -def _run_training_cpu( - encoded_train: np.ndarray, - encoded_test: np.ndarray, - Y_train: np.ndarray, - Y_test: np.ndarray, - *, - dev_qml: Any, # noqa: ANN401 - num_layers: int, - iterations: int, - batch_size: int, - lr: float, - seed: int, - n_train: int, - rng: np.random.Generator, - qml_device: str = "cpu", - early_stop_target: float | None = None, - optimizer: str = "nesterov", -) -> dict[str, Any]: - """CPU path: default.qubit + autograd + Nesterov or Adam. Optional early stop every 100 steps.""" + if not torch.cuda.is_available(): + raise RuntimeError("CUDA GPU is required for training. No CUDA device found.") try: - pnp.random.seed(seed) - except Exception: - pass - feats_train = pnp.array(encoded_train) - feats_test = encoded_test - Y_train_pnp = pnp.array(Y_train) - Y_test_np = np.asarray(Y_test) - - @qml.qnode(dev_qml, interface="autograd", diff_method="backprop") - def circuit(weights, state_vector): - qml.StatePrep(state_vector, wires=(0, 1)) - for lw in weights: - layer(lw, wires=(0, 1)) - return qml.expval(qml.PauliZ(0)) + dev_qml = qml.device("lightning.gpu", wires=NUM_QUBITS) + except Exception as e: + raise RuntimeError( + "lightning.gpu is required for GPU training. Install with: " + "pip install pennylane-lightning[gpu]" + ) from e - def model(weights, bias, state_batch): - return circuit(weights, state_batch) + bias - - def cost(weights, bias, X_batch, Y_batch): - preds = model(weights, bias, X_batch) - return pnp.mean((Y_batch - preds) ** 2) - - weights_init = 0.01 * pnp.random.randn( - num_layers, NUM_QUBITS, 3, requires_grad=True - ) - bias_init = pnp.array(0.0, requires_grad=True) - opt = AdamOptimizer(lr) if optimizer == "adam" else NesterovMomentumOptimizer(lr) - - t0 = time.perf_counter() - _ = circuit(weights_init, feats_train[0]) - _ = cost(weights_init, bias_init, feats_train[:1], Y_train_pnp[:1]) - compile_sec = time.perf_counter() - t0 - - t0 = time.perf_counter() - weights, bias = weights_init, bias_init - steps_done = 0 - for step in range(iterations): - batch_idx = rng.integers(0, n_train, size=(batch_size,)) - fb = feats_train[batch_idx] - yb = Y_train_pnp[batch_idx] - out = opt.step(cost, weights, bias, fb, yb) - weights, bias = out[0], out[1] - steps_done += 1 - if early_stop_target is not None and (step + 1) % 100 == 0: - pred_test_now = np.sign( - np.array(model(weights, bias, pnp.array(feats_test))) - ).flatten() - test_acc_now = float(np.mean(np.abs(pred_test_now - Y_test_np) < 1e-5)) - if test_acc_now >= early_stop_target: - break - train_sec = time.perf_counter() - t0 - - pred_train = np.sign(np.array(model(weights, bias, feats_train))).flatten() - pred_test = np.sign(np.array(model(weights, bias, pnp.array(feats_test)))).flatten() - Y_train_np = np.array(Y_train_pnp) - train_acc = float(np.mean(np.abs(pred_train - Y_train_np) < 1e-5)) - test_acc = float(np.mean(np.abs(pred_test - Y_test_np) < 1e-5)) - - return { - "compile_time_sec": compile_sec, - "train_time_sec": train_sec, - "train_accuracy": train_acc, - "test_accuracy": test_acc, - "n_train": n_train, - "n_test": len(Y_test), - "epochs": steps_done, - "samples_per_sec": (steps_done * batch_size) / train_sec - if train_sec > 0 - else 0.0, - "qml_device": qml_device, - } + n_train = len(Y_train) + rng = np.random.default_rng(seed) + # Ensure encoded data is on GPU + if not encoded_train.is_cuda: + encoded_train = encoded_train.cuda() + if not encoded_test.is_cuda: + encoded_test = encoded_test.cuda() -def _run_training_gpu( - encoded_train: torch.Tensor, - encoded_test: torch.Tensor, - Y_train: np.ndarray, - Y_test: np.ndarray, - *, - dev_qml: Any, # noqa: ANN401 - num_layers: int, - iterations: int, - batch_size: int, - lr: float, - seed: int, - n_train: int, - rng: np.random.Generator, - early_stop_target: float | None = None, -) -> dict[str, Any]: - """GPU path: lightning.gpu + PyTorch interface, data stays on GPU. Optional early stop every 100 steps.""" device = encoded_train.device dtype = encoded_train.dtype Y_train_t = torch.tensor(Y_train, dtype=dtype, device=device) @@ -327,7 +183,8 @@ def _run_training_gpu( @qml.qnode(dev_qml, interface="torch", diff_method="adjoint") def circuit(weights, state_vector): - qml.StatePrep(state_vector, wires=(0, 1)) + # normalize=False: QDP pre-normalizes to unit norm, skipping PennyLane's re-normalization. + qml.AmplitudeEmbedding(state_vector, wires=(0, 1), normalize=False) for lw in weights: layer(lw, wires=(0, 1)) return qml.expval(qml.PauliZ(0)) @@ -340,10 +197,10 @@ def cost(weights, bias, X_batch, Y_batch): return torch.mean((Y_batch - preds) ** 2) torch.manual_seed(seed) - weights = 0.01 * torch.randn( - num_layers, NUM_QUBITS, 3, device=device, dtype=dtype, requires_grad=True + weights = torch.nn.Parameter( + 0.01 * torch.randn(num_layers, NUM_QUBITS, 3, device=device, dtype=dtype) ) - bias = torch.tensor(0.0, device=device, dtype=dtype, requires_grad=True) + bias = torch.nn.Parameter(torch.tensor(0.0, device=device, dtype=dtype)) opt = torch.optim.SGD([weights, bias], lr=lr, momentum=0.9, nesterov=True) t0 = time.perf_counter() @@ -389,13 +246,12 @@ def cost(weights, bias, X_batch, Y_batch): "samples_per_sec": (steps_done * batch_size) / train_sec if train_sec > 0 else 0.0, - "qml_device": "cuda", } def main() -> None: parser = argparse.ArgumentParser( - description="QDP Iris amplitude encoding pipeline (2-class, same training as baseline)" + description="QDP Iris amplitude encoding pipeline (2-class, GPU training)" ) parser.add_argument( "--iters", @@ -419,13 +275,6 @@ def main() -> None: help="Test fraction (default: 0.1); ignored if --data-file set", ) parser.add_argument("--seed", type=int, default=0, help="Random seed (default: 0)") - parser.add_argument( - "--optimizer", - type=str, - default="adam", - choices=("adam", "nesterov"), - help="Optimizer for CPU (default: adam); GPU uses SGD+Nesterov", - ) parser.add_argument( "--trials", type=int, @@ -471,7 +320,7 @@ def main() -> None: Y_train = Y[train_idx] Y_test = Y[test_idx] - # QDP encoding: 4-D → amplitude-encoded state vectors + # QDP encoding: 4-D → amplitude-encoded state vectors (on GPU) encoded_train = encode_via_qdp( X_train_4d, batch_size=args.batch_size, @@ -487,10 +336,10 @@ def main() -> None: filename="iris_4d_test.npy", ) - print("Iris amplitude (QDP encoding) — 2-class variational classifier") + print("Iris amplitude (QDP encoding, GPU) — 2-class variational classifier") print(f" Data: {data_src} → QDP amplitude (n={n}; 2-class Iris = 100 samples)") print( - f" Iters: {args.iters}, batch_size: {args.batch_size}, layers: {args.layers}, lr: {args.lr}, optimizer: {args.optimizer}" + f" Iters: {args.iters}, batch_size: {args.batch_size}, layers: {args.layers}, lr: {args.lr}" ) results: list[dict[str, Any]] = [] @@ -507,11 +356,9 @@ def main() -> None: lr=args.lr, seed=args.seed + t, early_stop_target=early_stop, - optimizer=args.optimizer, ) results.append(r) print(f"\n Trial {t + 1}:") - print(f" QML device: {r.get('qml_device', 'cpu')}") print(f" Compile: {r['compile_time_sec']:.4f} s") print(f" Train: {r['train_time_sec']:.4f} s") print(f" Train acc: {r['train_accuracy']:.4f} (n={r['n_train']})") diff --git a/qdp/qdp-python/pyproject.toml b/qdp/qdp-python/pyproject.toml index 3af565db43..7a4750445d 100644 --- a/qdp/qdp-python/pyproject.toml +++ b/qdp/qdp-python/pyproject.toml @@ -49,10 +49,12 @@ benchmark = [ [tool.uv.sources] qumat = { path = "../..", editable = true } +torch = { index = "pytorch" } +# CUDA 12.6 wheels to match driver (libnvJitLink 12_6); cu122 pulls libs that need 12_8 and fail. [[tool.uv.index]] name = "pytorch" -url = "https://download.pytorch.org/whl/cu122" +url = "https://download.pytorch.org/whl/cu126" explicit = true # Invalidate uv cache when Rust or Cargo changes so extension is rebuilt (run_throughput_pipeline_py etc.). diff --git a/qdp/qdp-python/qumat_qdp/loader.py b/qdp/qdp-python/qumat_qdp/loader.py index c6514f0948..2fe4dc65a1 100644 --- a/qdp/qdp-python/qumat_qdp/loader.py +++ b/qdp/qdp-python/qumat_qdp/loader.py @@ -31,11 +31,19 @@ from collections.abc import Iterator from functools import lru_cache -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, cast + +import numpy as np if TYPE_CHECKING: import _qdp +# Optional torch for as_torch(); as_numpy() uses QuantumTensor.to_numpy() (no torch needed). +try: + import torch as _torch +except ImportError: + _torch = None # type: ignore[assignment] + # Seed must fit Rust u64: 0 <= seed <= 2^64 - 1. _U64_MAX = 2**64 - 1 @@ -120,6 +128,40 @@ def __init__( self._synthetic_requested = False # set True only by source_synthetic() self._file_requested = False self._null_handling: str | None = None + self._array: np.ndarray | None = None + self._array_requested = False + # Output format: None = yield raw QuantumTensor (DLPack); ("torch", device) or ("numpy",) + self._output_format: tuple[str, ...] | None = None + + def as_torch(self, device: str = "cuda") -> QuantumDataLoader: + """Yield batches as PyTorch tensors. device='cuda' keeps data on GPU (no copy); 'cpu' moves to CPU. Returns self.""" + if device not in ("cuda", "cpu"): + raise ValueError(f"device must be 'cuda' or 'cpu', got {device!r}") + if _torch is None: + raise RuntimeError( + "PyTorch is required for as_torch(). Install with: pip install torch" + ) + self._output_format = ("torch", device) + return self + + def as_numpy(self) -> QuantumDataLoader: + """Yield batches as NumPy float64 arrays (CPU). Uses QuantumTensor.to_numpy() — no PyTorch required. Returns self.""" + self._output_format = ("numpy",) + return self + + def source_array(self, X: np.ndarray) -> QuantumDataLoader: + """Use in-memory array; no temp file. Encodes via QdpEngine.encode() per batch. Returns self.""" + if X is None or not hasattr(X, "shape") or len(X.shape) != 2: + raise ValueError( + "source_array(X) requires a 2D array (n_samples, n_features)." + ) + self._array = np.asarray(X, dtype=np.float64) + if not self._array.flags.c_contiguous: + self._array = np.ascontiguousarray(self._array) + self._array_requested = True + n = self._array.shape[0] + self._total_batches = max(1, (n + self._batch_size - 1) // self._batch_size) + return self def qubits(self, n: int) -> QuantumDataLoader: """Set number of qubits. Returns self for chaining.""" @@ -204,8 +246,55 @@ def null_handling(self, policy: str) -> QuantumDataLoader: self._null_handling = policy return self + def _array_iterator(self) -> Iterator[Any]: + """Yield one QuantumTensor per batch from in-memory array via QdpEngine.encode().""" + qdp = _get_qdp() + QdpEngine = getattr(qdp, "QdpEngine", None) + if QdpEngine is None: + raise RuntimeError("_qdp.QdpEngine not found. Build with maturin develop.") + engine = QdpEngine(device_id=self._device_id) + X = self._array + if X is None: + raise RuntimeError( + "Internal error: _array_iterator called without source_array() data." + ) + assert X is not None # type narrowing for static checkers + n = X.shape[0] + for start in range(0, n, self._batch_size): + end = min(start + self._batch_size, n) + qt = engine.encode(X[start:end], self._num_qubits, self._encoding_method) + yield qt + def _create_iterator(self) -> Iterator[object]: - """Build engine and return the Rust-backed loader iterator (synthetic or file).""" + """Build engine and return the Rust-backed loader iterator (synthetic or file) or array iterator.""" + if self._array_requested: + if self._synthetic_requested or self._file_requested: + raise ValueError( + "Cannot combine source_array() with source_synthetic() or source_file(); use only one source." + ) + if self._array is None: + raise ValueError( + "source_array() was called without an array; set with .source_array(X)." + ) + qdp = _get_qdp() + engine = getattr(qdp, "QdpEngine", None) + if engine is None: + raise RuntimeError( + "_qdp.QdpEngine not found. Build with maturin develop." + ) + engine = engine(device_id=self._device_id) + create_array_loader = getattr(engine, "create_array_loader", None) + if create_array_loader is not None: + return iter( + create_array_loader( + self._array, + batch_size=self._batch_size, + num_qubits=self._num_qubits, + encoding_method=self._encoding_method, + batch_limit=None, + ) + ) + return iter(self._array_iterator()) if self._synthetic_requested and self._file_requested: raise ValueError( "Cannot set both synthetic and file sources; use either .source_synthetic() or .source_file(path), not both." @@ -270,6 +359,26 @@ def _create_iterator(self) -> Iterator[object]: ) ) + def _wrap_iterator(self, raw_iter: Iterator[object]) -> Iterator[Any]: + if self._output_format is None: + yield from raw_iter + return + kind = self._output_format[0] + if kind == "torch": + device = self._output_format[1] + for qt in raw_iter: + t = _torch.from_dlpack(qt) + yield t.cpu() if device == "cpu" else t + elif kind == "numpy": + for qt in raw_iter: + # Rust QuantumTensor has to_numpy(); raw_iter is Iterator[object] + yield cast(Any, qt).to_numpy() + else: + yield from raw_iter + def __iter__(self) -> Iterator[object]: - """Return Rust-backed iterator that yields one QuantumTensor per batch.""" - return self._create_iterator() + """Return iterator yielding one batch per iteration (DLPack, torch, or numpy per as_torch/as_numpy).""" + raw = self._create_iterator() + if self._output_format is None: + return raw + return self._wrap_iterator(raw) diff --git a/qdp/qdp-python/src/engine.rs b/qdp/qdp-python/src/engine.rs index 2c94899c53..b7d6306b78 100644 --- a/qdp/qdp-python/src/engine.rs +++ b/qdp/qdp-python/src/engine.rs @@ -688,6 +688,58 @@ impl QdpEngine { Ok(PyQuantumLoader::new(Some(iter))) } + #[cfg(target_os = "linux")] + /// Create an array-backed pipeline iterator (QuantumDataLoader.source_array(X)). + /// PyO3 best practice: one copy (to_vec) to own data for iterator lifetime; then detach() + /// so Rust work (new_from_array) runs without GIL. Iterator's next_batch uses &[f64] (no per-batch to_vec). + #[pyo3(signature = (data, batch_size, num_qubits, encoding_method, batch_limit=None))] + fn create_array_loader( + &self, + py: Python<'_>, + data: &Bound<'_, PyAny>, + batch_size: usize, + num_qubits: u32, + encoding_method: &str, + batch_limit: Option, + ) -> PyResult { + let array_2d = data.extract::>().map_err(|_| { + PyRuntimeError::new_err( + "create_array_loader requires a 2D NumPy array (float64, C-contiguous).", + ) + })?; + let shape = array_2d.shape(); + let num_samples = shape[0]; + let sample_size = shape[1]; + let data_slice = array_2d + .as_slice() + .map_err(|_| PyRuntimeError::new_err("NumPy array must be C-contiguous."))?; + let data_vec = data_slice.to_vec(); + let batch_limit = batch_limit.unwrap_or(usize::MAX); + let config = config_from_args( + &self.engine, + batch_size, + num_qubits, + encoding_method, + 0, + None, + qdp_core::reader::NullHandling::FillZero, + ); + let engine = self.engine.clone(); + let iter = py + .detach(|| { + qdp_core::PipelineIterator::new_from_array( + engine, + data_vec, + num_samples, + sample_size, + config, + batch_limit, + ) + }) + .map_err(|e| PyRuntimeError::new_err(format!("create_array_loader failed: {}", e)))?; + Ok(PyQuantumLoader::new(Some(iter))) + } + #[cfg(target_os = "linux")] /// Create a streaming Parquet pipeline iterator (for QuantumDataLoader.source_file(path, streaming=True)). #[allow(clippy::too_many_arguments)] diff --git a/qdp/qdp-python/src/tensor.rs b/qdp/qdp-python/src/tensor.rs index ab341d1ac5..067e4f1857 100644 --- a/qdp/qdp-python/src/tensor.rs +++ b/qdp/qdp-python/src/tensor.rs @@ -14,10 +14,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +use numpy::{PyArray2, ndarray::Array2}; use pyo3::exceptions::PyRuntimeError; use pyo3::ffi; use pyo3::prelude::*; use qdp_core::dlpack::DLManagedTensor; +use std::ffi::c_void; + +// CUDA Runtime API — already linked transitively by qdp-core. +unsafe extern "C" { + fn cudaMemcpy(dst: *mut c_void, src: *const c_void, count: usize, kind: i32) -> i32; +} +const CUDA_MEMCPY_DEVICE_TO_HOST: i32 = 2; /// Quantum tensor wrapper implementing DLPack protocol /// @@ -98,6 +106,100 @@ impl QuantumTensor { } } + /// Copy encoded quantum state from GPU to a NumPy array (CPU, float64). + /// + /// Performs a synchronous cudaMemcpy D2H without requiring PyTorch. + /// Complex128 output (imaginary parts are always 0.0 per the CUDA kernel) + /// is reduced to float64 by discarding the zero imaginary components. + /// + /// Returns: + /// numpy.ndarray of shape (batch_size, state_len), dtype float64. + /// + /// Raises: + /// RuntimeError: If the tensor has already been consumed, the pointer is + /// invalid, the dtype is unsupported, or the CUDA copy fails. + #[allow(clippy::wrong_self_convention)] // mut required: sets self.consumed and calls DLPack deleter + fn to_numpy<'py>(&mut self, py: Python<'py>) -> PyResult>> { + if self.consumed { + return Err(PyRuntimeError::new_err( + "DLPack tensor already consumed (can only be used once)", + )); + } + if self.ptr.is_null() { + return Err(PyRuntimeError::new_err("Invalid DLPack tensor pointer")); + } + + let (rows, cols, host_data) = unsafe { + let dl_tensor = &(*self.ptr).dl_tensor; + + // Shape — require 1-D or 2-D. + let ndim = dl_tensor.ndim as usize; + if ndim == 0 || ndim > 2 || dl_tensor.shape.is_null() { + return Err(PyRuntimeError::new_err( + "to_numpy() requires a 1-D or 2-D tensor", + )); + } + let shape = std::slice::from_raw_parts(dl_tensor.shape, ndim); + let (rows, cols) = if ndim == 1 { + (1usize, shape[0] as usize) + } else { + (shape[0] as usize, shape[1] as usize) + }; + + // Dtype: complex128 (DL_COMPLEX=5, bits=128) or float64 (DL_FLOAT=2, bits=64). + let dtype = &dl_tensor.dtype; + let (is_complex, elem_bytes) = match (dtype.code, dtype.bits) { + (5, 128) => (true, 16usize), + (2, 64) => (false, 8usize), + _ => { + return Err(PyRuntimeError::new_err(format!( + "to_numpy() unsupported dtype: code={}, bits={}", + dtype.code, dtype.bits + ))); + } + }; + + let n_elems = rows * cols; + // For complex128 each element is two consecutive f64 values. + let host_f64_count = if is_complex { n_elems * 2 } else { n_elems }; + let mut host_buf = vec![0.0f64; host_f64_count]; + + let data_ptr = (dl_tensor.data as *const u8).add(dl_tensor.byte_offset as usize); + + let ret = cudaMemcpy( + host_buf.as_mut_ptr() as *mut c_void, + data_ptr as *const c_void, + n_elems * elem_bytes, + CUDA_MEMCPY_DEVICE_TO_HOST, + ); + if ret != 0 { + return Err(PyRuntimeError::new_err(format!( + "cudaMemcpy D2H failed with error code {}", + ret + ))); + } + + // Consumed: GPU memory is ours to free now. + self.consumed = true; + if let Some(deleter) = (*self.ptr).deleter { + deleter(self.ptr); + } + + // complex128 → float64: discard imaginary parts (always 0.0). + let host_data: Vec = if is_complex { + host_buf.into_iter().step_by(2).collect() + } else { + host_buf + }; + + (rows, cols, host_data) + }; + + let arr = Array2::from_shape_vec((rows, cols), host_data) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + Ok(PyArray2::from_owned_array(py, arr)) + } + /// Returns DLPack device information /// /// Returns: @@ -122,8 +224,8 @@ impl QuantumTensor { impl Drop for QuantumTensor { fn drop(&mut self) { - // Only free if not consumed by __dlpack__ - // If consumed, PyTorch/consumer will call the deleter + // Only free if not consumed; __dlpack__ leaves freeing to PyTorch, + // to_numpy() calls the deleter itself after the D2H copy. if !self.consumed && !self.ptr.is_null() { unsafe { // Defensive check: qdp-core always provides a deleter diff --git a/qdp/qdp-python/tests/test_quantum_data_loader.py b/qdp/qdp-python/tests/test_quantum_data_loader.py index 8c93c45c56..43e24008a3 100644 --- a/qdp/qdp-python/tests/test_quantum_data_loader.py +++ b/qdp/qdp-python/tests/test_quantum_data_loader.py @@ -16,6 +16,9 @@ """tests for Quantum Data Loader.""" +from unittest.mock import patch + +import numpy as np import pytest try: @@ -28,6 +31,15 @@ def _loader_available(): return QuantumDataLoader is not None +def _cuda_available(): + try: + import torch + + return torch.cuda.is_available() + except ImportError: + return False + + @pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") def test_mutual_exclusion_both_sources_raises() -> None: """Calling both .source_synthetic() and .source_file() then __iter__ raises ValueError.""" @@ -238,3 +250,134 @@ def test_source_file_s3_streaming_non_parquet_raises(path): ) msg = str(exc_info.value).lower() assert "parquet" in msg or "streaming" in msg + + +# --- as_torch() / as_numpy() output format tests --- + + +@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") +def test_as_torch_raises_at_config_time_when_torch_missing(): + """as_torch() raises RuntimeError immediately (config time) when torch is not installed.""" + with patch("qumat_qdp.loader._torch", None): + loader = QuantumDataLoader(device_id=0).qubits(4).batches(2, size=4) + with pytest.raises(RuntimeError) as exc_info: + loader.as_torch() + msg = str(exc_info.value) + assert "PyTorch" in msg or "torch" in msg.lower() + assert "pip install" in msg + + +@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") +def test_as_numpy_succeeds_at_config_time_without_torch(): + """as_numpy() does not raise at config time even when torch is not installed.""" + with patch("qumat_qdp.loader._torch", None): + loader = ( + QuantumDataLoader(device_id=0) + .qubits(4) + .batches(2, size=4) + .source_synthetic() + .as_numpy() + ) + assert loader._output_format == ("numpy",) + + +@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") +@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required") +def test_as_numpy_yields_float64_arrays(): + """as_numpy() yields numpy float64 arrays with correct shape; no torch required.""" + num_qubits = 4 + batch_size = 8 + state_len = 2**num_qubits # 16 + + batches = [] + with patch("qumat_qdp.loader._torch", None): + loader = ( + QuantumDataLoader(device_id=0) + .qubits(num_qubits) + .batches(3, size=batch_size) + .source_synthetic() + .as_numpy() + ) + for batch in loader: + batches.append(batch) + + assert len(batches) == 3 + for batch in batches: + assert isinstance(batch, np.ndarray), f"expected ndarray, got {type(batch)}" + assert batch.dtype == np.float64, f"expected float64, got {batch.dtype}" + assert batch.ndim == 2 + assert batch.shape == (batch_size, state_len), f"unexpected shape {batch.shape}" + + +@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") +@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required") +def test_as_numpy_amplitudes_are_unit_norm(): + """Each row from as_numpy() should be a unit-norm state vector (amplitude encoding).""" + num_qubits = 4 + batch_size = 16 + + loader = ( + QuantumDataLoader(device_id=0) + .qubits(num_qubits) + .batches(2, size=batch_size) + .source_synthetic() + .as_numpy() + ) + for batch in loader: + arr = np.asarray(batch, dtype=np.float64) + norms = np.linalg.norm(arr, axis=1) + np.testing.assert_allclose(norms, 1.0, atol=1e-5) + + +@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") +@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required") +def test_as_torch_yields_cuda_tensors(): + """as_torch(device='cuda') yields torch tensors on CUDA.""" + try: + import torch + except ImportError: + pytest.skip("torch not installed") + + num_qubits = 4 + batch_size = 8 + state_len = 2**num_qubits + + loader = ( + QuantumDataLoader(device_id=0) + .qubits(num_qubits) + .batches(2, size=batch_size) + .source_synthetic() + .as_torch(device="cuda") + ) + for batch in loader: + assert isinstance(batch, torch.Tensor) + assert batch.is_cuda + assert batch.shape == (batch_size, state_len) + + +@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available") +@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required") +def test_as_numpy_from_source_array(): + """as_numpy() works with source_array(), yielding correct shapes and dtype.""" + num_qubits = 3 + state_len = 2**num_qubits # 8 + n_samples = 12 + batch_size = 4 + + rng = np.random.default_rng(42) + X = rng.standard_normal((n_samples, state_len)) + + loader = ( + QuantumDataLoader(device_id=0) + .qubits(num_qubits) + .batches(1, size=batch_size) + .encoding("amplitude") + .source_array(X) + .as_numpy() + ) + batches = list(loader) + assert len(batches) == n_samples // batch_size + for batch in batches: + assert isinstance(batch, np.ndarray) + assert batch.dtype == np.float64 + assert batch.shape[1] == state_len diff --git a/qdp/qdp-python/uv.lock b/qdp/qdp-python/uv.lock index dbba6c7b39..547461d8e8 100644 --- a/qdp/qdp-python/uv.lock +++ b/qdp/qdp-python/uv.lock @@ -827,34 +827,42 @@ wheels = [ [[package]] name = "nvidia-cublas-cu12" -version = "12.8.4.1" +version = "12.6.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, + { url = "https://files.pythonhosted.org/packages/af/eb/ff4b8c503fa1f1796679dce648854d58751982426e4e4b37d6fce49d259c/nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb", size = 393138322, upload-time = "2024-11-20T17:40:25.65Z" }, + { url = "https://files.pythonhosted.org/packages/97/0d/f1f0cadbf69d5b9ef2e4f744c9466cb0a850741d08350736dfdb4aa89569/nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668", size = 390794615, upload-time = "2024-11-20T17:39:52.715Z" }, ] [[package]] name = "nvidia-cuda-cupti-cu12" -version = "12.8.90" +version = "12.6.80" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8b/2f6230cb715646c3a9425636e513227ce5c93c4d65823a734f4bb86d43c3/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc", size = 8236764, upload-time = "2024-11-20T17:35:41.03Z" }, + { url = "https://files.pythonhosted.org/packages/25/0f/acb326ac8fd26e13c799e0b4f3b2751543e1834f04d62e729485872198d4/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4", size = 8236756, upload-time = "2024-10-01T16:57:45.507Z" }, + { url = "https://files.pythonhosted.org/packages/49/60/7b6497946d74bcf1de852a21824d63baad12cd417db4195fc1bfe59db953/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132", size = 8917980, upload-time = "2024-11-20T17:36:04.019Z" }, + { url = "https://files.pythonhosted.org/packages/a5/24/120ee57b218d9952c379d1e026c4479c9ece9997a4fb46303611ee48f038/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73", size = 8917972, upload-time = "2024-10-01T16:58:06.036Z" }, ] [[package]] name = "nvidia-cuda-nvrtc-cu12" -version = "12.8.93" +version = "12.6.77" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, + { url = "https://files.pythonhosted.org/packages/f4/2f/72df534873235983cc0a5371c3661bebef7c4682760c275590b972c7b0f9/nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13", size = 23162955, upload-time = "2024-10-01T16:59:50.922Z" }, + { url = "https://files.pythonhosted.org/packages/75/2e/46030320b5a80661e88039f59060d1790298b4718944a65a7f2aeda3d9e9/nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53", size = 23650380, upload-time = "2024-10-01T17:00:14.643Z" }, ] [[package]] name = "nvidia-cuda-runtime-cu12" -version = "12.8.90" +version = "12.6.77" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, + { url = "https://files.pythonhosted.org/packages/8f/ea/590b2ac00d772a8abd1c387a92b46486d2679ca6622fd25c18ff76265663/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd", size = 908052, upload-time = "2024-11-20T17:35:19.905Z" }, + { url = "https://files.pythonhosted.org/packages/b7/3d/159023799677126e20c8fd580cca09eeb28d5c5a624adc7f793b9aa8bbfa/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e", size = 908040, upload-time = "2024-10-01T16:57:22.221Z" }, + { url = "https://files.pythonhosted.org/packages/e1/23/e717c5ac26d26cf39a27fbc076240fad2e3b817e5889d671b67f4f9f49c5/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7", size = 897690, upload-time = "2024-11-20T17:35:30.697Z" }, + { url = "https://files.pythonhosted.org/packages/f0/62/65c05e161eeddbafeca24dc461f47de550d9fa8a7e04eb213e32b55cfd99/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8", size = 897678, upload-time = "2024-10-01T16:57:33.821Z" }, ] [[package]] @@ -865,39 +873,47 @@ dependencies = [ { name = "nvidia-cublas-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, ] [[package]] name = "nvidia-cufft-cu12" -version = "11.3.3.83" +version = "11.3.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, + { url = "https://files.pythonhosted.org/packages/1f/37/c50d2b2f2c07e146776389e3080f4faf70bcc4fa6e19d65bb54ca174ebc3/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6", size = 200164144, upload-time = "2024-11-20T17:40:58.288Z" }, + { url = "https://files.pythonhosted.org/packages/ce/f5/188566814b7339e893f8d210d3a5332352b1409815908dad6a363dcceac1/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb", size = 200164135, upload-time = "2024-10-01T17:03:24.212Z" }, + { url = "https://files.pythonhosted.org/packages/8f/16/73727675941ab8e6ffd86ca3a4b7b47065edcca7a997920b831f8147c99d/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5", size = 200221632, upload-time = "2024-11-20T17:41:32.357Z" }, + { url = "https://files.pythonhosted.org/packages/60/de/99ec247a07ea40c969d904fc14f3a356b3e2a704121675b75c366b694ee1/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca", size = 200221622, upload-time = "2024-10-01T17:03:58.79Z" }, ] [[package]] name = "nvidia-cufile-cu12" -version = "1.13.1.3" +version = "1.11.1.6" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, + { url = "https://files.pythonhosted.org/packages/b2/66/cc9876340ac68ae71b15c743ddb13f8b30d5244af344ec8322b449e35426/nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159", size = 1142103, upload-time = "2024-11-20T17:42:11.83Z" }, + { url = "https://files.pythonhosted.org/packages/17/bf/cc834147263b929229ce4aadd62869f0b195e98569d4c28b23edc72b85d9/nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db", size = 1066155, upload-time = "2024-11-20T17:41:49.376Z" }, ] [[package]] name = "nvidia-curand-cu12" -version = "10.3.9.90" +version = "10.3.7.77" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, + { url = "https://files.pythonhosted.org/packages/42/ac/36543605358a355632f1a6faa3e2d5dfb91eab1e4bc7d552040e0383c335/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8", size = 56289881, upload-time = "2024-10-01T17:04:18.981Z" }, + { url = "https://files.pythonhosted.org/packages/73/1b/44a01c4e70933637c93e6e1a8063d1e998b50213a6b65ac5a9169c47e98e/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf", size = 56279010, upload-time = "2024-11-20T17:42:50.958Z" }, + { url = "https://files.pythonhosted.org/packages/4a/aa/2c7ff0b5ee02eaef890c0ce7d4f74bc30901871c5e45dee1ae6d0083cd80/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117", size = 56279000, upload-time = "2024-10-01T17:04:45.274Z" }, + { url = "https://files.pythonhosted.org/packages/a6/02/5362a9396f23f7de1dd8a64369e87c85ffff8216fc8194ace0fa45ba27a5/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e", size = 56289882, upload-time = "2024-11-20T17:42:25.222Z" }, ] [[package]] name = "nvidia-cusolver-cu12" -version = "11.7.3.90" +version = "11.7.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nvidia-cublas-cu12" }, @@ -905,18 +921,24 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, + { url = "https://files.pythonhosted.org/packages/93/17/dbe1aa865e4fdc7b6d4d0dd308fdd5aaab60f939abfc0ea1954eac4fb113/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0", size = 157833628, upload-time = "2024-10-01T17:05:05.591Z" }, + { url = "https://files.pythonhosted.org/packages/f0/6e/c2cf12c9ff8b872e92b4a5740701e51ff17689c4d726fca91875b07f655d/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c", size = 158229790, upload-time = "2024-11-20T17:43:43.211Z" }, + { url = "https://files.pythonhosted.org/packages/9f/81/baba53585da791d043c10084cf9553e074548408e04ae884cfe9193bd484/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6", size = 158229780, upload-time = "2024-10-01T17:05:39.875Z" }, + { url = "https://files.pythonhosted.org/packages/7c/5f/07d0ba3b7f19be5a5ec32a8679fc9384cfd9fc6c869825e93be9f28d6690/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e", size = 157833630, upload-time = "2024-11-20T17:43:16.77Z" }, ] [[package]] name = "nvidia-cusparse-cu12" -version = "12.5.8.93" +version = "12.5.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, + { url = "https://files.pythonhosted.org/packages/eb/eb/6681efd0aa7df96b4f8067b3ce7246833dd36830bb4cec8896182773db7d/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887", size = 216451147, upload-time = "2024-11-20T17:44:18.055Z" }, + { url = "https://files.pythonhosted.org/packages/d3/56/3af21e43014eb40134dea004e8d0f1ef19d9596a39e4d497d5a7de01669f/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1", size = 216451135, upload-time = "2024-10-01T17:06:03.826Z" }, + { url = "https://files.pythonhosted.org/packages/06/1e/b8b7c2f4099a37b96af5c9bb158632ea9e5d9d27d7391d7eb8fc45236674/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73", size = 216561367, upload-time = "2024-11-20T17:44:54.824Z" }, + { url = "https://files.pythonhosted.org/packages/43/ac/64c4316ba163e8217a99680c7605f779accffc6a4bcd0c778c12948d3707/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f", size = 216561357, upload-time = "2024-10-01T17:06:29.861Z" }, ] [[package]] @@ -924,31 +946,46 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, ] [[package]] name = "nvidia-nccl-cu12" -version = "2.27.3" +version = "2.27.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" }, + { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, ] [[package]] name = "nvidia-nvjitlink-cu12" -version = "12.8.93" +version = "12.6.85" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, + { url = "https://files.pythonhosted.org/packages/9d/d7/c5383e47c7e9bf1c99d5bd2a8c935af2b6d705ad831a7ec5c97db4d82f4f/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a", size = 19744971, upload-time = "2024-11-20T17:46:53.366Z" }, + { url = "https://files.pythonhosted.org/packages/31/db/dc71113d441f208cdfe7ae10d4983884e13f464a6252450693365e166dcf/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41", size = 19270338, upload-time = "2024-11-20T17:46:29.758Z" }, +] + +[[package]] +name = "nvidia-nvshmem-cu12" +version = "3.3.20" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" }, + { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" }, ] [[package]] name = "nvidia-nvtx-cu12" -version = "12.8.90" +version = "12.6.77" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/b9/93/80f8a520375af9d7ee44571a6544653a176e53c2b8ccce85b97b83c2491b/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b", size = 90549, upload-time = "2024-11-20T17:38:17.387Z" }, + { url = "https://files.pythonhosted.org/packages/2b/53/36e2fd6c7068997169b49ffc8c12d5af5e5ff209df6e1a2c4d373b3a638f/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059", size = 90539, upload-time = "2024-10-01T17:00:27.179Z" }, + { url = "https://files.pythonhosted.org/packages/56/9a/fff8376f8e3d084cd1530e1ef7b879bb7d6d265620c95c1b322725c694f4/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2", size = 89276, upload-time = "2024-11-20T17:38:27.621Z" }, + { url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265, upload-time = "2024-10-01T17:00:38.172Z" }, ] [[package]] @@ -1096,7 +1133,7 @@ dependencies = [ { name = "networkx", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "numpy", marker = "python_full_version >= '3.11'" }, { name = "packaging", marker = "python_full_version >= '3.11'" }, - { name = "pennylane-lightning", version = "0.43.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pennylane-lightning", version = "0.44.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "requests", marker = "python_full_version >= '3.11'" }, { name = "rustworkx", marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -1140,7 +1177,7 @@ wheels = [ [[package]] name = "pennylane-lightning" -version = "0.43.0" +version = "0.44.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.12'", @@ -1150,19 +1187,17 @@ dependencies = [ { name = "pennylane", version = "0.43.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy-openblas32", marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/50/77/e7b484fda69da63fe02c4f56374dbc1e00aaf5492f8799c1b8ecb92c0e1f/pennylane_lightning-0.43.0.tar.gz", hash = "sha256:ee6f34d4733be0e1d1ba1a12b3a9d3672c9fa455786dbc062176bfe028d6c69d", size = 785957, upload-time = "2025-10-15T13:20:39.482Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/41/ce4d7728b0faf7c77c4e18e2bca77b6ba52c3cc43f5a321ea6596c963e9e/pennylane_lightning-0.44.0.tar.gz", hash = "sha256:4d7383ab8b53af17d14f5b9985afa867a0cec10d224bd068259d824eba812e7a", size = 791167, upload-time = "2026-01-13T15:36:40.241Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/33/8f2c98b82fd560a97ce724e027d5f806babe26769b7e21d01ec064457083/pennylane_lightning-0.43.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:2071e116c03c82a29a036ec0f529e29cabad248f3595c36a40452fcec1f13353", size = 1725043, upload-time = "2025-10-15T13:18:51.595Z" }, - { url = "https://files.pythonhosted.org/packages/ba/b5/321819f3702b90334dd34484655c09b152f891c3c4b5e374d22df81a3655/pennylane_lightning-0.43.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:cfa8422b7827b4be6240f6b52b298c91811bd50ab7b9702d6ea02282c4d559af", size = 2172500, upload-time = "2025-10-15T13:18:54.839Z" }, - { url = "https://files.pythonhosted.org/packages/89/52/408f138ebd0a0eb0014f23509be02c2ad4f490ed69d802efeb078dc21272/pennylane_lightning-0.43.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8d24c919c7508aaa8e54b51d5626890c730204f78c52556bb559273b85d792dc", size = 2017459, upload-time = "2025-10-15T13:18:57.621Z" }, - { url = "https://files.pythonhosted.org/packages/30/4c/43344cf028a228cc5162e734aa2a77d1e609dc3ca9b6bfd8fe541028a313/pennylane_lightning-0.43.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32eb8fa0332b54969bb4693e4bd15e96273bc15e0e81af9b29b8a516a407453d", size = 2464101, upload-time = "2025-10-15T13:19:00.575Z" }, - { url = "https://files.pythonhosted.org/packages/e8/ea/3a4d6b6552a9ab0368a14c8e88f85635e5aa49d0bafa9dbb08701ff0e6b1/pennylane_lightning-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:165dba4748398b5852b91be312f690ee5567b860f054d7fbb6270da6b68f7e84", size = 5383887, upload-time = "2025-10-15T13:19:05.769Z" }, - { url = "https://files.pythonhosted.org/packages/c5/a9/9598c87859109bd74358a8f2623b586791028337c0f2ebe257567e38ba03/pennylane_lightning-0.43.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:3c4017557c84ed4334b05e2e33ef40407474195247f8e97434c097c8cef1f5e3", size = 1724477, upload-time = "2025-10-15T13:19:09.443Z" }, - { url = "https://files.pythonhosted.org/packages/2e/63/f60ebce7ec4dea995be8a26645f841379d57b45db25c37584d1bbe9745e0/pennylane_lightning-0.43.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:e4e27d0f892ba587e0fe274a9a349fbbdd5727ed898223a65c9d049a6f7609c1", size = 2173061, upload-time = "2025-10-15T13:19:12.306Z" }, - { url = "https://files.pythonhosted.org/packages/bc/4d/0ec98912a480d51d4433007b38a7682a8c975b03463c8cc7e91ee99241ca/pennylane_lightning-0.43.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44820f846805d0919f3a85cdfc8938913af3c99418729d01cf1a6c3de7d862ba", size = 2016744, upload-time = "2025-10-15T13:19:15.018Z" }, - { url = "https://files.pythonhosted.org/packages/dd/e9/83d5460175bbe2701587d288c026eeabbdc4a23168fcee5a572be45115c8/pennylane_lightning-0.43.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6fc236ff206866d7ef5deed733a3bef719b0bda0476be577983a7bda8c516c68", size = 2463530, upload-time = "2025-10-15T13:19:17.763Z" }, - { url = "https://files.pythonhosted.org/packages/a3/a2/46cbbc0788890cae778ce5454151d2e0a3a5dcbe1e12941c7351d05e0106/pennylane_lightning-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:35bab12effe2ce3c652fef86ce2c32c5140c5e9c895d172fe99b77dceabe35cf", size = 5381017, upload-time = "2025-10-15T13:19:22.633Z" }, - { url = "https://files.pythonhosted.org/packages/a1/5f/b57d29a6794975b8dbed4afd5755a0b8d5f979e3b52d1bc986a28fa7fc82/pennylane_lightning-0.43.0-py3-none-any.whl", hash = "sha256:f8ac2d58d48133728bbb801cbf6f8f58808b878b44c32143a01ef703658a6d14", size = 1034810, upload-time = "2025-10-15T13:19:40.301Z" }, + { url = "https://files.pythonhosted.org/packages/02/70/41e014c3fa7c94839da771acd6d293e597c5ee493ef91834bcfe7bf8743a/pennylane_lightning-0.44.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:9a492cb23d631b83f1493e1eb3ff0437e9e29c41921b0ed41d4cff7f016b98b8", size = 1725460, upload-time = "2026-01-13T15:35:18.008Z" }, + { url = "https://files.pythonhosted.org/packages/c2/53/728b93e80ef6a968d715c11c0de3ee2953cc934182a4d8de454aa6d5eb3e/pennylane_lightning-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:048df8e23a62bde4046162c1229ecfdd8cb7f17b8a16cb5a7c6f68280aff024f", size = 2020368, upload-time = "2026-01-13T15:35:19.913Z" }, + { url = "https://files.pythonhosted.org/packages/25/71/703d4df1fd010fab517337ff12403ee4d040b48d45663c61145e80a36f06/pennylane_lightning-0.44.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4a1cb8827a05e58596f632fb02e014392d92c38a2e471817cd0cb826cb995305", size = 2470668, upload-time = "2026-01-13T15:35:21.257Z" }, + { url = "https://files.pythonhosted.org/packages/27/23/31695ff221cb7ff4574c9567ce24d431962c42d4692c48c037a048cdb56d/pennylane_lightning-0.44.0-cp311-cp311-win_amd64.whl", hash = "sha256:6809ea3a0982c478b1434aaf0e78ca19bdafbddf27ea9ed04378cde5494fe1a7", size = 5411322, upload-time = "2026-01-13T15:35:23.015Z" }, + { url = "https://files.pythonhosted.org/packages/d4/76/c2339362329f468981b4ac24d59982ab06e9a3c4561928d0a4c1bd0d4720/pennylane_lightning-0.44.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:89d84f6b24675f011695d7be4a6dbe7821224f5f96747413367135b5c53ae414", size = 1724936, upload-time = "2026-01-13T15:35:25.034Z" }, + { url = "https://files.pythonhosted.org/packages/35/22/dfb5af72c9bf9f85bdf114be4204369894a2b9d9d205ed180df422ff93a0/pennylane_lightning-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aae687962962f2d2a8620740c7b0eb2de16aba40f080bec64519652e3a25fba3", size = 2019499, upload-time = "2026-01-13T15:35:26.824Z" }, + { url = "https://files.pythonhosted.org/packages/cd/98/30c3164b620f89dfec71c05359a1025e15d695a42dbdbc6350f664fc6b58/pennylane_lightning-0.44.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e2030fcebea3cfc7e8d6fc4a5aa821704ffcc15ed4ba76bf653facb7d8ebe39", size = 2469486, upload-time = "2026-01-13T15:35:28.663Z" }, + { url = "https://files.pythonhosted.org/packages/3c/8a/418d1a9f8e292d322a66eac83c8e4b4f48e01f24e629b9b496689412cc0f/pennylane_lightning-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:26e7d79a816da3a659ceba554999d1781cc1829699f544ff733cc3dbe2c6f83c", size = 5409508, upload-time = "2026-01-13T15:35:30.558Z" }, + { url = "https://files.pythonhosted.org/packages/cf/d1/b681ae8546b264a4c9d999b8e57a3291cbf38edf39194d5416fbae19f8af/pennylane_lightning-0.44.0-py3-none-any.whl", hash = "sha256:a5a257f89c623565df68f987437de380495299b1271b935e1269717198668e71", size = 1037558, upload-time = "2026-01-13T15:35:46.04Z" }, ] [[package]] @@ -1440,12 +1475,12 @@ benchmark = [ { name = "qiskit-aer", specifier = ">=0.17.2" }, { name = "scikit-learn", specifier = ">=1.3" }, { name = "tensorflow", specifier = ">=2.20" }, - { name = "torch", specifier = ">=2.2,<=2.9.0" }, + { name = "torch", specifier = ">=2.2,<=2.9.0", index = "https://download.pytorch.org/whl/cu126" }, { name = "tqdm" }, ] dev = [ { name = "pytest" }, - { name = "torch", specifier = ">=2.2,<=2.9.0" }, + { name = "torch", specifier = ">=2.2,<=2.9.0", index = "https://download.pytorch.org/whl/cu126" }, ] [[package]] @@ -1823,46 +1858,44 @@ wheels = [ [[package]] name = "torch" -version = "2.8.0" -source = { registry = "https://pypi.org/simple" } +version = "2.9.0+cu126" +source = { registry = "https://download.pytorch.org/whl/cu126" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" }, { name = "setuptools", marker = "python_full_version >= '3.12'" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "sys_platform == 'linux'" }, { name = "typing-extensions" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/63/28/110f7274254f1b8476c561dada127173f994afa2b1ffc044efb773c15650/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905", size = 102052793, upload-time = "2025-08-06T14:53:15.852Z" }, - { url = "https://files.pythonhosted.org/packages/70/1c/58da560016f81c339ae14ab16c98153d51c941544ae568da3cb5b1ceb572/torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011", size = 888025420, upload-time = "2025-08-06T14:54:18.014Z" }, - { url = "https://files.pythonhosted.org/packages/70/87/f69752d0dd4ba8218c390f0438130c166fa264a33b7025adb5014b92192c/torch-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8e5bf982e87e2b59d932769938b698858c64cc53753894be25629bdf5cf2f46", size = 241363614, upload-time = "2025-08-06T14:53:31.496Z" }, - { url = "https://files.pythonhosted.org/packages/ef/d6/e6d4c57e61c2b2175d3aafbfb779926a2cfd7c32eeda7c543925dceec923/torch-2.8.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3f16a58a9a800f589b26d47ee15aca3acf065546137fc2af039876135f4c760", size = 73611154, upload-time = "2025-08-06T14:53:10.919Z" }, - { url = "https://files.pythonhosted.org/packages/8f/c4/3e7a3887eba14e815e614db70b3b529112d1513d9dae6f4d43e373360b7f/torch-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:220a06fd7af8b653c35d359dfe1aaf32f65aa85befa342629f716acb134b9710", size = 102073391, upload-time = "2025-08-06T14:53:20.937Z" }, - { url = "https://files.pythonhosted.org/packages/5a/63/4fdc45a0304536e75a5e1b1bbfb1b56dd0e2743c48ee83ca729f7ce44162/torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c12fa219f51a933d5f80eeb3a7a5d0cbe9168c0a14bbb4055f1979431660879b", size = 888063640, upload-time = "2025-08-06T14:55:05.325Z" }, - { url = "https://files.pythonhosted.org/packages/84/57/2f64161769610cf6b1c5ed782bd8a780e18a3c9d48931319f2887fa9d0b1/torch-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c7ef765e27551b2fbfc0f41bcf270e1292d9bf79f8e0724848b1682be6e80aa", size = 241366752, upload-time = "2025-08-06T14:53:38.692Z" }, - { url = "https://files.pythonhosted.org/packages/a4/5e/05a5c46085d9b97e928f3f037081d3d2b87fb4b4195030fc099aaec5effc/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:5ae0524688fb6707c57a530c2325e13bb0090b745ba7b4a2cd6a3ce262572916", size = 73621174, upload-time = "2025-08-06T14:53:25.44Z" }, - { url = "https://files.pythonhosted.org/packages/49/0c/2fd4df0d83a495bb5e54dca4474c4ec5f9c62db185421563deeb5dabf609/torch-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e2fab4153768d433f8ed9279c8133a114a034a61e77a3a104dcdf54388838705", size = 101906089, upload-time = "2025-08-06T14:53:52.631Z" }, - { url = "https://files.pythonhosted.org/packages/99/a8/6acf48d48838fb8fe480597d98a0668c2beb02ee4755cc136de92a0a956f/torch-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2aca0939fb7e4d842561febbd4ffda67a8e958ff725c1c27e244e85e982173c", size = 887913624, upload-time = "2025-08-06T14:56:44.33Z" }, - { url = "https://files.pythonhosted.org/packages/af/8a/5c87f08e3abd825c7dfecef5a0f1d9aa5df5dd0e3fd1fa2f490a8e512402/torch-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f4ac52f0130275d7517b03a33d2493bab3693c83dcfadf4f81688ea82147d2e", size = 241326087, upload-time = "2025-08-06T14:53:46.503Z" }, - { url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload-time = "2025-08-06T14:53:57.144Z" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:31ef6cf39c85a368b09b4fcb92e520ea6dae0121faba28107d8eab6f78f67d51" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:653962a66d992e3ba850154356e9ecd83c9beb07663065a3a01d083c8c49b6a5" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp310-cp310-win_amd64.whl", hash = "sha256:e8fa700af633d4dcfacc39e8e4d75827d13023243292d9a7fe1e5e5215a6e633" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0ac8362cd4c8c85af5c865fb63a4580656f5f1aae39e77469a84dfb3d6c979d0" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:bd3329d3da1786cfd993eca23f0d1213f939145c5aa2ddadd1b0f6dbc37be17d" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp311-cp311-win_amd64.whl", hash = "sha256:94fc90845de9324943c2f4f5ebffca35df32135e562cd040c3b5cc17259bbc8a" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f80b9bdd81a8d4d48bea4fbab027b728e399bf733a7330f521924ae25aa48958" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ea68e3146cd7d770c662f0120f18b8b4a6d96be4314e7196047b282887828cfb" }, + { url = "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp312-cp312-win_amd64.whl", hash = "sha256:321de9e00dfb066fac4e182c62b6f0a10eb7943924daecb261a7490f98ce3641" }, ] [[package]] @@ -1879,15 +1912,15 @@ wheels = [ [[package]] name = "triton" -version = "3.4.0" +version = "3.5.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "setuptools" }, -] wheels = [ - { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" }, - { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload-time = "2025-07-30T19:58:29.908Z" }, - { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload-time = "2025-07-30T19:58:37.081Z" }, + { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" }, + { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" }, + { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" }, + { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" }, + { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, ] [[package]] diff --git a/testing/qumat/test_parameter_binding.py b/testing/qumat/test_parameter_binding.py index ae8fdb9e6b..7a7976cde4 100644 --- a/testing/qumat/test_parameter_binding.py +++ b/testing/qumat/test_parameter_binding.py @@ -224,7 +224,9 @@ def test_partially_bound_parameters_error(self, backend_name): @pytest.mark.parametrize("backend_name", TESTING_BACKENDS) def test_execute_circuit_does_not_mutate_backend_config(self, backend_name): """Test that execute_circuit does not mutate the user's backend_config across all backends.""" - backend_config = get_backend_config(backend_name).copy() + cfg = get_backend_config(backend_name) + assert cfg is not None + backend_config = cfg.copy() original_config = backend_config.copy() qumat = QuMat(backend_config) @@ -240,7 +242,9 @@ def test_execute_circuit_does_not_mutate_backend_config(self, backend_name): @pytest.mark.parametrize("backend_name", TESTING_BACKENDS) def test_get_final_state_vector_does_not_mutate_backend_config(self, backend_name): """Test that get_final_state_vector does not mutate the user's backend_config across all backends.""" - backend_config = get_backend_config(backend_name).copy() + cfg = get_backend_config(backend_name) + assert cfg is not None + backend_config = cfg.copy() original_config = backend_config.copy() qumat = QuMat(backend_config) diff --git a/uv.lock b/uv.lock index 6781486846..bd88f82f75 100644 --- a/uv.lock +++ b/uv.lock @@ -2076,12 +2076,8 @@ dev = [ [[package]] name = "qumat-qdp" source = { editable = "qdp/qdp-python" } -dependencies = [ - { name = "qumat" }, -] [package.metadata] -requires-dist = [{ name = "qumat", editable = "." }] [package.metadata.requires-dev] benchmark = [ @@ -2094,12 +2090,12 @@ benchmark = [ { name = "qiskit-aer", specifier = ">=0.17.2" }, { name = "scikit-learn", specifier = ">=1.3" }, { name = "tensorflow", specifier = ">=2.20" }, - { name = "torch", specifier = ">=2.2,<=2.9.0" }, + { name = "torch", specifier = ">=2.2,<=2.9.0", index = "https://download.pytorch.org/whl/cu126" }, { name = "tqdm" }, ] dev = [ { name = "pytest" }, - { name = "torch", specifier = ">=2.2,<=2.9.0" }, + { name = "torch", specifier = ">=2.2,<=2.9.0", index = "https://download.pytorch.org/whl/cu126" }, ] [[package]]