4 ヶ月前 · 30d648c2e3
--- a/README.md
+++ b/README.md
@@ -157,6 +157,7 @@ python scripts/convert_to_gguf.py ... --arch bs
 
				 ## 💻 C++ API
			
 
				 
			
 
				 ```cpp
			
 
				+#include <atomic>
			
 
				 #include <bs_roformer/inference.h>
			
 
				 #include <bs_roformer/audio.h>
			
 
				 
			
@@ -170,10 +171,14 @@ Inference engine("model.gguf");
 
				 int chunk_size = engine.GetDefaultChunkSize();   // e.g., 352800
			
 
				 int num_overlap = engine.GetDefaultNumOverlap(); // e.g., 2
			
 
				 
			
 
				-// 4. Run inference (with progress callback)
			
 
				+// 4. Run inference (with progress + cancel callback)
			
 
				+std::atomic<bool> should_cancel{false};
			
 
				 auto stems = engine.Process(input.data, chunk_size, num_overlap,
			
 
				     [](float progress) {
			
 
				         std::cout << "Progress: " << int(progress * 100) << "%" << std::endl;
			
 
				+    },
			
 
				+    [&should_cancel]() {
			
 
				+        return should_cancel.load();
			
 
				     });
			
 
				 
			
 
				 // 5. Save result
			
@@ -181,6 +186,8 @@ AudioBuffer output{stems[0], 2, 44100, stems[0].size()};
 
				 AudioFile::Save("vocals.wav", output);
			
 
				 ```
			
 
				 
			
 
				+If `cancel_callback` returns `true`, `Process()` throws `std::runtime_error("Inference cancelled")`.
			
 
				+
			
 
				 ---
			
 
				 
			
 
				 ## 🏗️ Project Architecture
			
--- a/README.zh.md
+++ b/README.zh.md
@@ -155,6 +155,7 @@ python scripts/convert_to_gguf.py ... --arch bs
 
				 ## 💻 C++ API
			
 
				 
			
 
				 ```cpp
			
 
				+#include <atomic>
			
 
				 #include <bs_roformer/inference.h>
			
 
				 #include <bs_roformer/audio.h>
			
 
				 
			
@@ -168,10 +169,14 @@ Inference engine("model.gguf");
 
				 int chunk_size = engine.GetDefaultChunkSize();   // 如 352800
			
 
				 int num_overlap = engine.GetDefaultNumOverlap(); // 如 2
			
 
				 
			
 
				-// 4. 执行推理（带进度回调）
			
 
				+// 4. 执行推理（带进度回调 + 取消回调）
			
 
				+std::atomic<bool> should_cancel{false};
			
 
				 auto stems = engine.Process(input.data, chunk_size, num_overlap,
			
 
				     [](float progress) {
			
 
				         std::cout << "Progress: " << int(progress * 100) << "%" << std::endl;
			
 
				+    },
			
 
				+    [&should_cancel]() {
			
 
				+        return should_cancel.load();
			
 
				     });
			
 
				 
			
 
				 // 5. 保存结果
			
@@ -179,6 +184,8 @@ AudioBuffer output{stems[0], 2, 44100, stems[0].size()};
 
				 AudioFile::Save("vocals.wav", output);
			
 
				 ```
			
 
				 
			
 
				+当 `cancel_callback` 返回 `true` 时，`Process()` 会抛出 `std::runtime_error("Inference cancelled")`。
			
 
				+
			
 
				 ---
			
 
				 
			
 
				 ## 🏗️ 项目架构
			
--- a/include/bs_roformer/inference.h
+++ b/include/bs_roformer/inference.h
@@ -12,6 +12,8 @@ namespace ggml { struct context; struct cgraph; }
 
				 
			
 
				 class Inference {
			
 
				 public:
			
 
				+    using CancelCallback = std::function<bool()>;
			
 
				+
			
 
				     Inference(const std::string& model_path);
			
 
				     ~Inference();
			
 
				 
			
@@ -22,7 +24,8 @@ public:
 
				     std::vector<std::vector<float>> Process(const std::vector<float>& input_audio, 
			
 
				                                int chunk_size = 352800, 
			
 
				                                int num_overlap = 2,
			
 
				-                               std::function<void(float)> progress_callback = nullptr);
			
 
				+                               std::function<void(float)> progress_callback = nullptr,
			
 
				+                               CancelCallback cancel_callback = nullptr);
			
 
				 
			
 
				     // Low-level chunk processing (public for testing)
			
 
				     std::vector<std::vector<float>> ProcessChunk(const std::vector<float>& chunk_audio);
			
@@ -39,14 +42,16 @@ public:
 
				                                                 int chunk_size, 
			
 
				                                                 int num_overlap,
			
 
				                                                 ModelCallback model_func,
			
 
				-                                                std::function<void(float)> progress_callback = nullptr); // Added callback
			
 
				+                                                std::function<void(float)> progress_callback = nullptr,
			
 
				+                                                CancelCallback cancel_callback = nullptr);
			
 
				 
			
 
				 private:
			
 
				     // Pipelined Overlap-Add
			
 
				     std::vector<std::vector<float>> ProcessOverlapAddPipelined(const std::vector<float>& input_audio, 
			
 
				                                                   int chunk_size, 
			
 
				                                                   int num_overlap,
			
 
				-                                                  std::function<void(float)> progress_callback);
			
 
				+                                                  std::function<void(float)> progress_callback,
			
 
				+                                                  CancelCallback cancel_callback);
			
 
				 
			
 
				 private:
			
 
				     std::unique_ptr<BSRoformer> model_;
			
--- a/src/inference.cpp
+++ b/src/inference.cpp
@@ -15,8 +15,11 @@
 
				 #include <thread>
			
 
				 #include <mutex>
			
 
				 #include <condition_variable>
			
 
				+#include <atomic>
			
 
				+#include <exception>
			
 
				 
			
 
				 using Complex = std::complex<float>;
			
 
				+static constexpr const char* kInferenceCancelledMessage = "Inference cancelled";
			
 
				 
			
 
				 // Helper forward decl
			
 
				 std::vector<float> GetWindow(int size, int fade_size);
			
@@ -289,11 +292,13 @@ void Inference::PostProcessAndISTFT(const std::vector<float>& mask_output,
 
				     }
			
 
				 }
			
 
				 
			
 
				-#include <future>
			
 
				-
			
 
				-std::vector<std::vector<float>> Inference::Process(const std::vector<float>& input_audio, int chunk_size, int num_overlap, std::function<void(float)> progress_callback) {
			
 
				+std::vector<std::vector<float>> Inference::Process(const std::vector<float>& input_audio,
			
 
				+                                                   int chunk_size,
			
 
				+                                                   int num_overlap,
			
 
				+                                                   std::function<void(float)> progress_callback,
			
 
				+                                                   CancelCallback cancel_callback) {
			
 
				     if (input_audio.empty()) return {};
			
 
				-    return ProcessOverlapAddPipelined(input_audio, chunk_size, num_overlap, progress_callback);
			
 
				+    return ProcessOverlapAddPipelined(input_audio, chunk_size, num_overlap, progress_callback, cancel_callback);
			
 
				 }
			
 
				 
			
 
				 // =================================================================================================
			
@@ -450,7 +455,8 @@ private:
 
				 std::vector<std::vector<float>> Inference::ProcessOverlapAddPipelined(const std::vector<float>& input_audio, 
			
 
				                                                          int chunk_size, 
			
 
				                                                          int num_overlap,
			
 
				-                                                         std::function<void(float)> progress_callback) {
			
 
				+                                                         std::function<void(float)> progress_callback,
			
 
				+                                                         CancelCallback cancel_callback) {
			
 
				     if (input_audio.empty()) return {};
			
 
				     if (input_audio.size() % 2 != 0) {
			
 
				         throw std::runtime_error("Error: Input audio must be interleaved stereo (even number of samples).");
			
@@ -505,6 +511,7 @@ std::vector<std::vector<float>> Inference::ProcessOverlapAddPipelined(const std:
 
				     std::vector<float> counter(n_padded_samples * channels, 0.0f);
			
 
				     std::vector<float> window_base = GetWindow(chunk_size, fade_size);
			
 
				     std::mutex result_mutex; // Protects 'result' and 'counter'
			
 
				+    std::atomic<bool> cancel_requested{false};
			
 
				     
			
 
				     // lambda to extract chunk 'i'
			
 
				     auto extract_chunk = [&](int i) -> std::vector<float> {
			
@@ -590,6 +597,20 @@ std::vector<std::vector<float>> Inference::ProcessOverlapAddPipelined(const std:
 
				     // 3 items buffer is enough to keep GPU busy
			
 
				     ThreadSafeQueue<std::shared_ptr<ChunkState>> input_queue(3);
			
 
				     ThreadSafeQueue<std::shared_ptr<ChunkState>> output_queue(3);
			
 
				+    std::mutex exception_mutex;
			
 
				+    std::exception_ptr pipeline_exception = nullptr;
			
 
				+
			
 
				+    auto set_pipeline_exception = [&](std::exception_ptr eptr) {
			
 
				+        {
			
 
				+            std::lock_guard<std::mutex> lock(exception_mutex);
			
 
				+            if (!pipeline_exception) {
			
 
				+                pipeline_exception = eptr;
			
 
				+            }
			
 
				+        }
			
 
				+        cancel_requested.store(true, std::memory_order_release);
			
 
				+        input_queue.Shutdown();
			
 
				+        output_queue.Shutdown();
			
 
				+    };
			
 
				     
			
 
				     // Structure to hold chunk metadata together
			
 
				     struct ChunkTask {
			
@@ -599,51 +620,109 @@ std::vector<std::vector<float>> Inference::ProcessOverlapAddPipelined(const std:
 
				     
			
 
				     // 1. Preprocessor Thread
			
 
				     auto preproccessor = std::thread([&]() {
			
 
				-        int current_offset = 0;
			
 
				-        while (current_offset < n_padded_samples) {
			
 
				-            std::vector<float> chunk = extract_chunk(current_offset);
			
 
				-            
			
 
				-            auto state = PreProcessChunk(chunk, current_offset); 
			
 
				-            
			
 
				-            input_queue.Push(state);
			
 
				-            current_offset += step;
			
 
				+        try {
			
 
				+            int current_offset = 0;
			
 
				+            while (current_offset < n_padded_samples && !cancel_requested.load(std::memory_order_acquire)) {
			
 
				+                std::vector<float> chunk = extract_chunk(current_offset);
			
 
				+                
			
 
				+                auto state = PreProcessChunk(chunk, current_offset); 
			
 
				+                
			
 
				+                input_queue.Push(state);
			
 
				+                if (cancel_requested.load(std::memory_order_acquire)) {
			
 
				+                    break;
			
 
				+                }
			
 
				+                current_offset += step;
			
 
				+            }
			
 
				+        } catch (...) {
			
 
				+            set_pipeline_exception(std::current_exception());
			
 
				         }
			
 
				         input_queue.Shutdown();
			
 
				     });
			
 
				     
			
 
				     // 3. Postprocessor Thread
			
 
				     auto postprocessor = std::thread([&]() {
			
 
				-        std::shared_ptr<ChunkState> state;
			
 
				-        while (output_queue.Pop(state)) {
			
 
				-            // This does ISTFT (CPU intensive)
			
 
				-            PostProcessChunk(state);
			
 
				-            
			
 
				-            // Accumulate (Memory bandwidth intensive + Mutex)
			
 
				-            accumulate_result(state, state->id); // state->id holds offset
			
 
				-            
			
 
				-            if (progress_callback) {
			
 
				-                float progress = (float)std::min(state->id + step, n_padded_samples) / n_padded_samples;
			
 
				-                progress_callback(progress);
			
 
				+        try {
			
 
				+            std::shared_ptr<ChunkState> state;
			
 
				+            while (!cancel_requested.load(std::memory_order_acquire) && output_queue.Pop(state)) {
			
 
				+                // This does ISTFT (CPU intensive)
			
 
				+                PostProcessChunk(state);
			
 
				+                if (cancel_requested.load(std::memory_order_acquire)) {
			
 
				+                    break;
			
 
				+                }
			
 
				+                
			
 
				+                // Accumulate (Memory bandwidth intensive + Mutex)
			
 
				+                accumulate_result(state, state->id); // state->id holds offset
			
 
				+                
			
 
				+                if (!cancel_requested.load(std::memory_order_acquire) && progress_callback) {
			
 
				+                    float progress = (float)std::min(state->id + step, n_padded_samples) / n_padded_samples;
			
 
				+                    progress_callback(progress);
			
 
				+                }
			
 
				             }
			
 
				+        } catch (...) {
			
 
				+            set_pipeline_exception(std::current_exception());
			
 
				         }
			
 
				     });
			
 
				     
			
 
				+    auto poll_cancel_requested = [&]() -> bool {
			
 
				+        if (cancel_requested.load(std::memory_order_acquire)) {
			
 
				+            return true;
			
 
				+        }
			
 
				+        if (cancel_callback && cancel_callback()) {
			
 
				+            cancel_requested.store(true, std::memory_order_release);
			
 
				+            return true;
			
 
				+        }
			
 
				+        return false;
			
 
				+    };
			
 
				+
			
 
				     // 2. Main Thread (Inference Loop)
			
 
				+    bool cancelled = false;
			
 
				     std::shared_ptr<ChunkState> state;
			
 
				-    while (true) {
			
 
				-        bool ok = input_queue.Pop(state);
			
 
				-        if (!ok) break; // Input queue shutdown and empty
			
 
				-        
			
 
				-        // This does GGML Inference (GPU intensive, Blocking)
			
 
				-        RunInference(state);
			
 
				-        
			
 
				-        output_queue.Push(state);
			
 
				+    try {
			
 
				+        while (true) {
			
 
				+            if (poll_cancel_requested()) {
			
 
				+                cancelled = true;
			
 
				+                break;
			
 
				+            }
			
 
				+
			
 
				+            bool ok = input_queue.Pop(state);
			
 
				+            if (!ok) break; // Input queue shutdown and empty
			
 
				+
			
 
				+            if (poll_cancel_requested()) {
			
 
				+                cancelled = true;
			
 
				+                break;
			
 
				+            }
			
 
				+            
			
 
				+            // This does GGML Inference (GPU intensive, Blocking)
			
 
				+            RunInference(state);
			
 
				+
			
 
				+            if (poll_cancel_requested()) {
			
 
				+                cancelled = true;
			
 
				+                break;
			
 
				+            }
			
 
				+            
			
 
				+            output_queue.Push(state);
			
 
				+        }
			
 
				+    } catch (...) {
			
 
				+        set_pipeline_exception(std::current_exception());
			
 
				     }
			
 
				     
			
 
				+    if (cancelled) {
			
 
				+        cancel_requested.store(true, std::memory_order_release);
			
 
				+        input_queue.Shutdown();
			
 
				+    }
			
 
				+
			
 
				     // Wait for threads
			
 
				     output_queue.Shutdown();
			
 
				     if (preproccessor.joinable()) preproccessor.join();
			
 
				     if (postprocessor.joinable()) postprocessor.join();
			
 
				+
			
 
				+    if (pipeline_exception) {
			
 
				+        std::rethrow_exception(pipeline_exception);
			
 
				+    }
			
 
				+
			
 
				+    if (cancel_requested.load(std::memory_order_acquire)) {
			
 
				+        throw std::runtime_error(kInferenceCancelledMessage);
			
 
				+    }
			
 
				     
			
 
				     // Normalize and Crop
			
 
				     // result is [stems][samples]
			
@@ -676,7 +755,8 @@ std::vector<std::vector<float>> Inference::ProcessOverlapAdd(const std::vector<f
 
				                                                 int chunk_size, 
			
 
				                                                 int num_overlap,
			
 
				                                                 ModelCallback model_func,
			
 
				-                                                std::function<void(float)> progress_callback) {
			
 
				+                                                std::function<void(float)> progress_callback,
			
 
				+                                                CancelCallback cancel_callback) {
			
 
				     if (input_audio.empty()) return {};
			
 
				     if (input_audio.size() % 2 != 0) {
			
 
				         throw std::runtime_error("Error: Input audio must be interleaved stereo (even number of samples).");
			
@@ -736,6 +816,10 @@ std::vector<std::vector<float>> Inference::ProcessOverlapAdd(const std::vector<f
 
				     int total_length = n_padded_samples;
			
 
				     
			
 
				     while (i < total_length) {
			
 
				+        if (cancel_callback && cancel_callback()) {
			
 
				+            throw std::runtime_error(kInferenceCancelledMessage);
			
 
				+        }
			
 
				+
			
 
				         int remaining = total_length - i;
			
 
				         int part_len = std::min(C, remaining); // Logic matches Python slice [i:i+C]
			
 
				         
			
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -45,3 +45,4 @@ bsr_add_test(test_component_mask)
 
				 bsr_add_test(test_inference)
			
 
				 bsr_add_test(test_chunking_logic)
			
 
				 bsr_add_test(test_stft_consistency)
			
 
				+bsr_add_test(test_cancel_callback)
			
--- a/tests/test_cancel_callback.cpp
+++ b/tests/test_cancel_callback.cpp
@@ -0,0 +1,115 @@
 
				+#include <algorithm>
			
 
				+#include <cmath>
			
 
				+#include <exception>
			
 
				+#include <iostream>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "bs_roformer/inference.h"
			
 
				+
			
 
				+static bool IsCancelledError(const std::exception& e) {
			
 
				+    return std::string(e.what()) == "Inference cancelled";
			
 
				+}
			
 
				+
			
 
				+int main() {
			
 
				+    std::cout << "Test: Cancel Callback Behavior" << std::endl;
			
 
				+
			
 
				+    const int channels = 2;
			
 
				+    const int samples = 96;
			
 
				+    const int chunk_size = 32;
			
 
				+    const int num_overlap = 2;
			
 
				+
			
 
				+    std::vector<float> input(samples * channels);
			
 
				+    for (int i = 0; i < samples; ++i) {
			
 
				+        input[i * channels + 0] = std::sin(0.1f * static_cast<float>(i));
			
 
				+        input[i * channels + 1] = std::cos(0.1f * static_cast<float>(i));
			
 
				+    }
			
 
				+
			
 
				+    auto identity = [](const std::vector<float>& chunk) {
			
 
				+        return std::vector<std::vector<float>>{chunk};
			
 
				+    };
			
 
				+
			
 
				+    // Case 1: immediate cancellation
			
 
				+    bool immediate_cancelled = false;
			
 
				+    try {
			
 
				+        (void)Inference::ProcessOverlapAdd(
			
 
				+            input,
			
 
				+            chunk_size,
			
 
				+            num_overlap,
			
 
				+            identity,
			
 
				+            nullptr,
			
 
				+            []() { return true; });
			
 
				+    } catch (const std::exception& e) {
			
 
				+        if (!IsCancelledError(e)) {
			
 
				+            std::cerr << "Unexpected exception for immediate cancel: " << e.what() << std::endl;
			
 
				+            return 1;
			
 
				+        }
			
 
				+        immediate_cancelled = true;
			
 
				+    }
			
 
				+
			
 
				+    if (!immediate_cancelled) {
			
 
				+        std::cerr << "Immediate cancellation did not throw" << std::endl;
			
 
				+        return 1;
			
 
				+    }
			
 
				+
			
 
				+    // Case 2: delayed cancellation
			
 
				+    int cancel_calls = 0;
			
 
				+    bool delayed_cancelled = false;
			
 
				+    try {
			
 
				+        (void)Inference::ProcessOverlapAdd(
			
 
				+            input,
			
 
				+            chunk_size,
			
 
				+            num_overlap,
			
 
				+            identity,
			
 
				+            nullptr,
			
 
				+            [&cancel_calls]() {
			
 
				+                ++cancel_calls;
			
 
				+                return cancel_calls >= 3;
			
 
				+            });
			
 
				+    } catch (const std::exception& e) {
			
 
				+        if (!IsCancelledError(e)) {
			
 
				+            std::cerr << "Unexpected exception for delayed cancel: " << e.what() << std::endl;
			
 
				+            return 1;
			
 
				+        }
			
 
				+        delayed_cancelled = true;
			
 
				+    }
			
 
				+
			
 
				+    if (!delayed_cancelled) {
			
 
				+        std::cerr << "Delayed cancellation did not throw" << std::endl;
			
 
				+        return 1;
			
 
				+    }
			
 
				+
			
 
				+    // Case 3: cancel callback always false should match baseline output.
			
 
				+    auto no_cancel = []() { return false; };
			
 
				+    auto baseline = Inference::ProcessOverlapAdd(input, chunk_size, num_overlap, identity);
			
 
				+    auto with_no_cancel = Inference::ProcessOverlapAdd(
			
 
				+        input,
			
 
				+        chunk_size,
			
 
				+        num_overlap,
			
 
				+        identity,
			
 
				+        nullptr,
			
 
				+        no_cancel);
			
 
				+
			
 
				+    if (baseline.size() != with_no_cancel.size() || baseline.empty()) {
			
 
				+        std::cerr << "Output stem count mismatch in no-cancel path" << std::endl;
			
 
				+        return 1;
			
 
				+    }
			
 
				+
			
 
				+    if (baseline[0].size() != with_no_cancel[0].size()) {
			
 
				+        std::cerr << "Output sample count mismatch in no-cancel path" << std::endl;
			
 
				+        return 1;
			
 
				+    }
			
 
				+
			
 
				+    float max_diff = 0.0f;
			
 
				+    for (size_t i = 0; i < baseline[0].size(); ++i) {
			
 
				+        max_diff = std::max(max_diff, std::abs(baseline[0][i] - with_no_cancel[0][i]));
			
 
				+    }
			
 
				+
			
 
				+    if (max_diff > 1e-6f) {
			
 
				+        std::cerr << "No-cancel output mismatch, max diff = " << max_diff << std::endl;
			
 
				+        return 1;
			
 
				+    }
			
 
				+
			
 
				+    std::cout << "PASSED" << std::endl;
			
 
				+    return 0;
			
 
				+}
			
--- a/tests/test_inference.cpp
+++ b/tests/test_inference.cpp
@@ -3,6 +3,7 @@
 
				 #include <cmath>
			
 
				 #include <string>
			
 
				 #include <cstdlib>
			
 
				+#include <algorithm>
			
 
				 #include "bs_roformer/inference.h"
			
 
				 #include "../src/utils.h"
			
 
				 
			
@@ -55,6 +56,21 @@ int main(int argc, char* argv[]) {
 
				         // This matches the generation of output_audio.npy
			
 
				         std::vector<std::vector<float>> output_stems = engine.ProcessChunk(input_audio);
			
 
				         std::vector<float> output_audio = output_stems[0];
			
 
				+
			
 
				+        // Smoke test new cancel callback path in Process()
			
 
				+        size_t smoke_samples = std::min<size_t>(input_audio.size(), static_cast<size_t>(16384 * 2));
			
 
				+        if (smoke_samples % 2 != 0) {
			
 
				+            smoke_samples -= 1;
			
 
				+        }
			
 
				+        if (smoke_samples >= 2) {
			
 
				+            std::vector<float> smoke_input(input_audio.begin(), input_audio.begin() + smoke_samples);
			
 
				+            auto cancel_false = []() { return false; };
			
 
				+            auto smoke_stems = engine.Process(smoke_input, 16384, 2, nullptr, cancel_false);
			
 
				+            if (smoke_stems.empty() || smoke_stems[0].empty()) {
			
 
				+                std::cerr << "Process() smoke test returned empty output" << std::endl;
			
 
				+                return 1;
			
 
				+            }
			
 
				+        }
			
 
				         
			
 
				         std::cout << "  Input size: " << input_audio.size() << std::endl;
			
 
				         std::cout << "  Output size: " << output_audio.size() << std::endl;