5 місяців тому · fcf1eecf4c
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -373,14 +373,18 @@ jobs:
 
				           retention-days: 30
			
 
				 
			
 
				   # ===========================================================================
			
 
				-  # CUDA Build: Linux (Verification Only - Not for Release)
			
 
				+  # CUDA Build: Linux (Compile Verification Only)
			
 
				   # ===========================================================================
			
 
				-  # Note: We do not provide precompiled CUDA binaries for Linux.
			
 
				-  # This build exists solely to verify the code compiles with CUDA on Linux.
			
 
				-  # Linux users should build from source for optimal performance.
			
 
				   build-cuda-linux:
			
 
				-    name: build-cuda-linux-verify
			
 
				-    runs-on: ubuntu-latest
			
 
				+    name: build-cuda-linux-${{ matrix.cuda_version }}
			
 
				+    runs-on: ${{ matrix.os }}
			
 
				+    strategy:
			
 
				+      fail-fast: false
			
 
				+      matrix:
			
 
				+        include:
			
 
				+          - { cuda_version: "11.8.0", os: ubuntu-22.04 }
			
 
				+          - { cuda_version: "12.9.1", os: ubuntu-latest }
			
 
				+          - { cuda_version: "13.1.0", os: ubuntu-latest }
			
 
				     
			
 
				     steps:  
			
 
				       - name: Checkout
			
@@ -392,7 +396,7 @@ jobs:
 
				       - name: Install CUDA Toolkit
			
 
				         uses: Jimver/cuda-toolkit@master
			
 
				         with:
			
 
				-          cuda: "12.9.1"
			
 
				+          cuda: ${{ matrix.cuda_version }}
			
 
				           method: network
			
 
				           sub-packages: '["nvcc", "cudart", "thrust"]'
			
 
				           non-cuda-sub-packages: '["libcublas", "libcublas-dev"]'
			
@@ -404,150 +408,135 @@ jobs:
 
				       - name: Configure
			
 
				         run: |
			
 
				           ls -ld ggml
			
 
				+          # Minimal architectures for compile verification
			
 
				+          # Just verify compilation works, not for distribution
			
 
				+          CUDA_ARCHS="75;86"
			
 
				+          echo "Verifying build for CUDA architectures: $CUDA_ARCHS"
			
 
				           cmake -B build -G Ninja \
			
 
				             -DCMAKE_BUILD_TYPE=Release \
			
 
				             -DGGML_DIR=ggml \
			
 
				             -DGGML_CUDA=ON \
			
 
				             -DGGML_CUDA_FORCE_MMQ=ON \
			
 
				-            -DCMAKE_CUDA_RUNTIME_LIBRARY=Static \
			
 
				-            -DCMAKE_CUDA_ARCHITECTURES="61;75;80;86;89;120" \
			
 
				+            -DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCHS" \
			
 
				             -DMBR_BUILD_TESTS=OFF \
			
 
				             -DMBR_BUILD_CLI=ON
			
 
				             
			
 
				       - name: Build
			
 
				         run: cmake --build build --config Release -j $(nproc)
			
 
				         
			
 
				-      - name: Upload Build Artifacts (Debug Only)
			
 
				+      - name: Upload Artifacts
			
 
				         uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				-          name: build-linux-cuda-verify
			
 
				+          name: build-linux-cuda-${{ matrix.cuda_version }}
			
 
				           path: |
			
 
				             build/bin/
			
 
				             build/lib*/
			
 
				             build/*.so
			
 
				           retention-days: 7
			
 
				+      
			
 
				+      # ----- Prepare Release Artifact -----
			
 
				+      - name: Prepare Release Artifact
			
 
				+        run: |
			
 
				+          # Create release directory
			
 
				+          mkdir -p release/mel-band-roformer
			
 
				+          
			
 
				+          # Find and copy CLI executable
			
 
				+          CLI_PATH=$(find build -name "mel_band_roformer-cli" -type f | head -n 1)
			
 
				+          if [[ -n "$CLI_PATH" ]]; then
			
 
				+            cp "$CLI_PATH" release/mel-band-roformer/
			
 
				+            chmod +x release/mel-band-roformer/mel_band_roformer-cli
			
 
				+          fi
			
 
				+          
			
 
				+          # Copy shared libraries
			
 
				+          find build -name "*.so*" | while read lib; do
			
 
				+            cp "$lib" release/mel-band-roformer/ 2>/dev/null || true
			
 
				+          done
			
 
				+          
			
 
				+          # List contents
			
 
				+          echo "Release artifact contents:"
			
 
				+          ls -lh release/mel-band-roformer/
			
 
				+      
			
 
				+      - name: Upload Release Artifact
			
 
				+        uses: actions/upload-artifact@v4
			
 
				+        with:
			
 
				+          name: MelBandRoformer-linux-cuda-${{ matrix.cuda_version }}
			
 
				+          path: release/mel-band-roformer/
			
 
				+          retention-days: 30
			
 
				 
			
 
				-  # CUDA Build: Windows (Single-Architecture Distribution)
			
 
				   # ===========================================================================
			
 
				-  # Strategy: Each architecture built separately for minimal file size
			
 
				-  # - CUDA 11.8: GTX 10 / RTX 20 / RTX 30 / RTX 40 (Driver >= 520)
			
 
				-  # - CUDA 12.9: RTX 50 only (Driver >= 575)
			
 
				+  # CUDA Build: Windows (Compile Only - No GPU for testing)
			
 
				+  # ===========================================================================
			
 
				   build-cuda-windows:
			
 
				-    name: build-cuda-windows-${{ matrix.arch_name }}
			
 
				+    name: build-cuda-windows-${{ matrix.cuda_version }}
			
 
				     runs-on: windows-2022
			
 
				     strategy:
			
 
				       fail-fast: false
			
 
				       matrix:
			
 
				-        include:
			
 
				-          # CUDA 11.8 builds (Driver >= 520)
			
 
				-          - { cuda_version: "11.8.0", arch: "61", arch_name: "gtx10-pascal" }
			
 
				-          - { cuda_version: "11.8.0", arch: "75", arch_name: "rtx20-turing" }
			
 
				-          - { cuda_version: "11.8.0", arch: "80", arch_name: "rtx30-desktop" }
			
 
				-          - { cuda_version: "11.8.0", arch: "86", arch_name: "rtx30-mobile" }
			
 
				-          - { cuda_version: "11.8.0", arch: "89", arch_name: "rtx40-ada" }
			
 
				-          
			
 
				-          # CUDA 12.9 build (Driver >= 575)
			
 
				-          - { cuda_version: "12.9.1", arch: "120", arch_name: "rtx50-blackwell" }
			
 
				+        cuda_version: ["11.8.0", "12.9.1", "13.1.0"]
			
 
				+    
			
 
				+    env:
			
 
				+      CUDA_VERSION: ${{ matrix.cuda_version }}
			
 
				     
			
 
				     steps:
			
 
				       - name: Checkout
			
 
				         uses: actions/checkout@v4
			
 
				       
			
 
				       - name: Setup MSVC
			
 
				+        if: runner.os == 'Windows'
			
 
				         uses: ilammy/msvc-dev-cmd@v1
			
 
				 
			
 
				       - name: Clone GGML
			
 
				         run: git clone --depth 1 https://github.com/ggerganov/ggml.git ggml
			
 
				       
			
 
				       - name: Install CUDA Toolkit
			
 
				+        if: ${{ matrix.cuda_version != '13.1.0' }}
			
 
				         uses: Jimver/cuda-toolkit@master
			
 
				         with:
			
 
				           cuda: ${{ matrix.cuda_version }}
			
 
				           method: network
			
 
				           sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
			
 
				+        
			
 
				+      - name: Install CUDA Toolkit(13.1.0)
			
 
				+        if: ${{ matrix.cuda_version == '13.1.0' }}
			
 
				+        uses: Jimver/cuda-toolkit@master
			
 
				+        with:
			
 
				+          cuda: ${{ matrix.cuda_version }}
			
 
				+          method: network
			
 
				+          sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "nvrtc", "nvrtc_dev", "crt", "nvvm", "visual_studio_integration"]'
			
 
				 
			
 
				       - name: Install Ninja
			
 
				         run: choco install ninja -y
			
 
				         
			
 
				       - name: Configure and Build
			
 
				-        shell: cmd
			
 
				-        run: |
			
 
				-          REM CUDA 11.8 requires compatibility flags for newer MSVC
			
 
				-          if "${{ matrix.cuda_version }}" == "11.8.0" (
			
 
				-            set CUDAFLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR
			
 
				-          ) else (
			
 
				-            set CUDAFLAGS=
			
 
				-          )
			
 
				-          
			
 
				-          cmake -B build -G "Ninja Multi-Config" ^
			
 
				-            -DGGML_DIR=ggml ^
			
 
				-            -DGGML_CUDA=ON ^
			
 
				-            -DGGML_CUDA_FORCE_MMQ=ON ^
			
 
				-            -DCMAKE_CUDA_RUNTIME_LIBRARY=Static ^
			
 
				-            -DCMAKE_CUDA_ARCHITECTURES="${{ matrix.arch }}" ^
			
 
				-            -DMBR_BUILD_TESTS=OFF ^
			
 
				-            -DMBR_BUILD_CLI=ON
			
 
				-          cmake --build build --config Release -j %NUMBER_OF_PROCESSORS%
			
 
				-        
			
 
				-      - name: Verify Binary Dependencies
			
 
				         shell: pwsh
			
 
				         run: |
			
 
				-          Write-Host "=== Verifying Binary Dependencies ===" -ForegroundColor Cyan
			
 
				-          
			
 
				-          # Find all DLLs and EXEs
			
 
				-          $binaries = Get-ChildItem -Path build -Include *.dll,*.exe -Recurse -File
			
 
				-          
			
 
				-          if ($binaries.Count -eq 0) {
			
 
				-            Write-Host "WARNING: No binaries found!" -ForegroundColor Yellow
			
 
				-            exit 0
			
 
				-          }
			
 
				-          
			
 
				-          $hasProblems = $false
			
 
				-          $forbiddenDeps = @("cudart64", "cudart32", "cublas64", "cublas32", "cublasLt64")
			
 
				-          
			
 
				-          foreach ($binary in $binaries) {
			
 
				-            Write-Host "`n--- $($binary.Name) ---" -ForegroundColor Green
			
 
				-            
			
 
				-            # Use dumpbin to get dependencies
			
 
				-            $deps = & dumpbin /dependents $binary.FullName 2>&1
			
 
				-            
			
 
				-            # Extract DLL names
			
 
				-            $dllDeps = $deps | Select-String -Pattern "^\s+(\S+\.dll)" | ForEach-Object { $_.Matches.Groups[1].Value }
			
 
				-            
			
 
				-            if ($dllDeps) {
			
 
				-              Write-Host "Dependencies:"
			
 
				-              foreach ($dep in $dllDeps) {
			
 
				-                # Check for forbidden dependencies
			
 
				-                $isForbidden = $false
			
 
				-                foreach ($forbidden in $forbiddenDeps) {
			
 
				-                  if ($dep -like "$forbidden*") {
			
 
				-                    Write-Host "  [FAIL] $dep" -ForegroundColor Red
			
 
				-                    $isForbidden = $true
			
 
				-                    $hasProblems = $true
			
 
				-                  }
			
 
				-                }
			
 
				-                if (-not $isForbidden) {
			
 
				-                  Write-Host "  [OK] $dep" -ForegroundColor Gray
			
 
				-                }
			
 
				-              }
			
 
				-            } else {
			
 
				-              Write-Host "  No DLL dependencies found (static build)" -ForegroundColor Gray
			
 
				-            }
			
 
				-          }
			
 
				-          
			
 
				-          Write-Host "`n=== Summary ===" -ForegroundColor Cyan
			
 
				-          if ($hasProblems) {
			
 
				-            Write-Host "FAILED: Found forbidden CUDA runtime dependencies!" -ForegroundColor Red
			
 
				-            Write-Host "The build should use static CUDA runtime linking." -ForegroundColor Red
			
 
				-            exit 1
			
 
				+          # Consumer GPU architectures:
			
 
				+          # 61=Pascal (GTX 10), 75=Turing (RTX 20/GTX 16), 86=Ampere (RTX 30), 89=Ada (RTX 40), 120=Blackwell (RTX 50)
			
 
				+          $cudaVersion = "${{ matrix.cuda_version }}"
			
 
				+          if ($cudaVersion -match "^11\.") {
			
 
				+            # CUDA 11.x doesn't support arch 120
			
 
				+            $cudaArchs = "61;75;86;89"
			
 
				+            $env:CUDAFLAGS = "-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"
			
 
				           } else {
			
 
				-            Write-Host "PASSED: No forbidden CUDA dependencies found." -ForegroundColor Green
			
 
				+            # CUDA 12+: Add RTX 50 (Blackwell)
			
 
				+            $cudaArchs = "61;75;86;89;120"
			
 
				+            $env:CUDAFLAGS = ""
			
 
				           }
			
 
				-      
			
 
				-      - name: Upload Build Artifacts
			
 
				+          Write-Host "Building for CUDA architectures: $cudaArchs"
			
 
				+          
			
 
				+          cmake -B build -G "Ninja Multi-Config" `
			
 
				+            -DGGML_DIR=ggml `
			
 
				+            -DGGML_CUDA=ON `
			
 
				+            -DGGML_CUDA_FORCE_MMQ=ON `
			
 
				+            "-DCMAKE_CUDA_ARCHITECTURES=$cudaArchs" `
			
 
				+            -DMBR_BUILD_TESTS=OFF `
			
 
				+            -DMBR_BUILD_CLI=ON
			
 
				+          cmake --build build --config Release -j $env:NUMBER_OF_PROCESSORS
			
 
				+        
			
 
				+      - name: Upload Artifacts
			
 
				         uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				-          name: build-windows-cuda-${{ matrix.arch_name }}
			
 
				+          name: build-windows-cuda-${{ matrix.cuda_version }}
			
 
				           path: |
			
 
				             build/bin/
			
 
				             build/Release/
			
@@ -579,6 +568,6 @@ jobs:
 
				       - name: Upload Release Artifact
			
 
				         uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				-          name: MelBandRoformer-windows-cuda-${{ matrix.arch_name }}
			
 
				+          name: MelBandRoformer-windows-cuda-${{ matrix.cuda_version }}
			
 
				           path: release\mel-band-roformer\
			
 
				-          retention-days: 30
			
 
				+          retention-days: 30
			
--- a/README.md
+++ b/README.md
@@ -26,23 +26,6 @@ This project is a pure C++ inference engine for the Mel-Band-Roformer audio sour
 
				 - **Pre-built Binaries**: Download executables for your platform from the [Releases](../../releases) page
			
 
				 - **GGUF Models**: Download pre-converted model files from [MelBandRoformer-GGUF](https://huggingface.co/chenmozhijin/MelBandRoformer-GGUF)
			
 
				 
			
 
				-#### Choosing CUDA Version (Windows)
			
 
				-
			
 
				-We provide optimized CUDA builds for different GPU architectures. Choose the version that matches your GPU:
			
 
				-
			
 
				-| GPU Series | Download | Driver |
			
 
				-|------------|----------|--------|
			
 
				-| GeForce RTX 50 series | `cuda-rtx50-blackwell` | ≥ 575 |
			
 
				-| GeForce RTX 40 series | `cuda-rtx40-ada` | ≥ 520 |
			
 
				-| GeForce RTX 30 series (Desktop) | `cuda-rtx30-desktop` | ≥ 520 |
			
 
				-| GeForce RTX 30 series (Laptop) | `cuda-rtx30-mobile` | ≥ 520 |
			
 
				-| GeForce RTX 20 series | `cuda-rtx20-turing` | ≥ 520 |
			
 
				-| GeForce GTX 10 series | `cuda-gtx10-pascal` | ≥ 520 |
			
 
				-
			
 
				-> **Tip**: Use `nvidia-smi` command to check your GPU model.
			
 
				-
			
 
				-> **Note**: Linux CUDA binaries are not provided. Linux users should build from source for optimal performance.
			
 
				-
			
 
				 ### Command Line Usage
			
 
				 
			
 
				 ```bash
			
--- a/README.zh.md
+++ b/README.zh.md
@@ -26,23 +26,6 @@ Mel-Band-Roformer 音频源分离模型的高性能 C++ 推理实现。
 
				 - **预构建程序**：在 [Releases](../../releases) 页面下载对应平台的可执行文件
			
 
				 - **GGUF 模型**：在 [MelBandRoformer-GGUF](https://huggingface.co/chenmozhijin/MelBandRoformer-GGUF) 下载预转换的模型文件
			
 
				 
			
 
				-#### CUDA 版本选择 (Windows)
			
 
				-
			
 
				-根据您的显卡型号选择对应版本：
			
 
				-
			
 
				-| 显卡系列 | 下载版本 | 驱动要求 |
			
 
				-|---------|---------|---------|
			
 
				-| GeForce RTX 50 系列 | `cuda-rtx50-blackwell` | ≥ 575 |
			
 
				-| GeForce RTX 40 系列 | `cuda-rtx40-ada` | ≥ 520 |
			
 
				-| GeForce RTX 30 系列 (台式机) | `cuda-rtx30-desktop` | ≥ 520 |
			
 
				-| GeForce RTX 30 系列 (笔记本) | `cuda-rtx30-mobile` | ≥ 520 |
			
 
				-| GeForce RTX 20 系列 | `cuda-rtx20-turing` | ≥ 520 |
			
 
				-| GeForce GTX 10 系列 | `cuda-gtx10-pascal` | ≥ 520 |
			
 
				-
			
 
				-> **提示**：使用 `nvidia-smi` 命令或 NVIDIA 控制面板查看显卡型号和驱动版本。
			
 
				-
			
 
				-> **注意**：Linux 不提供预编译 CUDA 版本，请[从源码构建](#编译命令)。
			
 
				-
			
 
				 ### 命令行使用
			
 
				 
			
 
				 ```bash