Переглянути джерело

build: update CUDA build workflow with multi-version matrix

沉默の金 5 місяців тому
батько
коміт
fcf1eecf4c
3 змінених файлів з 89 додано та 134 видалено
  1. 89 100
      .github/workflows/build.yml
  2. 0 17
      README.md
  3. 0 17
      README.zh.md

+ 89 - 100
.github/workflows/build.yml

@@ -373,14 +373,18 @@ jobs:
           retention-days: 30
 
   # ===========================================================================
-  # CUDA Build: Linux (Verification Only - Not for Release)
+  # CUDA Build: Linux (Compile Verification Only)
   # ===========================================================================
-  # Note: We do not provide precompiled CUDA binaries for Linux.
-  # This build exists solely to verify the code compiles with CUDA on Linux.
-  # Linux users should build from source for optimal performance.
   build-cuda-linux:
-    name: build-cuda-linux-verify
-    runs-on: ubuntu-latest
+    name: build-cuda-linux-${{ matrix.cuda_version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - { cuda_version: "11.8.0", os: ubuntu-22.04 }
+          - { cuda_version: "12.9.1", os: ubuntu-latest }
+          - { cuda_version: "13.1.0", os: ubuntu-latest }
     
     steps:  
       - name: Checkout
@@ -392,7 +396,7 @@ jobs:
       - name: Install CUDA Toolkit
         uses: Jimver/cuda-toolkit@master
         with:
-          cuda: "12.9.1"
+          cuda: ${{ matrix.cuda_version }}
           method: network
           sub-packages: '["nvcc", "cudart", "thrust"]'
           non-cuda-sub-packages: '["libcublas", "libcublas-dev"]'
@@ -404,150 +408,135 @@ jobs:
       - name: Configure
         run: |
           ls -ld ggml
+          # Minimal architectures for compile verification
+          # Just verify compilation works, not for distribution
+          CUDA_ARCHS="75;86"
+          echo "Verifying build for CUDA architectures: $CUDA_ARCHS"
           cmake -B build -G Ninja \
             -DCMAKE_BUILD_TYPE=Release \
             -DGGML_DIR=ggml \
             -DGGML_CUDA=ON \
             -DGGML_CUDA_FORCE_MMQ=ON \
-            -DCMAKE_CUDA_RUNTIME_LIBRARY=Static \
-            -DCMAKE_CUDA_ARCHITECTURES="61;75;80;86;89;120" \
+            -DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCHS" \
             -DMBR_BUILD_TESTS=OFF \
             -DMBR_BUILD_CLI=ON
             
       - name: Build
         run: cmake --build build --config Release -j $(nproc)
         
-      - name: Upload Build Artifacts (Debug Only)
+      - name: Upload Artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-linux-cuda-verify
+          name: build-linux-cuda-${{ matrix.cuda_version }}
           path: |
             build/bin/
             build/lib*/
             build/*.so
           retention-days: 7
+      
+      # ----- Prepare Release Artifact -----
+      - name: Prepare Release Artifact
+        run: |
+          # Create release directory
+          mkdir -p release/mel-band-roformer
+          
+          # Find and copy CLI executable
+          CLI_PATH=$(find build -name "mel_band_roformer-cli" -type f | head -n 1)
+          if [[ -n "$CLI_PATH" ]]; then
+            cp "$CLI_PATH" release/mel-band-roformer/
+            chmod +x release/mel-band-roformer/mel_band_roformer-cli
+          fi
+          
+          # Copy shared libraries
+          find build -name "*.so*" | while read lib; do
+            cp "$lib" release/mel-band-roformer/ 2>/dev/null || true
+          done
+          
+          # List contents
+          echo "Release artifact contents:"
+          ls -lh release/mel-band-roformer/
+      
+      - name: Upload Release Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: MelBandRoformer-linux-cuda-${{ matrix.cuda_version }}
+          path: release/mel-band-roformer/
+          retention-days: 30
 
-  # CUDA Build: Windows (Single-Architecture Distribution)
   # ===========================================================================
-  # Strategy: Each architecture built separately for minimal file size
-  # - CUDA 11.8: GTX 10 / RTX 20 / RTX 30 / RTX 40 (Driver >= 520)
-  # - CUDA 12.9: RTX 50 only (Driver >= 575)
+  # CUDA Build: Windows (Compile Only - No GPU for testing)
+  # ===========================================================================
   build-cuda-windows:
-    name: build-cuda-windows-${{ matrix.arch_name }}
+    name: build-cuda-windows-${{ matrix.cuda_version }}
     runs-on: windows-2022
     strategy:
       fail-fast: false
       matrix:
-        include:
-          # CUDA 11.8 builds (Driver >= 520)
-          - { cuda_version: "11.8.0", arch: "61", arch_name: "gtx10-pascal" }
-          - { cuda_version: "11.8.0", arch: "75", arch_name: "rtx20-turing" }
-          - { cuda_version: "11.8.0", arch: "80", arch_name: "rtx30-desktop" }
-          - { cuda_version: "11.8.0", arch: "86", arch_name: "rtx30-mobile" }
-          - { cuda_version: "11.8.0", arch: "89", arch_name: "rtx40-ada" }
-          
-          # CUDA 12.9 build (Driver >= 575)
-          - { cuda_version: "12.9.1", arch: "120", arch_name: "rtx50-blackwell" }
+        cuda_version: ["11.8.0", "12.9.1", "13.1.0"]
+    
+    env:
+      CUDA_VERSION: ${{ matrix.cuda_version }}
     
     steps:
       - name: Checkout
         uses: actions/checkout@v4
       
       - name: Setup MSVC
+        if: runner.os == 'Windows'
         uses: ilammy/msvc-dev-cmd@v1
 
       - name: Clone GGML
         run: git clone --depth 1 https://github.com/ggerganov/ggml.git ggml
       
       - name: Install CUDA Toolkit
+        if: ${{ matrix.cuda_version != '13.1.0' }}
         uses: Jimver/cuda-toolkit@master
         with:
           cuda: ${{ matrix.cuda_version }}
           method: network
           sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
+        
+      - name: Install CUDA Toolkit(13.1.0)
+        if: ${{ matrix.cuda_version == '13.1.0' }}
+        uses: Jimver/cuda-toolkit@master
+        with:
+          cuda: ${{ matrix.cuda_version }}
+          method: network
+          sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "nvrtc", "nvrtc_dev", "crt", "nvvm", "visual_studio_integration"]'
 
       - name: Install Ninja
         run: choco install ninja -y
         
       - name: Configure and Build
-        shell: cmd
-        run: |
-          REM CUDA 11.8 requires compatibility flags for newer MSVC
-          if "${{ matrix.cuda_version }}" == "11.8.0" (
-            set CUDAFLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR
-          ) else (
-            set CUDAFLAGS=
-          )
-          
-          cmake -B build -G "Ninja Multi-Config" ^
-            -DGGML_DIR=ggml ^
-            -DGGML_CUDA=ON ^
-            -DGGML_CUDA_FORCE_MMQ=ON ^
-            -DCMAKE_CUDA_RUNTIME_LIBRARY=Static ^
-            -DCMAKE_CUDA_ARCHITECTURES="${{ matrix.arch }}" ^
-            -DMBR_BUILD_TESTS=OFF ^
-            -DMBR_BUILD_CLI=ON
-          cmake --build build --config Release -j %NUMBER_OF_PROCESSORS%
-        
-      - name: Verify Binary Dependencies
         shell: pwsh
         run: |
-          Write-Host "=== Verifying Binary Dependencies ===" -ForegroundColor Cyan
-          
-          # Find all DLLs and EXEs
-          $binaries = Get-ChildItem -Path build -Include *.dll,*.exe -Recurse -File
-          
-          if ($binaries.Count -eq 0) {
-            Write-Host "WARNING: No binaries found!" -ForegroundColor Yellow
-            exit 0
-          }
-          
-          $hasProblems = $false
-          $forbiddenDeps = @("cudart64", "cudart32", "cublas64", "cublas32", "cublasLt64")
-          
-          foreach ($binary in $binaries) {
-            Write-Host "`n--- $($binary.Name) ---" -ForegroundColor Green
-            
-            # Use dumpbin to get dependencies
-            $deps = & dumpbin /dependents $binary.FullName 2>&1
-            
-            # Extract DLL names
-            $dllDeps = $deps | Select-String -Pattern "^\s+(\S+\.dll)" | ForEach-Object { $_.Matches.Groups[1].Value }
-            
-            if ($dllDeps) {
-              Write-Host "Dependencies:"
-              foreach ($dep in $dllDeps) {
-                # Check for forbidden dependencies
-                $isForbidden = $false
-                foreach ($forbidden in $forbiddenDeps) {
-                  if ($dep -like "$forbidden*") {
-                    Write-Host "  [FAIL] $dep" -ForegroundColor Red
-                    $isForbidden = $true
-                    $hasProblems = $true
-                  }
-                }
-                if (-not $isForbidden) {
-                  Write-Host "  [OK] $dep" -ForegroundColor Gray
-                }
-              }
-            } else {
-              Write-Host "  No DLL dependencies found (static build)" -ForegroundColor Gray
-            }
-          }
-          
-          Write-Host "`n=== Summary ===" -ForegroundColor Cyan
-          if ($hasProblems) {
-            Write-Host "FAILED: Found forbidden CUDA runtime dependencies!" -ForegroundColor Red
-            Write-Host "The build should use static CUDA runtime linking." -ForegroundColor Red
-            exit 1
+          # Consumer GPU architectures:
+          # 61=Pascal (GTX 10), 75=Turing (RTX 20/GTX 16), 86=Ampere (RTX 30), 89=Ada (RTX 40), 120=Blackwell (RTX 50)
+          $cudaVersion = "${{ matrix.cuda_version }}"
+          if ($cudaVersion -match "^11\.") {
+            # CUDA 11.x doesn't support arch 120
+            $cudaArchs = "61;75;86;89"
+            $env:CUDAFLAGS = "-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"
           } else {
-            Write-Host "PASSED: No forbidden CUDA dependencies found." -ForegroundColor Green
+            # CUDA 12+: Add RTX 50 (Blackwell)
+            $cudaArchs = "61;75;86;89;120"
+            $env:CUDAFLAGS = ""
           }
-      
-      - name: Upload Build Artifacts
+          Write-Host "Building for CUDA architectures: $cudaArchs"
+          
+          cmake -B build -G "Ninja Multi-Config" `
+            -DGGML_DIR=ggml `
+            -DGGML_CUDA=ON `
+            -DGGML_CUDA_FORCE_MMQ=ON `
+            "-DCMAKE_CUDA_ARCHITECTURES=$cudaArchs" `
+            -DMBR_BUILD_TESTS=OFF `
+            -DMBR_BUILD_CLI=ON
+          cmake --build build --config Release -j $env:NUMBER_OF_PROCESSORS
+        
+      - name: Upload Artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: build-windows-cuda-${{ matrix.arch_name }}
+          name: build-windows-cuda-${{ matrix.cuda_version }}
           path: |
             build/bin/
             build/Release/
@@ -579,6 +568,6 @@ jobs:
       - name: Upload Release Artifact
         uses: actions/upload-artifact@v4
         with:
-          name: MelBandRoformer-windows-cuda-${{ matrix.arch_name }}
+          name: MelBandRoformer-windows-cuda-${{ matrix.cuda_version }}
           path: release\mel-band-roformer\
-          retention-days: 30
+          retention-days: 30

+ 0 - 17
README.md

@@ -26,23 +26,6 @@ This project is a pure C++ inference engine for the Mel-Band-Roformer audio sour
 - **Pre-built Binaries**: Download executables for your platform from the [Releases](../../releases) page
 - **GGUF Models**: Download pre-converted model files from [MelBandRoformer-GGUF](https://huggingface.co/chenmozhijin/MelBandRoformer-GGUF)
 
-#### Choosing CUDA Version (Windows)
-
-We provide optimized CUDA builds for different GPU architectures. Choose the version that matches your GPU:
-
-| GPU Series | Download | Driver |
-|------------|----------|--------|
-| GeForce RTX 50 series | `cuda-rtx50-blackwell` | ≥ 575 |
-| GeForce RTX 40 series | `cuda-rtx40-ada` | ≥ 520 |
-| GeForce RTX 30 series (Desktop) | `cuda-rtx30-desktop` | ≥ 520 |
-| GeForce RTX 30 series (Laptop) | `cuda-rtx30-mobile` | ≥ 520 |
-| GeForce RTX 20 series | `cuda-rtx20-turing` | ≥ 520 |
-| GeForce GTX 10 series | `cuda-gtx10-pascal` | ≥ 520 |
-
-> **Tip**: Use `nvidia-smi` command to check your GPU model.
-
-> **Note**: Linux CUDA binaries are not provided. Linux users should build from source for optimal performance.
-
 ### Command Line Usage
 
 ```bash

+ 0 - 17
README.zh.md

@@ -26,23 +26,6 @@ Mel-Band-Roformer 音频源分离模型的高性能 C++ 推理实现。
 - **预构建程序**:在 [Releases](../../releases) 页面下载对应平台的可执行文件
 - **GGUF 模型**:在 [MelBandRoformer-GGUF](https://huggingface.co/chenmozhijin/MelBandRoformer-GGUF) 下载预转换的模型文件
 
-#### CUDA 版本选择 (Windows)
-
-根据您的显卡型号选择对应版本:
-
-| 显卡系列 | 下载版本 | 驱动要求 |
-|---------|---------|---------|
-| GeForce RTX 50 系列 | `cuda-rtx50-blackwell` | ≥ 575 |
-| GeForce RTX 40 系列 | `cuda-rtx40-ada` | ≥ 520 |
-| GeForce RTX 30 系列 (台式机) | `cuda-rtx30-desktop` | ≥ 520 |
-| GeForce RTX 30 系列 (笔记本) | `cuda-rtx30-mobile` | ≥ 520 |
-| GeForce RTX 20 系列 | `cuda-rtx20-turing` | ≥ 520 |
-| GeForce GTX 10 系列 | `cuda-gtx10-pascal` | ≥ 520 |
-
-> **提示**:使用 `nvidia-smi` 命令或 NVIDIA 控制面板查看显卡型号和驱动版本。
-
-> **注意**:Linux 不提供预编译 CUDA 版本,请[从源码构建](#编译命令)。
-
 ### 命令行使用
 
 ```bash