lainlives
/
Roformer.cpp


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
							#include <iostream>
#include <vector>
#include <cmath>
#include <string>
#include <cstring>
#include <cstdlib>
#include <ggml.h>
#include <ggml-alloc.h>
#include <ggml-backend.h>
#include "../src/model.h"
#include "../src/utils.h"

/**
 * test_component_layers.cpp
 * 
 * Verifies Transformer layers against golden tensors from export_debug.py
 * Copied from tests_old/test_component_layers.cpp with env var support
 */

std::string GetModelPath() {
    const char* env = std::getenv("BSR_MODEL_PATH");
    return env ? env : "bs_roformer.gguf";
}

std::string GetTestDataDir() {
    const char* env = std::getenv("BSR_TEST_DATA_DIR");
    return env ? env : ".";
}

int main(int argc, char* argv[]) {
    std::cout << "========================================" << std::endl;
    std::cout << "Test: Transformer Layers Verification" << std::endl;
    std::cout << "========================================" << std::endl;
    
    std::string model_path = GetModelPath();
    std::string debug_dir = GetTestDataDir();
    
    if (argc > 1) model_path = argv[1];
    if (argc > 2) debug_dir = argv[2];
    
    try {
        // 1. Load Model
        std::cout << "\n[1/6] Loading model..." << std::endl;
        BSRoformer model;
        model.Initialize(model_path);
        
        // 2. Load golden tensors
        std::cout << "\n[2/6] Loading golden tensors..." << std::endl;
        
        // Load after_band_split (input to Transformers)
        auto [input_data, input_shape] = utils::load_activation(debug_dir, "after_band_split");
        if (!input_data) {
            std::cerr << "Failed to load after_band_split.npy" << std::endl;
            return 1;
        }
        std::cout << "  Input (after_band_split) shape: [";
        for (size_t i = 0; i < input_shape.size(); ++i) {
            std::cout << input_shape[i];
            if (i < input_shape.size() - 1) std::cout << ", ";
        }
        std::cout << "]" << std::endl;
        
        // Load before_mask_est (expected output after all 6 layers)
        auto [expected_data, expected_shape] = utils::load_activation(debug_dir, "before_mask_est");
        if (!expected_data) {
            std::cerr << "Failed to load before_mask_est.npy" << std::endl;
            utils::free_npy_data(input_data);
            return 1;
        }
        std::cout << "  Expected (before_mask_est) shape: [";
        for (size_t i = 0; i < expected_shape.size(); ++i) {
            std::cout << expected_shape[i];
            if (i < expected_shape.size() - 1) std::cout << ", ";
        }
        std::cout << "]" << std::endl;
        
        // Extract dimensions from shapes
        // PyTorch: [batch, time, bands, dim]
        int batch = static_cast<int>(input_shape[0]);
        int n_frames = static_cast<int>(input_shape[1]);
        int n_bands = static_cast<int>(input_shape[2]);
        int dim = static_cast<int>(input_shape[3]);
        
        std::cout << "  batch=" << batch << ", n_frames=" << n_frames 
                  << ", n_bands=" << n_bands << ", dim=" << dim << std::endl;
        
        // 3. Build computation graph
        std::cout << "\n[3/6] Building computation graph..." << std::endl;
        
        size_t mem_size = 1024 * 1024 * 1024;  // 1GB for Transformers
        struct ggml_init_params ctx_params = {
            /*.mem_size   = */ mem_size,
            /*.mem_buffer = */ nullptr,
            /*.no_alloc   = */ true,
        };
        ggml_context* ctx = ggml_init(ctx_params);
        
        // Expanded position tensors for CUDA RoPE compatibility:
        // pos_time_exp: size [T * F * B], repeating [0..T-1] for each F*B batch
        // pos_freq_exp: size [F * T * B], repeating [0..F-1] for each T*B batch
        int time_exp_size = n_frames * n_bands * batch;  // T * F * B
        int freq_exp_size = n_bands * n_frames * batch;  // F * T * B
        
        std::vector<int32_t> pos_time_exp_data(time_exp_size);
        for (int i = 0; i < time_exp_size; ++i) {
            pos_time_exp_data[i] = i % n_frames;  // Repeat [0..T-1]
        }
        
        std::vector<int32_t> pos_freq_exp_data(freq_exp_size);
        for (int i = 0; i < freq_exp_size; ++i) {
            pos_freq_exp_data[i] = i % n_bands;  // Repeat [0..F-1]
        }
        
        ggml_cgraph* gf = ggml_new_graph_custom(ctx, 32768, false);
        
        // Create input tensor: [dim, bands, time, batch] (GGML order)
        ggml_tensor* input = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, 
                                                 dim, n_bands, n_frames, batch);
        ggml_set_input(input);
        
        // Create expanded position tensors for RoPE
        ggml_tensor* pos_time_exp = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, time_exp_size);
        ggml_set_input(pos_time_exp);
        
        ggml_tensor* pos_freq_exp = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, freq_exp_size);
        ggml_set_input(pos_freq_exp);
        
        // Build Transformers graph
        ggml_tensor* x = model.BuildTransformersGraph(ctx, input, gf, pos_time_exp, pos_freq_exp, n_frames, batch);
        if (!x) {
            std::cerr << "FAILED: BuildTransformersGraph returned nullptr" << std::endl;
            utils::free_npy_data(input_data);
            utils::free_npy_data(expected_data);
            ggml_free(ctx);
            return 1;
        }
        
        // Mark output
        ggml_tensor* output = ggml_dup(ctx, x);
        ggml_set_output(output);
        ggml_build_forward_expand(gf, output);
        
        std::cout << "  Graph built with " << ggml_graph_n_nodes(gf) << " nodes" << std::endl;
        
        // 4. Allocate and execute
        std::cout << "\n[4/6] Allocating graph..." << std::endl;
        
        ggml_gallocr_t allocr = ggml_gallocr_new(
            ggml_backend_get_default_buffer_type(model.GetBackend())
        );
        
        if (!ggml_gallocr_alloc_graph(allocr, gf)) {
            std::cerr << "FAILED: Failed to allocate graph" << std::endl;
            utils::free_npy_data(input_data);
            utils::free_npy_data(expected_data);
            ggml_gallocr_free(allocr);
            ggml_free(ctx);
            return 1;
        }
        
        std::cout << "\n[5/6] Executing graph..." << std::endl;
        
        // Copy input data
        ggml_backend_tensor_set(input, input_data, 0, ggml_nbytes(input));
        
        // Copy expanded position tensors
        ggml_backend_tensor_set(pos_time_exp, pos_time_exp_data.data(), 0, ggml_nbytes(pos_time_exp));
        ggml_backend_tensor_set(pos_freq_exp, pos_freq_exp_data.data(), 0, ggml_nbytes(pos_freq_exp));
        
        // Compute
        ggml_backend_graph_compute(model.GetBackend(), gf);
        
        // 5. Compare results
        std::cout << "\n[6/6] Comparing results..." << std::endl;
        
        // Copy output from GPU to CPU for comparison
        std::vector<float> output_data(ggml_nelements(output));
        ggml_backend_tensor_get(output, output_data.data(), 0, ggml_nbytes(output));
        
        // Compare element counts
        size_t expected_nelements = utils::shape_nelements(expected_shape);
        std::cout << "  Output elements: " << output_data.size() << std::endl;
        std::cout << "  Expected elements: " << expected_nelements << std::endl;
        
        // Compute comparison statistics directly
        float max_abs = 0.0f;
        float sum_abs = 0.0f;
        for (size_t i = 0; i < output_data.size() && i < expected_nelements; ++i) {
            float diff = std::abs(expected_data[i] - output_data[i]);
            max_abs = std::max(max_abs, diff);
            sum_abs += diff;
        }
        float mean_abs = sum_abs / output_data.size();
        
        std::cout << "\n[Comparison] Transformers Output" << std::endl;
        std::cout << "  Max abs diff:  " << max_abs << std::endl;
        std::cout << "  Mean abs diff: " << mean_abs << std::endl;
        
        bool match = max_abs <= 3e-2f || mean_abs <= 3e-3f;
        
        // Cleanup
        utils::free_npy_data(input_data);
        utils::free_npy_data(expected_data);
        ggml_gallocr_free(allocr);
        ggml_free(ctx);
        
        if (match) {
            std::cout << "\nPASSED: Transformers match PyTorch output" << std::endl;
            return 0;
        } else {
            std::cout << "\nFAILED: Transformers do not match PyTorch output" << std::endl;
            return 1;
        }
        
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }
}