generate_test_audio.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. """
  2. Generate synthetic test audio for CI testing.
  3. Creates deterministic test signals without requiring any external audio files,
  4. ensuring reproducibility and avoiding copyright concerns.
  5. Usage:
  6. python generate_test_audio.py --output test_audio.wav
  7. """
  8. import argparse
  9. import numpy as np
  10. try:
  11. import soundfile as sf
  12. except ImportError:
  13. sf = None
  14. def generate_test_audio(
  15. output_path: str,
  16. duration: float = 5.0,
  17. sample_rate: int = 44100,
  18. ) -> None:
  19. """
  20. Generate deterministic test audio (sine wave synthesis).
  21. Creates a mixture of "vocal-like" and "accompaniment-like" sine waves
  22. that covers a reasonable frequency range for testing audio separation.
  23. Args:
  24. output_path: Path to save the output WAV file
  25. duration: Duration in seconds
  26. sample_rate: Sample rate in Hz
  27. """
  28. if sf is None:
  29. raise ImportError(
  30. "soundfile is required for audio generation. "
  31. "Install with: pip install soundfile"
  32. )
  33. t = np.linspace(0, duration, int(sample_rate * duration), dtype=np.float32)
  34. # Simulate vocals: multiple sine waves (fundamental + harmonics)
  35. # Using A3 (220 Hz) as base frequency
  36. vocals = (
  37. 0.50 * np.sin(2 * np.pi * 220 * t) # A3 fundamental
  38. + 0.30 * np.sin(2 * np.pi * 440 * t) # A4 harmonic
  39. + 0.15 * np.sin(2 * np.pi * 880 * t) # A5 harmonic
  40. + 0.05 * np.sin(2 * np.pi * 1760 * t) # A6 harmonic
  41. )
  42. # Add slight vibrato to vocals (more realistic)
  43. vibrato = 0.02 * np.sin(2 * np.pi * 5 * t) # 5 Hz vibrato
  44. vocals = vocals * (1 + vibrato)
  45. # Simulate accompaniment: different frequency content
  46. accompaniment = (
  47. 0.40 * np.sin(2 * np.pi * 110 * t) # A2 bass
  48. + 0.30 * np.sin(2 * np.pi * 330 * t) # E4
  49. + 0.20 * np.sin(2 * np.pi * 660 * t) # E5
  50. + 0.10 * np.sin(2 * np.pi * 82.41 * t) # E2 sub-bass
  51. )
  52. # Add Gaussian noise to prevent zero-signal bands (crucial for RMSNorm stability)
  53. noise = np.random.normal(0, 0.001, t.shape).astype(np.float32)
  54. # Add slight amplitude envelope to make it more interesting
  55. envelope = np.ones_like(t)
  56. fade_samples = int(0.1 * sample_rate) # 100ms fade
  57. envelope[:fade_samples] = np.linspace(0, 1, fade_samples)
  58. envelope[-fade_samples:] = np.linspace(1, 0, fade_samples)
  59. # Mix vocals, accompaniment, and noise
  60. mix = (vocals + accompaniment) * envelope + noise
  61. # Normalize to prevent clipping
  62. max_val = np.max(np.abs(mix))
  63. if max_val > 0:
  64. mix = mix / max_val * 0.9 # Leave some headroom
  65. # Create stereo (identical channels for simplicity)
  66. stereo = np.stack([mix, mix], axis=-1)
  67. # Save as WAV
  68. sf.write(output_path, stereo, sample_rate, subtype="PCM_16")
  69. print(f"Generated: {output_path}")
  70. print(f" Duration: {duration}s")
  71. print(f" Sample rate: {sample_rate} Hz")
  72. print(" Channels: 2 (stereo)")
  73. print(" Format: PCM_16")
  74. def main():
  75. parser = argparse.ArgumentParser(
  76. description="Generate synthetic test audio for CI testing",
  77. formatter_class=argparse.RawDescriptionHelpFormatter,
  78. epilog="""
  79. This script generates deterministic test audio using sine wave synthesis.
  80. The output is suitable for testing audio processing pipelines without
  81. requiring real audio files.
  82. Example:
  83. python generate_test_audio.py --output test.wav
  84. python generate_test_audio.py --output test.wav --duration 10 --sample-rate 48000
  85. """,
  86. )
  87. parser.add_argument("--output", "-o", required=True, help="Output WAV file path")
  88. parser.add_argument(
  89. "--duration",
  90. "-d",
  91. type=float,
  92. default=5.0,
  93. help="Duration in seconds (default: 5.0)",
  94. )
  95. parser.add_argument(
  96. "--sample-rate",
  97. "-sr",
  98. type=int,
  99. default=44100,
  100. help="Sample rate in Hz (default: 44100)",
  101. )
  102. args = parser.parse_args()
  103. generate_test_audio(
  104. output_path=args.output,
  105. duration=args.duration,
  106. sample_rate=args.sample_rate,
  107. )
  108. if __name__ == "__main__":
  109. main()