diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..6324d40 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.14 diff --git a/CMakeLists.txt b/CMakeLists.txt index f1c2230..e21606b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ pybind11_add_module(python-samplerate src/samplerate.cpp) target_include_directories(python-samplerate PRIVATE ./external/libsamplerate/include) if(MSVC) - target_compile_options(python-samplerate PRIVATE /EHsc /MP /bigobj) + target_compile_options(python-samplerate PRIVATE /EHsc /MP /bigobj /O2) set(CMAKE_EXE_LINKER_FLAGS /MANIFEST:NO) endif() @@ -29,19 +29,29 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR target_compile_options(python-samplerate PRIVATE -std=c++14 -O3 -Wall -Wextra -fPIC) endif() -### stick the package and libsamplerate version into the module -target_compile_definitions(python-samplerate - PUBLIC LIBSAMPLERATE_VERSION="${LIBSAMPLERATE_VERSION}" - PRIVATE $<$:VERSION_INFO="${PACKAGE_VERSION_INFO}"> -) - -### Final target setup +### Final target setup - must be before compile_definitions so LTO generator expression works set_target_properties( python-samplerate PROPERTIES PREFIX "" OUTPUT_NAME "samplerate" LINKER_LANGUAGE C + INTERPROCEDURAL_OPTIMIZATION TRUE ) +### stick the package and libsamplerate version into the module +target_compile_definitions(python-samplerate + PUBLIC LIBSAMPLERATE_VERSION="${LIBSAMPLERATE_VERSION}" + PRIVATE $<$:VERSION_INFO="${PACKAGE_VERSION_INFO}"> + # Build information for debugging + PRIVATE BUILD_TYPE="$" + PRIVATE COMPILER_ID="${CMAKE_CXX_COMPILER_ID}" + PRIVATE COMPILER_VERSION="${CMAKE_CXX_COMPILER_VERSION}" + PRIVATE CMAKE_VERSION="${CMAKE_VERSION}" + PRIVATE TARGET_ARCH="${CMAKE_SYSTEM_PROCESSOR}" + PRIVATE TARGET_OS="${CMAKE_SYSTEM_NAME}" + PUBLIC PYBIND11_VERSION_INFO="${PYBIND11_VERSION_INFO}" + PRIVATE LTO_ENABLED=$> +) + target_link_libraries(python-samplerate PUBLIC samplerate) diff --git a/README.md b/README.md index 9b7dce4..da4d6e0 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,47 @@ assert np.allclose(output_data_simple, output_data_full) # See `samplerate.CallbackResampler` for the Callback API, or # `examples/play_modulation.py` for an example. + +# Callback API Example +def producer(): + # Generate data in chunks + for i in range(10): + yield np.random.uniform(-1, 1, 1024).astype(np.float32) + yield None # Signal end of stream + +data_iter = producer() +callback = lambda: next(data_iter) + +resampler = samplerate.CallbackResampler(callback, ratio, converter) +output_chunks = [] +while True: + # Read chunks of resampled data + chunk = resampler.read(512) + if chunk.shape[0] == 0: + break + output_chunks.append(chunk) ``` -See `samplerate.resample`, `samplerate.Resampler`, and `samplerate.CallbackResampler` in the API documentation for details. +## Performance Tips + +To get the maximum performance from `samplerate`: + +1. **Use `np.float32`**: The underlying `libsamplerate` library operates on 32-bit floats. Passing `np.float64` (default numpy float) or integer arrays triggers an implicit copy and cast, which can be expensive. + ```python + # Fast (no copy) + data = np.zeros(1000, dtype=np.float32) + samplerate.resample(data, 1.5) + + # Slower (implicit copy + cast) + data = np.zeros(1000, dtype=np.float64) + samplerate.resample(data, 1.5) + ``` +2. **Use C-Contiguous Arrays**: Ensure your input arrays are C-contiguous (row-major). Non-contiguous arrays (e.g., column slices) will also trigger a copy. +3. **Adjust GIL Threshold**: If you are processing many small chunks in a multi-threaded application, the default "auto" GIL release threshold (1000 frames) might be too high or too low. You can tune it: + ```python + # Release GIL even for small chunks (e.g. > 100 frames) + samplerate.set_gil_release_threshold(100) + ``` ## Multi-threading and GIL Control @@ -67,6 +105,7 @@ import samplerate # Default: "auto" mode - releases GIL only for large data (>= 1000 frames) # Balances single-threaded performance with multi-threading capability +# The threshold is configurable: samplerate.set_gil_release_threshold(2000) output = samplerate.resample(input_data, ratio) # Force GIL release - best for multi-threaded applications diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 239b595..a4254b7 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -1,4 +1,6 @@ include(FetchContent) +# Set pybind11 Python finding policy to use new FindPython +set(PYBIND11_FINDPYTHON ON CACHE BOOL "Use FindPython instead of FindPythonInterp") # pybind11 FetchContent_Declare( @@ -6,9 +8,13 @@ FetchContent_Declare( GIT_REPOSITORY https://github.com/pybind/pybind11 GIT_TAG f5fbe867d2d26e4a0a9177a51f6e568868ad3dc8 # 3.0.1 ) +## Change this if you update pybind11 version +set(PYBIND11_VERSION_INFO "3.0.1" CACHE STRING "pybind11 version") # <-- update pybind11 version here + FetchContent_MakeAvailable(pybind11) + # libsamplerate set(BUILD_TESTING OFF CACHE BOOL "Disable libsamplerate test build") set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/pyproject.toml b/pyproject.toml index b424d40..3db5df2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,3 +51,11 @@ build-frontend = "build[uv]" build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*","cp314-*"] # Skip 32-bit builds and musllinux wheels skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] + +[tool.uv] +cache-keys = [ + { file = "pyproject.toml" }, + { file = "setup.py" }, + { file = "CMakeLists.txt" }, + { dir = "external" }, + { dir = "src" }] \ No newline at end of file diff --git a/src/samplerate.cpp b/src/samplerate.cpp index c15eaa7..f0eba25 100644 --- a/src/samplerate.cpp +++ b/src/samplerate.cpp @@ -40,6 +40,35 @@ #define VERSION_INFO "nightly" #endif +// Build information defaults (set by CMake) +#ifndef BUILD_TYPE +#define BUILD_TYPE "unknown" +#endif +#ifndef COMPILER_ID +#define COMPILER_ID "unknown" +#endif +#ifndef COMPILER_VERSION +#define COMPILER_VERSION "unknown" +#endif +#ifndef CMAKE_VERSION +#define CMAKE_VERSION "unknown" +#endif +#ifndef TARGET_ARCH +#define TARGET_ARCH "unknown" +#endif +#ifndef TARGET_OS +#define TARGET_OS "unknown" +#endif +#ifndef PYBIND11_VERSION_INFO +#define PYBIND11_VERSION_INFO "unknown" +#endif +#ifndef LIBSAMPLERATE_VERSION +#define LIBSAMPLERATE_VERSION "unknown" +#endif +#ifndef LTO_ENABLED +#define LTO_ENABLED 0 +#endif + // This value was empirically and somewhat arbitrarily chosen; increase it for further safety. #define END_OF_INPUT_EXTRA_OUTPUT_FRAMES 10000 @@ -51,7 +80,7 @@ // with multi-threaded performance (allowing parallelism for large data). // Empirically chosen based on benchmarks showing that at 1000 frames, the GIL // overhead is < 1% of total execution time for even the fastest converter types. -#define GIL_RELEASE_THRESHOLD_FRAMES 1000 +long gil_release_threshold_frames = 1000; namespace py = pybind11; using namespace pybind11::literals; @@ -72,13 +101,13 @@ namespace samplerate { bool should_release_gil(const py::object &release_gil, long num_frames) { if (release_gil.is_none()) { // "auto" mode: release GIL only for large data sizes - return num_frames >= GIL_RELEASE_THRESHOLD_FRAMES; + return num_frames >= gil_release_threshold_frames; } else if (py::isinstance(release_gil)) { return release_gil.cast(); } else if (py::isinstance(release_gil)) { std::string s = release_gil.cast(); if (s == "auto") { - return num_frames >= GIL_RELEASE_THRESHOLD_FRAMES; + return num_frames >= gil_release_threshold_frames; } throw std::domain_error("Invalid release_gil value. Use True, False, None, or 'auto'."); } @@ -177,7 +206,7 @@ class Resampler { ~Resampler() { src_delete(_state); } // src_delete handles nullptr case py::array_t process( - py::array_t input, + const py::array_t &input, double sr_ratio, bool end_of_input, const py::object &release_gil = py::none()) { // accessors for the arrays @@ -213,7 +242,7 @@ class Resampler { SRC_DATA src_data = { static_cast(inbuf.ptr), // data_in static_cast(outbuf.ptr), // data_out - inbuf.shape[0], // input_frames + static_cast(inbuf.shape[0]), // input_frames long(new_size), // output_frames 0, // input_frames_used, filled by libsamplerate 0, // output_frames_gen, filled by libsamplerate @@ -505,7 +534,7 @@ py::array_t resample( SRC_DATA src_data = { static_cast(inbuf.ptr), // data_in static_cast(outbuf.ptr), // data_out - inbuf.shape[0], // input_frames + static_cast(inbuf.shape[0]), // input_frames long(new_size), // output_frames 0, // input_frames_used, filled by libsamplerate 0, // output_frames_gen, filled by libsamplerate @@ -559,6 +588,78 @@ PYBIND11_MODULE(samplerate, m) { m.attr("__version__") = VERSION_INFO; m.attr("__libsamplerate_version__") = LIBSAMPLERATE_VERSION; + m.def("set_gil_release_threshold", [](long threshold) { + gil_release_threshold_frames = threshold; + }, "Set the minimum number of frames required to release the GIL in 'auto' mode."); + + m.def("get_gil_release_threshold", []() { + return gil_release_threshold_frames; + }, "Get the minimum number of frames required to release the GIL in 'auto' mode."); + + m.def("get_build_info", []() { + py::dict info; + info["version"] = VERSION_INFO; + info["libsamplerate_version"] = LIBSAMPLERATE_VERSION; + info["build_type"] = BUILD_TYPE; + info["compiler_id"] = COMPILER_ID; + info["compiler_version"] = COMPILER_VERSION; + info["cmake_version"] = CMAKE_VERSION; + info["target_arch"] = TARGET_ARCH; + info["target_os"] = TARGET_OS; + info["pybind11_version"] = PYBIND11_VERSION_INFO; + // C++ standard - MSVC uses _MSVC_LANG instead of __cplusplus +#ifdef _MSVC_LANG + #define CPP_STD_VALUE _MSVC_LANG +#else + #define CPP_STD_VALUE __cplusplus +#endif +#if CPP_STD_VALUE >= 202002L + info["cpp_standard"] = "C++20"; +#elif CPP_STD_VALUE >= 201703L + info["cpp_standard"] = "C++17"; +#elif CPP_STD_VALUE >= 201402L + info["cpp_standard"] = "C++14"; +#elif CPP_STD_VALUE >= 201103L + info["cpp_standard"] = "C++11"; +#else + info["cpp_standard"] = "pre-C++11"; +#endif +#undef CPP_STD_VALUE + // LTO status (passed from CMake) +#if LTO_ENABLED + info["lto_enabled"] = true; +#else + info["lto_enabled"] = false; +#endif + // Pointer size (32 vs 64 bit) + info["pointer_size_bits"] = sizeof(void*) * 8; + // Float size sanity check + info["float_size_bytes"] = sizeof(float); + info["gil_release_threshold"] = gil_release_threshold_frames; + return info; + }, R"doc( +Get detailed build information for debugging purposes. + +Returns +------- +dict + Dictionary containing: + - version: Package version + - libsamplerate_version: libsamplerate library version + - build_type: Build configuration (Release, Debug, etc.) + - compiler_id: Compiler used (MSVC, GNU, Clang, etc.) + - compiler_version: Compiler version string + - cmake_version: CMake version used for build + - target_arch: Target architecture (x86_64, arm64, etc.) + - target_os: Target operating system + - pybind11_version: pybind11 version + - cpp_standard: C++ standard used + - lto_enabled: Whether Link Time Optimization was enabled + - pointer_size_bits: Pointer size (32 or 64) + - float_size_bytes: Size of float type (should be 4) + - gil_release_threshold: Current GIL release threshold +)doc"); + auto m_exceptions = m.def_submodule( "exceptions", "Sub-module containing sampling exceptions"); auto m_converters = m.def_submodule( diff --git a/src/samplerate.pyi b/src/samplerate.pyi new file mode 100644 index 0000000..f926391 --- /dev/null +++ b/src/samplerate.pyi @@ -0,0 +1,81 @@ +from typing import Optional, Union, Callable, Iterator, Tuple, overload, TypedDict +import numpy as np +import numpy.typing as npt + +class BuildInfo(TypedDict): + version: str + libsamplerate_version: str + build_type: str + compiler_id: str + compiler_version: str + cmake_version: str + target_arch: str + target_os: str + pybind11_version: str + cpp_standard: str + lto_enabled: bool + pointer_size_bits: int + float_size_bytes: int + gil_release_threshold: int + +class ConverterType: + sinc_best: int + sinc_medium: int + sinc_fastest: int + zero_order_hold: int + linear: int + +class ResamplingError(RuntimeError): ... + +def set_gil_release_threshold(threshold: int) -> None: ... +def get_gil_release_threshold() -> int: ... +def get_build_info() -> BuildInfo: ... + +def resample( + input_data: npt.NDArray[np.float32], + ratio: float, + converter_type: Union[ConverterType, str, int] = "sinc_best", + verbose: bool = False, + release_gil: Optional[Union[bool, str]] = None, +) -> npt.NDArray[np.float32]: ... + +class Resampler: + converter_type: int + channels: int + def __init__( + self, + converter_type: Union[ConverterType, str, int] = "sinc_best", + channels: int = 1, + ) -> None: ... + def process( + self, + input_data: npt.NDArray[np.float32], + ratio: float, + end_of_input: bool = False, + release_gil: Optional[Union[bool, str]] = None, + ) -> npt.NDArray[np.float32]: ... + def reset(self) -> None: ... + def set_ratio(self, new_ratio: float) -> None: ... + def clone(self) -> "Resampler": ... + +class CallbackResampler: + ratio: float + converter_type: int + channels: int + def __init__( + self, + callback: Callable[[], Optional[npt.NDArray[np.float32]]], + ratio: float, + converter_type: Union[ConverterType, str, int] = "sinc_best", + channels: int = 1, + ) -> None: ... + def read( + self, + num_frames: int, + release_gil: Optional[Union[bool, str]] = None, + ) -> npt.NDArray[np.float32]: ... + def reset(self) -> None: ... + def set_starting_ratio(self, new_ratio: float) -> None: ... + def clone(self) -> "CallbackResampler": ... + def __enter__(self) -> "CallbackResampler": ... + def __exit__(self, exc_type, exc, exc_tb) -> None: ... diff --git a/tests/test_datatype_performance.py b/tests/test_datatype_performance.py new file mode 100644 index 0000000..de14797 --- /dev/null +++ b/tests/test_datatype_performance.py @@ -0,0 +1,45 @@ +import time +import numpy as np +import samplerate + +def benchmark_resample(input_data, ratio=1.5, converter='sinc_fastest'): + start_time = time.perf_counter() + samplerate.resample(input_data, ratio, converter) + end_time = time.perf_counter() + return end_time - start_time + +def test_datatype_performance(): + # Generate 1 second of audio at 44.1kHz + fs = 44100 + duration = 15.0 + t = np.arange(fs * duration) / fs + + # Create float64 (default) and float32 arrays + data_float64 = np.sin(2 * np.pi * 440 * t) + data_float32 = data_float64.astype(np.float32) + + # Warmup + benchmark_resample(data_float32) + + # Benchmark float32 (native) + times_f32 = [] + for _ in range(10): + times_f32.append(benchmark_resample(data_float32)) + avg_f32 = np.mean(times_f32) + + # Benchmark float64 (requires conversion) + times_f64 = [] + for _ in range(10): + times_f64.append(benchmark_resample(data_float64)) + avg_f64 = np.mean(times_f64) + + print(f"\nPerformance Comparison (1s audio, sinc_fastest):") + print(f"float32 (native): {avg_f32*1000:.3f} ms") + print(f"float64 (copy): {avg_f64*1000:.3f} ms") + print(f"Overhead: {(avg_f64 - avg_f32)*1000:.3f} ms ({(avg_f64/avg_f32 - 1)*100:.1f}%)") + + # We expect float32 to be faster, but we won't fail the test if it isn't + # (machine noise can affect small benchmarks), just report it. + +if __name__ == "__main__": + test_datatype_performance() \ No newline at end of file