diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 0000000..b7053c0 --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,24 @@ +name: Draft PDF +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v4 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper/paper.pdf diff --git a/LICENSE b/LICENSE index 03a5c24..5178239 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2016--2024 pytch development team. +Copyright (c) 2016--2025 pytch development team. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 335a62f..0093343 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,13 @@ Pytch - A Real-Time Pitch Analysis Tool For Polyphonic Music ![screenshot](pictures/screenshot.png) +## Demo & Wiki + +If you want to see `pytch` in action, watch our [demo video](https://youtu.be/LPt83Wqf2e4). + +Please have a look at our [wiki](https://github.com/pytchtracking/pytch/wiki) for an explanation of the GUI. + + ## Download and Installation Clone the project @@ -30,9 +37,9 @@ pytch ``` hit return and sing! -## Contribution +## Contributing -Every contribution is welcome. To ensure consistent style we use [black](https://github.com/psf/black). +Every contribution is welcome. Please feel free to open and issue or a pull request. To ensure consistent style we use [black](https://github.com/psf/black). You can add automated style checks at commit time using [pre-commit](https://pre-commit.com/) ```bash diff --git a/paper/compile.sh b/paper/compile.sh new file mode 100644 index 0000000..7263c37 --- /dev/null +++ b/paper/compile.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +cd .. +docker run --rm -it -v "$PWD:/data" -u "$(id -u):$(id -g)" openjournals/inara -o pdf paper/paper.md -p diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..e619fc3 --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,181 @@ +@article{MeierCM24_RealTimePLP_TISMIR, +author = {Peter Meier and Ching-Yu Chiu and Meinard M{\"u}ller}, +title = {{A} Real-Time Beat Tracking System with Zero Latency and Enhanced Controllability}, +journal = {Transactions of the International Society for Music Information Retrieval ({TISMIR})}, +year = {2024}, +volume = {7}, +number = {1}, +pages = {213--227}, +doi = {10.5334/tismir.189}, +url-demo = {https://audiolabs-erlangen.de/resources/MIR/2024-TISMIR-RealTimePLP} +} + +@inproceedings{StefaniT22_RealTimeMIR_DAFX, + title = {On the challenges of embedded real-time music information retrieval}, + author = {Domenico Stefani and Luca Turchet}, + booktitle = {Proceedings of the International Conference on Digital Audio Effects (DAFx)}, + volume = {3}, + pages = {177--184}, + year = {2022} +} + +@phdthesis{Cuesta22_Multipitch_PhD, + author = {Helena Cuesta}, + year = {2022}, + title = {Data-driven Pitch Content Description of Choral Singing Recordings}, + school = {Universitat Pompeu Fabra, Barcelona, Spain}, +} + +@article{BerglinPD22_VisualFeedback_JPM, +author = {Jacob Berglin and Peter Q Pfordresher and Steven Demorest}, +title = {The effect of visual and auditory feedback on adult poor-pitch remediation}, +journal = {Psychology of Music}, +volume = {50}, +number = {4}, +pages = {1077-1090}, +year = {2022}, +doi = {10.1177/03057356211026730}, +URL = {https://doi.org/10.1177/03057356211026730}, +} + +@inproceedings{RosenzweigSM22_libf0_ISMIR-LBD, +author = {Sebastian Rosenzweig and Simon Schw{\"a}r and Meinard M{\"u}ller}, +title = {libf0: A Python Library for Fundamental Frequency Estimation}, +booktitle = {Late Breaking Demos of the International Society for Music Information Retrieval Conference ({ISMIR})}, +address = {Bengaluru, India}, +year = {2022}, +url-pdf = {https://archives.ismir.net/ismir2022/latebreaking/000003.pdf}, +url-code = {https://github.com/groupmm/libf0/} +} + +@book{Mueller21_FMP_SPRINGER, + author = {Meinard M\"{u}ller}, + title = {Fundamentals of Music Processing -- Using Python and Jupyter Notebooks}, + type = {Monograph}, + year = {2021}, + isbn = {978-3-030-69807-2}, + publisher = {Springer Verlag}, + edition = {2nd}, + pages = {1--495}, + doi = {10.1007/978-3-030-69808-9}, + url-details = {http://www.music-processing.de} +} + +@article{RosenzweigCWSGM20_DCS_TISMIR, + author = {Sebastian Rosenzweig and Helena Cuesta and Christof Wei{\ss} and Frank Scherbaum and Emilia G{\'o}mez and Meinard M{\"u}ller}, + title = {{D}agstuhl {ChoirSet}: {A} Multitrack Dataset for {MIR} Research on Choral Singing}, + journal = {Transactions of the International Society for Music Information Retrieval ({TISMIR})}, + volume = {3}, + number = {1}, + year = {2020}, + pages = {98--110}, + publisher = {Ubiquity Press}, + doi = {10.5334/tismir.48}, + url-pdf = {2020_RosenzweigCWSGM_DagstuhlChoirSet_TISMIR_ePrint.pdf}, + url-demo = {https://www.audiolabs-erlangen.de/resources/MIR/2020-DagstuhlChoirSet} +} + +@inproceedings{ScherbaumMRM19_MultimediaRecordings_FMA, + author = {Frank Scherbaum and Nana Mzhavanadze and Sebastian Rosenzweig and Meinard M{\"u}ller}, + title = {Multi-media recordings of traditional {G}eorgian vocal music for computational analysis}, + booktitle = {Proceedings of the International Workshop on Folk Music Analysis ({FMA})}, + address = {Birmingham, UK}, + year = {2019}, + pages = {1--6} +} + +@inproceedings{RosenzweigSM19_StableF0_ISMIR, + author = {Sebastian Rosenzweig and Frank Scherbaum and Meinard M{\"u}ller}, + title = {Detecting Stable Regions in Frequency Trajectories for Tonal Analysis of Traditional {G}eorgian Vocal Music}, + booktitle = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}, + pages = {352--359}, + address = {Delft, The Netherlands}, + year = {2019}, + doi = {10.5281/zenodo.3527816} +} + +@inproceedings{KriegerowskiS_Pytch_2017, + author = {Marius Kriegerowski and Frank Scherbaum}, + title = {Pytch - simultane mehrkanalige Audioanalyse von Gesangstimmen}, + booktitle = {Late-breaking Demos of the Workshop: Musik trifft Informatik at 47. Jahrestagung der Gesellschaft für Informatik}, + year = {2017}, + address = {Chemnitz, Germany}, +} + +@article{Scherbaum16_LarynxMicrophones_IWFMA, + author = {Frank Scherbaum}, + title = {On the Benefit of Larynx-Microphone Field Recordings for the Documentation and Analysis of Polyphonic Vocal Music}, + journal = {Proceedings of the International Workshop Folk Music Analysis}, + pages = {80--87}, + address = {Dublin,Ireland}, + year = {2016} +} + +@article{SixCL13_Tarsos_JNMR, + author = {Joren Six and Olmo Cornelis and Marc Leman}, + title = {Tarsos, a Modular Platform for Precise Pitch Analysis of {W}estern and Non-{W}estern Music}, + journal = {Journal of New Music Research}, + volume = {42}, + number = {2}, + pages = {113-129}, + year = {2013}, + doi = {10.1080/09298215.2013.797999} +} + +@inproceedings{CannamLS10_SonicVisualizer_ICMC, + author = {Chris Cannam and Christian Landone and Mark B. Sandler}, + title = {Sonic {V}isualiser: An Open Source Application for Viewing, Analysing, and Annotating Music Audio Files}, + booktitle = {Proceedings of the International Conference on Multimedia}, + address = {Florence, Italy}, + pages = {1467--1468}, + year = {2010}, +} + +@article{CamachoH08_SawtoothWaveform_JASA, + author = {Arturo Camacho and John G. Harris}, + title = {A sawtooth waveform inspired pitch estimator for speech and music}, + publisher = {ASA}, + year = {2008}, + journal = {The Journal of the Acoustical Society of America}, + volume = {124}, + number = {3}, + pages = {1638--1652}, +} + +@article{Goto04_RealTimeF0_SC, + title = {A real-time music-scene-description system: Predominant-F0 estimation for detecting melody and bass lines in real-world audio signals}, + author = {Masataka Goto}, + journal = {Speech Communication}, + volume = {43}, + number = {4}, + pages = {311--329}, + year = {2004}, + publisher = {Elsevier} +} + +@article{CheveigneK02_YIN_JASA, + author = {Alain de Cheveign{\'e} and Hideki Kawahara}, + title = {{YIN}, a fundamental frequency estimator for speech and music.}, + journal = {Journal of the Acoustical Society of America (JASA)}, + year = {2002}, + volume = {111}, + pages = {1917--1930}, + number = {4}, +} + +@article{Boersma01_Praat_GI, + author = {Paul Boersma}, + journal = {Glot International}, + number = {9/10}, + pages = {341--345}, + title = {{Praat}, a system for doing phonetics by computer}, + volume = {5}, + year = {2001} +} + +@book{HagermanS80_Barbershop_CITESEER, + title = {Fundamental frequency adjustment in barbershop singing}, + author = {B Hagerman and Johan Sundberg}, + year = {1980}, + publisher = {Citeseer} +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..e6aa802 --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,94 @@ +--- +title: '`pytch` v2: A Real-Time Monitoring Tool For Polyphonic Singing Performances' +tags: + - Python + - Audio Processing + - Music Information Retrieval + - Singing Voice Analysis + - GUI +authors: + - name: Sebastian Rosenzweig + orcid: 0000-0003-4964-9217 + equal-contrib: true + corresponding: true + affiliation: 1 + - name: Marius Kriegerowski + orcid: + equal-contrib: true + corresponding: false + affiliation: 2 + - name: Frank Scherbaum + orcid: 0000-0002-5050-7331 + corresponding: false + affiliation: 3 + + +affiliations: + - name: Independent Researcher, Barcelona, Spain + index: 1 + - name: Independent Researcher, Berlin, Germany + index: 2 + - name: University of Potsdam, Potsdam, Germany + index: 3 +date: 30 May 2025 +bibliography: paper.bib +--- + +# Summary +Polyphonic singing is one of the most widespread forms of music-making. During a performance, singers must constantly adjust their pitch to stay in tune with one another — a complex skill that requires extensive practice. Research has shown that pitch monitoring tools can assist singers in fine-tuning their intonation during a performance [@BerglinPD22_VisualFeedback_JPM]. Specifically, real-time visualizations of the fundamental frequency (F0), which represents the pitch of the singing voice, help singers assess their pitch relative to a fixed reference or other voices. +To support the monitoring of polyphonic singing performances, we developed `pytch`, an interactive Python tool with a graphical user interface (GUI) designed to record, process, and visualize multiple voices in real time. The GUI displays vocal spectra and estimated F0 trajectories for all singers, as well as the harmonic intervals between them. Additionally, users can adjust visual and algorithmic parameters interactively to accommodate different input devices, microphone signals, singing styles, and use cases. Written in Python, `pytch` utilizes the `libf0` library [@RosenzweigSM22_libf0_ISMIR-LBD] for real-time F0 estimation and `pyqtgraph`[^1] for efficient visualizations of the analysis results. +Our tool builds upon a late-breaking demo in [@KriegerowskiS_Pytch_2017], which we refer to as version 1. Since then, the tool has been significantly extended with a new real-time graphics engine, a modular audio processing backend that facilitates the integration of additional algorithms, and improved support for a wider range of platforms and recording hardware, which we refer to as version 2. Over its seven years of development, `pytch` has been tested and refined through use in several rehearsals, workshops, and field studies — including Sardinian quartet singing (see demo video[^2]) and traditional Georgian singing (see demo video[^3]). + +[^1]: +[^2]: +[^3]: + + +# Statement of Need +Software that assesses the pitch of a singing voice in real time is best known from Karaoke singing applications, such as Let's Sing[^2], Rock Band[^3], or Cantamus[^4]. These tools typically compare the singer’s pitch to a score reference to judge whether notes are ‘correct’ or ‘incorrect’. However, such applications face several limitations when applied to polyphonic or group singing contexts. Most notably, many Karaoke systems can only process one or two singing voices at a time, which is problematic for monitoring group performances. Additionally, software that relies on a score as a reference poses challenges for a cappella performances, where singers may drift together in pitch over time while maintaining relative harmony, or in orally-transmitted traditions that may lack a formal score altogether. Finally, existing open-source research software for singing voice processing, like Praat [@Boersma01_Praat_GI], Sonic Visualiser [@CannamLS10_SonicVisualizer_ICMC], and Tarsos [@SixCL13_Tarsos_JNMR], lack real-time feedback, preventing an effective feedback loop between singers and their tool. + +To address these challenges, we developed `pytch`. Our tool is currently the only software that enables singers and conductors to monitor and train harmonic interval singing in real time — a skill that is essential in many vocal traditions. This includes not only polyphonic genres such as traditional Georgian vocal music [@ScherbaumMRM19_MultimediaRecordings_FMA] or Barbershop singing [@HagermanS80_Barbershop_CITESEER], where precise tuning between voices is stylistically central, but also the practice of non-tempered tuning systems found in various oral traditions. In more detail, the vocal spectra can help singers fine-tune the expression of formant frequencies, while melodic and harmonic issues become visible through F0 trajectories and harmonic intervals. Unlike many existing tools, `pytch` does not require a musical score, making it well-suited for rehearsals, ethnomusicological research and pedagogical contexts focused on intonation and harmonic listening. + +In addition to its practical applications, `pytch` also provides a flexible platform for music information retrieval (MIR) research on real-time audio processing. Working with real-time data introduces challenges such as a limited audio context for analysis and strict timing constraints to ensure low-latency processing. Researchers can use `pytch` to develop, test, and compare algorithms for F0 estimation and other music information retrieval tasks [@StefaniT22_RealTimeMIR_DAFX;@Goto04_RealTimeF0_SC;@MeierCM24_RealTimePLP_TISMIR]. + +[^4]: +[^5]: +[^6]: + + + +# Multitrack Singing Recordings + +To fully leverage the capabilities of `pytch`, it is essential to record each singer with an individual microphone. While there is no hard limit on the number of input channels, we recommend recording up to four individual singers to ensure visibility of the charts and responsiveness of the GUI. Stereo recordings—-such as those captured by a room microphone placed in front of the ensemble--are not suitable for the analysis with `pytch`, because contributions of individual voices are difficult to identify from polyphonic mixtures [@Cuesta22_Multipitch_PhD]. Suitable multitrack recordings can be obtained using handheld dynamic microphones or headset microphones. However, these setups are prone to cross-talk, especially when singers are positioned close together. + +One way to reduce cross-talk is to increase the physical distance between singers or to record them in isolation. However, this is not always feasible, as singers need to hear one another to maintain accurate tuning. An effective workaround is the use of contact microphones, such as throat microphones, which capture vocal fold vibrations directly from the skin of the throat. This method offers a significant advantage: the recorded signals are largely immune to interference from other singers, resulting in much cleaner, more isolated recordings. Throat microphones have successfully been used to record vocal ensembles in several past studies [@Scherbaum16_LarynxMicrophones_IWFMA]. + +In addition to live monitoring, `pytch` can also be used to analyze pre-recorded multitrack singing performances. By playing back individual vocal tracks in a digital audio workstation (DAW) and using virtual audio routing tools such as Loopback[^7] (macOS) or BlackHole[^8], these tracks can be streamed into `pytch` as if they were live microphone inputs. This setup, which was also used in the demo video[^3], allows users to benefit from `pytch`’s real-time visualization and analysis features during evaluation of rehearsals, performances, or field recordings. + +[^7]: +[^8]: + + +# Audio Processing +The real-time audio processing pipeline implemented in the file `audio.py` is the heart of `pytch` and consists of two main stages: recording and analysis. The recording stage captures multichannel audio waveforms from the soundcard or an external audio interface using the `sounddevice` library. The library is based on PortAudio and supports a wide range of operating systems, audio devices, and sampling rates. The recorded audio is received in chunks via a recording callback and fed into a ring buffer shared with the analysis process. When the buffer is sufficiently filled with audio chunks, the analysis process reads the recorded audio to compute several audio features. + +For each channel, the analysis stage computes the audio level in dBFS, a time--frequency representation of the audio signal via the Short-Time Fourier Transform (see [@Mueller21_FMP_SPRINGER] for fundamentals of music processing), and an estimate of the F0 along with a confidence value, using the `libf0` library [@RosenzweigSM22_libf0_ISMIR-LBD]. The library includes several implementations of well-known F0 estimation algorithms. We make use of YIN [@CheveigneK02_YIN_JASA], which is a time-domain algorithm that computes the F0 based on a tweaked auto-correlation function. It is computationally efficient and well-suited for low-latency applications, but it tends to suffer from estimation errors, particularly confusions with higher harmonics such as the octave. The obtained F0 estimates, which are natively computed in the unit Hz, are converted to the unit cents using a user-specified reference frequency. Depending on the audio quality and vocal characteristics, F0 estimates may exhibit artifacts such as discontinuities or pitch slides, which can make the resulting trajectories difficult to interpret [@RosenzweigSM19_StableF0_ISMIR]. Previous research has shown that using throat microphones can improve the isolation of individual voices in group singing contexts, resulting in cleaner signals and more accurate F0 estimates [@Scherbaum16_LarynxMicrophones_IWFMA]. To further enhance interpretability, `pytch` includes several optional post-processing steps: a confidence threshold to discard estimates with low confidence score, a median filter to smooth the trajectories, and a gradient filter to suppress abrupt pitch slides. As a final step in the audio analysis, the harmonic intervals between the F0 trajectories are computed. Every audio feature is stored separately in a dedicated ring buffer. After processing, the pipeline sets a flag that notifies the GUI that new data is ready for visualization. + + +# Graphical User Interface (GUI) +In this section, we provide a step-by-step explanation of the `pytch` GUI implemented in the file `gui.py`. Right after the program start, a startup menu opens in which the user is asked to specify the soundcard, input channels, sampling rate, and window size for processing (see Figure \autoref{fig:menu}). Furthermore, the user can choose to store the recorded audio and the F0 trajectories on disk. + +![`pytch` startup menu.\label{fig:menu}](../pictures/menu.png){ width=50% } + +These configuration choices are required to initialize the audio processing module and the main GUI, which is loaded when the user clicks "ok". A screenshot of the main GUI which opens after successful initialization is shown in Figure \autoref{fig:GUI}. + +![`pytch` GUI monitoring three singing voices.\label{fig:GUI}](../pictures/screenshot.png){ width=100% } + +The main GUI is organized into three horizontal sections. On the left, a control panel provides a start/stop button and allows users to adjust both the visual layout and algorithmic parameters. The central section displays "channel views"--one for each input channel--color-coded for clarity. Each view includes a microphone level meter, a real-time spectrum display with a vertical line marking the current F0 estimate, and a scrolling spectrogram with a 5 second time context. Channels are listed from top to bottom in the order they were selected during setup. Optionally, the bottommost view can display a product signal from all channels. + +The right section, referred to as the "trajectory view," provides time-based visualizations of either the F0 trajectories ("pitches" tab) or the harmonic intervals between voices ("differential" tab) with a 10 second time context. Using the controls in the left-side menu, the user can select the F0 estimation algorithm and improve the real-time visualization by adjusting the confidence threshold, the median filter length for smoothing, and the tolerance of the gradient filter. F0 and interval trajectories can be displayed with respect to a fixed reference frequency or a dynamic one derived from a selected channel, the lowest, or highest detected voice. Axis limits for this section can also be manually set. + +# Acknowledgements +We would like to thank Lukas Dietz for his help with the implementation, Peter Meier and Sebastian Strahl for the collaboration on real-time implementations, and all the singers who contributed to testing `pytch` during its development. + +# References diff --git a/pictures/menu.png b/pictures/menu.png new file mode 100644 index 0000000..f67c37d Binary files /dev/null and b/pictures/menu.png differ diff --git a/pyproject.toml b/pyproject.toml index 8fe6ada..27c528b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pytch" -version = "0.3.0" +version = "2.2.0" description = "A Real-Time Pitch Analysis Tool For Polyphonic Music" authors = [ {name = "Pytch Contributors"} @@ -26,5 +26,8 @@ include-package-data = false [tool.setuptools.packages.find] include = ["pytch*"] +[tool.poetry.extras] +test = ["pytest"] + [project.scripts] pytch = "pytch.cli:main" diff --git a/pytch/__init__.py b/pytch/__init__.py index fe1fb6b..ca0e42e 100644 --- a/pytch/__init__.py +++ b/pytch/__init__.py @@ -1,13 +1,3 @@ import logging -try: # Python 2.7+ - from logging import NullHandler -except ImportError: - - class NullHandler(logging.Handler): - def emit(self, record): - pass - - logging.basicConfig(level=logging.INFO) -logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/pytch/audio.py b/pytch/audio.py index 5d2760c..4363c7e 100644 --- a/pytch/audio.py +++ b/pytch/audio.py @@ -13,16 +13,18 @@ from datetime import datetime import csv -_audio_lock = threading.Lock() # lock for raw audio buffer -_feat_lock = threading.Lock() # lock for feature buffers -_gui_lock = threading.Lock() # lock for communication with GUI logger = logging.getLogger("pytch.audio") eps = np.finfo(float).eps def get_input_devices(): - """Returns a list of devices.""" + """Returns a list of input devices. + + Returns: + List of available input devices. + + """ input_devices = [] for device_id, device in enumerate(sounddevice.query_devices()): if device["max_input_channels"] > 0: @@ -31,7 +33,15 @@ def get_input_devices(): def get_fs_options(device_idx): - """Returns a dictionary of supported sampling rates for all devices.""" + """Returns a dictionary of supported sampling rates for all devices. + + Args: + device_idx: Device index. + + Returns: + List of supported sampling rates. + + """ candidates = [8000.0, 11025.0, 16000.0, 22050.0, 32000.0, 37800.0, 44100.0, 48000.0] supported_fs = [] for c in candidates: @@ -42,7 +52,16 @@ def get_fs_options(device_idx): def check_fs(device_index, fs): - """Validates chosen sampling rate.""" + """Validates chosen sampling rate. + + Args: + device_index: Device index. + fs: Sampling rate. + + Returns: + True if sampling rate is supported, else False. + + """ valid = True try: sounddevice.check_input_settings( @@ -61,14 +80,32 @@ def check_fs(device_index, fs): @njit -def f2cent(f, standard_frequency=440.0): - """Convert from Hz to Cents""" - return 1200.0 * np.log2(np.abs(f) / standard_frequency + eps) +def f2cent(f, f_ref=440.0): + """Convert frequency from Hz to Cents. + + Args: + f: Frequency. + f_ref: Reference frequency. + + Returns: + Frequency in Cents. + + """ + return 1200.0 * np.log2(np.abs(f) / f_ref + eps) @njit def gradient_filter(y, max_gradient): - """Get index where the abs gradient of x, y is < max_gradient.""" + """Gradient filter. + + Args: + y: Signal. + max_gradient: Upper boundary for absolute gradient. + + Returns: + Indices where the absolute gradient of y is < max_gradient. + + """ return np.where(np.abs(np.diff(f2cent(y))) < max_gradient)[0] @@ -76,62 +113,96 @@ class RingBuffer: """Generic ring buffer for n-dimensional data""" def __init__(self, size, dtype): - """Initialize buffer, size should be of format (n_frames, ..., n_channels)""" + """Initialize buffer. + + Args: + size: buffer size (n_frames, ..., n_channels) + dtype: buffer dtype + """ self.size = size self.buffer = np.zeros(size, dtype=dtype) self.write_head = 0 self.read_head = 0 + self.lock = threading.Lock() def write(self, data): - """Writes data to buffer""" + """Writes data to buffer. + + Args: + data: Data of shape (n_frames, ..., n_channels). + + """ if data.shape[0] > self.size[0]: logger.warning("Buffer overflow!") - write_idcs = np.mod(self.write_head + np.arange(data.shape[0]), self.size[0]) - self.buffer[write_idcs, ...] = data - self.write_head = np.mod( - write_idcs[-1] + 1, self.size[0] - ) # set write head to the next bin to write to + with self.lock: + write_idcs = np.mod( + self.write_head + np.arange(data.shape[0]), self.size[0] + ) + self.buffer[write_idcs, ...] = data + self.write_head = np.mod( + write_idcs[-1] + 1, self.size[0] + ) # set write head to the next bin to write to def read_latest(self, n_frames): - """Reads n_frames from buffer, starting from latest data""" + """Read latest n_frames frames from buffer, starting from write head. + + Args: + n_frames: Number of frames to read. + + Returns: + Read data. + + """ if self.size[0] < n_frames: - Exception("Cannot read more data than buffer length!") + Exception("cannot read more data than buffer length!") - read_idcs = np.mod( - self.size[0] + self.write_head - np.arange(n_frames) - 1, self.size[0] - )[::-1] - return self.buffer[read_idcs, ...] + with self.lock: + read_idcs = np.mod( + self.size[0] + self.write_head - np.arange(n_frames) - 1, self.size[0] + )[::-1] + return self.buffer[read_idcs, ...] def read_next(self, n_frames, hop_frames=None): - """Reads n_frames from buffer, starting from latest read""" + """Read n_frames frames from buffer, starting from read head. + + Args: + n_frames: Number of frames to read. + hop_frames: Read head increment. + + Returns: + Read data. - if ( - np.mod(self.size[0] + self.write_head - self.read_head, self.size[0]) - < n_frames - ): - return np.array([]) + """ + with self.lock: + if ( + np.mod(self.size[0] + self.write_head - self.read_head, self.size[0]) + < n_frames + ): + # return empty array if not enough data available + return np.array([]) - read_idcs = np.mod( - self.size[0] + self.read_head + np.arange(n_frames), self.size[0] - )[::-1] + read_idcs = np.mod( + self.size[0] + self.read_head + np.arange(n_frames), self.size[0] + )[::-1] - if hop_frames is None: - hop_frames = n_frames + if hop_frames is None: + hop_frames = n_frames - self.read_head = np.mod( - self.read_head + hop_frames, self.size[0] - ) # advance read head + self.read_head = np.mod( + self.read_head + hop_frames, self.size[0] + ) # advance read head - return self.buffer[read_idcs, ...] + return self.buffer[read_idcs, ...] def flush(self): + """Flush buffer.""" self.buffer = np.zeros_like(self.buffer) self.write_head = 0 self.read_head = 0 class AudioProcessor: - """Class for recording and processing of multichannel audio""" + """Class for recording and processing of multichannel audio.""" def __init__( self, @@ -141,20 +212,29 @@ def __init__( channels=None, device_no=None, f0_algorithm="YIN", - gui=None, out_path="", ): + """Initialize audio processing. + + Args: + fs: Sampling rate. + buf_len_sec: Buffer length in seconds. + fft_len: FFT length in bins. + channels: List of channels to record. + device_no: Index of device to record from. + f0_algorithm: F0 algorithm to use. + out_path: Output directory for F0 trajectories. + """ self.fs = fs self.buf_len_sec = buf_len_sec self.fft_len = fft_len - self.hop_len = 2 ** int(np.log2(fs / 25)) + self.hop_len = self.fft_len // 2 self.fft_freqs = np.fft.rfftfreq(self.fft_len, 1 / self.fs) self.fft_win = np.hanning(self.fft_len).reshape(-1, 1) self.channels = [0] if channels is None else channels self.device_no = device_no self.f0_algorithm = f0_algorithm self.out_path = out_path - self.gui = gui self.f0_lvl_threshold = -70 # minimum level in dB to compute f0 estimates self.frame_rate = self.fs / self.hop_len self.stream = None @@ -209,35 +289,9 @@ def __init__( + [f"Confidence Channel {ch}" for ch in channels] ) - # initialise output buffers that are read by GUI - if gui is not None: - self.new_gui_data_available = False - self.proc_lvl = gui.lvl_cvals[0] - self.proc_spec = np.zeros( - (self.raw_fft_buf.buffer.shape[1], len(self.channels) + 1) - ) - self.proc_stft = np.zeros( - ( - int(np.round(gui.disp_t_stft * self.frame_rate)), - len(self.fft_freqs), - len(self.channels) + 1, - ) - ) - self.proc_inst_f0 = np.full((1, len(self.channels) + 1), np.nan) - self.proc_f0 = np.zeros( - (int(np.round(gui.disp_t_f0 * self.frame_rate)), len(self.channels)) - ) - self.proc_diff = np.zeros( - ( - self.proc_f0.shape[0], - (len(self.channels) * (len(self.channels) - 1)) // 2, - ) - ) - def start_stream(self): """Start recording and processing""" - if self.is_running: - self.stop_stream() + self.stop_stream() # initialize audio stream self.stream = sounddevice.InputStream( @@ -266,8 +320,13 @@ def stop_stream(self): """Stop recording and processing""" if self.is_running: self.is_running = False - self.worker.join() self.stream.stop() + self.worker.join() + self.audio_buf.flush() + self.raw_lvl_buf.flush() + self.raw_fft_buf.flush() + self.raw_f0_buf.flush() + self.raw_conf_buf.flush() def close_stream(self): """Close stream, processing thread and files""" @@ -279,31 +338,24 @@ def close_stream(self): self.traj_out_file.close() def worker_thread(self): - """The thread that does all the audio processing""" + """The thread that does the audio processing""" while self.is_running: - with _audio_lock: - audio = self.audio_buf.read_next( - self.fft_len, self.hop_len - ) # get audio + audio = self.audio_buf.read_next(self.fft_len, self.hop_len) # get audio if audio.size == 0: sleep(0.001) continue + start_t = time() lvl = self.compute_level(audio) # compute level fft = self.compute_fft(audio) # compute fft f0, conf = self.compute_f0(audio, lvl) # compute f0 & confidence + logger.debug(f"Processing took {time()-start_t:.4f}s.") - with _feat_lock: - self.raw_lvl_buf.write(lvl) - self.raw_fft_buf.write(fft) - self.raw_f0_buf.write(f0) - self.raw_conf_buf.write(conf) - - # GUI pre-processing for faster updates - if self.gui is not None: - self.gui_preprocessing() - self.new_gui_data_available = True + self.raw_lvl_buf.write(lvl) + self.raw_fft_buf.write(fft) + self.raw_f0_buf.write(f0) + self.raw_conf_buf.write(conf) # write trajectories to disk if configured if self.out_path != "": @@ -311,30 +363,39 @@ def worker_thread(self): writer.writerow(np.concatenate((f0[0, :], conf[0, :]))) def recording_callback(self, data, frames, time, status): - """Receives and stores frames from soundcard, data is of shape (frames, channels)""" + """Receives frames from soundcard and stores them in buffer, data is of shape (frames, channels)""" audio_conv = ( data[:, self.channels].astype(np.float32, order="C") / 32768.0 ) # convert int16 to float32 - with _audio_lock: - self.audio_buf.write(audio_conv) + self.audio_buf.write(audio_conv) if self.out_path != "": self.audio_out_file.write(audio_conv) @staticmethod def compute_level(audio): - """Peak level in dB""" + """Computes peak level in dB""" return 10 * np.log10(np.max(np.abs(audio + eps), axis=0)).reshape(1, -1) def compute_fft(self, audio): - """FFT""" + """Computes the Fast Fourier Transform (FFT)""" return np.abs(np.fft.rfft(audio * self.fft_win, self.fft_len, axis=0))[ np.newaxis, :, : ] def compute_f0(self, audio, lvl): - """Fundamental frequency estimation""" + """Fundamental frequency (F0) estimation. + + Args: + audio: audio signal + lvl: audio levels + + Returns: + f0: F0 estimate. + conf: Confidence. + + """ f0 = np.zeros((1, audio.shape[1])) conf = np.zeros((1, audio.shape[1])) @@ -342,10 +403,12 @@ def compute_f0(self, audio, lvl): if lvl[0, c] < self.f0_lvl_threshold: continue + audio_tmp = np.concatenate( + (audio[:, c][::-1], audio[:, c], audio[:, c][::-1]) + ) if self.f0_algorithm == "YIN": - # TODO: replace with real-time version, add real-time SWIPE, relax min/max limits f0_tmp, _, conf_tmp = libf0.yin( - np.concatenate((audio[:, c][::-1], audio[:, c], audio[:, c][::-1])), + audio_tmp, Fs=self.fs, N=self.fft_len, H=self.fft_len, @@ -356,110 +419,39 @@ def compute_f0(self, audio, lvl): ) f0[:, c] = np.mean(f0_tmp) # take the center frame conf[:, c] = 1 - np.mean(conf_tmp) - else: f0[:, c] = np.zeros(f0.shape[0]) conf[:, c] = np.zeros(f0.shape[0]) return f0, conf - def gui_preprocessing(self): - """Prepares computed features for display in GUI which speeds up everything""" - # get raw data - lvl, spec, stft, f0, conf = self.read_latest_frames( - self.gui.disp_t_lvl, - self.gui.disp_t_spec, - self.gui.disp_t_stft, - self.gui.disp_t_f0, - self.gui.disp_t_conf, - ) - - # compute max level and clip - proc_lvl = np.clip( - np.max(lvl, axis=0), - a_min=self.gui.lvl_cvals[0], - a_max=self.gui.lvl_cvals[-1], - ) - - # preprocess spectrum - n_spec_frames = spec.shape[0] - spec = np.mean(spec, axis=0) - proc_spec = np.zeros((spec.shape[0], spec.shape[1] + 1)) - proc_spec[:, :-1] = spec - proc_spec[:, -1] = np.prod(spec, axis=1) - if self.gui.cur_spec_scale_type == "log": - proc_spec = np.log(1 + 1 * proc_spec) - max_values = np.abs(proc_spec).max(axis=0) - proc_spec /= np.where(max_values != 0, max_values, 1) - - # preprocess stft - proc_stft = np.zeros((stft.shape[0], stft.shape[1], stft.shape[2] + 1)) - proc_stft[:, :, :-1] = stft - proc_stft[:, :, -1] = np.prod(stft, axis=2) - if self.gui.cur_spec_scale_type == "log": - proc_stft = np.log(1 + 1 * proc_stft) - max_values = np.max(np.abs(proc_stft), axis=(0, 1), keepdims=True) - proc_stft /= np.where(max_values != 0, max_values, 1) - - # preprocess f0 - median_len = self.gui.cur_smoothing_len - if median_len > 0: - idcs = np.argwhere(f0 > 0) - f0[idcs] = median_filter(f0[idcs], size=median_len, axes=(0,)) - conf[idcs] = median_filter(conf[idcs], size=median_len, axes=(0,)) - - inst_f0 = np.mean(f0[-n_spec_frames:, :], axis=0) - inst_conf = np.mean(conf[-n_spec_frames:, :], axis=0) - inst_f0[inst_conf < self.gui.cur_conf_threshold] = np.nan - - # compute reference frequency - cur_ref_freq_mode = self.gui.cur_ref_freq_mode - ref_freq = self.gui.cur_ref_freq - if cur_ref_freq_mode == "fixed": - cur_ref_freq = ref_freq - elif cur_ref_freq_mode == "highest": - cur_ref_freq = np.max(np.mean(f0, axis=0)) - elif cur_ref_freq_mode == "lowest": - cur_ref_freq = np.min(np.mean(f0, axis=0)) - else: - cur_ref_freq = f0[-1, int(cur_ref_freq_mode[-2:]) - 1] - - # threshold trajectories and compute intervals - nan_val = 99999 - proc_f0, proc_diff = self.f0_diff_computations( - f0, - conf, - self.gui.cur_conf_threshold, - self.gui.cur_derivative_tol, - cur_ref_freq, - nan_val, - ) - proc_f0[proc_f0 == nan_val] = np.nan - proc_diff[proc_diff == nan_val] = np.nan - - with _gui_lock: - self.proc_lvl = proc_lvl - self.proc_spec[:] = proc_spec - self.proc_stft[:] = proc_stft - self.proc_f0[:] = proc_f0 - self.proc_inst_f0[:, :-1] = inst_f0 - self.proc_diff[:] = proc_diff - @staticmethod @njit - def f0_diff_computations( - f0, conf, cur_conf_threshold, cur_derivative_tol, cur_ref_freq, nan_val - ): - """Computes pair-wise differences between F0-trajectories, speed-up using jit-compilation""" + def f0_diff_computations(f0, conf, conf_threshold, gradient_tol, ref_freq, nan_val): + """Computes pair-wise differences between F0-trajectories, speed-up using jit-compilation. + + Args: + f0: Fundamental frequencies of all voices. + conf: Confidences of all voices. + conf_threshold: Confidence threshold. + gradient_tol: Tolerance for gradient filter. + ref_freq: Reference frequency. + nan_val: Value that is used in replace for NaN. + + Returns: + proc_f0: Thresholded and smoothed F0 trajectories in Cents. + proc_diff: Harmonic differences between voices in Cents. + + """ proc_f0 = np.ones_like(f0) * nan_val for i in range(f0.shape[1]): # filter f0 using confidence threshold and gradient filter - index = np.where((conf[:, i] >= cur_conf_threshold) & (f0[:, i] > 0))[0] - index_grad = gradient_filter(f0[:, i], cur_derivative_tol) + index = np.where((conf[:, i] >= conf_threshold) & (f0[:, i] > 0))[0] + index_grad = gradient_filter(f0[:, i], gradient_tol) index = np.intersect1d(index, index_grad) - proc_f0[index, i] = f2cent(f0[index, i], cur_ref_freq) + proc_f0[index, i] = f2cent(f0[index, i], ref_freq) proc_diff = ( np.ones((f0.shape[0], (f0.shape[1] * (f0.shape[1] - 1)) // 2)) * nan_val @@ -481,29 +473,136 @@ def f0_diff_computations( return proc_f0, proc_diff - def read_latest_frames(self, t_lvl, t_spec, t_stft, t_f0, t_conf): - """Reads latest t seconds from buffers""" + def get_gui_data( + self, + disp_t_lvl, + disp_t_spec, + disp_t_stft, + disp_t_f0, + disp_t_conf, + lvl_cvals, + spec_scale_type, + smoothing_len, + conf_threshold, + ref_freq_mode, + ref_freq, + gradient_tol, + ): + """Reads and prepares data for GUI. + + Args: + disp_t_lvl: Time for level computation. + disp_t_spec: Time for spectrum computation. + disp_t_stft: Time for spectrogram computation. + disp_t_f0: Time for F0 computation. + disp_t_conf: Time for confidence computation. + lvl_cvals: GUI level limits. + spec_scale_type: Spectral scale type. + smoothing_len: Smoothing filter length in frames. + conf_threshold: Confidence threshold. + ref_freq_mode: Reference frequency mode. + ref_freq: Reference frequency. + gradient_tol: Gradient filter tolerance. + + Returns: + lvl: Levels for all channels. + spec: Spectra for all channels & product. + inst_f0: Instantaneous F0 for all channels & product. + stft: Spectrograms for all channels & product. + f0: F0 estimates for all channels. + diff: Differential F0s (harmonic intervals) for all channels. + + """ + start_t = time() + + # read latest data from buffer + # why not read_next()? -> we prioritize low latency over completeness of the visualized data. + lvl = self.raw_lvl_buf.read_latest(int(np.round(disp_t_lvl * self.frame_rate))) + spec_raw = self.raw_fft_buf.read_latest( + int(np.round(disp_t_stft * self.frame_rate)) + ) + f0 = self.raw_f0_buf.read_latest(int(np.round(disp_t_f0 * self.frame_rate))) + conf = self.raw_conf_buf.read_latest( + int(np.round(disp_t_conf * self.frame_rate)) + ) - with _feat_lock: - lvl = self.raw_lvl_buf.read_latest(int(np.round(t_lvl * self.frame_rate))) - spec = self.raw_fft_buf.read_latest(int(np.round(t_spec * self.frame_rate))) - stft = self.raw_fft_buf.read_latest(int(np.round(t_stft * self.frame_rate))) - f0 = self.raw_f0_buf.read_latest(int(np.round(t_f0 * self.frame_rate))) - conf = self.raw_conf_buf.read_latest( - int(np.round(t_conf * self.frame_rate)) + # compute max level and clip + if len(lvl) > 0: + lvl = np.clip( + np.max(lvl, axis=0), + a_min=lvl_cvals[0], + a_max=lvl_cvals[-1], ) - return lvl, spec, stft, f0, conf - - def get_latest_gui_data(self): - """Reads pre-processed data for GUI""" - with _gui_lock: - self.new_gui_data_available = False - return ( - self.proc_lvl, - self.proc_spec, - self.proc_inst_f0, - self.proc_stft, - self.proc_f0, - self.proc_diff, + # preprocess spectrum + if len(spec_raw) > 0: + n_spec_frames = int(np.round(spec_raw.shape[0] * disp_t_spec / disp_t_stft)) + spec = np.mean(spec_raw[-n_spec_frames:, :, :], axis=0) + spec = np.concatenate((spec, np.prod(spec, axis=1).reshape(-1, 1)), axis=-1) + if spec_scale_type == "log": + spec = np.log(1 + 1 * spec) + max_values = np.abs(spec).max(axis=0) + spec /= np.where(max_values != 0, max_values, 1) + else: + spec = np.array([]) + + # preprocess stft + if len(spec_raw) > 0: + stft = np.zeros( + (spec_raw.shape[0], spec_raw.shape[1], spec_raw.shape[2] + 1) ) + stft[:, :, :-1] = spec_raw + stft[:, :, -1] = np.prod(spec_raw, axis=2) + if spec_scale_type == "log": + stft = np.log(1 + 1 * stft) + max_values = np.max(np.abs(stft), axis=(0, 1), keepdims=True) + stft /= np.where(max_values != 0, max_values, 1) + else: + stft = np.array([]) + + # preprocess f0 + if len(f0) > 0: + median_len = smoothing_len + if median_len > 0: + idcs = np.argwhere(f0 > 0) + f0[idcs] = median_filter(f0[idcs], size=median_len, axes=(0,)) + conf[idcs] = median_filter(conf[idcs], size=median_len, axes=(0,)) + + n_spec_frames = int(np.round(spec_raw.shape[0] * disp_t_spec / disp_t_stft)) + inst_f0 = np.mean(f0[-n_spec_frames:, :], axis=0) + inst_f0 = np.concatenate((inst_f0, [0])) + inst_conf = np.mean(conf[-n_spec_frames:, :], axis=0) + inst_conf = np.concatenate((inst_conf, [0])) + inst_f0[inst_conf < conf_threshold] = np.nan + + # compute reference frequency + ref_freq_mode = ref_freq_mode + ref_freq = ref_freq + if ref_freq_mode == "fixed": + ref_freq = ref_freq + elif ref_freq_mode == "highest": + ref_freq = np.max(np.mean(f0, axis=0)) + elif ref_freq_mode == "lowest": + ref_freq = np.min(np.mean(f0, axis=0)) + else: + ref_freq = f0[-1, int(ref_freq_mode[-2:]) - 1] + + # threshold trajectories and compute intervals + nan_val = 99999 + f0, diff = self.f0_diff_computations( + f0, + conf, + conf_threshold, + gradient_tol, + ref_freq, + nan_val, + ) + f0[f0 == nan_val] = np.nan + diff[diff == nan_val] = np.nan + else: + inst_f0 = np.array([]) + diff = np.array([]) + + logger.debug(f"GUI pre-processing took {time()-start_t:.4f}s.") + + return lvl, spec, inst_f0, stft, f0, diff diff --git a/pytch/cli.py b/pytch/cli.py index 17b76c1..f26b3b7 100644 --- a/pytch/cli.py +++ b/pytch/cli.py @@ -9,6 +9,7 @@ def main(): + """Parses commandline arguments and starts pytch.""" parser = argparse.ArgumentParser("pytch") parser.add_argument( "--debug", @@ -26,8 +27,6 @@ def main(): else: logger.setLevel(logging.INFO) - logger.debug("starting app...") - start_gui() diff --git a/pytch/gui.py b/pytch/gui.py index 02143b6..9660bc9 100644 --- a/pytch/gui.py +++ b/pytch/gui.py @@ -3,11 +3,10 @@ """GUI Functions""" import logging -import threading import sys -import time import numpy as np import importlib.metadata +from time import time from .gui_utils import FloatQLineEdit, QHLine, disable_interactivity, colors from .audio import AudioProcessor, get_input_devices, get_fs_options @@ -19,11 +18,10 @@ import pyqtgraph as pg logger = logging.getLogger("pytch.gui") -_refresh_lock = threading.Lock() # lock for GUI updates def start_gui(): - """Starts the GUI, first show input menu, then open the main GUI""" + """Starts the GUI, first show input menu, then open the main GUI.""" app = qw.QApplication(sys.argv) input_dialog = InputMenu() if input_dialog.exec() == qw.QDialog.DialogCode.Accepted: @@ -41,10 +39,10 @@ def start_gui(): class InputMenu(qw.QDialog): - """Pop up menu at program start that offers user to customise input settings""" + """Pop up menu at program start that offers user to customise input settings.""" - def __init__(self, *args, **kwargs): - qw.QDialog.__init__(self, *args, **kwargs) + def __init__(self): + qw.QDialog.__init__(self) self.setModal(True) layout = qw.QGridLayout() @@ -99,13 +97,13 @@ def __init__(self, *args, **kwargs): self.update_channel_info(0) def update_channel_info(self, menu_index): - """Updates available channels in input menu""" + """Updates available channels in input menu.""" sounddevice_index, device = self.devices[menu_index] nmax_channels = device["max_input_channels"] sampling_rate_options = get_fs_options(sounddevice_index) self.channel_selector = ChannelSelector( - n_channels=nmax_channels, channels_enabled=[0], menu_buttons=self.buttons + n_channels=nmax_channels, menu_buttons=self.buttons ) self.channel_options.setWidget(self.channel_selector) @@ -115,25 +113,39 @@ def update_channel_info(self, menu_index): @staticmethod def get_nfft_box(): - """Return a qw.QSlider for modifying FFT width""" + """Menu for choosing the FFT length. + + Returns: + FFT qw.QSlider. + + """ b = qw.QComboBox() b.addItems([str(f * 256) for f in [1, 2, 4, 8, 16]]) b.setCurrentIndex(2) return b def open_dir_dialog(self): - """Opens an os dialogue for selecting a directory""" + """Opens an os dialogue for selecting a directory.""" dir_name = qw.QFileDialog.getExistingDirectory(self, "Select a Directory") if dir_name: self.out_path = str(dir_name) self.dir_name_edit.setText(self.out_path) def on_ok_clicked(self): - """Close the menu when the user clicks ok and signal that main GUI can be opened""" + """Close the menu when the user clicks ok and signal that main GUI can be opened.""" self.accept() def get_input_settings(self): - """Returns user-configured input settings""" + """Collects and returns user-configured input settings. + + Returns: + sounddevice_idx: Index of the chosen sounddevice. + channels: List of selected channels. + fs: Chosen sampling rate. + fft_size: Chosen FFt size. + out_path: Chosen output path. + + """ sounddevice_idx = self.devices[self.input_options.currentIndex()][0] channels = self.channel_selector.get_selected_channels() fs = int(self.fs_options.currentText()) @@ -142,9 +154,15 @@ def get_input_settings(self): class ChannelSelector(qw.QWidget): - """Widget for the channel buttons on the right side of the input menu""" + """Widget for the channel buttons on the right side of the input menu.""" + + def __init__(self, n_channels, menu_buttons): + """Initialization function. - def __init__(self, n_channels, channels_enabled, menu_buttons): + Args: + n_channels: Number of channels to choose from. + menu_buttons: Buttons of the main menu. + """ super().__init__() self.setLayout(qw.QVBoxLayout()) @@ -161,7 +179,12 @@ def __init__(self, n_channels, channels_enabled, menu_buttons): self.layout().addWidget(button) def get_selected_channels(self): - """Returns selected channels by the user in order""" + """Returns user-selected channels in order of selection. + + Returns: + Selected channels. + + """ return self.press_order def track_button_press(self, index): @@ -180,6 +203,15 @@ class MainWindow(qw.QMainWindow): """Main window that includes the main widget for the menu and all visualizations.""" def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path): + """Initialization. + + Args: + sounddevice_idx: Index of the chosen sound device. + channels: List of chosen channels. + fs: Chosen sampling rate. + fft_size: Chosen FFT size. + out_path: Chosen output path. + """ super().__init__() # default settings for the entire GUI. @@ -190,15 +222,15 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path): self.fft_size = fft_size self.out_path = out_path self.f0_algorithms = ["YIN"] - self.buf_len_sec = 30.0 + self.buf_len_sec = 30.0 # sec self.spec_scale_types = ["log", "linear"] self.ref_freq_modes = ["fixed", "highest", "lowest"] - self.disp_t_lvl = 1 - self.disp_t_spec = 1 - self.disp_t_stft = 5 - self.disp_t_f0 = 10 - self.disp_t_conf = 10 - self.lvl_cvals = [-80, -12, 0] + self.disp_t_lvl = 1 # sec + self.disp_t_spec = 1 # sec + self.disp_t_stft = 5 # sec + self.disp_t_f0 = 10 # sec + self.disp_t_conf = 10 # sec + self.lvl_cvals = [-80, -12, 0] # dBFS self.lvl_colors = ["green", "yellow", "red"] self.ch_colors = colors self.cur_disp_freq_lims = [ @@ -211,11 +243,11 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path): ] # limits in cents for pitch trajectory view self.cur_spec_scale_type = self.spec_scale_types[0] self.cur_ref_freq_mode = self.ref_freq_modes[0] - self.cur_ref_freq = 220 + self.cur_ref_freq = 220 # Hz self.cur_conf_threshold = 0.5 - self.cur_derivative_tol = 600 - self.cur_smoothing_len = 3 - self.last_refresh = time.time() + self.cur_gradient_tol = 600 # Cents + self.cur_smoothing_len = 3 # bins + self.gui_refresh_ms = int(np.round(1000 / 60)) # 60 fps # status variables self.is_running = False @@ -240,7 +272,6 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path): channels=self.channels, device_no=self.sounddevice_idx, f0_algorithm=self.f0_algorithms[0], - gui=self, out_path=out_path, ) @@ -271,44 +302,51 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path): layout.addWidget(splitter) # refresh timer - self.refresh_timer = GUIRefreshTimer() - self.refresh_timer.refresh_signal.connect(self.refresh_gui) - self.refresh_timer.start() + self.refresh_timer = qc.QTimer() + self.refresh_timer.timeout.connect(self.refresh_gui) + self.refresh_timer.start(self.gui_refresh_ms) self.play_pause() # start recording and plotting def play_pause(self): - """Starts or stops the GUI""" + """Starts or stops the GUI.""" if self.is_running: self.audio_processor.stop_stream() - self.refresh_timer.stop_emitting() + self.refresh_timer.stop() self.is_running = False self.menu.play_pause_button.setText("Play") else: self.audio_processor.start_stream() - self.refresh_timer.start_emitting() + self.refresh_timer.start(self.gui_refresh_ms) self.is_running = True self.menu.play_pause_button.setText("Pause") @qc.pyqtSlot() def refresh_gui(self): - """GUI refresh function, needs to be as fast as possible""" - with _refresh_lock: # only update when last update has finished - if self.audio_processor.new_gui_data_available: - # get preprocessed audio data from audio processor - lvl, spec, inst_f0, stft, f0, diff = ( - self.audio_processor.get_latest_gui_data() - ) - - # update widgets - self.channel_views.on_draw(lvl, spec, inst_f0, stft) - self.trajectory_views.on_draw(f0, diff) + """GUI refresh function, needs to be as fast as possible.""" + + # get preprocessed audio data from audio processor + lvl, spec, inst_f0, stft, f0, diff = self.audio_processor.get_gui_data( + disp_t_lvl=self.disp_t_lvl, + disp_t_spec=self.disp_t_spec, + disp_t_stft=self.disp_t_stft, + disp_t_f0=self.disp_t_f0, + disp_t_conf=self.disp_t_conf, + lvl_cvals=self.lvl_cvals, + spec_scale_type=self.cur_spec_scale_type, + smoothing_len=self.cur_smoothing_len, + conf_threshold=self.cur_conf_threshold, + ref_freq_mode=self.cur_ref_freq_mode, + ref_freq=self.cur_ref_freq, + gradient_tol=self.cur_gradient_tol, + ) - # logger.info(f"Last refresh finished {time.time() - self.last_refresh}s ago") - self.last_refresh = time.time() + # update widgets + self.channel_views.on_draw(lvl, spec, inst_f0, stft) + self.trajectory_views.on_draw(f0, diff) def menu_toggle_button(self): - """The button for toggeling the menu""" + """The button for toggeling the menu.""" top_bar = qw.QHBoxLayout() top_bar.setContentsMargins(0, 0, 0, 0) top_bar.setSpacing(0) @@ -323,7 +361,7 @@ def menu_toggle_button(self): return top_bar def toggle_menu(self): - """Make menu appear or disappear""" + """Make menu appear or disappear.""" if self.menu_visible: self.menu.hide() self.toggle_button.setText("☰ Show Menu") @@ -333,45 +371,24 @@ def toggle_menu(self): self.menu_visible = not self.menu_visible def closeEvent(self, a0): - """Clean up when GUI is closed""" - self.refresh_timer.terminate() + """Clean up when GUI is closed.""" + self.refresh_timer.stop() self.audio_processor.stop_stream() self.audio_processor.close_stream() sys.exit() -class GUIRefreshTimer(qc.QThread): - """Timer for GUI refreshes""" - - refresh_signal = qc.pyqtSignal() - - def __init__(self): - super().__init__() - self.emit_signal = True - - def run(self): - while 1: - time.sleep(1 / 24) # ideally update with 24 fps - if self.emit_signal: - with ( - _refresh_lock - ): # make sure last refresh is done before sending next one - self.refresh_signal.emit() - - def stop_emitting(self): - """when user presses pause""" - self.emit_signal = False - - def start_emitting(self): - """when user presses play""" - self.emit_signal = True +class ProcessingMenu(qw.QFrame): + """The processing menu on the left side of the main window.""" + def __init__(self, main_window: MainWindow): + """Initialization. -class ProcessingMenu(qw.QFrame): - """The processing menu on the left side of the main window""" + Args: + main_window: qw.QMainWindow instance. - def __init__(self, main_window: MainWindow, *args, **kwargs): - qw.QFrame.__init__(self, *args, **kwargs) + """ + qw.QFrame.__init__(self) self.main_window = main_window @@ -426,17 +443,13 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): layout.addWidget(self.box_show_products, 4, 1, 1, 1) layout.addWidget(qw.QLabel("Minimum Frequency"), 5, 0) - self.freq_min = FloatQLineEdit( - parent=self, default=main_window.cur_disp_freq_lims[0] - ) + self.freq_min = FloatQLineEdit(default=main_window.cur_disp_freq_lims[0]) layout.addWidget(self.freq_min, 5, 1, 1, 1) self.freq_min.accepted_value.connect(self.on_min_freq_changed) layout.addWidget(qw.QLabel("Hz"), 5, 2) layout.addWidget(qw.QLabel("Maximum Frequency"), 6, 0) - self.freq_max = FloatQLineEdit( - parent=self, default=main_window.cur_disp_freq_lims[1] - ) + self.freq_max = FloatQLineEdit(default=main_window.cur_disp_freq_lims[1]) layout.addWidget(self.freq_max, 6, 1, 1, 1) self.freq_max.accepted_value.connect(self.on_max_freq_changed) layout.addWidget(qw.QLabel("Hz"), 6, 2) @@ -492,14 +505,14 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): layout.addWidget(self.smoothing_label, 13, 2) layout.addWidget(qw.QLabel("Pitchslide Tolerance [Cents]"), 14, 0) - self.derivative_tol_slider = qw.QSlider() - self.derivative_tol_slider.setRange(0, 1200) - self.derivative_tol_slider.setValue(main_window.cur_derivative_tol) - self.derivative_tol_slider.setOrientation(qc.Qt.Orientation.Horizontal) - self.derivative_tol_label = qw.QLabel(f"{self.derivative_tol_slider.value()}") - self.derivative_tol_slider.valueChanged.connect(self.on_derivative_tol_changed) - layout.addWidget(self.derivative_tol_label, 14, 2) - layout.addWidget(self.derivative_tol_slider, 14, 1, 1, 1) + self.gradient_tol_slider = qw.QSlider() + self.gradient_tol_slider.setRange(0, 1200) + self.gradient_tol_slider.setValue(main_window.cur_gradient_tol) + self.gradient_tol_slider.setOrientation(qc.Qt.Orientation.Horizontal) + self.gradient_tol_label = qw.QLabel(f"{self.gradient_tol_slider.value()}") + self.gradient_tol_slider.valueChanged.connect(self.on_gradient_tol_changed) + layout.addWidget(self.gradient_tol_label, 14, 2) + layout.addWidget(self.gradient_tol_slider, 14, 1, 1, 1) layout.addWidget(qw.QLabel("Reference Mode"), 15, 0) self.ref_freq_mode_menu = qw.QComboBox() @@ -514,23 +527,19 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): layout.addWidget(self.ref_freq_mode_menu, 15, 1, 1, 1) layout.addWidget(qw.QLabel("Reference Frequency"), 16, 0) - self.freq_box = FloatQLineEdit(parent=self, default=main_window.cur_ref_freq) + self.freq_box = FloatQLineEdit(default=main_window.cur_ref_freq) self.freq_box.accepted_value.connect(self.on_reference_frequency_changed) layout.addWidget(self.freq_box, 16, 1, 1, 1) layout.addWidget(qw.QLabel("Hz"), 16, 2) layout.addWidget(qw.QLabel("Minimum Pitch"), 17, 0) - self.pitch_min = FloatQLineEdit( - parent=self, default=main_window.cur_disp_pitch_lims[0] - ) + self.pitch_min = FloatQLineEdit(default=main_window.cur_disp_pitch_lims[0]) self.pitch_min.accepted_value.connect(self.on_pitch_min_changed) layout.addWidget(self.pitch_min, 17, 1, 1, 1) layout.addWidget(qw.QLabel("Cents"), 17, 2) layout.addWidget(qw.QLabel("Maximum Pitch"), 18, 0) - self.pitch_max = FloatQLineEdit( - parent=self, default=main_window.cur_disp_pitch_lims[1] - ) + self.pitch_max = FloatQLineEdit(default=main_window.cur_disp_pitch_lims[1]) self.pitch_max.accepted_value.connect(self.on_pitch_max_changed) layout.addWidget(self.pitch_max, 18, 1, 1, 1) layout.addWidget(qw.QLabel("Cents"), 18, 2) @@ -539,33 +548,40 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): main_layout.addWidget(settings, 3, 0, 1, 2) def on_min_freq_changed(self, f): + """Update function for minimum frequency on user interaction.""" self.main_window.cur_disp_freq_lims[0] = int(f) self.main_window.channel_views.on_disp_freq_lims_changed( self.main_window.cur_disp_freq_lims ) def on_max_freq_changed(self, f): + """Update function for maximum frequency on user interaction.""" self.main_window.cur_disp_freq_lims[1] = int(f) self.main_window.channel_views.on_disp_freq_lims_changed( self.main_window.cur_disp_freq_lims ) def on_algorithm_select(self, algorithm): + """Update function for F0 algorithm on user interaction.""" self.main_window.audio_processor.f0_algorithm = algorithm def on_conf_threshold_changed(self, val): + """Update function for confidence threshold on user interaction.""" self.noise_thresh_label.setText(str(val / 10.0)) self.main_window.cur_conf_threshold = val / 10.0 def on_conf_smoothing_changed(self, val): + """Update function for smoothing filter length on user interaction.""" self.smoothing_label.setText(str(val)) self.main_window.cur_smoothing_len = val - def on_derivative_tol_changed(self, val): - self.derivative_tol_label.setText(str(val)) - self.main_window.cur_derivative_tol = val + def on_gradient_tol_changed(self, val): + """Update function for gradient filter tolerance on user interaction.""" + self.gradient_tol_label.setText(str(val)) + self.main_window.cur_gradient_tol = val def on_reference_frequency_mode_changed(self, text): + """Update function for reference frequency mode on user interaction.""" if (text == "Highest") or (text == "Lowest") or ("Channel" in text): self.freq_box.setReadOnly(True) else: @@ -576,33 +592,43 @@ def on_reference_frequency_mode_changed(self, text): self.main_window.cur_ref_freq_mode = text def on_reference_frequency_changed(self, val): + """Update function for reference frequency on user interaction.""" self.main_window.cur_ref_freq = val def on_pitch_min_changed(self, val): + """Update function for minimum pitch limit on user interaction.""" self.main_window.cur_disp_pitch_lims[0] = int(val) self.main_window.trajectory_views.on_disp_pitch_lims_changed( self.main_window.cur_disp_pitch_lims ) def on_pitch_max_changed(self, val): + """Update function for maximum pitch limit on user interaction.""" self.main_window.cur_disp_pitch_lims[1] = int(val) self.main_window.trajectory_views.on_disp_pitch_lims_changed( self.main_window.cur_disp_pitch_lims ) def on_spectrum_type_select(self, arg): + """Update function for spectrum type on user interaction.""" self.main_window.cur_spec_scale_type = arg def sizeHint(self): + """Size hint.""" return qc.QSize(100, 200) class ChannelViews(qw.QWidget): - """The central widget of the GUI that contains all channel views""" + """The central widget of the GUI that contains all channel views.""" refresh_signal = qc.pyqtSignal(np.ndarray, np.ndarray, np.ndarray, np.ndarray) def __init__(self, main_window: MainWindow): + """Initialization. + + Args: + main_window: qw.QMainWindow instance. + """ qw.QWidget.__init__(self) self.layout = qw.QVBoxLayout() self.layout.setSpacing(0) @@ -613,8 +639,8 @@ def __init__(self, main_window: MainWindow): self.views.append( ChannelView( main_window=main_window, - ch_id=ch_id, - orig_ch=orig_ch + 1, + soundcard_ch_id=ch_id, + disp_channel_id=orig_ch + 1, is_product=False, has_xlabel=False, ) @@ -638,45 +664,91 @@ def __init__(self, main_window: MainWindow): self.show_spectrogram_widgets(True) def show_level_widgets(self, show): + """Change visibility of level widgets. + + Args: + show: True for visible, False for invisible. + """ for view in self.views: view.show_level_widget(show) def show_spectrum_widgets(self, show): + """Change visibility of spectrum widgets. + + Args: + show: True for visible, False for invisible. + """ for view in self.views: view.show_spectrum_widget(show) def show_spectrogram_widgets(self, show): + """Change visibility of spectrogram widgets. + + Args: + show: True for visible, False for invisible. + """ for view in self.views: view.show_spectrogram_widget(show) def show_product_widgets(self, show): + """Change visibility of product widgets. + + Args: + show: True for visible, False for invisible. + """ self.views[-1].setVisible(show) self.h_line.setVisible(show) def on_disp_freq_lims_changed(self, disp_freq_lims): + """Changes frequency limits. + + Args: + disp_freq_lims: New frequency limits. + """ for view in self.views: view.on_disp_freq_lims_changed(disp_freq_lims) @qc.pyqtSlot() def on_draw(self, lvl, spec, inst_f0, stft): + """Trigger channel views refresh. + + Args: + lvl: New level. + spec: New spectrum. + inst_f0: New instantaneous frequency. + stft: New spectrogram. + + """ self.refresh_signal.emit(lvl, spec, inst_f0, stft) def sizeHint(self): + """Size hint.""" return qc.QSize(400, 200) def __iter__(self): + """Helper to enable iteration through channel views.""" yield from self.views class ChannelLabel(qw.QWidget): - """Widget that contains the vertical channel label""" + """Widget that contains the vertical channel label.""" def __init__(self, text): + """Initialization. + + Args: + text: The channel name. + """ super().__init__() self.text = text def paintEvent(self, event): - """Paints the label and updates it when necessary, e.g. when available space changes""" + """Paints the label and updates it when necessary, e.g. when available space changes. + + Args: + event: Trigger event. + + """ painter = qg.QPainter(self) painter.setFont(qg.QFont("Arial", 13, qg.QFont.Weight.Bold)) painter.setPen(qg.QColor("black")) @@ -692,7 +764,7 @@ def paintEvent(self, event): class ChannelView(qw.QWidget): """Widget that contains a channel label, level, spectrum and spectrogram, - a.k.a. one row of the central GUI widget + a.k.a. one row of the central GUI widget. """ level_refresh_signal = qc.pyqtSignal(float) @@ -702,25 +774,38 @@ class ChannelView(qw.QWidget): def __init__( self, main_window: MainWindow, - ch_id=None, - orig_ch=None, + soundcard_ch_id=None, + disp_channel_id=None, is_product=False, has_xlabel=True, - *args, - **kwargs, ): - qw.QWidget.__init__(self, *args, **kwargs) + """Initialization. + + Args: + main_window: Main pytch window. + soundcard_ch_id: Soundcard channel ID or None. + disp_channel_id: Display channel ID or None. + is_product: Bool that indicates whether channel view is for the product channel. + has_xlabel: Bool that indicates whether channel view should have x labels on plots. + """ + qw.QWidget.__init__(self) self.layout = qw.QHBoxLayout() self.layout.setSpacing(0) # keep GUI tight, remove frames around widgets self.layout.setContentsMargins(0, 0, 0, 0) self.main_window = main_window - self.color = "black" if ch_id is None else main_window.ch_colors[ch_id] + self.color = ( + "black" + if soundcard_ch_id is None + else main_window.ch_colors[soundcard_ch_id] + ) self.is_product = is_product - self.ch_id = ch_id + self.ch_id = soundcard_ch_id # channel label - label = ChannelLabel("Product" if ch_id is None else f"Channel {orig_ch}") + label = ChannelLabel( + "Product" if soundcard_ch_id is None else f"Channel {disp_channel_id}" + ) self.level_widget = LevelWidget(self.main_window, has_xlabel=has_xlabel) self.spectrogram_widget = SpectrogramWidget( @@ -747,34 +832,57 @@ def __init__( @qc.pyqtSlot(object, object, object, object) def on_draw(self, lvl, spec, inst_f0, stft): - """Refreshes all widgets as fast as possible""" - # prepare data - if self.is_product: - lvl_update = lvl[-1] - stft_update = stft[:, :, -1] - spec_update = spec[:, -1] - inst_f0_update = inst_f0[:, -1] - else: - lvl_update = lvl[self.ch_id] - stft_update = stft[:, :, self.ch_id] - spec_update = spec[:, self.ch_id] - inst_f0_update = inst_f0[:, self.ch_id] + """Refreshes all widgets with new data. - # update widgets - self.level_refresh_signal.emit(lvl_update) - self.spectrum_refresh_signal.emit(spec_update, inst_f0_update) - self.spectrogram_refresh_signal.emit(stft_update) + Args: + lvl: New level data. + spec: New spectrum data. + inst_f0: New instantaneous frequency data. + stft: New spectrogram data. + + """ + idx = -1 if self.is_product else self.ch_id + + if len(lvl) > 0 and not self.is_product: + self.level_refresh_signal.emit(lvl[idx]) + + if len(spec) > 0 and len(inst_f0) > 0: + self.spectrum_refresh_signal.emit(spec[:, idx], inst_f0[idx]) + + if len(stft) > 0: + self.spectrogram_refresh_signal.emit(stft[:, :, idx]) def show_spectrum_widget(self, show): + """Change visibility of spectrum widget. + + Args: + show: True for visible, False for invisible. + """ self.spectrum_widget.setVisible(show) def show_spectrogram_widget(self, show): + """Change visibility of spectrogram widget. + + Args: + show: True for visible, False for invisible. + """ self.spectrogram_widget.setVisible(show) def show_level_widget(self, show): + """Change visibility of level widget. + + Args: + show: True for visible, False for invisible. + """ self.level_widget.setVisible(show) def on_disp_freq_lims_changed(self, disp_freq_lims): + """Change frequency axis limits of spectrum and spectrogram. + + Args: + disp_freq_lims: New frequency limits. + + """ self.spectrum_widget.on_disp_freq_lims_changed(disp_freq_lims) self.spectrogram_widget.on_disp_freq_lims_changed(disp_freq_lims) @@ -783,6 +891,12 @@ class LevelWidget(pg.GraphicsLayoutWidget): """The level meter with color-coded dB levels""" def __init__(self, main_window: MainWindow, has_xlabel=True): + """Initialization. + + Args: + main_window: Main pytch window. + has_xlabel: Bool that indicates whether plot has x label. + """ super(LevelWidget, self).__init__() self.main_window = main_window @@ -825,7 +939,13 @@ def __init__(self, main_window: MainWindow, has_xlabel=True): @qc.pyqtSlot(float) def on_draw(self, lvl): - """Updates the image with new data.""" + """Updates the image with new data. + + Args: + lvl: New audio level. + + """ + start_t = time() lvl_conv = self.lvl_converter(lvl) plot_array = np.linspace( 0, lvl_conv, int(lvl_conv * np.abs(self.main_window.lvl_cvals[0])) @@ -836,12 +956,21 @@ def on_draw(self, lvl): self.img.setImage(plot_array) self.img.setLevels([0, 1]) + logger.debug(f"Lvl update took {time()-start_t:.4f}s.") class SpectrumWidget(pg.GraphicsLayoutWidget): - """Spectrum plot with current fundamental frequency as dashed line""" + """Spectrum plot with current fundamental frequency as dashed line.""" def __init__(self, main_window: MainWindow, color, has_xlabel=True): + """Initialization. + + Args: + main_window: Main pytch window. + color: Color to use for this widget. + has_xlabel: Bool that indicates whether plot has x label. + + """ super(SpectrumWidget, self).__init__() self.main_window = main_window @@ -894,15 +1023,26 @@ def on_disp_freq_lims_changed(self, disp_freq_lims): @qc.pyqtSlot(object, float) def on_draw(self, data_plot, inst_f0=None): """Updates the spectrum and the fundamental frequency line.""" - self._line.setData(self.f_axis, data_plot) # Update the spectrum line + start_t = time() + self._line.setData(x=self.f_axis, y=data_plot) # Update the spectrum line + if inst_f0 is not None: self._inst_f0_line.setPos(inst_f0) # Update the fundamental frequency line + logger.debug(f"Spectrum update took {time() - start_t:.4f}s.") class SpectrogramWidget(pg.GraphicsLayoutWidget): """Spectrogram widget""" def __init__(self, main_window: MainWindow, color, has_xlabel=True): + """Initialization. + + Args: + main_window: Main pytch window. + color: Color to use for this widget. + has_xlabel: Bool that indicates whether plot has x label. + + """ super().__init__() self.main_window = main_window @@ -960,7 +1100,13 @@ def on_disp_freq_lims_changed(self, disp_freq_lims): @qc.pyqtSlot(object) def on_draw(self, data_plot): - """Updates the spectrogram with new data.""" + """Updates the spectrogram data. + + Args: + data_plot: New spectrogram. + + """ + start_t = time() self.img.setImage(data_plot.T, autoLevels=False) self.img.setRect( qc.QRectF( @@ -970,13 +1116,19 @@ def on_draw(self, data_plot): self.default_spec.shape[0], ) ) + logger.debug(f"Spectrogram update took {time() - start_t:.4f}s.") class TrajectoryViews(qw.QTabWidget): - """Right-hand widget that contains the visualization of the F0-trajectories and the differential""" + """Right-hand widget that contains the visualization of the F0-trajectories and the differential.""" + + def __init__(self, main_window: MainWindow): + """Initialization. - def __init__(self, main_window: MainWindow, *args, **kwargs): - qw.QTabWidget.__init__(self, *args, **kwargs) + Args: + main_window: Main pytch window. + """ + qw.QTabWidget.__init__(self) self.main_window = main_window @@ -1013,17 +1165,46 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): @qc.pyqtSlot(object, object) def on_draw(self, f0, diff): - self.pitch_view.on_draw(f0) - if len(self.main_window.channels) > 1: + """Update pitch and pitch differences view. Only update selected. + + Args: + f0: New F0 trajectories. + diff: New pitch differences. + + """ + start_t = time() + if len(f0) > 0 and self.currentIndex() == 0: + self.pitch_view.on_draw(f0) + + if ( + len(self.main_window.channels) > 1 + and len(diff) > 0 + and self.currentIndex() == 1 + ): self.pitch_diff_view.on_draw(diff) + logger.debug(f"Trajectory view update took {time() - start_t:.4f}s.") + def on_disp_pitch_lims_changed(self, disp_pitch_lims): + """Update pitch limits on user interaction. + + Args: + disp_pitch_lims: New pitch limits. + + """ self.change_pitch_lims(self.pitch_view, disp_pitch_lims) if len(self.main_window.channels) > 1: self.change_pitch_lims(self.pitch_diff_view, disp_pitch_lims) @staticmethod def change_pitch_lims(view, disp_pitch_lims): + """Update pitch limits of given view. + + Args: + view: Pitch or Differences view. + disp_pitch_lims: New pitch limits. + + """ # Set the x-axis range view.plot_item.setXRange(0, len(view.t_axis)) @@ -1035,6 +1216,12 @@ def change_pitch_lims(view, disp_pitch_lims): view.plot_item.getAxis("left").setTicks([[(y, str(y)) for y in y_ticks]]) def show_trajectory_views(self, show): + """Change visibility of trajectory views. + + Args: + show: Bool, True indicates visible, False invisible. + + """ self.setVisible(show) def sizeHint(self): @@ -1043,12 +1230,18 @@ def sizeHint(self): class PitchWidget(pg.GraphicsLayoutWidget): - """Visualization of the F0-trajectories of each channel""" + """Visualization of the F0-trajectories of each channel.""" low_pitch_changed = qc.pyqtSignal(np.ndarray) - def __init__(self, main_window: MainWindow, *args, **kwargs): - super(PitchWidget, self).__init__(*args, **kwargs) + def __init__(self, main_window: MainWindow): + """Initialization. + + Args: + main_window: Main pytch window. + + """ + super(PitchWidget, self).__init__() self.main_window = main_window self.channel_views = main_window.channel_views.views[:-1] @@ -1096,16 +1289,28 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): @qc.pyqtSlot(object) def on_draw(self, f0): - """Updates the F0 trajectories for each channel.""" - for i in range(f0.shape[1]): - self._lines[i].setData(self.t_axis, f0[:, i]) # Update the line data + """Updates the F0 trajectories for each channel. + + Args: + f0: New F0 trajectories. + + """ + if len(f0) > 0: + for i in range(f0.shape[1]): + self._lines[i].setData(self.t_axis, f0[:, i]) # Update the line data class DifferentialPitchWidget(pg.GraphicsLayoutWidget): - """Visualization of the pair-wise F0-differences""" + """Visualization of the pair-wise F0 differences.""" + + def __init__(self, main_window: MainWindow): + """Initialization. + + Args: + main_window: Main pytch window. - def __init__(self, main_window: MainWindow, *args, **kwargs): - super(DifferentialPitchWidget, self).__init__(*args, **kwargs) + """ + super(DifferentialPitchWidget, self).__init__() self.main_window = main_window self.channel_views = main_window.channel_views.views[:-1] self.ci.layout.setContentsMargins(0, 0, 0, 0) @@ -1171,7 +1376,17 @@ def __init__(self, main_window: MainWindow, *args, **kwargs): @qc.pyqtSlot(object) def on_draw(self, diff): - """Updates the pitch differences for each channel pair.""" - for i in range(diff.shape[1]): - self._lines[i][0].setData(self.t_axis, diff[:, i]) # Update the solid line - self._lines[i][1].setData(self.t_axis, diff[:, i]) # Update the dashed line + """Updates the pitch differences for each channel pair. + + Args: + diff: New pitch differences. + + """ + if len(diff) > 0: + for i in range(diff.shape[1]): + self._lines[i][0].setData( + self.t_axis, diff[:, i] + ) # Update the solid line + self._lines[i][1].setData( + self.t_axis, diff[:, i] + ) # Update the dashed line diff --git a/pytch/gui_utils.py b/pytch/gui_utils.py index 87d9db0..5871037 100644 --- a/pytch/gui_utils.py +++ b/pytch/gui_utils.py @@ -25,8 +25,14 @@ class FloatQLineEdit(qw.QLineEdit): accepted_value = qc.pyqtSignal(float) - def __init__(self, default=None, *args, **kwargs): - qw.QLineEdit.__init__(self, *args, **kwargs) + def __init__(self, default=None): + """Initialization. + + Args: + default: Default value. + + """ + qw.QLineEdit.__init__(self) self.setValidator(qg.QDoubleValidator()) self.setFocusPolicy(qc.Qt.FocusPolicy.ClickFocus | qc.Qt.FocusPolicy.TabFocus) self.returnPressed.connect(self.do_check) @@ -56,21 +62,13 @@ def __init__(self): ) -class QVLine(qw.QFrame): - """A vertical separation line""" - - def __init__(self): - super().__init__() - self.setMinimumHeight(1) - self.setFixedWidth(20) - self.setFrameShape(qw.QFrame.Shape.VLine) - self.setFrameShadow(qw.QFrame.Shadow.Sunken) - self.setSizePolicy( - qw.QSizePolicy.Policy.Preferred, qw.QSizePolicy.Policy.Minimum - ) +def disable_interactivity(plot_item): + """Disables interactive elements, like zooming or context menus, for given plot. + Args: + plot_item: PyQt PlotItem. -def disable_interactivity(plot_item): + """ plot_item.setMouseEnabled(x=False, y=False) # Disable mouse panning & zooming plot_item.hideButtons() # Disable corner auto-scale button plot_item.setMenuEnabled(False) # Disable right-click context menu