Skip to content

Commit

Permalink
feat(code): code refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
voidful committed Feb 13, 2024
1 parent 803a83c commit a2d44b1
Show file tree
Hide file tree
Showing 85 changed files with 708 additions and 96 deletions.
22 changes: 11 additions & 11 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
*.wav
/test/**/
!/test/sample1_16k.wav
!/test/sample2_22k.wav
!/test/sample3_48k.wav
!/test/sample4_16k.wav
!/test/sample5_16k.wav
!/test/sample6_48k.wav
!/test/sample7_16k.wav
!/test/sample8_16k.wav
!/test/sample9_48k.wav
!/test/sample10_16k.wav
/AudCodec/test/**/
!/AudCodec/test/sample1_16k.wav
!/AudCodec/test/sample2_22k.wav
!/AudCodec/test/sample3_48k.wav
!/AudCodec/test/sample4_16k.wav
!/AudCodec/test/sample5_16k.wav
!/AudCodec/test/sample6_48k.wav
!/AudCodec/test/sample7_16k.wav
!/AudCodec/test/sample8_16k.wav
!/AudCodec/test/sample9_48k.wav
!/AudCodec/test/sample10_16k.wav

__pycache__/
File renamed without changes.
Empty file added AudCodec/base_codec/__init__.py
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from torch.nn.utils import spectral_norm
from torch.nn.utils import weight_norm
from librosa.util import normalize
from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit

LRELU_SLOPE = 0.1

Expand Down
2 changes: 1 addition & 1 deletion base_codec/audiodec.py → AudCodec/base_codec/audiodec.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2
import torch
from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit


class BaseCodec:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit
import torch
from audiotools import AudioSignal

Expand Down
2 changes: 1 addition & 1 deletion base_codec/encodec.py → AudCodec/base_codec/encodec.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import torch
from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit


class BaseCodec:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
from transformers import AutoModel, AutoProcessor
from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit


class BaseCodec:
Expand Down
2 changes: 1 addition & 1 deletion base_codec/funcodec.py → AudCodec/base_codec/funcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch
import os

from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit
from audiotools import AudioSignal


Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy

from base_codec.general import save_audio, ExtractedUnit
from AudCodec.base_codec.general import save_audio, ExtractedUnit
import torchaudio
import torch
import nlp2
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import nlp2
from base_codec.academicodec import BaseCodec
from AudCodec.base_codec.academicodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import nlp2
from base_codec.academicodec import BaseCodec
from AudCodec.base_codec.academicodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import nlp2
from base_codec.academicodec import BaseCodec
from AudCodec.base_codec.academicodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.audiodec import BaseCodec
from AudCodec.base_codec.audiodec import BaseCodec
import nlp2


Expand Down
2 changes: 1 addition & 1 deletion codec/dac_16k.py → AudCodec/codec/dac_16k.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.descript_audio_codec import BaseCodec
from AudCodec.base_codec.descript_audio_codec import BaseCodec


class Codec(BaseCodec):
Expand Down
2 changes: 1 addition & 1 deletion codec/dac_24k.py → AudCodec/codec/dac_24k.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.descript_audio_codec import BaseCodec
from AudCodec.base_codec.descript_audio_codec import BaseCodec


class Codec(BaseCodec):
Expand Down
2 changes: 1 addition & 1 deletion codec/dac_44k.py → AudCodec/codec/dac_44k.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.descript_audio_codec import BaseCodec
from AudCodec.base_codec.descript_audio_codec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.encodec import BaseCodec
from AudCodec.base_codec.encodec import BaseCodec

class Codec(BaseCodec):
def config(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.encodec import BaseCodec
from AudCodec.base_codec.encodec import BaseCodec

class Codec(BaseCodec):
def config(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.encodec import BaseCodec
from AudCodec.base_codec.encodec import BaseCodec

class Codec(BaseCodec):
def config(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.encodec import BaseCodec
from AudCodec.base_codec.encodec import BaseCodec

class Codec(BaseCodec):
def config(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.encodec import BaseCodec
from AudCodec.base_codec.encodec import BaseCodec

class Codec(BaseCodec):
def config(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2

from base_codec.funcodec import BaseCodec
from AudCodec.base_codec.funcodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2

from base_codec.funcodec import BaseCodec
from AudCodec.base_codec.funcodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2

from base_codec.funcodec import BaseCodec
from AudCodec.base_codec.funcodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2

from base_codec.funcodec import BaseCodec
from AudCodec.base_codec.funcodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2

from base_codec.funcodec import BaseCodec
from AudCodec.base_codec.funcodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import nlp2

from base_codec.funcodec import BaseCodec
from AudCodec.base_codec.funcodec import BaseCodec


class Codec(BaseCodec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from base_codec.speech_tokenizer import BaseCodec
from AudCodec.base_codec.speech_tokenizer import BaseCodec
import nlp2

class Codec(BaseCodec):
Expand Down
1 change: 0 additions & 1 deletion dataset/__init__.py → AudCodec/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import dataset.general
def load_dataset(dataset_name):
module = __import__(f"dataset.{dataset_name}", fromlist=[dataset_name])
return module.load_data()
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added AudCodec/img/Overview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added AudCodec/test/__init__.py
Empty file.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
import torchaudio

from codec import list_codec, load_codec
from AudCodec.codec import list_codec, load_codec

if __name__ == '__main__':
for sample_file in ['sample1_16k.wav', 'sample2_22k.wav', 'sample3_48k.wav', 'sample4_16k.wav',
Expand Down
2 changes: 1 addition & 1 deletion test/code_synth.py → AudCodec/test/code_synth.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torchaudio

from codec import list_codec, load_codec
from AudCodec.codec import list_codec, load_codec

if __name__ == '__main__':
for sample_file in ['sample1_16k.wav', 'sample2_22k.wav', 'sample3_48k.wav', 'sample4_16k.wav',
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch

from benchmarking import compute_metrics
from codec import load_codec
from AudCodec.codec import load_codec
import torchaudio
import numpy as np

Expand Down
26 changes: 26 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Contributing to Codec-SUPERB

We welcome contributions to Codec-SUPERB in several areas: models, datasets, and metrics. Here's how you can contribute:

## Contributing Models

1. Fork the Codec-SUPERB repository.
2. Add your model to the `models` directory. Please ensure your model adheres to the interface defined in `models/README.md`.
3. Add tests for your model in the `tests` directory.
4. Submit a pull request with your changes. Please include a detailed description of your model and how it improves Codec-SUPERB.

## Contributing Datasets

1. Fork the Codec-SUPERB repository.
2. Add your dataset to the `datasets` directory. Please ensure your dataset adheres to the format defined in `datasets/README.md`.
3. Add tests for your dataset in the `tests` directory.
4. Submit a pull request with your changes. Please include a detailed description of your dataset and how it improves Codec-SUPERB.

## Contributing Metrics

1. Fork the Codec-SUPERB repository.
2. Add your metric to the `metrics` directory. Please ensure your metric adheres to the interface defined in `metrics/README.md`.
3. Add tests for your metric in the `tests` directory.
4. Submit a pull request with your changes. Please include a detailed description of your metric and how it improves Codec-SUPERB.

We look forward to your contributions!
86 changes: 64 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,78 @@
# Audio Codec Benchmark
# Codec-SUPERB: Audio Codec Speech Processing Universal Performance Benchmark

## Codec Collection:
![Overview](AudCodec/img/Overview.png)

- https://github.com/ZhangXInFD/SpeechTokenizer
- https://github.com/descriptinc/descript-audio-codec
- https://github.com/facebookresearch/encodec
- https://github.com/yangdongchao/AcademiCodec
- https://github.com/facebookresearch/AudioDec
- https://github.com/alibaba-damo-academy/FunCodec
Codec-SUPERB is a comprehensive benchmark designed to evaluate audio codec models across a variety of speech tasks. Our
goal is to facilitate community collaboration and accelerate advancements in the field of speech processing by
preserving and enhancing speech information quality.


## Table of Contents

- [Introduction](#introduction)
- [Key Features](#key-features)
- [Installation](#installation)
- [Usage](#usage)
- [Benchmarking](#benchmarking)
- [Contribution](#contribution)
- [License](#license)

## Introduction

Codec-SUPERB sets a new benchmark in evaluating audio codec models, providing a rigorous and transparent framework for
assessing performance across a range of speech processing tasks. Our goal is to foster innovation and set new standards
in audio quality and processing efficiency.

## Key Features

## Criteria
### Out-of-the-Box Codec Interface
Codec-SUPERB offers an intuitive, out-of-the-box codec interface that allows for easy integration and testing of various
codec models, facilitating quick iterations and experiments.

### Waveform (Lower is better)
### Multi-Perspective Leaderboard
Codec-SUPERB's unique blend of multi-perspective evaluation and an online leaderboard drives innovation in audio codec research by providing a comprehensive assessment and fostering competitive transparency among developers.

L1Loss in waveform
### Standardized Environment
We ensure a standardized testing environment to guarantee fair and consistent comparison across all models. This
uniformity brings reliability to benchmark results, making them universally interpretable.

### Mel Distance (Lower is better)
### Unified Datasets
We provide a collection of unified datasets, curated to test a wide range of speech processing scenarios. This ensures
that models are evaluated under diverse conditions, reflecting real-world applications.

The Mel Distance is the distance between the log mel spectrograms of the reconstructed and ground truth waveforms.
## Installation

### STFT Distance (Lower is better)
```bash
git clone https://github.com/voidful/Codec-SUPERB.git
cd Codec-SUPERB
pip install -r requirements.txt
```

This metric calculates the distance between the log magnitude spectrograms of the reconstructed and ground truth
waveforms, using window lengths of [2048, 512], and is better at capturing fidelity in higher frequencies compared to
the Mel Distance.
## Usage

### PESQ (Higher is better)
Detailed instructions on how to use Codec-SUPERB, including preparing your codec model and executing benchmark tests,
can be found in the `docs` directory.

PESQ is an intrusive perceptual quality metric for automated assessment of the speech quality. We adopt ITU-T P.862.2 (wideband).
## Benchmarking

### STOI (Higher is better)
Codec-SUPERB supports a comprehensive suite of speech tasks, from speech recognition to audio quality assessment, each
designed to rigorously evaluate the capabilities of audio codec models.

STOI is an intrusive perceptual quality metric that assesses audio quality based on the intelligibility of the
reconstructed speech.
## Contribution

Contributions are highly encouraged, whether it's through adding new codec models, expanding the dataset collection, or
enhancing the benchmarking framework. Please see `CONTRIBUTING.md` for more details.

## License

This project is licensed under the MIT License - see the `LICENSE` file for details.


## Reference Audio Codec Repositories:

- https://github.com/ZhangXInFD/SpeechTokenizer
- https://github.com/descriptinc/descript-audio-codec
- https://github.com/facebookresearch/encodec
- https://github.com/yangdongchao/AcademiCodec
- https://github.com/facebookresearch/AudioDec
- https://github.com/alibaba-damo-academy/FunCodec
Empty file added application/AEC.md
Empty file.
Empty file added application/ASR.md
Empty file.
Empty file added application/ASV.md
Empty file.
Empty file added application/ER.md
Empty file.
2 changes: 1 addition & 1 deletion benchmarking.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datasets import load_dataset, load_from_disk
from collections import defaultdict
from audiotools import AudioSignal
from base_codec.general import pad_arrays_to_match
from AudCodec.base_codec.general import pad_arrays_to_match
from metrics import get_metrics
import psutil
from tqdm.contrib.concurrent import process_map
Expand Down
2 changes: 1 addition & 1 deletion dataset_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import itertools
import numpy as np
from datasets import load_dataset
from codec import list_codec
from AudCodec.codec import list_codec


def load_datasets(dataset_name, splits):
Expand Down
Loading

0 comments on commit a2d44b1

Please sign in to comment.