Skip to content

Commit

Permalink
feat: add generation function for fast[a|q] format
Browse files Browse the repository at this point in the history
  • Loading branch information
natir committed Feb 2, 2024
1 parent 97d1959 commit 1c1f5c4
Show file tree
Hide file tree
Showing 11 changed files with 468 additions and 78 deletions.
2 changes: 1 addition & 1 deletion .copier-answers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ forge_namespace: natir
forge_repo_name: biotest
msrv: '1.75'
proc_macro: true
project_description: Many function to generate test data for bioinformatics data
project_description: Generate random test data for bioinformatics
project_name: biotest
10 changes: 6 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "biotest"
version = "0.1.0"
authors = ["Pierre Marijon <[email protected]>"]
edition = "2021"
description = "Many function to generate test data for bioinformatics data"
description = "Generate random test data for bioinformatics"
rust-version = "1.75"

homepage = "https://github.com/natir/biotest"
Expand All @@ -15,12 +15,11 @@ license-file = "LICENSE"


[dependencies]

rand = { version = "0.8" }

# Error management
thiserror = { version = "1" }


# Logging and error management
log = { version = "0.4" }

Expand All @@ -30,12 +29,15 @@ biotest_derive = { path = "biotest_derive", optional = true }

[dev-dependencies]
criterion = { version = "0.5" }
tempfile = { version = "3" }

# CLI management
clap = { version = "4", features = ["derive"] }

# Logging management
stderrlog = { version = "0.5" }


[profile.release]
lto = 'thin'
opt-level = 3
Expand All @@ -46,4 +48,4 @@ incremental = false

[profile.profiling]
inherits = "release"
debug = true
debug = true
16 changes: 5 additions & 11 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,15 @@

[![License](https://img.shields.io/badge/license-MIT-green)](https:///natir/biotest/blob/master/LICENSE)

Generate random test data for bioinformatics

Many function to generate test data for bioinformatics data

## Installation

### From source
## Usage

```bash
git clone https:///natir/biotest.git
cd biotest
cargo install --path .
In your Cargo.toml add
```toml
biotest = { url = "https:///natir/biotest.git" }
```

## Usage

## Minimum supported Rust version

Currently the minimum supported Rust version is 1.75.
2 changes: 1 addition & 1 deletion biotest_derive/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Many function to generate test data for bioinformatics data procedural macro crate
//! Generate random test data for bioinformatics"
#![warn(missing_docs)]

Expand Down
50 changes: 0 additions & 50 deletions examples/biotest.rs

This file was deleted.

70 changes: 70 additions & 0 deletions src/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//! Declarations of some constants value
/* std use */

/* crates use */

/* projet use */

const fn gen_array<const N: usize, const B: usize>() -> [u8; N] {
let mut array = [0; N];

let mut i = 0;
while i < N {
array[i] = (B + i) as u8;
i += 1;
}

array
}

/// Fixed random seed
pub const SEED: [u8; 32] = [42; 32];

/// Nucleotides with any case
pub const NUCLEOTIDES: [u8; 8] = *b"ACTGactg";

/// Nucleotides lower
pub const NUCLEOTIDES_LOWER: [u8; 4] = *b"actg";

/// Nucleotides upper
pub const NUCLEOTIDES_UPPER: [u8; 4] = *b"ACTG";

/// All possible phred 33 value
pub const PHRED33: [u8; 40] = gen_array::<40, 33>();

/// All possible phred 64 value
pub const PHRED64: [u8; 40] = gen_array::<40, 64>();

/// Alphabets with [ \ ] ^ _ `
pub const ALPHABETS: [u8; 58] = gen_array::<58, 65>();

#[cfg(test)]
mod tests {
/* project use */
use super::*;

#[test]
fn phred33() {
assert_eq!(
gen_array::<40, 33>().to_vec(),
b"!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGH".to_vec()
);
}

#[test]
fn phred64() {
assert_eq!(
gen_array::<40, 64>().to_vec(),
b"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefg".to_vec()
);
}

#[test]
fn alphapets() {
assert_eq!(
gen_array::<58, 65>().to_vec(),
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz".to_vec()
);
}
}
4 changes: 3 additions & 1 deletion src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use thiserror;
/// Enum to manage error
#[derive(std::fmt::Debug, thiserror::Error)]
pub enum Error {
}
#[error(transparent)]
StdIo(#[from] std::io::Error),

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / check

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / Github Pages

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / test (stable)

missing documentation for a variant

Check failure on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / lints

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / minimum_rust_version

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / test (beta)

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / test (macos)

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / test (windows)

missing documentation for a variant

Check warning on line 10 in src/error.rs

View workflow job for this annotation

GitHub Actions / test (windows)

missing documentation for a variant
}

/// Alias of result
pub type Result<T> = core::result::Result<T, Error>;
11 changes: 11 additions & 0 deletions src/format.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//! Format data generation
/* std use */

/* crates use */

/* module declaration */
pub mod fasta;
pub mod fastq;

/* projet use */
149 changes: 149 additions & 0 deletions src/format/fasta.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
//! Fasta generation
/* std use */
use std::io::Write;

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / check

unused import: `std::io::Write`

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (stable)

unused import: `std::io::Write`

Check failure on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / lints

unused import: `std::io::Write`

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / minimum_rust_version

unused import: `std::io::Write`

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (beta)

unused import: `std::io::Write`

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (macos)

unused import: `std::io::Write`

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (windows)

unused import: `std::io::Write`

Check warning on line 4 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (windows)

unused import: `std::io::Write`

/* crates use */

/* projet use */
use crate::constants;

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / check

unused import: `crate::constants`

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (stable)

unused import: `crate::constants`

Check failure on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / lints

unused import: `crate::constants`

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / minimum_rust_version

unused import: `crate::constants`

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (beta)

unused import: `crate::constants`

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (macos)

unused import: `crate::constants`

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (windows)

unused import: `crate::constants`

Check warning on line 9 in src/format/fasta.rs

View workflow job for this annotation

GitHub Actions / test (windows)

unused import: `crate::constants`
use crate::error;

fn description<W>(
output: &mut W,
rng: &mut rand::rngs::StdRng,
id: usize,
comment: usize,
) -> error::Result<()>
where
W: std::io::Write,
{
output.write_all(&[b'>'])?;
crate::text(output, rng, id)?;
output.write_all(&[b' '])?;
crate::text(output, rng, comment)?;

Ok(())
}

/// Write record
pub fn record<W>(
output: &mut W,
rng: &mut rand::rngs::StdRng,
id: usize,
comment: usize,
seq_len: usize,
) -> error::Result<()>
where
W: std::io::Write,
{
description(output, rng, id, comment)?;
output.write_all(&[b'\n'])?;
crate::sequence(output, rng, seq_len)?;

Ok(())
}

/// Write multiple record
pub fn records<W>(
output: &mut W,
rng: &mut rand::rngs::StdRng,
id: usize,
comment: usize,
seq_len: usize,
num_record: usize,
) -> error::Result<()>
where
W: std::io::Write,
{
for _ in 0..num_record {
record(output, rng, id, comment, seq_len)?;
output.write_all(&[b'\n'])?;
}

Ok(())
}

/// Create a fasta file
pub fn create<P>(
path: P,
rng: &mut rand::rngs::StdRng,
id: usize,
comment: usize,
seq_len: usize,
num_record: usize,
) -> error::Result<()>
where
P: std::convert::AsRef<std::path::Path>,
{
let mut output = std::fs::File::create(&path)?;

records(&mut output, rng, id, comment, seq_len, num_record)?;

Ok(())
}

#[cfg(test)]
mod tests {
/* std use */
use std::io::Read;

/* project use */
use super::*;

const TRUTH: &[u8] = b">oNi_P dzwC[tBTlD
tCGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTa
>rQ_[V S^RtSvzMeT
ttGCtCatGtctgCTGGTACtgTgcaaaagggGAGacAtgCtGCAAtTac
>HYNm[ QBCgL`Scxx
GGtatTCaTCctcTGgAActTgCGAcaAgaAAtaTCCcAgagggaCcttC
>gNXcb hRd]QWyFOg
gAACcTtCttAacGtTtAtGTgACAGCCaCGctGagattTGtgCttaAGg
>ppugI LwOFhYRxBZ
CTGTCCACgTTTGagtGaGCatAGGACAAaacTaTTagagGtatAGCcTa
";

#[test]
fn record_() -> error::Result<()> {
let mut output = Vec::new();
let mut rng = crate::rand();

record(&mut output, &mut rng, 5, 10, 50)?;

assert_eq!(output, TRUTH.to_vec()[..68]);

Ok(())
}

#[test]
fn records_() -> error::Result<()> {
let mut output = Vec::new();
let mut rng = crate::rand();

records(&mut output, &mut rng, 5, 10, 50, 5)?;

assert_eq!(output, TRUTH);

Ok(())
}

#[test]
fn create_() -> error::Result<()> {
let mut rng = crate::rand();

let temp_dir = tempfile::tempdir()?;
let temp_path = temp_dir.path();

let temp_file = temp_path.join("tmp.fasta");

create(&temp_file, &mut rng, 5, 10, 50, 5)?;

let mut data = Vec::new();
let mut input = std::fs::File::open(&temp_file)?;
input.read_to_end(&mut data)?;

assert_eq!(data, TRUTH);

Ok(())
}
}
Loading

0 comments on commit 1c1f5c4

Please sign in to comment.