Skip to content

Commit

Permalink
feat: Add Gap attributes for gff
Browse files Browse the repository at this point in the history
  • Loading branch information
natir committed Jun 17, 2024
1 parent fbe3a12 commit a5f91b8
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 57 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

### Added

- Add a Cigar format
- Add support of Cigar format
- Add support of Gff format

### Changed

Expand Down
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ license-file = "LICENSE"


[features]
cigar = []
fasta = []
fastq = []
gff = []
quality = []
sequence = []
vcf = []
gff = []
cigar = []


[dependencies]
rand = { version = "0.8" }
Expand Down
14 changes: 7 additions & 7 deletions src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,26 @@
/* crates use */

/* module declaration */
#[cfg(feature = "cigar")]
pub mod cigar;

#[cfg(feature = "fasta")]
pub mod fasta;

#[cfg(feature = "fastq")]
pub mod fastq;

#[cfg(feature = "vcf")]
pub mod vcf;
#[cfg(feature = "gff")]
pub mod gff;

#[cfg(feature = "sequence")]
pub mod sequence;

#[cfg(feature = "quality")]
pub mod quality;

#[cfg(feature = "gff")]
pub mod gff;

#[cfg(feature = "cigar")]
pub mod cigar;
#[cfg(feature = "vcf")]
pub mod vcf;

/* projet use */
use crate::error;
Expand Down
4 changes: 0 additions & 4 deletions src/format/cigar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,11 @@ impl format::Format for Cigar {
) -> error::Result<()> {
let mut len = 0;
while len < self.length {
dbg!(self.length);
dbg!(len);
dbg!(1..(self.length - len) as usize);
let size = if self.length - len > 1 {
rng.gen_range::<usize, core::ops::Range<usize>>(1..(self.length - len) as usize)
} else {
1
};
dbg!(size);

let letter = if self.alphabet_weights.is_empty() {
self.alphabet.generate(rng, 1)
Expand Down
106 changes: 65 additions & 41 deletions src/format/gff.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! GFF2 format
//! GFF3 format
/* std use */

Expand All @@ -15,112 +15,132 @@ use crate::values::Generate as _;
use crate::values::Get as _;

/// Struct to generate gff record
#[derive(derive_builder::Builder)]
#[builder(pattern = "owned")]
#[derive(typed_builder::TypedBuilder)]
pub struct Gff {
/// Chromosome
#[builder(default = "values::Chromosomes::Default")]
#[builder(default = values::Chromosomes::Default)]
contigs: values::Chromosomes,

/// Feature
#[builder(default = "values::GffFeature::All")]
#[builder(default = values::GffFeature::All)]
features: values::GffFeature,

/// Position
#[builder(default = "values::Integer::Position")]
#[builder(default = values::Integer::Position)]
position: values::Integer,

/// Feature length
#[builder(default = "values::Integer::UserDefine(1..100_000)")]
#[builder(default = values::Integer::UserDefine(1..100_000))]
length: values::Integer,

/// Score
#[builder(default = "values::Float::Default")]
#[builder(default = values::Float::Default)]
score: values::Float,

/// Strand
#[builder(default = "values::Strand::All")]
#[builder(default = values::Strand::All)]
strand: values::Strand,

/// Phase
#[builder(default = "values::GffPhase::All")]
#[builder(default = values::GffPhase::All)]
phase: values::GffPhase,

/// Id
#[builder(default = "values::Alphabet::A2z")]
#[builder(default = values::Alphabet::A2z)]
id: values::Alphabet,

/// Length of id
#[builder(default = "10")]
#[builder(default = 10)]
id_len: usize,

/// Id prefix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
id_prefix: Vec<u8>,

/// Id suffix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
id_suffix: Vec<u8>,

/// Name
#[builder(default = "values::Alphabet::Lower")]
#[builder(default = values::Alphabet::Lower)]
name: values::Alphabet,

/// Length of name
#[builder(default = "10")]
#[builder(default = 10)]
name_len: usize,

/// Name prefix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
name_prefix: Vec<u8>,

/// Name suffix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
name_suffix: Vec<u8>,

/// Alias
#[builder(default = "values::Alphabet::A2z")]
#[builder(default = values::Alphabet::A2z)]
alias: values::Alphabet,

/// Length of alias
#[builder(default = "10")]
#[builder(default = 10)]
alias_len: usize,

/// Alias prefix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
alias_prefix: Vec<u8>,

/// Alias suffix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
alias_suffix: Vec<u8>,

/// Parent
#[builder(default = "values::Alphabet::A2z")]
#[builder(default = values::Alphabet::A2z)]
parent: values::Alphabet,

/// Length of parent
#[builder(default = "10")]
#[builder(default = 10)]
parent_len: usize,

/// Parent prefix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
parent_prefix: Vec<u8>,

/// Parent suffix
#[builder(default = "b\"\".to_vec()")]
#[builder(default = b"".to_vec())]
parent_suffix: Vec<u8>,
}

impl Gff {
/// Create a GffBuilder
pub fn builder() -> GffBuilder {
GffBuilder::default()
fn produce_gap_value(rng: &mut rand::rngs::StdRng, length: u64) -> error::Result<Vec<u8>> {
let mut output = Vec::new();
let mut lengths = Vec::new();
let mut len = 0;
while len < length {
let size = if length - len > 1 {
rng.gen_range::<usize, core::ops::Range<usize>>(1..(length - len) as usize)
} else {
1
};

lengths.push(size);
len += size as u64;
}

for len in lengths {
let letter = values::Cigar::Gff.generate(rng, 1)?;
output.extend(letter);
output.extend(len.to_string().as_bytes().to_vec());
output.push(b' ');
}
output.pop();

Ok(output)
}
}

impl core::default::Default for Gff {
fn default() -> Self {
GffBuilder::default().build().unwrap() // it's default no error
Gff::builder().build()
}
}

Expand Down Expand Up @@ -219,6 +239,11 @@ impl format::Format for Gff {
output.write_all(&self.parent_prefix)?;
output.write_all(&self.parent.generate(rng, self.parent_len)?)?;
output.write_all(&self.parent_suffix)?;
output.write_all(b";")?;

// gap
output.write_all(b"Gap=")?;
output.write_all(&Gff::produce_gap_value(rng, (end - start) as u64)?)?;

Ok(())
}
Expand All @@ -233,14 +258,14 @@ mod tests {
use super::format::Format as _;
use super::*;

const TRUTH: &[u8] = b"YAR028W\tbiotest\texon\t6057\t6155\t9.429573\t.\t0\tID=[tBTlDDl[M;Name=emxuzgaghm;Alias=s^[teLMir[;Parent=gMDhw\\voCG
YAR028W\tbiotest\trepeat\t4903\t4948\t7.9373302\t+\t0\tID=gQouVGn`Jw;Name=qjbbjlzxpz;Alias=Any[_POshs;Parent=qbSjAdbZcR
YAR028W\tbiotest\trepeat\t5211\t5297\t3.2389307\t+\t0\tID=tSvzMeTjon;Name=ljdusfsrcu;Alias=tTH\\QXXOiA;Parent=LJLnuPtf`S
YAR028W\tbiotest\trepeat\t5617\t5673\t3.21298\t+\t2\tID=vYGCzkT\\Wk;Name=nwbrlpbpvm;Alias=tIGEbcnVWJ;Parent=VaDBnQSHYN
X\tbiotest\ttranscript\t5944\t6040\t9.520424\t+\t2\tID=`NnOG[K`QK;Name=rrilpylxga;Alias=MyDqpgZliS;Parent=mUzRvGGXBg
const TRUTH: &[u8] = b"YAR028W\tbiotest\texon\t6057\t6155\t9.429573\t.\t0\tID=[tBTlDDl[M;Name=emxuzgaghm;Alias=s^[teLMir[;Parent=gMDhw\\voCG;Gap=M48 D11 R26 F10 M1 D1 D1
93\tbiotest\texon\t8323\t8381\t3.2013047\t-\t.\tID=dbZcRFrrQ_;Name=jwinicfqqi;Alias=jonYVInjLI;Parent=i`oWogntTH;Gap=I28 R12 F5 I1 R3 D8 I1
X\tbiotest\texon\t9176\t9219\t1.0694146\t.\t.\tID=zkT\\Wk_sGD;Name=rlpbpvmdcp;Alias=nVWJVaDBnQ;Parent=SHYNm[QBCg;Gap=F24 R15 F2 I1 D1
ENA|LT795502|LT795502.1\tbiotest\tgene\t2073\t2169\t0.5253875\t-\t2\tID=gZliSmUzRv;Name=ccdkarvolo;Alias=Bw_ZxxkAFA;Parent=[o`OIdJgjZ;Gap=R31 F13 I47 D4 F1
ENA|LT795502|LT795502.1\tbiotest\ttranscript\t3919\t3944\t9.702128\t.\t0\tID=jBlBKigqzn;Name=gultrkslsv;Alias=\\RlwOmAiZP;Parent=wyAsKBssXJ;Gap=R6 R4 D2 D10 F2 I1
";

const DEFAULT: &[u8] = b"YAR028W\tbiotest\texon\t1133862760\t1133889429\t21.144531\t.\t0\tID=[tBTlDDl[M;Name=emxuzgaghm;Alias=s^[teLMir[;Parent=gMDhw\\voCG";
const DEFAULT: &[u8] = b"YAR028W\tbiotest\texon\t1133862760\t1133889429\t21.144531\t.\t0\tID=[tBTlDDl[M;Name=emxuzgaghm;Alias=s^[teLMir[;Parent=gMDhw\\voCG;Gap=D21168 D1146 R2911 I1127 D50 I96 R103 R1 M46 I7 F1 F1 F7 R1 D2 F1 F1";

#[test]
fn default() -> error::Result<()> {
Expand All @@ -265,12 +290,11 @@ X\tbiotest\ttranscript\t5944\t6040\t9.520424\t+\t2\tID=`NnOG[K`QK;Name=rrilpylxg
.position(values::Integer::UserDefine(0..10_000))
.length(values::Integer::UserDefine(2..100))
.score(values::Float::UserDefine(0.0..10.0))
.build()?;
.build();

generator.record(&mut output, &mut rng)?;

println!("{:?} {}", String::from_utf8(output.to_vec()), output.len());
assert_eq!(output, TRUTH.to_vec()[..108]);
assert_eq!(output, TRUTH.to_vec()[..137]);

Ok(())
}
Expand All @@ -284,7 +308,7 @@ X\tbiotest\ttranscript\t5944\t6040\t9.520424\t+\t2\tID=`NnOG[K`QK;Name=rrilpylxg
.position(values::Integer::UserDefine(0..10_000))
.length(values::Integer::UserDefine(2..100))
.score(values::Float::UserDefine(0.0..10.0))
.build()?;
.build();

generator.records(&mut output, &mut rng, 5)?;

Expand All @@ -306,7 +330,7 @@ X\tbiotest\ttranscript\t5944\t6040\t9.520424\t+\t2\tID=`NnOG[K`QK;Name=rrilpylxg
.position(values::Integer::UserDefine(0..10_000))
.length(values::Integer::UserDefine(2..100))
.score(values::Float::UserDefine(0.0..10.0))
.build()?;
.build();

generator.create(&temp_file, &mut rng, 5)?;

Expand Down
7 changes: 5 additions & 2 deletions src/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ impl core::convert::AsRef<[&'static [u8]]> for Strand {
}
}
}

#[derive(Default)]
/// Possible cigar alphabet
pub enum Cigar {
#[default]
Expand All @@ -380,6 +382,8 @@ impl core::convert::AsRef<[u8]> for Cigar {
}
}

impl Generate for Cigar {}

#[derive(Debug, Clone, Default)]
/// Possible value for frame
pub enum GffFeature {
Expand Down Expand Up @@ -428,8 +432,6 @@ impl core::convert::AsRef<[&'static [u8]]> for GffPhase {
}
}

impl Generate for Cigar {}

#[cfg(test)]
mod tests {
/* project use */
Expand Down Expand Up @@ -659,6 +661,7 @@ mod tests {
);
}

#[test]
fn cigar() -> error::Result<()> {
assert_eq!(Cigar::Sam.as_ref(), constants::CIGAR_SAM);
assert_eq!(Cigar::Gff.as_ref(), constants::CIGAR_GFF);
Expand Down

0 comments on commit a5f91b8

Please sign in to comment.