From a19332a5d74420ad05f4b3d5a4126848c4356807 Mon Sep 17 00:00:00 2001 From: nanamicat Date: Sat, 27 Jul 2024 02:07:45 +0800 Subject: [PATCH 1/3] sort & missing --- Cargo.lock | 17 ++++++++++++++ Cargo.toml | 2 ++ src/bin/page_sort.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 src/bin/page_sort.rs diff --git a/Cargo.lock b/Cargo.lock index e6652d6..81fe296 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,6 +69,12 @@ dependencies = [ "wyz", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cfg-if" version = "1.0.0" @@ -214,10 +220,12 @@ version = "0.1.0" dependencies = [ "anyhow", "bitvec", + "byteorder", "circular-buffer", "clap", "crc", "indicatif", + "memmap2", "num_enum", "pretty-hex", "simple_endian", @@ -264,6 +272,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" diff --git a/Cargo.toml b/Cargo.toml index 803b56c..e9aa8e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,10 +9,12 @@ default-run="page_explorer" [dependencies] anyhow = "1.0.86" bitvec = "1.0.1" +byteorder = "1.5.0" circular-buffer = "0.1.7" clap = { version = "4.5.9", features = ["derive"] } crc = "3.2.1" indicatif = "0.17.8" +memmap2 = "0.9.4" num_enum = "0.7.2" pretty-hex = "0.4.1" simple_endian = "0.3.2" diff --git a/src/bin/page_sort.rs b/src/bin/page_sort.rs new file mode 100644 index 0000000..978002a --- /dev/null +++ b/src/bin/page_sort.rs @@ -0,0 +1,53 @@ +use std::fs::File; +use std::path::PathBuf; + +use byteorder::{BigEndian, ByteOrder}; +use clap::Parser; +use memmap2::MmapMut; + +#[derive(Parser, Debug)] +struct Arguments { + file: PathBuf, +} + +const PAGE_SIZE: usize = 16 * 1024; // 16K块大小 + +struct Page { + data: [u8; PAGE_SIZE], +} + +impl Page { + fn offset(&self) -> u32 { + BigEndian::read_u32(&self.data[4..8]) + } +} + +fn main() -> std::io::Result<()> { + let args = Arguments::parse(); + + + let file = File::options().read(true).write(true).open(args.file)?; + + let mmap = unsafe { MmapMut::map_mut(&file)? }; + + // 不知道怎么更科学的转类型,GPT 写的, + let pages: &mut [Page] = unsafe { + std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Page, mmap.len() / PAGE_SIZE) + }; + + pages.sort_by_key(|f| f.offset()); + + let mut missing = 0; + let mut expected = pages[0].offset(); + for page in pages { + missing += page.offset() - expected; + expected = page.offset() + 1; + } + + println!("missing: {}", missing); + + // 确保所有更改被刷到磁盘 + mmap.flush()?; + + Ok(()) +} From d01b28ba31d645c87e8eafdc7590626559d52fe7 Mon Sep 17 00:00:00 2001 From: nanamicat Date: Sat, 27 Jul 2024 15:36:23 +0800 Subject: [PATCH 2/3] remove byteorder dependence --- Cargo.lock | 7 ----- Cargo.toml | 1 - src/bin/page_sort.rs | 53 ------------------------------------- src/bin/tablespace_sort.rs | 54 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 52 insertions(+), 63 deletions(-) delete mode 100644 src/bin/page_sort.rs diff --git a/Cargo.lock b/Cargo.lock index 3870388..d552dbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,12 +69,6 @@ dependencies = [ "wyz", ] -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "cfg-if" version = "1.0.0" @@ -220,7 +214,6 @@ version = "0.1.0" dependencies = [ "anyhow", "bitvec", - "byteorder", "circular-buffer", "clap", "crc", diff --git a/Cargo.toml b/Cargo.toml index f6f100d..c2abd6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,6 @@ default-run="page_explorer" [dependencies] anyhow = "1.0.86" bitvec = "1.0.1" -byteorder = "1.5.0" circular-buffer = "0.1.7" clap = { version = "4.5.9", features = ["derive"] } crc = "3.2.1" diff --git a/src/bin/page_sort.rs b/src/bin/page_sort.rs deleted file mode 100644 index 978002a..0000000 --- a/src/bin/page_sort.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::fs::File; -use std::path::PathBuf; - -use byteorder::{BigEndian, ByteOrder}; -use clap::Parser; -use memmap2::MmapMut; - -#[derive(Parser, Debug)] -struct Arguments { - file: PathBuf, -} - -const PAGE_SIZE: usize = 16 * 1024; // 16K块大小 - -struct Page { - data: [u8; PAGE_SIZE], -} - -impl Page { - fn offset(&self) -> u32 { - BigEndian::read_u32(&self.data[4..8]) - } -} - -fn main() -> std::io::Result<()> { - let args = Arguments::parse(); - - - let file = File::options().read(true).write(true).open(args.file)?; - - let mmap = unsafe { MmapMut::map_mut(&file)? }; - - // 不知道怎么更科学的转类型,GPT 写的, - let pages: &mut [Page] = unsafe { - std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Page, mmap.len() / PAGE_SIZE) - }; - - pages.sort_by_key(|f| f.offset()); - - let mut missing = 0; - let mut expected = pages[0].offset(); - for page in pages { - missing += page.offset() - expected; - expected = page.offset() + 1; - } - - println!("missing: {}", missing); - - // 确保所有更改被刷到磁盘 - mmap.flush()?; - - Ok(()) -} diff --git a/src/bin/tablespace_sort.rs b/src/bin/tablespace_sort.rs index 98ecdb0..edb42fb 100644 --- a/src/bin/tablespace_sort.rs +++ b/src/bin/tablespace_sort.rs @@ -1,3 +1,53 @@ -fn main() { - todo!() +use std::fs::File; +use std::path::PathBuf; + +use clap::Parser; +use memmap2::MmapMut; + +#[derive(Parser, Debug)] +struct Arguments { + file: PathBuf, +} + +const PAGE_SIZE: usize = 16 * 1024; // 16K块大小 + +struct Page { + data: [u8; PAGE_SIZE], +} + +impl Page { + fn offset(&self) -> u32 { + let num: [u8; 4] = self.data[4..8].try_into().expect("slice with incorrect length");; + return u32::from_be_bytes(num); + } +} + +fn main() -> std::io::Result<()> { + let args = Arguments::parse(); + + + let file = File::options().read(true).write(true).open(args.file)?; + + let mmap = unsafe { MmapMut::map_mut(&file)? }; + + // 不知道怎么更科学的转类型,GPT 写的, + let pages: &mut [Page] = unsafe { + std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Page, mmap.len() / PAGE_SIZE) + }; + + pages.sort_by_key(|f| f.offset()); + + let mut missing = 0; + let mut expected = pages[0].offset(); + for page in pages { + missing += page.offset() - expected; + expected = page.offset() + 1; + } + + println!("max: {}, missing: {}", expected - 1, missing); + + // 确保所有更改被刷到磁盘 + mmap.flush()?; + + Ok(()) } From 695c4ab205c04776bdac140e867db14de4a8ae49 Mon Sep 17 00:00:00 2001 From: nanamicat Date: Sat, 27 Jul 2024 16:56:11 +0800 Subject: [PATCH 3/3] unwrap --- src/bin/tablespace_sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/tablespace_sort.rs b/src/bin/tablespace_sort.rs index edb42fb..cacbc26 100644 --- a/src/bin/tablespace_sort.rs +++ b/src/bin/tablespace_sort.rs @@ -17,7 +17,7 @@ struct Page { impl Page { fn offset(&self) -> u32 { - let num: [u8; 4] = self.data[4..8].try_into().expect("slice with incorrect length");; + let num: [u8; 4] = self.data[4..8].try_into().unwrap(); return u32::from_be_bytes(num); } }