Skip to content

Commit

Permalink
SQL Table Parsing + more unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
Codetector1374 committed Jul 26, 2024
1 parent d9b0023 commit f7b9b90
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 57 deletions.
43 changes: 1 addition & 42 deletions src/bin/page_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,52 +80,11 @@ fn explore_page(file_offset: usize, page: Page) {
return;
}

let def = TableDefinition {
name: String::from("pre_ucenter_members"),
primary_keys: vec![
// name, type, nullable, signed, pk
Field::new("uid", FieldType::MediumInt(true), false),
],
non_key_fields: vec![
// name, type, nullable, signed, pk
Field::new(
"username",
FieldType::VariableChars(15),
false,
),
Field::new(
"password",
FieldType::VariableChars(255),
false,
),
Field::new(
"secmobicc",
FieldType::VariableChars(3),
false,
),
Field::new(
"secmobile",
FieldType::VariableChars(12),
false,
),
Field::new("email", FieldType::VariableChars(255), false),
Field::new("myid", FieldType::VariableChars(30), false),
Field::new("myidkey", FieldType::VariableChars(16), false),
Field::new("regip", FieldType::VariableChars(45), false),
Field::new("regdate", FieldType::Int(false), false),
Field::new("lastloginip", FieldType::Int(true), false),
Field::new("lastlogintime", FieldType::Int(false), false),
Field::new("salt", FieldType::VariableChars(20), false),
Field::new("secques", FieldType::VariableChars(8), false),
],
};
let table_def = Arc::new(def);

trace!("{:x?}", page);

if page.header.page_type == PageType::Index {
let index_page = IndexPage::try_from_page(page).expect("Failed to construct index");
explore_index(index_page, Some(&table_def));
// explore_index(index_page, Some(&table_def));
}
}

Expand Down
142 changes: 142 additions & 0 deletions src/innodb/charset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
use anyhow::{anyhow, Error, Result};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InnoDBCharset {
Armscii8,
Ascii,
Big5,
Binary,
Cp1250,
Cp1251,
Cp1256,
Cp1257,
Cp850,
Cp852,
Cp866,
Cp932,
Dec8,
Eucjpms,
Euckr,
Gb18030,
Gb2312,
Gbk,
Geostd8,
Greek,
Hebrew,
Hp8,
Keybcs2,
Koi8r,
Koi8u,
Latin1,
Latin2,
Latin5,
Latin7,
Macce,
Macroman,
Sjis,
Swe7,
Tis620,
Ucs2,
Ujis,
Utf16,
Utf16le,
Utf32,
Utf8mb3,
Utf8mb4,
}

impl InnoDBCharset {
pub fn with_name(name: &str) -> Result<Self> {
match name {
"armscii8" => Ok(Self::Armscii8),
"ascii" => Ok(Self::Ascii),
"big5" => Ok(Self::Big5),
"binary" => Ok(Self::Binary),
"cp1250" => Ok(Self::Cp1250),
"cp1251" => Ok(Self::Cp1251),
"cp1256" => Ok(Self::Cp1256),
"cp1257" => Ok(Self::Cp1257),
"cp850" => Ok(Self::Cp850),
"cp852" => Ok(Self::Cp852),
"cp866" => Ok(Self::Cp866),
"cp932" => Ok(Self::Cp932),
"dec8" => Ok(Self::Dec8),
"eucjpms" => Ok(Self::Eucjpms),
"euckr" => Ok(Self::Euckr),
"gb18030" => Ok(Self::Gb18030),
"gb2312" => Ok(Self::Gb2312),
"gbk" => Ok(Self::Gbk),
"geostd8" => Ok(Self::Geostd8),
"greek" => Ok(Self::Greek),
"hebrew" => Ok(Self::Hebrew),
"hp8" => Ok(Self::Hp8),
"keybcs2" => Ok(Self::Keybcs2),
"koi8r" => Ok(Self::Koi8r),
"koi8u" => Ok(Self::Koi8u),
"latin1" => Ok(Self::Latin1),
"latin2" => Ok(Self::Latin2),
"latin5" => Ok(Self::Latin5),
"latin7" => Ok(Self::Latin7),
"macce" => Ok(Self::Macce),
"macroman" => Ok(Self::Macroman),
"sjis" => Ok(Self::Sjis),
"swe7" => Ok(Self::Swe7),
"tis620" => Ok(Self::Tis620),
"ucs2" => Ok(Self::Ucs2),
"ujis" => Ok(Self::Ujis),
"utf16" => Ok(Self::Utf16),
"utf16le" => Ok(Self::Utf16le),
"utf32" => Ok(Self::Utf32),
"utf8mb3" => Ok(Self::Utf8mb3),
"utf8mb4" => Ok(Self::Utf8mb4),
_ => Err(Error::msg(format!("Unknown charset: {}", name))),
}
}

pub fn max_len(&self) -> u8 {
match self {
InnoDBCharset::Armscii8 => 1,
InnoDBCharset::Ascii => 1,
InnoDBCharset::Big5 => 2,
InnoDBCharset::Binary => 1,
InnoDBCharset::Cp1250 => 1,
InnoDBCharset::Cp1251 => 1,
InnoDBCharset::Cp1256 => 1,
InnoDBCharset::Cp1257 => 1,
InnoDBCharset::Cp850 => 1,
InnoDBCharset::Cp852 => 1,
InnoDBCharset::Cp866 => 1,
InnoDBCharset::Cp932 => 2,
InnoDBCharset::Dec8 => 1,
InnoDBCharset::Eucjpms => 3,
InnoDBCharset::Euckr => 2,
InnoDBCharset::Gb18030 => 4,
InnoDBCharset::Gb2312 => 2,
InnoDBCharset::Gbk => 2,
InnoDBCharset::Geostd8 => 1,
InnoDBCharset::Greek => 1,
InnoDBCharset::Hebrew => 1,
InnoDBCharset::Hp8 => 1,
InnoDBCharset::Keybcs2 => 1,
InnoDBCharset::Koi8r => 1,
InnoDBCharset::Koi8u => 1,
InnoDBCharset::Latin1 => 1,
InnoDBCharset::Latin2 => 1,
InnoDBCharset::Latin5 => 1,
InnoDBCharset::Latin7 => 1,
InnoDBCharset::Macce => 1,
InnoDBCharset::Macroman => 1,
InnoDBCharset::Sjis => 2,
InnoDBCharset::Swe7 => 1,
InnoDBCharset::Tis620 => 1,
InnoDBCharset::Ucs2 => 2,
InnoDBCharset::Ujis => 3,
InnoDBCharset::Utf16 => 4,
InnoDBCharset::Utf16le => 4,
InnoDBCharset::Utf32 => 4,
InnoDBCharset::Utf8mb3 => 3,
InnoDBCharset::Utf8mb4 => 4,
}
}

}
3 changes: 2 additions & 1 deletion src/innodb/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod page;
pub mod table;
pub mod charset;

use std::{
error::Error,
Expand All @@ -21,4 +22,4 @@ impl Display for InnoDBError {
}
}

impl Error for InnoDBError {}
impl Error for InnoDBError {}
2 changes: 1 addition & 1 deletion src/innodb/table/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub enum FieldValue {
String(String),
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Field {
pub name: String,
pub field_type: FieldType,
Expand Down
104 changes: 91 additions & 13 deletions src/innodb/table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,27 @@ use sqlparser::{
};
use tracing::debug;

#[derive(Debug, Default)]
use crate::innodb::charset::InnoDBCharset;

#[derive(Debug, Default, PartialEq, Eq)]
pub struct TableDefinition {
pub name: String,
pub primary_keys: Vec<Field>,
pub non_key_fields: Vec<Field>,
}

fn character_length_to_u64(l: Option<CharacterLength>) -> Option<u64> {
if let Some(len) = l {
return match len {
CharacterLength::IntegerLength { length, unit: _ } => Some(length),
CharacterLength::Max => None,
};
}
None
}

impl TableDefinition {
pub fn try_from_sql_statement(sql: &str) -> Result<TableDefinition> {
let mut parser = Parser::new(&MySqlDialect {}).try_with_sql(sql)?;
let stmt = parser.parse_statement()?;
if let Statement::CreateTable(parsed_table) = stmt {
let mut table_def = TableDefinition::default();

let table_charset = match parsed_table.default_charset {
Some(charset_str) => InnoDBCharset::with_name(&charset_str).unwrap(),
None => InnoDBCharset::Ascii,
};

assert_eq!(parsed_table.name.0.len(), 1, "Table name is only 1 part");
table_def.name = parsed_table.name.0.first().unwrap().value.clone();

Expand All @@ -62,12 +59,55 @@ impl TableDefinition {

// Actual Columns
for column in parsed_table.columns.iter() {
let charset = column
.options
.iter()
.map(|opt| &opt.option)
.filter_map(|opt| match opt {
ColumnOption::CharacterSet(name) => {
InnoDBCharset::with_name(&name.0.first().unwrap().value).ok()
}
_ => None,
})
.last()
.unwrap_or(table_charset);
let f_type: FieldType = match column.data_type {
DataType::Char(len_opt) => {
FieldType::Char(character_length_to_u64(len_opt).unwrap_or(255) as u8)
let final_len = match len_opt {
Some(l) => match l {
CharacterLength::IntegerLength { length, unit: _ } => length,
CharacterLength::Max => u8::MAX as u64,
},
None => u8::MAX as u64,
};
assert!(final_len <= u8::MAX as u64);
if charset.max_len() == 1 {
FieldType::Char(final_len as u8)
} else {
FieldType::VariableChars(final_len as u16)
}
}
DataType::Varchar(len_opt) => {
let final_len = match len_opt {
Some(l) => match l {
CharacterLength::IntegerLength { length, unit: _ } => length,
CharacterLength::Max => u16::MAX as u64,
},
None => u16::MAX as u64,
};
assert!(final_len <= u16::MAX as u64);
FieldType::VariableChars(final_len as u16)
}
DataType::UnsignedTinyInt(_) => FieldType::TinyInt(false),
DataType::UnsignedSmallInt(_) => FieldType::SmallInt(false),
DataType::UnsignedMediumInt(_) => FieldType::MediumInt(false),
DataType::UnsignedInt(_) => FieldType::Int(false),
DataType::UnsignedBigInt(_) => FieldType::BigInt(false),
DataType::TinyInt(_) => FieldType::TinyInt(true),
DataType::SmallInt(_) => FieldType::SmallInt(true),
DataType::MediumInt(_) => FieldType::MediumInt(true),
DataType::Int(_) => FieldType::Int(true),
DataType::BigInt(_) => FieldType::BigInt(true),
_ => unimplemented!("mapping of {:?}", column.data_type),
};

Expand Down Expand Up @@ -125,9 +165,11 @@ impl TableDefinition {

#[cfg(test)]
mod test {
use std::{fs::read_to_string, path::PathBuf};

use crate::innodb::table::field::FieldType;

use super::TableDefinition;
use super::{field::Field, TableDefinition};

#[test]
fn parse_sql_to_table_def_1() {
Expand All @@ -152,4 +194,40 @@ mod test {
assert_eq!(field1.field_type, FieldType::Int(false));
assert_eq!(field1.nullable, false);
}

#[test]
fn prase_sql_complex_table() {
let sql = read_to_string(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("test_data")
.join("pre_ucenter_members.sql"),
)
.unwrap();
let reference = TableDefinition {
name: String::from("pre_ucenter_members"),
primary_keys: vec![
// name, type, nullable, signed, pk
Field::new("uid", FieldType::MediumInt(false), false),
],
non_key_fields: vec![
// name, type, nullable, signed, pk
Field::new("username", FieldType::VariableChars(15), false),
Field::new("password", FieldType::VariableChars(255), false),
Field::new("secmobicc", FieldType::VariableChars(3), false),
Field::new("secmobile", FieldType::VariableChars(12), false),
Field::new("email", FieldType::VariableChars(255), false),
Field::new("myid", FieldType::VariableChars(30), false),
Field::new("myidkey", FieldType::VariableChars(16), false),
Field::new("regip", FieldType::VariableChars(45), false),
Field::new("regdate", FieldType::Int(false), false),
Field::new("lastloginip", FieldType::Int(true), false),
Field::new("lastlogintime", FieldType::Int(false), false),
Field::new("salt", FieldType::VariableChars(20), false),
Field::new("secques", FieldType::VariableChars(8), false),
],
};

let parsed = TableDefinition::try_from_sql_statement(&sql).expect("Failed to parse SQL");
assert_eq!(parsed, reference);
}
}
18 changes: 18 additions & 0 deletions test_data/pre_ucenter_members.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE TABLE `pre_ucenter_members` (
`uid` mediumint unsigned NOT NULL AUTO_INCREMENT,
`username` char(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`secmobicc` varchar(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`secmobile` varchar(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`email` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`myid` char(30) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`myidkey` char(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`regip` varchar(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`regdate` int unsigned NOT NULL DEFAULT '0',
`lastloginip` int NOT NULL DEFAULT '0',
`lastlogintime` int unsigned NOT NULL DEFAULT '0',
`salt` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`secques` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
PRIMARY KEY (`uid`),
UNIQUE KEY `username` (`username`)
) ENGINE=InnoDB AUTO_INCREMENT=477152 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

0 comments on commit f7b9b90

Please sign in to comment.