Skip to content

Commit

Permalink
24.06.2024: Add extra row informtion
Browse files Browse the repository at this point in the history
  • Loading branch information
CHRISCARLON committed Jun 24, 2024
1 parent 74536b9 commit 65af3c5
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 63 deletions.
61 changes: 30 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,34 @@ exqs --url "URL HERE"
## Example Output:

```zsh
exqs --url "https://data.london.gov.uk/download/mps-monthly-crime-dahboard-data/7f45d2fe-bf69-4395-b814-cadd5ec48489/M1045_MonthlyCrimeDashboard_TNOCrimeData_202406.xlsx"
Sheet Name: MPS_MonthlyCrimeDashboard_TNOCr
Total number of columns: 12
Total number of rows: 223821
┌────────────────────────────┬───────────┐
│ Column Headers ┆ Data Type │
╞════════════════════════════╪═══════════╡
│ Column 1: Month_Year ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 2: Area Type ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 3: Borough_SNT ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 4: Area name ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 5: Area code ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 6: Offence Group ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 7: Offence Subgroup ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 8: Measure ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 9: Financial Year ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 10: FY_FYIndex ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 11: Count ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 12: Refresh Date ┆ String │
└────────────────────────────┴───────────┘
Sheet Name: 2023
Total number of columns: 7
Total number of rows: 76
┌───────────────────────────────────────────────┬───────────┐
│ Column Headers ┆ Data Type │
╞═══════════════════════════════════════════════╪═══════════╡
│ Column 1: GENDER ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 2: POSTCODE ┆ String │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 3: DOD ┆ Unknown │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 4: AGE ┆ Float │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 5: FUNERAL COST ┆ Float │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 6: COSTS RECOVERED ┆ Float │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
│ Column 7: DATE REFERRED TO TREASURY SOLICITOR ┆ String │
└───────────────────────────────────────────────┴───────────┘
Data Row 1: MALE LS25 41619 72 1590 1590 41900
Data Row 2: MALE LS22 41625 69 1350.54 1350.54 NOT REFERRED
Data Row 3: MALE LS8 41628 77 1702 0 NOT REFERRED
Data Row 4: MALE LS11 41653 63 2270 1673.4 NOT REFERRED
Data Row 5: MALE LS6 41654 54 1307 0 NOT REFERRED
Data Row 6: FEMALE LS12 41655 91 1474 1474 41767
Data Row 7: MALE LS14 41658 89 1630 1630 NOT REFERRED
Data Row 8: MALE LS12 41673 66 1378.5 1378.5 NOT REFERRED
Data Row 9: MALE LS3 41684 56 1660.64 855.49 NOT REFERRED
Data Row 10: FEMALE LS9 41689 46 1266 215.24 NOT REFERRED
```
62 changes: 47 additions & 15 deletions src/local_file_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@ use comfy_table::{Table, Cell, Color, Attribute};
use std::path::Path;

pub fn display_basic_info<P: AsRef<Path>>(path: P) -> Result<(), Box<dyn std::error::Error>> {

// Read in data from file path
let mut workbook: Xlsx<_> = open_workbook(path)?;

// Read in sheet names to list
let sheet_names = workbook.sheet_names().to_vec();

// Loop through sheet names
for sheet_name in sheet_names {
if let Some(Ok(range)) = workbook.worksheet_range_at(0) {
let mut table = Table::new();
Expand All @@ -20,13 +24,24 @@ pub fn display_basic_info<P: AsRef<Path>>(path: P) -> Result<(), Box<dyn std::er
.add_attribute(Attribute::Bold)
.fg(Color::DarkRed)
]);
// Find the header row
if let Some((header_index, header_row)) = range.rows()
.enumerate()
.find(|(_, row)| row.iter().filter(|cell| !cell.is_empty()).count() > 1)
{
let column_count = header_row.len();

let row_count = range.rows().count();
let column_count = if let Some(header_row) = range.rows().next() {
let count = header_row.len();
// Find the first non-empty row after the header the take 10 rows
let data_rows: Vec<_> = range.rows()
.skip(header_index + 1)
.take(10)
.collect();

for (index, cell) in header_row.iter().enumerate() {
let data_type = if cell.is_empty() {
for (index, header) in header_row.iter().enumerate() {
let data_types: Vec<_> = data_rows.iter()
.filter_map(|row| row.get(index))
.map(|cell| {
if cell.is_empty() {
"Empty"
} else if cell.is_int() {
"Integer"
Expand All @@ -40,26 +55,43 @@ pub fn display_basic_info<P: AsRef<Path>>(path: P) -> Result<(), Box<dyn std::er
"Error"
} else {
"Unknown"
};
}
})
.collect();

table.add_row(vec![
Cell::new(format!("Column {}: {}", index + 1, cell.to_string())),
Cell::new(data_type)
]);
}
count
let most_common_type = if data_types.is_empty() {
"No Data"
} else {
0
data_types.iter()
.filter(|&t| *t != "Empty")
.max_by_key(|&t| data_types.iter().filter(|&r| r == t).count())
.unwrap_or(&"Empty")
};

table.add_row(vec![
Cell::new(format!("Column {}: {}", index + 1, header.to_string())),
Cell::new(most_common_type)
]);
}
let row_count = range.rows().count() - (header_index + 1);
println!("Sheet Name: {}", sheet_name);
println!("Total number of columns: {}", column_count);
println!("Total number of rows: {}", row_count);
println!("{table}");
// Print each row from data_rows
for (row_index, row) in data_rows.iter().enumerate() {
print!("Data Row {}: ", row_index + 1);
for cell in row.iter() {
print!("{} ", cell);
}
println!();
}
} else {
println!("Cannot read sheet: {}", sheet_name);
println!("Could not find header row in the sheet");
}
} else {
println!("Cannot read sheet: {}", sheet_name);
}

}
Ok(())
}
66 changes: 49 additions & 17 deletions src/remote_file_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@ use reqwest::blocking::get;
use std::io::Cursor;

pub fn display_remote_basic_info(url: &str) -> Result<(), Box<dyn std::error::Error>> {
// Download the file into memory

// Return content as a bytes object to keep in memory
let response = get(url)?;
let content = response.bytes()?;

// Create Xlsx object from memory
// Create workbooks
let mut workbook: Xlsx<_> = Xlsx::new(Cursor::new(content))?;

// Add sheet names to list
let sheet_names = workbook.sheet_names().to_vec();

// Loop through sheet names
for sheet_name in sheet_names {
if let Some(Ok(range)) = workbook.worksheet_range_at(0) {
let mut table = Table::new();
Expand All @@ -26,12 +29,24 @@ pub fn display_remote_basic_info(url: &str) -> Result<(), Box<dyn std::error::Er
.fg(Color::DarkRed)
]);

let row_count = range.rows().count();
let column_count = if let Some(header_row) = range.rows().next() {
let count = header_row.len();
// Find the header row
if let Some((header_index, header_row)) = range.rows()
.enumerate()
.find(|(_, row)| row.iter().filter(|cell| !cell.is_empty()).count() > 1)
{
let column_count = header_row.len();

for (index, cell) in header_row.iter().enumerate() {
let data_type = if cell.is_empty() {
// Find the first non-empty row after the header then take 10 rows
let data_rows: Vec<_> = range.rows()
.skip(header_index + 1)
.take(10)
.collect();

for (index, header) in header_row.iter().enumerate() {
let data_types: Vec<_> = data_rows.iter()
.filter_map(|row| row.get(index))
.map(|cell| {
if cell.is_empty() {
"Empty"
} else if cell.is_int() {
"Integer"
Expand All @@ -45,26 +60,43 @@ pub fn display_remote_basic_info(url: &str) -> Result<(), Box<dyn std::error::Er
"Error"
} else {
"Unknown"
};
}
})
.collect();

table.add_row(vec![
Cell::new(format!("Column {}: {}", index + 1, cell.to_string())),
Cell::new(data_type)
]);
}
count
let most_common_type = if data_types.is_empty() {
"No Data"
} else {
0
data_types.iter()
.filter(|&t| *t != "Empty")
.max_by_key(|&t| data_types.iter().filter(|&r| r == t).count())
.unwrap_or(&"Empty")
};

table.add_row(vec![
Cell::new(format!("Column {}: {}", index + 1, header.to_string())),
Cell::new(most_common_type)
]);
}
let row_count = range.rows().count() - (header_index + 1);
println!("Sheet Name: {}", sheet_name);
println!("Total number of columns: {}", column_count);
println!("Total number of rows: {}", row_count);
println!("{table}");
// Print each row from data_rows
for (row_index, row) in data_rows.iter().enumerate() {
print!("Data Row {}: ", row_index + 1);
for cell in row.iter() {
print!("{} ", cell);
}
println!();
}
} else {
println!("Cannot read sheet: {}", sheet_name);
println!("Could not find header row in the sheet");
}
} else {
println!("Cannot read sheet: {}", sheet_name);
}

}
Ok(())
}

0 comments on commit 65af3c5

Please sign in to comment.