Skip to content

Commit

Permalink
Merge pull request #1 from prateekmedia/add-psm-rust
Browse files Browse the repository at this point in the history
feat: add psm for rust parser
  • Loading branch information
Neo2SHYAlien authored Aug 23, 2024
2 parents f60841f + 2121165 commit 1ac6cc7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
21 changes: 20 additions & 1 deletion src/rust/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,26 @@ pub struct Args {
/// Tesseract v3 : default mode is 0,
/// Tesseract v4 : default mode is 1.
#[arg(long, verbatim_doc_comment, value_name="mode", help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
pub oem: Option<i32>,
pub oem: Option<u8>,
/// Select the PSM mode for Tesseract.
/// Available Page segmentation modes:
/// 0 Orientation and script detection (OSD) only.
/// 1 Automatic page segmentation with OSD.
/// 2 Automatic page segmentation, but no OSD, or OCR.
/// 3 Fully automatic page segmentation, but no OSD. (Default)
/// 4 Assume a single column of text of variable sizes.
/// 5 Assume a single uniform block of vertically aligned text.
/// 6 Assume a single uniform block of text.
/// 7 Treat the image as a single text line.
/// 8 Treat the image as a single word.
/// 9 Treat the image as a single word in a circle.
/// 10 Treat the image as a single character.
/// 11 Sparse text. Find as much text as possible in no particular order.
/// 12 Sparse text with OSD.
/// 13 Raw line. Treat the image as a single text line,
/// bypassing hacks that are Tesseract-specific.
#[arg(long, verbatim_doc_comment, value_name="mode", help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
pub psm: Option<u8>,
/// For MKV subtitles, select which language's caption
/// stream will be processed. e.g. 'eng' for English.
/// Language codes can be either the 3 letters bibliographic
Expand Down
2 changes: 2 additions & 0 deletions src/rust/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,7 @@ pub struct CcxOptions {
pub dvblang: Option<String>,
pub ocrlang: Option<String>,
pub ocr_oem: i32,
pub psm: i32,
pub ocr_quantmode: i32,
pub mkvlang: Option<String>,
pub analyze_video_stream: bool,
Expand Down Expand Up @@ -950,6 +951,7 @@ impl Default for CcxOptions {
ocrlang: None,
ocr_oem: -1,
ocr_quantmode: 1,
psm: 3,
mkvlang: None,
analyze_video_stream: false,
hardsubx_ocr_mode: 0,
Expand Down
10 changes: 9 additions & 1 deletion src/rust/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,15 @@ impl CcxOptions {
println!("Invalid oem value");
std::process::exit(ExitCode::MalformedParameter as i32);
}
self.ocr_oem = *oem;
self.ocr_oem = *oem as _;
}

if let Some(ref psm) = args.psm {
if !(0..=13).contains(psm) {
println!("--psm must be between 0 and 13");
std::process::exit(ExitCode::MalformedParameter as i32);
}
self.psm = *psm as _;
}

if let Some(ref lang) = args.mkvlang {
Expand Down

0 comments on commit 1ac6cc7

Please sign in to comment.