Skip to content

Commit

Permalink
Add flag for Page Segmentation Modes control
Browse files Browse the repository at this point in the history
I added an flag --psm for controlling PSM (Page Segmentation Modes) in Tesseract. The default option (3) gives me quite bad results. When I use 6, 11, or 12 for Bulgarian, it gives me much better OCR results. I haven't tested other languages yet, but I expect improvements as well if other mode is used.
  • Loading branch information
Neo2SHYAlien committed Mar 5, 2024
1 parent f08febf commit ba09cb4
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/CHANGES.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- Cleanup: Remove the (unmaintained) Nuklear GUI code
- Cleanup: Reduce the amount of Windows build options in the project file
- Fix: infinite loop in MP4 file type detector.
- New: Add tesseract page segmentation modes control with `--psm` flag

0.94 (2021-12-14)
-----------------
Expand Down
1 change: 1 addition & 0 deletions src/lib_ccx/ccx_common_option.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ void init_options(struct ccx_s_options *options)
options->dvblang = NULL; // By default, autodetect DVB language
options->ocrlang = NULL; // By default, autodetect .traineddata file
options->ocr_oem = -1; // By default, OEM mode depends on the tesseract version
options->psm = 3; // Default PSM mode (3 is the default tesseract as well)
options->ocr_quantmode = 1; // CCExtractor's internal
options->mkvlang = NULL; // By default, all the languages are extracted
options->ignore_pts_jumps = 1;
Expand Down
1 change: 1 addition & 0 deletions src/lib_ccx/ccx_common_option.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ struct ccx_s_options // Options from user parameters
char *dvblang; // The name of the language stream for DVB
const char *ocrlang; // The name of the .traineddata file to be loaded with tesseract
int ocr_oem; // The Tesseract OEM mode, could be 0 (default), 1 or 2
int psm; // The Tesseract PSM mode, could be between 0 and 13. 3 is tesseract default
int ocr_quantmode; // How to quantize the bitmap before passing to to tesseract (0=no quantization at all, 1=CCExtractor's internal)
char *mkvlang; // The name of the language stream for MKV
int analyze_video_stream; // If 1, the video stream will be processed even if we're using a different one for subtitles.
Expand Down
3 changes: 3 additions & 0 deletions src/lib_ccx/ocr.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ void *init_ocr(int lang_index)
&pars_values, 1, false);
}

// set PSM mode
TessBaseAPISetPageSegMode(ctx->api, ccx_options.psm);

free(pars_vec);
free(pars_values);

Expand Down
38 changes: 38 additions & 0 deletions src/lib_ccx/params.c
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,23 @@ void print_usage(void)
mprint(" Default value depends on the tesseract version linked :\n");
mprint(" Tesseract v3 : default mode is 0,\n");
mprint(" Tesseract v4 : default mode is 1.\n");
mprint(" --psm: Select the PSM mode for Tesseract.\n");
mprint(" Available Page segmentation modes:\n");
mprint(" 0 Orientation and script detection (OSD) only.\n");
mprint(" 1 Automatic page segmentation with OSD.\n");
mprint(" 2 Automatic page segmentation, but no OSD, or OCR.\n");
mprint(" 3 Fully automatic page segmentation, but no OSD. (Default)\n");
mprint(" 4 Assume a single column of text of variable sizes.\n");
mprint(" 5 Assume a single uniform block of vertically aligned text.\n");
mprint(" 6 Assume a single uniform block of text.\n");
mprint(" 7 Treat the image as a single text line.\n");
mprint(" 8 Treat the image as a single word.\n");
mprint(" 9 Treat the image as a single word in a circle.\n");
mprint(" 10 Treat the image as a single character.\n");
mprint(" 11 Sparse text. Find as much text as possible in no particular order.\n");
mprint(" 12 Sparse text with OSD.\n");
mprint(" 13 Raw line. Treat the image as a single text line,\n");
mprint(" bypassing hacks that are Tesseract-specific.\n");
mprint(" --mkvlang: For MKV subtitles, select which language's caption\n");
mprint(" stream will be processed. e.g. 'eng' for English.\n");
mprint(" Language codes can be either the 3 letters bibliographic\n");
Expand Down Expand Up @@ -1687,6 +1704,27 @@ int parse_parameters(struct ccx_s_options *opt, int argc, char *argv[])
fatal(EXIT_MALFORMED_PARAMETER, "--oem has no argument.\n");
}
}
if (strcmp(argv[i], "--psm") == 0)
{
if (i < argc - 1)
{
i++;

char *str = (char *)malloc(sizeof(argv[i]));
sprintf(str, "%s", argv[i]);
opt->psm = atoi(str);
if (opt->psm < 0 || opt->psm > 13)
{
fatal(EXIT_MALFORMED_PARAMETER, "--psm must be between 0 and 13\n");
}

continue;
}
else
{
fatal(EXIT_MALFORMED_PARAMETER, "--psm has no argument.\n");
}
}
if (strcmp(argv[i], "--mkvlang") == 0)
{
if (i < argc - 1)
Expand Down
2 changes: 2 additions & 0 deletions src/lib_ccx/params_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ void params_dump(struct lib_ccx_ctx *ctx)
mprint("Reduced color palette]\n");
break;
}

mprint("[Tesseract PSM: %d]\n", ccx_options.psm);
}

#define Y_N(cond) ((cond) ? "Yes" : "No")
Expand Down

0 comments on commit ba09cb4

Please sign in to comment.