Skip to content

Commit

Permalink
intl: Handle translation output codesets with suffixes [BZ #26383]
Browse files Browse the repository at this point in the history
Commit 91927b7 (Rewrite iconv option parsing [BZ #19519]) did not
handle cases where the output codeset for translations (via the `gettext'
family of functions) might have a caller specified encoding suffix such as
TRANSLIT or IGNORE.  This led to a regression where translations did not
work when the codeset had a suffix.

This commit fixes the above issue by parsing any suffixes passed to
__dcigettext and adds two new test-cases to intl/tst-codeset.c to
verify correct behaviour.  The iconv-internal function __gconv_create_spec
and the static iconv-internal function gconv_destroy_spec are now visible
internally within glibc and used in intl/dcigettext.c.
  • Loading branch information
submachine committed Sep 25, 2020
1 parent 06e95b9 commit 7d4ec75
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 57 deletions.
4 changes: 3 additions & 1 deletion iconv/Versions
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ libc {
GLIBC_PRIVATE {
# functions shared with iconv program
__gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
__gconv_open; __gconv_create_spec;

# functions used elsewhere in glibc
__gconv_open; __gconv_create_spec; __gconv_destroy_spec;

# function used by the gconv modules
__gconv_transliterate;
Expand Down
10 changes: 10 additions & 0 deletions iconv/gconv_charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,13 @@ __gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
return ret;
}
libc_hidden_def (__gconv_create_spec)


void
__gconv_destroy_spec (struct gconv_spec *conv_spec)
{
free (conv_spec->fromcode);
free (conv_spec->tocode);
return;
}
libc_hidden_def (__gconv_destroy_spec)
27 changes: 0 additions & 27 deletions iconv/gconv_charset.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,33 +48,6 @@
#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"


/* This function accepts the charset names of the source and destination of the
conversion and populates *conv_spec with an equivalent conversion
specification that may later be used by __gconv_open. The charset names
might contain options in the form of suffixes that alter the conversion,
e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
and truncating any suffix options in fromcode, and processing and truncating
any suffix options in tocode. Supported suffix options ("TRANSLIT" or
"IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
to be set to true. Unrecognized suffix options are silently discarded. If
the function succeeds, it returns conv_spec back to the caller. It returns
NULL upon failure. */
struct gconv_spec *
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
const char *tocode);
libc_hidden_proto (__gconv_create_spec)


/* This function frees all heap memory allocated by __gconv_create_spec. */
static void __attribute__ ((unused))
gconv_destroy_spec (struct gconv_spec *conv_spec)
{
free (conv_spec->fromcode);
free (conv_spec->tocode);
return;
}


/* This function copies in-order, characters from the source 's' that are
either alpha-numeric or one in one of these: "_-.,:/" - into the destination
'wp' while dropping all other characters. In the process, it converts all
Expand Down
21 changes: 21 additions & 0 deletions iconv/gconv_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
__gconv_t *handle, int flags);
libc_hidden_proto (__gconv_open)

/* This function accepts the charset names of the source and destination of the
conversion and populates *conv_spec with an equivalent conversion
specification that may later be used by __gconv_open. The charset names
might contain options in the form of suffixes that alter the conversion,
e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
and truncating any suffix options in fromcode, and processing and truncating
any suffix options in tocode. Supported suffix options ("TRANSLIT" or
"IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
to be set to true. Unrecognized suffix options are silently discarded. If
the function succeeds, it returns conv_spec back to the caller. It returns
NULL upon failure. */
extern struct gconv_spec *
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
const char *tocode);
libc_hidden_proto (__gconv_create_spec)

/* This function frees all heap memory allocated by __gconv_create_spec. */
extern void
__gconv_destroy_spec (struct gconv_spec *conv_spec);
libc_hidden_proto (__gconv_destroy_spec)

/* Free resources associated with transformation descriptor CD. */
extern int __gconv_close (__gconv_t cd)
attribute_hidden;
Expand Down
2 changes: 1 addition & 1 deletion iconv/iconv_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)

int res = __gconv_open (&conv_spec, &cd, 0);

gconv_destroy_spec (&conv_spec);
__gconv_destroy_spec (&conv_spec);

if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
{
Expand Down
2 changes: 1 addition & 1 deletion iconv/iconv_prog.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ main (int argc, char *argv[])
/* Let's see whether we have these coded character sets. */
res = __gconv_open (&conv_spec, &cd, 0);

gconv_destroy_spec (&conv_spec);
__gconv_destroy_spec (&conv_spec);

if (res != __GCONV_OK)
{
Expand Down
17 changes: 10 additions & 7 deletions intl/dcigettext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,

# ifdef _LIBC

struct gconv_spec conv_spec
= { .fromcode = norm_add_slashes (charset, ""),
.tocode = norm_add_slashes (outcharset, ""),
/* We always want to use transliteration. */
.translit = true,
.ignore = false
};
struct gconv_spec conv_spec;

__gconv_create_spec (&conv_spec, charset, outcharset);

/* We always want to use transliteration. */
conv_spec.translit = true;

int r = __gconv_open (&conv_spec, &convd->conv,
GCONV_AVOID_NOCONV);

__gconv_destroy_spec (&conv_spec);

if (__builtin_expect (r != __GCONV_OK, 0))
{
/* If the output encoding is the same there is
Expand Down
34 changes: 14 additions & 20 deletions intl/tst-codeset.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,39 +22,33 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <support/check.h>

static int
do_test (void)
{
char *s;
int result = 0;

unsetenv ("LANGUAGE");
unsetenv ("OUTPUT_CHARSET");
setlocale (LC_ALL, "de_DE.ISO-8859-1");
textdomain ("codeset");
bindtextdomain ("codeset", OBJPFX "domaindir");

/* Here we expect output in ISO-8859-1. */
s = gettext ("cheese");
if (strcmp (s, "K\344se"))
{
printf ("call 1 returned: %s\n", s);
result = 1;
}
TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");

/* Here we expect output in UTF-8. */
bind_textdomain_codeset ("codeset", "UTF-8");
TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");

/* Here we expect output in UTF-8. */
s = gettext ("cheese");
if (strcmp (s, "K\303\244se"))
{
printf ("call 2 returned: %s\n", s);
result = 1;
}

return result;
/* `a with umlaut' is transliterated to `ae'. */
bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");

/* Transliteration also works by default even if not set. */
bind_textdomain_codeset ("codeset", "ASCII");
TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");

return 0;
}

#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
#include <support/test-driver.c>

0 comments on commit 7d4ec75

Please sign in to comment.