From 33ec18b1fc908746afa055ca629b4c63cd85093f Mon Sep 17 00:00:00 2001 From: cregouby Date: Tue, 17 Sep 2024 16:14:27 +0200 Subject: [PATCH] Add FR translation for R-messages (#19) * add FR message translation * wrap cli_abort for translation * add test * add NEWS * fix typos --- DESCRIPTION | 3 +- NEWS.md | 2 ++ R/encoding.R | 2 +- R/tokenizer.R | 10 +++---- inst/po/fr/LC_MESSAGES/R-tok.mo | Bin 0 -> 1095 bytes po/R-fr.po | 32 +++++++++++++++++++++ po/R-tok.pot | 31 ++++++++++++++++++++ tests/testthat/test-message-translations.R | 15 ++++++++++ 8 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 inst/po/fr/LC_MESSAGES/R-tok.mo create mode 100644 po/R-fr.po create mode 100644 po/R-tok.pot create mode 100644 tests/testthat/test-message-translations.R diff --git a/DESCRIPTION b/DESCRIPTION index bc93dbb..062821b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,6 +3,7 @@ Title: Fast Text Tokenization Version: 0.1.4.9000 Authors@R: c( person("Daniel", "Falbel", , "daniel@posit.co", c("aut", "cre")), + person("Regouby", "Christophe", , "christophe.regouby@free.fr", c("ctb")), person(family = "Posit", role = c("cph")) ) Description: @@ -14,7 +15,7 @@ License: MIT + file LICENSE SystemRequirements: Rust tool chain w/ cargo, libclang/llvm-config Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Depends: R (>= 4.2.0) Imports: diff --git a/NEWS.md b/NEWS.md index 0f5100c..7488676 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # tok (development version) +- add message translation in FR (#19, @cregouby) + # tok 0.1.4 - Updated libR-sys to fix mac oldrel notes. (#18) diff --git a/R/encoding.R b/R/encoding.R index 48e9269..c808af7 100644 --- a/R/encoding.R +++ b/R/encoding.R @@ -26,7 +26,7 @@ encoding <- R6::R6Class( if (inherits(encoding, "REncoding")) { self$.encoding <- encoding } else { - cli::cli_abort("Expected class {.cls REncoding} but got {.cls {class(encoding)}}.") + cli::cli_abort(gettext("Expected class {.cls REncoding} but got {.cls {class(encoding)}}.")) } } ), diff --git a/R/tokenizer.R b/R/tokenizer.R index c1b4675..46d5624 100644 --- a/R/tokenizer.R +++ b/R/tokenizer.R @@ -83,7 +83,7 @@ tokenizer <- R6::R6Class( #' the tokenizer. #' @param path Path to tokenizer.json file from_file = function(path) { - cli::cli_abort("This is a static method. Not available for tokenizers instances.") + cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances.")) }, #' @description @@ -94,7 +94,7 @@ tokenizer <- R6::R6Class( #' @param auth_token An optional auth token used to access private repositories #' on the Hugging Face Hub from_pretrained = function(identifier, revision = "main", auth_token = NULL) { - cli::cli_abort("This is a static method. Not available for tokenizers instances.") + cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances.")) }, #' @description @@ -104,7 +104,7 @@ tokenizer <- R6::R6Class( #' @param files character vector of file paths. train = function(files, trainer) { if (!inherits(trainer, "tok_trainer")) - cli::cli_abort("{.arg trainer} must inherit from {.cls tok_trainer}.") + cli::cli_abort(gettext("{.arg trainer} must inherit from {.cls tok_trainer}.")) self$.tokenizer$train_from_files(trainer$.trainer, normalizePath(files)) }, @@ -227,7 +227,7 @@ tokenizer <- R6::R6Class( #' @field padding Gets padding configuration padding = function(x) { if (!missing(x)) { - cli::cli_abort("Can't be set this way, use {.fn enable_padding}.") + cli::cli_abort(gettext("Can't be set this way, use {.fn enable_padding}.")) } self$.tokenizer$get_padding() @@ -235,7 +235,7 @@ tokenizer <- R6::R6Class( #' @field truncation Gets truncation configuration truncation = function(x) { if (!missing(x)) { - cli::cli_abort("Can't be set this way, use {.fn enable_truncation}.") + cli::cli_abort(gettext("Can't be set this way, use {.fn enable_truncation}.")) } self$.tokenizer$get_truncation() diff --git a/inst/po/fr/LC_MESSAGES/R-tok.mo b/inst/po/fr/LC_MESSAGES/R-tok.mo new file mode 100644 index 0000000000000000000000000000000000000000..e4ea419f2afe1da1cfd5f20214311d7abc71b75b GIT binary patch literal 1095 zcmb_b%We}f6g5y-1jGWiEbcBKI>WTdq6tC*m4G6(C8|ON8!Bh!CNWIx!S;kc=tr>N z9~##11(Em&7JLHVz;Tn(Ms&dfYmUy0eed!0^_icymOcxNYrq{~1Go-+1j0B55?}(< zfFHmU;N~SE9)mmJI`|X#7x+7P2mI}_5U;^Et_ZOS?t|Zezk-9#?Ij@)`e$67zhPP+ zrz)e^_U!c@J7)v`KZ>7>~k7!gnkL4j-<*9GYXuda+cTa<2 z(Xr+&3Xj@Zv;#dHrWxcZiC5x!+(?q7(s>bW^H_zo=&AJFq}61#9wm(^snbfMS!>)+ zRuQ)$osYU!YL`oIY?Cmf2c;Zv)MYt3_uWK2mCg2M>uk@JIH{}~?O8)(zrZG*_xyt* z$7vo?I+PZ-_vmH!S+sT*7xwD2jZWL#q_v*%l~>VoMhj*|bPQ)`Kn%mS;5|!z! z431qiR!r?wQ7&h`V`5zTEaoDQwR3V+IY2+MYN>J7=_Ggzzecd!`;ud7Fy|oK9dswjp*_*rEY{15E2wfdBvi literal 0 HcmV?d00001 diff --git a/po/R-fr.po b/po/R-fr.po new file mode 100644 index 0000000..a3937d0 --- /dev/null +++ b/po/R-fr.po @@ -0,0 +1,32 @@ +msgid "" +msgstr "" +"Project-Id-Version: tok 0.1.4.9000\n" +"POT-Creation-Date: 2024-09-04 19:29+0200\n" +"PO-Revision-Date: 2024-09-04 19:39+0200\n" +"Last-Translator: \n" +"Language-Team: \n" +"Language: fr\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: Poedit 3.4.3\n" + +#: encoding.R:29 +msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}." +msgstr "Classe attendue {.cls REncoding}. Ici la classe est {.cls {class(encoding)}}." + +#: tokenizer.R:86 tokenizer.R:97 +msgid "This is a static method. Not available for tokenizers instances." +msgstr "Il s'agit d'une méthode statique. Elle n'est pas disponible pour les instances de tokenizers." + +#: tokenizer.R:107 +msgid "{.arg trainer} must inherit from {.cls tok_trainer}." +msgstr "{.arg trainer} doit hériter de {.cls tok_trainer}." + +#: tokenizer.R:230 +msgid "Can't be set this way, use {.fn enable_padding}." +msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_padding}." + +#: tokenizer.R:238 +msgid "Can't be set this way, use {.fn enable_truncation}." +msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_truncation}." diff --git a/po/R-tok.pot b/po/R-tok.pot new file mode 100644 index 0000000..709725e --- /dev/null +++ b/po/R-tok.pot @@ -0,0 +1,31 @@ +msgid "" +msgstr "" +"Project-Id-Version: tok 0.1.4.9000\n" +"POT-Creation-Date: 2024-09-04 19:29+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: encoding.R:29 +msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}." +msgstr "" + +#: tokenizer.R:86 tokenizer.R:97 +msgid "This is a static method. Not available for tokenizers instances." +msgstr "" + +#: tokenizer.R:107 +msgid "{.arg trainer} must inherit from {.cls tok_trainer}." +msgstr "" + +#: tokenizer.R:230 +msgid "Can't be set this way, use {.fn enable_padding}." +msgstr "" + +#: tokenizer.R:238 +msgid "Can't be set this way, use {.fn enable_truncation}." +msgstr "" diff --git a/tests/testthat/test-message-translations.R b/tests/testthat/test-message-translations.R new file mode 100644 index 0000000..8631cb7 --- /dev/null +++ b/tests/testthat/test-message-translations.R @@ -0,0 +1,15 @@ +test_that("R-level cli_abort messages are correctly translated in FR", { + withr::with_envvar(c(HUGGINGFACE_HUB_CACHE = tempdir()), { + try({ + tok <- tokenizer$from_pretrained("gpt2") + temp_json <- tempfile(fileext = ".json") + withr::with_language(lang = "fr", + expect_error( + tok$train(temp_json, temp_json), + regexp = "doit hériter de", + fixed = TRUE + )) + }) + }) + +}) \ No newline at end of file