From 1e7ce54726dd541c1e98fbcf030fa07d1df86a77 Mon Sep 17 00:00:00 2001 From: David Zero Date: Mon, 11 Sep 2023 20:44:40 -0400 Subject: [PATCH] deps: Remove ICU data dep Builds ICU data into a static library as part of the build instead of fetching a binary data release archive, and links the library into the URL lib. --- .bazelrc | 1 - WORKSPACE | 16 ++-- third_party/icu.BUILD | 170 ++++++++++++++++++++++++++++++++++++++++-- url/BUILD | 14 +++- url/url.cpp | 35 --------- 5 files changed, 179 insertions(+), 57 deletions(-) diff --git a/.bazelrc b/.bazelrc index 42574243f..0ea5f65ac 100644 --- a/.bazelrc +++ b/.bazelrc @@ -2,7 +2,6 @@ # ========================================================= build --enable_platform_specific_config -build --test_env=HASTUR_ICU_DATA=external/icu-data/ coverage --combined_report=lcov test --test_output=errors test --test_summary=terse diff --git a/WORKSPACE b/WORKSPACE index bf4bc5105..75757b2cf 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -126,18 +126,12 @@ http_archive( patch_cmds = [ "rm source/common/BUILD.bazel", "rm source/stubdata/BUILD.bazel", + "rm source/tools/toolutil/BUILD.bazel", + "rm source/i18n/BUILD.bazel", ], - sha256 = "818a80712ed3caacd9b652305e01afc7fa167e6f2e94996da44b90c2ab604ce1", - strip_prefix = "icu", - url = "https://github.com/unicode-org/icu/releases/download/release-73-2/icu4c-73_2-src.tgz", -) - -# https://github.com/unicode-org/icu -http_archive( - name = "icu-data", # Unicode-DFS-2016 - build_file_content = """exports_files(["icudt73l.dat"])""", - sha256 = "2657bd18c23b930ddf63f466192832cc083256515e07b5a5e7d79c5c1db058a1", - url = "https://github.com/unicode-org/icu/releases/download/release-73-2/icu4c-73_2-data-bin-l.zip", + sha256 = "4b6c4a79b0648d228d505601e58780a59e9ad4eaad54be75cc637bd635aa46d6", + strip_prefix = "icu-release-73-2/icu4c", + url = "https://github.com/unicode-org/icu/archive/refs/tags/release-73-2.zip", ) # https://github.com/ocornut/imgui diff --git a/third_party/icu.BUILD b/third_party/icu.BUILD index 1942147c1..c615e1b49 100644 --- a/third_party/icu.BUILD +++ b/third_party/icu.BUILD @@ -1,4 +1,9 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +config_setting( + name = "is_ubsan_build", + values = {"features": "ubsan"}, +) cc_library( name = "common", @@ -9,6 +14,7 @@ cc_library( "source/stubdata/*.cpp", ]), hdrs = glob([ + "source/common/*.h", "source/common/unicode/*.h", ]), copts = select({ @@ -32,20 +38,170 @@ cc_library( ], "//conditions:default": [], }), - defines = [ + linkopts = select({ + "@platforms//os:windows": [ + "-DEFAULTLIB:advapi32", + ], + "//conditions:default": ["-ldl"], + }), + linkstatic = True, + local_defines = [ "U_STATIC_IMPLEMENTATION", "U_COMMON_IMPLEMENTATION", "U_CHARSET_IS_UTF8=1", "U_HIDE_OBSOLETE_UTF_OLD_H=1", - "UCONFIG_NO_CONVERSION=1", ], - linkopts = select({ + strip_include_prefix = "source/common/", + visibility = ["//visibility:public"], +) + +cc_library( + name = "toolutil", + srcs = glob(["source/tools/toolutil/*.cpp"]), + hdrs = glob(["source/tools/toolutil/*.h"]), + copts = select({ "@platforms//os:windows": [ - "-DEFAULTLIB:advapi32", + "/GR", + ], + "//conditions:default": [ + "-frtti", ], - "//conditions:default": ["-ldl"], }), linkstatic = True, - strip_include_prefix = "source/common/", + local_defines = ["U_TOOLUTIL_IMPLEMENTATION"] + select({ + "@platforms//os:windows": [], + "//conditions:default": [ + "U_ELF", + ], + }), + strip_include_prefix = "source/tools/toolutil", + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ], +) + +cc_library( + name = "i18n", + srcs = glob(["source/i18n/*.cpp"]), + hdrs = glob([ + "source/i18n/*.h", + "source/i18n/unicode/*.h", + ]), + copts = select({ + "@platforms//os:windows": [ + "/GR", + "-utf-8", + ], + "//conditions:default": [ + "-frtti", + ], + }), + linkstatic = True, + local_defines = [ + "U_I18N_IMPLEMENTATION", + ], + strip_include_prefix = "source/i18n", + visibility = ["//visibility:private"], + deps = [":common"], +) + +cc_binary( + name = "gensprep", + srcs = glob(["source/tools/gensprep/*.c"]) + ["source/tools/gensprep/gensprep.h"], + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ":toolutil", + ], +) + +SPREP_DATA = glob(["source/data/sprep/*.txt"]) + +SPREP_DATA_COMPILED = [s.replace("txt", "spp").rpartition("/")[2] for s in SPREP_DATA] + +filegroup( + name = "normalizations", + srcs = ["source/data/unidata/NormalizationCorrections.txt"], +) + +[genrule( + name = "run_sprep_" + input.replace(".txt", "").rpartition("/")[2], + srcs = [input], + outs = [input.replace("txt", "spp").rpartition("/")[2]], + cmd = "./$(location gensprep) -d $(RULEDIR) -b " + input.replace(".txt", "").rpartition("/")[2] + " -m external/icu/source/data/unidata/ -u 3.2.0 $<", + tools = [ + ":gensprep", + ":normalizations", + ], + visibility = ["//visibility:public"], +) for input in SPREP_DATA] + +genrule( + name = "create_pkgdata_lst", + srcs = SPREP_DATA_COMPILED, + outs = ["pkgdata.lst"], + cmd = "echo -e \"" + "\\n".join(SPREP_DATA_COMPILED) + "\" > $(RULEDIR)/pkgdata.lst && echo uts46.nrm >> $(RULEDIR)/pkgdata.lst", +) + +genrule( + name = "move uts46.nrm", + srcs = ["source/data/in/uts46.nrm"], + outs = ["uts46.nrm"], + cmd = "cp $< $(RULEDIR)", +) + +cc_binary( + name = "icupkg", + srcs = ["source/tools/icupkg/icupkg.cpp"], + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ":toolutil", + ], +) + +cc_binary( + name = "pkgdata", + srcs = [ + "source/tools/pkgdata/pkgdata.cpp", + "source/tools/pkgdata/pkgtypes.c", + "source/tools/pkgdata/pkgtypes.h", + ], + copts = select({ + ":is_ubsan_build": [ + "-Wl,--whole-archive", + "-L/usr/lib/clang/15/lib/linux/", + "-lclang_rt.ubsan_standalone_cxx-x86_64", + "-Wl,--no-whole-archive", + ], + "@platforms//os:windows": [ + "-utf-8", + ], + "//conditions:default": [], + }), + visibility = ["//visibility:private"], + deps = [ + ":common", + ":i18n", + ":toolutil", + ], +) + +genrule( + name = "run_pkgdata", + srcs = [ + "pkgdata.lst", + "uts46.nrm", + ] + SPREP_DATA_COMPILED, + outs = ["libicudt73l.a"], + cmd = "srcs=($(SRCS)); PATH=$$PATH:$(location icupkg) $(location pkgdata) -e icudt73 -s $(RULEDIR) -d $(RULEDIR) -p icudt73l -m static $${srcs[0]}", + tools = [ + ":icupkg", + ":pkgdata", + ], visibility = ["//visibility:public"], ) diff --git a/url/BUILD b/url/BUILD index df4e278e9..b55757ef6 100644 --- a/url/BUILD +++ b/url/BUILD @@ -1,4 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library", "cc_test") load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test") load("//bzl:copts.bzl", "HASTUR_COPTS", "HASTUR_FUZZ_PLATFORMS") @@ -13,14 +13,22 @@ cc_library( deps = ["@icu//:common"], ) +cc_import( + name = "icudata", + static_library = "@icu//:libicudt73l.a", + alwayslink = True, +) + cc_library( name = "url", - srcs = ["url.cpp"], + srcs = [ + "url.cpp", + ], hdrs = ["url.h"], copts = HASTUR_COPTS, - data = ["@icu-data//:icudt73l.dat"], visibility = ["//visibility:public"], deps = [ + ":icudata", ":rtti_hack", "//util:base_parser", "//util:string", diff --git a/url/url.cpp b/url/url.cpp index 9d0ea9e4c..599b6ed12 100644 --- a/url/url.cpp +++ b/url/url.cpp @@ -118,39 +118,6 @@ struct PercentEncodeSet { static constexpr bool component(char c) { return userinfo(c) || (c >= '$' && c <= '&') || c == '+' || c == ','; } }; -void icu_init() { - static std::atomic called_once = false; - - if (called_once.exchange(true)) { - return; - } - - // NOLINTNEXTLINE(concurrency-mt-unsafe): This is going away soon. - char *data = std::getenv("HASTUR_ICU_DATA"); - - if (data != nullptr) { - std::filesystem::path env_path{data}; - - if (std::filesystem::is_directory(env_path)) { - u_setDataDirectory(env_path.string().c_str()); - } - } else { - // Use current working directory as a last resort. - // TODO(zero-one): Look at engine config for paths. - u_setDataDirectory(std::filesystem::current_path().string().c_str()); - } - - UErrorCode err = U_ZERO_ERROR; - - std::uint32_t opts = - UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_USE_STD3_RULES; - - [[maybe_unused]] auto *uts = icu::IDNA::createUTS46Instance(opts, err); - - assert(!U_FAILURE(err)); - - delete uts; -} } // namespace void icu_cleanup() { @@ -1189,8 +1156,6 @@ void UrlParser::state_fragment() { // https://url.spec.whatwg.org/#concept-domain-to-ascii std::optional UrlParser::domain_to_ascii(std::string_view domain, bool be_strict) const { - icu_init(); - std::string ascii_domain; icu::StringByteSink tmp{&ascii_domain};