Skip to content

Commit

Permalink
deps: Remove ICU data dep
Browse files Browse the repository at this point in the history
Builds ICU data into a static library as part of the build instead of
fetching a binary data release archive, and links the library into the
URL lib.
  • Loading branch information
Zer0-One committed Nov 23, 2023
1 parent 046c275 commit 67bac76
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 55 deletions.
6 changes: 5 additions & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# =========================================================

build --enable_platform_specific_config
build --test_env=HASTUR_ICU_DATA=external/icu-data/
build --subcommands
build --verbose_failures
coverage --combined_report=lcov
test --test_output=errors
test --test_summary=terse
Expand Down Expand Up @@ -32,11 +33,14 @@ build:linux --cxxopt='-fno-rtti'
build:linux --copt='-gdwarf-4'

build:windows --enable_runfiles
build:windows --action_env=LOCALAPPDATA # Quirk for running vswhere, remove when icu no-longer needed
build:windows --action_env=ProgramData # Quirk for running vswhere, remove when icu no-longer needed
build:windows --cxxopt='/std:c++latest'
build:windows --cxxopt='/GR-' # Disable rtti.
build:windows --copt='/permissive-' # Conform to the standard.
build:windows --copt='/Zc:__cplusplus' # Report the real supported C++ version, not just C++98.
build:windows --copt='-utf-8' # Use UTF-8 as the source and execution character sets.
build:windows --host_copt='-utf-8' # Use UTF-8 as the source and execution character sets.

# Special build options
# =========================================================
Expand Down
16 changes: 5 additions & 11 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -127,18 +127,12 @@ http_archive(
patch_cmds = [
"rm source/common/BUILD.bazel",
"rm source/stubdata/BUILD.bazel",
"rm source/tools/toolutil/BUILD.bazel",
"rm source/i18n/BUILD.bazel",
],
sha256 = "818a80712ed3caacd9b652305e01afc7fa167e6f2e94996da44b90c2ab604ce1",
strip_prefix = "icu",
url = "https://github.com/unicode-org/icu/releases/download/release-73-2/icu4c-73_2-src.tgz",
)

# https://github.com/unicode-org/icu
http_archive(
name = "icu-data", # Unicode-DFS-2016
build_file_content = """exports_files(["icudt73l.dat"])""",
sha256 = "2657bd18c23b930ddf63f466192832cc083256515e07b5a5e7d79c5c1db058a1",
url = "https://github.com/unicode-org/icu/releases/download/release-73-2/icu4c-73_2-data-bin-l.zip",
sha256 = "4b6c4a79b0648d228d505601e58780a59e9ad4eaad54be75cc637bd635aa46d6",
strip_prefix = "icu-release-73-2/icu4c",
url = "https://github.com/unicode-org/icu/archive/refs/tags/release-73-2.zip",
)

# https://github.com/ocornut/imgui
Expand Down
179 changes: 174 additions & 5 deletions third_party/icu.BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")

config_setting(
name = "is_ubsan_build",
values = {"features": "ubsan"},
)

cc_library(
name = "common",
Expand All @@ -9,6 +14,7 @@ cc_library(
"source/stubdata/*.cpp",
]),
hdrs = glob([
"source/common/*.h",
"source/common/unicode/*.h",
]),
copts = select({
Expand All @@ -33,10 +39,6 @@ cc_library(
}),
defines = [
"U_STATIC_IMPLEMENTATION",
"U_COMMON_IMPLEMENTATION",
"U_CHARSET_IS_UTF8=1",
"U_HIDE_OBSOLETE_UTF_OLD_H=1",
"UCONFIG_NO_CONVERSION=1",
],
linkopts = select({
"@platforms//os:windows": [
Expand All @@ -45,6 +47,173 @@ cc_library(
"//conditions:default": ["-ldl"],
}),
linkstatic = True,
local_defines = [
"U_COMMON_IMPLEMENTATION",
"U_CHARSET_IS_UTF8=1",
"U_HIDE_OBSOLETE_UTF_OLD_H=1",
],
strip_include_prefix = "source/common/",
visibility = ["//visibility:public"],
)

cc_library(
name = "toolutil",
srcs = glob(["source/tools/toolutil/*.cpp"]),
hdrs = glob(["source/tools/toolutil/*.h"]),
copts = select({
"@platforms//os:windows": [
"/GR",
],
"//conditions:default": [
"-frtti",
],
}),
linkstatic = True,
local_defines = ["U_TOOLUTIL_IMPLEMENTATION"] + select({
"@platforms//os:windows": [],
"//conditions:default": [
"U_ELF",
],
}),
strip_include_prefix = "source/tools/toolutil",
visibility = ["//visibility:private"],
deps = [
":common",
":i18n",
],
)

cc_library(
name = "i18n",
srcs = glob(["source/i18n/*.cpp"]),
hdrs = glob([
"source/i18n/*.h",
"source/i18n/unicode/*.h",
]),
copts = select({
"@platforms//os:windows": [
"/GR",
],
"//conditions:default": [
"-frtti",
],
}),
linkstatic = True,
local_defines = [
"U_I18N_IMPLEMENTATION",
],
strip_include_prefix = "source/i18n",
visibility = ["//visibility:private"],
deps = [":common"],
)

cc_binary(
name = "gensprep",
srcs = glob(["source/tools/gensprep/*.c"]) + ["source/tools/gensprep/gensprep.h"],
visibility = ["//visibility:private"],
deps = [
":common",
":i18n",
":toolutil",
],
)

SPREP_DATA = glob(["source/data/sprep/*.txt"])

SPREP_DATA_COMPILED = [s.replace("txt", "spp").rpartition("/")[2] for s in SPREP_DATA]

filegroup(
name = "normalizations",
srcs = ["source/data/unidata/NormalizationCorrections.txt"],
)

[genrule(
name = "run_sprep_" + input.replace(".txt", "").rpartition("/")[2],
srcs = [input],
outs = [input.replace("txt", "spp").rpartition("/")[2]],
cmd = "./$(location gensprep) -d $(RULEDIR) -b " + input.replace(".txt", "").rpartition("/")[2] + " -m external/icu/source/data/unidata/ -u 3.2.0 $<",
tools = [
":gensprep",
":normalizations",
],
visibility = ["//visibility:public"],
) for input in SPREP_DATA]

genrule(
name = "create_pkgdata_lst",
srcs = SPREP_DATA_COMPILED,
outs = ["pkgdata.lst"],
cmd = "echo -e \"" + "\\n".join(SPREP_DATA_COMPILED) + "\" > $(RULEDIR)/pkgdata.lst && echo uts46.nrm >> $(RULEDIR)/pkgdata.lst",
)

genrule(
name = "move uts46.nrm",
srcs = ["source/data/in/uts46.nrm"],
outs = ["uts46.nrm"],
cmd = "cp $< $(RULEDIR)",
)

cc_binary(
name = "icupkg",
srcs = ["source/tools/icupkg/icupkg.cpp"],
visibility = ["//visibility:private"],
deps = [
":common",
":i18n",
":toolutil",
],
)

cc_binary(
name = "pkgdata",
srcs = [
"source/tools/pkgdata/pkgdata.cpp",
"source/tools/pkgdata/pkgtypes.c",
"source/tools/pkgdata/pkgtypes.h",
],
copts = select({
":is_ubsan_build": [
"-Wl,--whole-archive",
"-L/usr/lib/clang/15/lib/linux/",
"-lclang_rt.ubsan_standalone_cxx-x86_64",
"-Wl,--no-whole-archive",
],
"//conditions:default": [],
}),
visibility = ["//visibility:private"],
deps = [
":common",
":i18n",
":toolutil",
],
)

genrule(
name = "run_pkgdata",
srcs = [
"pkgdata.lst",
"uts46.nrm",
] + SPREP_DATA_COMPILED,
outs = ["libicudt73l.a"],
cmd = r"""srcs=($(SRCS)); export PATH=$$PATH:$(location icupkg); $(location pkgdata) -e icudt73 -s $(RULEDIR) -d $(RULEDIR) -p icudt73l -m static $${srcs[0]}""",
tools = [
":icupkg",
":pkgdata",
],
visibility = ["//visibility:public"],
)

genrule(
name = "run_pkgdata_windows",
srcs = [
"pkgdata.lst",
"uts46.nrm",
] + SPREP_DATA_COMPILED,
outs = ["sicudt73l.lib"],
cmd = r"""srcs=($(SRCS)); export PATH=$$PATH:$(location icupkg):"/$$('C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe' -latest -find '**\lib.exe' | grep x64 | grep -v llvm | head -n1 | awk -F '\' 'BEGIN{OFS=FS} {$$NF=""; print}' | tr -d ':' | tr '\134' '/')"; $(location pkgdata) -e icudt73 -s $(RULEDIR) -d $(RULEDIR) -p icudt73l -m static $${srcs[0]}""",
tools = [
":icupkg",
":pkgdata",
],
visibility = ["//visibility:public"],
)
17 changes: 14 additions & 3 deletions url/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library", "cc_test")
load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test")
load("//bzl:copts.bzl", "HASTUR_COPTS", "HASTUR_FUZZ_PLATFORMS")

Expand All @@ -13,14 +13,25 @@ cc_library(
deps = ["@icu//:common"],
)

cc_import(
name = "icudata",
static_library = select({
"@platforms//os:windows": "@icu//:sicudt73l.lib",
"//conditions:default": "@icu//:libicudt73l.a",
}),
alwayslink = True,
)

cc_library(
name = "url",
srcs = ["url.cpp"],
srcs = [
"url.cpp",
],
hdrs = ["url.h"],
copts = HASTUR_COPTS,
data = ["@icu-data//:icudt73l.dat"],
visibility = ["//visibility:public"],
deps = [
":icudata",
":rtti_hack",
"//util:base_parser",
"//util:string",
Expand Down
35 changes: 0 additions & 35 deletions url/url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,39 +118,6 @@ struct PercentEncodeSet {
static constexpr bool component(char c) { return userinfo(c) || (c >= '$' && c <= '&') || c == '+' || c == ','; }
};

void icu_init() {
static std::atomic<bool> called_once = false;

if (called_once.exchange(true)) {
return;
}

// NOLINTNEXTLINE(concurrency-mt-unsafe): This is going away soon.
char *data = std::getenv("HASTUR_ICU_DATA");

if (data != nullptr) {
std::filesystem::path env_path{data};

if (std::filesystem::is_directory(env_path)) {
u_setDataDirectory(env_path.string().c_str());
}
} else {
// Use current working directory as a last resort.
// TODO(zero-one): Look at engine config for paths.
u_setDataDirectory(std::filesystem::current_path().string().c_str());
}

UErrorCode err = U_ZERO_ERROR;

std::uint32_t opts =
UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_USE_STD3_RULES;

[[maybe_unused]] auto *uts = icu::IDNA::createUTS46Instance(opts, err);

assert(!U_FAILURE(err));

delete uts;
}
} // namespace

void icu_cleanup() {
Expand Down Expand Up @@ -1189,8 +1156,6 @@ void UrlParser::state_fragment() {

// https://url.spec.whatwg.org/#concept-domain-to-ascii
std::optional<std::string> UrlParser::domain_to_ascii(std::string_view domain, bool be_strict) const {
icu_init();

std::string ascii_domain;
icu::StringByteSink<std::string> tmp{&ascii_domain};

Expand Down

0 comments on commit 67bac76

Please sign in to comment.