From 265d0d573a1789984fec5257a3976be9791aaf12 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:25:36 +0000 Subject: [PATCH 01/11] choice randomization: better approximation of JR behaviour, fixes #49 --- packages/xpath/src/lib/collections/sort.ts | 40 +++++++++++++++++--- packages/xpath/test/xforms/randomize.test.ts | 8 +++- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index c0bce26a0..31c023f99 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -15,6 +15,7 @@ class UnseededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator } class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { + // Park-Miller PRNG protected seed: number; constructor(seed: Int) { @@ -38,17 +39,46 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { } } -const isInt = (value: number): value is Int => value % 1 === 0; - export const seededRandomize = (values: readonly T[], seed?: number): T[] => { let generator: PseudoRandomNumberGenerator; if (seed == null) { generator = new UnseededPseudoRandomNumberGenerator(); - } else if (!isInt(seed)) { - throw 'todo not an int'; } else { - generator = new SeededPseudoRandomNumberGenerator(seed); + let finalSeed: number; + // Per issue #49 this is (to an extent) "bug-or-feature-compatible" with JavaRosa's implementation. + // org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of + // the double produced by randomSeedPathExpr.eval(). + // That results in a 0L when the double is NaN, which happens (for instance) when there + // is a string that does not look like a number (which is a problem in itself, as any non-numeric + // looking string will then result in the same seed of 0 — see https://github.com/getodk/javarosa/issues/800). + // We'll emulate Java's Double -> Long conversion here (for NaN and some other double values) + // so that we produce the same randomization as JR. + if (Number.isNaN(seed)) { + finalSeed = 0; + } else if (seed === Infinity) { + // In Java's .longValue(), this converts to 2**63 -1. + // But that's larger than the JS Number.MAX_SAFE_INTEGER, and thus we cannot guarantee the same + // outcomes as OpenRosa. + // However. When Park-Miller is initialized, it takes the modulus of the seed and 2**31 -1 as + // the first step. This means that for Park-Miller we can use 2**31 (which is smaller than Number.MAX_SAFE_INTEGER) + // as a surrogate equivalent seed for Infinity, since + // ((2**63 -1) % (2**31 -1)) = ((2**31) % (2**31 -1)) + // (because of JS Number imprecision (the problem to start with) don't use JS to convince of the above equality, + // or rewrite to use BigInt). + finalSeed = 2 ** 31; + } else if (seed === -Infinity) { + // Analogous with the above conversion for Infinity + finalSeed = -(2 ** 31 + 1); + } else if (!Number.isInteger(seed)) { + // We're not out of the woods yet — see issue: https://github.com/getodk/web-forms/issues/240. + // But one thing we know is that JR converts the double to a long, and thus drops the fractional part. + // We'll do the same here. + finalSeed = Math.floor(seed); + } else { + finalSeed = seed; + } + generator = new SeededPseudoRandomNumberGenerator(finalSeed); } const { length } = values; diff --git a/packages/xpath/test/xforms/randomize.test.ts b/packages/xpath/test/xforms/randomize.test.ts index 56cb9c95a..47c21b886 100644 --- a/packages/xpath/test/xforms/randomize.test.ts +++ b/packages/xpath/test/xforms/randomize.test.ts @@ -74,6 +74,11 @@ describe('randomize()', () => { { seed: 1, expected: 'BFEACD' }, { seed: 11111111, expected: 'ACDBFE' }, { seed: 'int(1)', expected: 'BFEACD' }, + { seed: 1.1, expected: 'BFEACD' }, + { seed: 0, expected: 'CBEAFD' }, + { seed: NaN, expected: 'CBEAFD' }, + { seed: Infinity, expected: 'CBEAFD' }, + { seed: -Infinity, expected: 'CBEAFD' }, { seed: 'floor(1.1)', expected: 'BFEACD' }, { seed: '//xhtml:div[@id="testFunctionNodeset2"]/xhtml:p', expected: 'BFEACD' }, ].forEach(({ seed, expected }) => { @@ -90,10 +95,9 @@ describe('randomize()', () => { [ { expression: 'randomize()' }, - { expression: `randomize(${SELECTOR}, 'a')` }, { expression: `randomize(${SELECTOR}, 1, 2)` }, ].forEach(({ expression }) => { - it.fails(`${expression} with invalid args, throws an error`, () => { + it.fails(`${expression} with invalid argument count, throws an error`, () => { testContext.evaluate(expression); }); }); From 159f06cb46fb68264732769a21fb92f303bb5b07 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:38:41 +0000 Subject: [PATCH 02/11] thanks for the diff noise, prettier --- packages/xpath/test/xforms/randomize.test.ts | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/xpath/test/xforms/randomize.test.ts b/packages/xpath/test/xforms/randomize.test.ts index 47c21b886..c43ea30bf 100644 --- a/packages/xpath/test/xforms/randomize.test.ts +++ b/packages/xpath/test/xforms/randomize.test.ts @@ -93,14 +93,13 @@ describe('randomize()', () => { }); }); - [ - { expression: 'randomize()' }, - { expression: `randomize(${SELECTOR}, 1, 2)` }, - ].forEach(({ expression }) => { - it.fails(`${expression} with invalid argument count, throws an error`, () => { - testContext.evaluate(expression); - }); - }); + [{ expression: 'randomize()' }, { expression: `randomize(${SELECTOR}, 1, 2)` }].forEach( + ({ expression }) => { + it.fails(`${expression} with invalid argument count, throws an error`, () => { + testContext.evaluate(expression); + }); + } + ); it('randomizes nodes', () => { testContext = createXFormsTestContext(` From bf0008c3683f1d52c9e65e6c562533099ea2fd59 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:31:56 +0000 Subject: [PATCH 03/11] link to org.javarosa.core.model.ItemsetBinding.resolveRandomSeed() sourcecode --- packages/xpath/src/lib/collections/sort.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index 31c023f99..61a256274 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -48,7 +48,7 @@ export const seededRandomize = (values: readonly T[], seed?: number): T[] => let finalSeed: number; // Per issue #49 this is (to an extent) "bug-or-feature-compatible" with JavaRosa's implementation. // org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of - // the double produced by randomSeedPathExpr.eval(). + // the double produced by randomSeedPathExpr.eval() — see https://github.com/getodk/javarosa/blob/6ce13527c/src/main/java/org/javarosa/core/model/ItemsetBinding.java#L311:L317 . // That results in a 0L when the double is NaN, which happens (for instance) when there // is a string that does not look like a number (which is a problem in itself, as any non-numeric // looking string will then result in the same seed of 0 — see https://github.com/getodk/javarosa/issues/800). From 2bda8afc2d1eab64968ef47ce3badcd9c23bc0c5 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Thu, 12 Dec 2024 18:23:16 +0000 Subject: [PATCH 04/11] simplify: make ParkMiller PRNG accept a BigInt seed, fix -Infinity --- packages/xpath/src/lib/collections/sort.ts | 53 +++++++++----------- packages/xpath/test/xforms/randomize.test.ts | 2 +- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index 61a256274..6e324b3c3 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -18,9 +18,15 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { // Park-Miller PRNG protected seed: number; - constructor(seed: Int) { - let initialSeed = seed % SEED_MODULO_OPERAND; - + constructor(seed: Int | bigint) { + let initialSeed: number; + if (typeof(seed) === ("bigint")) { + // the result of the modulo operation is always smaller than Number.MAX_SAFE_INTEGER, + // thus it's safe to convert to a Number. + initialSeed = Number(BigInt(seed) % BigInt(SEED_MODULO_OPERAND)); + } else { + initialSeed = Number(seed) % Number(SEED_MODULO_OPERAND); + } if (initialSeed <= 0) { initialSeed += MAX_INT_32 - 1; } @@ -45,39 +51,26 @@ export const seededRandomize = (values: readonly T[], seed?: number): T[] => if (seed == null) { generator = new UnseededPseudoRandomNumberGenerator(); } else { - let finalSeed: number; - // Per issue #49 this is (to an extent) "bug-or-feature-compatible" with JavaRosa's implementation. - // org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of + let finalSeed: number | bigint; + // Per issue #49 (https://github.com/getodk/web-forms/issues/49) this is intended to be "bug-or-feature-compatible" + // with JavaRosa's implementation; org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of // the double produced by randomSeedPathExpr.eval() — see https://github.com/getodk/javarosa/blob/6ce13527c/src/main/java/org/javarosa/core/model/ItemsetBinding.java#L311:L317 . // That results in a 0L when the double is NaN, which happens (for instance) when there // is a string that does not look like a number (which is a problem in itself, as any non-numeric // looking string will then result in the same seed of 0 — see https://github.com/getodk/javarosa/issues/800). // We'll emulate Java's Double -> Long conversion here (for NaN and some other double values) // so that we produce the same randomization as JR. - if (Number.isNaN(seed)) { - finalSeed = 0; - } else if (seed === Infinity) { - // In Java's .longValue(), this converts to 2**63 -1. - // But that's larger than the JS Number.MAX_SAFE_INTEGER, and thus we cannot guarantee the same - // outcomes as OpenRosa. - // However. When Park-Miller is initialized, it takes the modulus of the seed and 2**31 -1 as - // the first step. This means that for Park-Miller we can use 2**31 (which is smaller than Number.MAX_SAFE_INTEGER) - // as a surrogate equivalent seed for Infinity, since - // ((2**63 -1) % (2**31 -1)) = ((2**31) % (2**31 -1)) - // (because of JS Number imprecision (the problem to start with) don't use JS to convince of the above equality, - // or rewrite to use BigInt). - finalSeed = 2 ** 31; - } else if (seed === -Infinity) { - // Analogous with the above conversion for Infinity - finalSeed = -(2 ** 31 + 1); - } else if (!Number.isInteger(seed)) { - // We're not out of the woods yet — see issue: https://github.com/getodk/web-forms/issues/240. - // But one thing we know is that JR converts the double to a long, and thus drops the fractional part. - // We'll do the same here. - finalSeed = Math.floor(seed); - } else { - finalSeed = seed; - } + + // In Java, a NaN double's .longValue is 0 + if (Number.isNaN(seed)) finalSeed = 0; + // In Java, an Infinity double's .longValue() is 2**63 -1, which is larger than Number.MAX_SAFE_INTEGER, thus we'll need a BigInt. + else if (seed === Infinity) finalSeed = 2n ** 63n -1n; + // Analogous with the above conversion, but for -Infinity + else if (seed === -Infinity) finalSeed = -(2n ** 63n); + // A Java double's .longValue drops the fractional. + else if (typeof(seed) === "number" && !Number.isInteger(seed)) finalSeed = Math.trunc(seed); + // TODO: There's still more peculiarities to address: https://github.com/getodk/web-forms/issues/240 + else finalSeed = seed; generator = new SeededPseudoRandomNumberGenerator(finalSeed); } diff --git a/packages/xpath/test/xforms/randomize.test.ts b/packages/xpath/test/xforms/randomize.test.ts index c43ea30bf..0bbc4a4ed 100644 --- a/packages/xpath/test/xforms/randomize.test.ts +++ b/packages/xpath/test/xforms/randomize.test.ts @@ -78,7 +78,7 @@ describe('randomize()', () => { { seed: 0, expected: 'CBEAFD' }, { seed: NaN, expected: 'CBEAFD' }, { seed: Infinity, expected: 'CBEAFD' }, - { seed: -Infinity, expected: 'CBEAFD' }, + { seed: -Infinity, expected: 'CFBEAD' }, { seed: 'floor(1.1)', expected: 'BFEACD' }, { seed: '//xhtml:div[@id="testFunctionNodeset2"]/xhtml:p', expected: 'BFEACD' }, ].forEach(({ seed, expected }) => { From c83fef297264d881f1642002804a65dc969ab2ee Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Mon, 16 Dec 2024 12:11:45 +0000 Subject: [PATCH 05/11] derive a numeric seed from non-numeric-looking seed strings via digest --- .../xpath/src/functions/xforms/node-set.ts | 39 +++++++++++++++++-- packages/xpath/src/lib/collections/sort.ts | 11 +++--- packages/xpath/test/xforms/randomize.test.ts | 10 ++++- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts index c51a16366..bc30d379d 100644 --- a/packages/xpath/src/functions/xforms/node-set.ts +++ b/packages/xpath/src/functions/xforms/node-set.ts @@ -1,3 +1,5 @@ +import sha256 from 'crypto-js/sha256'; + import type { XPathNode } from '../../adapter/interface/XPathNode.ts'; import type { XPathDOMProvider } from '../../adapter/xpathDOMProvider.ts'; import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts'; @@ -384,8 +386,39 @@ export const randomize = new NodeSetFunction( const nodeResults = Array.from(results.values()); const nodes = nodeResults.map(({ value }) => value); - const seed = seedExpression?.evaluate(context).toNumber(); - - return seededRandomize(nodes, seed); + if (seedExpression === undefined) return seededRandomize(nodes); + const seed = seedExpression.evaluate(context); + const asNumber = seed.toNumber(); // TODO: There are some peculiarities to address: https://github.com/getodk/web-forms/issues/240 + let finalSeed: number | bigint | undefined; + if (Number.isNaN(asNumber)) { + // Specific behaviors for when a seed value is not interpretable as numeric. + // We still want to derive a seed in those cases, see https://github.com/getodk/javarosa/issues/800 + const seedString = seed.toString(); + if (seedString === '') { + finalSeed = 0; // special case: JR behaviour + } else { + // any other string, we'll convert to a number via a digest function + finalSeed = toBigIntHash(seedString); + } + } else { + finalSeed = asNumber; + } + return seededRandomize(nodes, finalSeed); } ); + +function toBigIntHash(text: string): bigint { + // hash text with sha256, and interpret the first 64 bits of output + // (the first and second int32s ("words") of CryptoJS digest output) + // as a BigInt. Thus the entropy of the hash is reduced to 64 bits, which + // for some applications is sufficient. + // The underlying representations are big-endian regardless of the endianness + // of the machine this runs on, as is the equivalent JavaRosa implementation + // at https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726 + const buffer = new ArrayBuffer(8); + const dataview = new DataView(buffer); + sha256(text) + .words.slice(0, 2) + .forEach((val, ix) => dataview.setInt32(ix * 4, val)); + return dataview.getBigInt64(0); +} diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index 6e324b3c3..a6f6971f8 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -20,7 +20,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { constructor(seed: Int | bigint) { let initialSeed: number; - if (typeof(seed) === ("bigint")) { + if (typeof seed === 'bigint') { // the result of the modulo operation is always smaller than Number.MAX_SAFE_INTEGER, // thus it's safe to convert to a Number. initialSeed = Number(BigInt(seed) % BigInt(SEED_MODULO_OPERAND)); @@ -45,7 +45,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { } } -export const seededRandomize = (values: readonly T[], seed?: number): T[] => { +export const seededRandomize = (values: readonly T[], seed?: number | bigint): T[] => { let generator: PseudoRandomNumberGenerator; if (seed == null) { @@ -64,12 +64,11 @@ export const seededRandomize = (values: readonly T[], seed?: number): T[] => // In Java, a NaN double's .longValue is 0 if (Number.isNaN(seed)) finalSeed = 0; // In Java, an Infinity double's .longValue() is 2**63 -1, which is larger than Number.MAX_SAFE_INTEGER, thus we'll need a BigInt. - else if (seed === Infinity) finalSeed = 2n ** 63n -1n; + else if (seed === Infinity) finalSeed = 2n ** 63n - 1n; // Analogous with the above conversion, but for -Infinity else if (seed === -Infinity) finalSeed = -(2n ** 63n); - // A Java double's .longValue drops the fractional. - else if (typeof(seed) === "number" && !Number.isInteger(seed)) finalSeed = Math.trunc(seed); - // TODO: There's still more peculiarities to address: https://github.com/getodk/web-forms/issues/240 + // A Java double's .longValue drops the fractional part. + else if (typeof seed === 'number' && !Number.isInteger(seed)) finalSeed = Math.trunc(seed); else finalSeed = seed; generator = new SeededPseudoRandomNumberGenerator(finalSeed); } diff --git a/packages/xpath/test/xforms/randomize.test.ts b/packages/xpath/test/xforms/randomize.test.ts index 0bbc4a4ed..06fba8b8b 100644 --- a/packages/xpath/test/xforms/randomize.test.ts +++ b/packages/xpath/test/xforms/randomize.test.ts @@ -18,6 +18,9 @@ describe('randomize()', () => { }); const SELECTOR = '//xhtml:div[@id="FunctionRandomize"]/xhtml:div'; + const MIRROR = 'mirror'; + const MIRROR_HASH_VALUE = 5989458117437254; // in Python: "from struct import unpack; from hashlib import sha256; unpack('>Q', sha256(b'mirror').digest()[:8])[0]" + const MIRROR_HASH_SORT_ORDER = 'ACBEDF'; describe('shuffles nodesets', () => { beforeEach(() => { @@ -44,7 +47,10 @@ describe('randomize()', () => {

3

4

- +
+

${MIRROR}

+
+ `, { namespaceResolver } ); @@ -81,6 +87,8 @@ describe('randomize()', () => { { seed: -Infinity, expected: 'CFBEAD' }, { seed: 'floor(1.1)', expected: 'BFEACD' }, { seed: '//xhtml:div[@id="testFunctionNodeset2"]/xhtml:p', expected: 'BFEACD' }, + { seed: MIRROR_HASH_VALUE, expected: MIRROR_HASH_SORT_ORDER }, + { seed: '//xhtml:div[@id="testFunctionNodeset3"]/xhtml:p', expected: MIRROR_HASH_SORT_ORDER }, ].forEach(({ seed, expected }) => { it(`with a seed: ${seed}`, () => { const expression = `randomize(${SELECTOR}, ${seed})`; From 954f9e9497cc470b0a4528c95b0c0f657bfb9ed7 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Tue, 17 Dec 2024 10:29:39 +0000 Subject: [PATCH 06/11] Create strange-brooms-rush.md --- .changeset/strange-brooms-rush.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/strange-brooms-rush.md diff --git a/.changeset/strange-brooms-rush.md b/.changeset/strange-brooms-rush.md new file mode 100644 index 000000000..71bba8563 --- /dev/null +++ b/.changeset/strange-brooms-rush.md @@ -0,0 +1,7 @@ +--- +"@getodk/xpath": patch +--- + +Choice list order randomization seed handling: better correspondence with JavaRosa behaviour, +including the addition of derivation of seeds from non-numeric inputs. +Previously, entering a non-integer in a form field seed input would result in an exception being thrown. From 761d7da02a61390b9f9d41c84173fd79c03eb52a Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Wed, 18 Dec 2024 08:59:15 +0000 Subject: [PATCH 07/11] the whitespace is telling --- packages/xpath/src/functions/xforms/node-set.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts index bc30d379d..afabf5a3e 100644 --- a/packages/xpath/src/functions/xforms/node-set.ts +++ b/packages/xpath/src/functions/xforms/node-set.ts @@ -386,7 +386,9 @@ export const randomize = new NodeSetFunction( const nodeResults = Array.from(results.values()); const nodes = nodeResults.map(({ value }) => value); + if (seedExpression === undefined) return seededRandomize(nodes); + const seed = seedExpression.evaluate(context); const asNumber = seed.toNumber(); // TODO: There are some peculiarities to address: https://github.com/getodk/web-forms/issues/240 let finalSeed: number | bigint | undefined; From 9b7ffc95cd4586e0c48d61680b61bfd46d813308 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:25:07 +0000 Subject: [PATCH 08/11] cleanup Co-authored-by: eyelidlessness --- packages/xpath/src/lib/collections/sort.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index a6f6971f8..1f188d1fc 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -25,7 +25,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { // thus it's safe to convert to a Number. initialSeed = Number(BigInt(seed) % BigInt(SEED_MODULO_OPERAND)); } else { - initialSeed = Number(seed) % Number(SEED_MODULO_OPERAND); + initialSeed = seed % SEED_MODULO_OPERAND; } if (initialSeed <= 0) { initialSeed += MAX_INT_32 - 1; From ea5c499ebe20df91d024e311da924267176bb2f2 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:32:43 +0000 Subject: [PATCH 09/11] const-ize toBigIntHash --- packages/xpath/src/functions/xforms/node-set.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts index afabf5a3e..2e21c009f 100644 --- a/packages/xpath/src/functions/xforms/node-set.ts +++ b/packages/xpath/src/functions/xforms/node-set.ts @@ -409,7 +409,7 @@ export const randomize = new NodeSetFunction( } ); -function toBigIntHash(text: string): bigint { +const toBigIntHash = (text: string): bigint => { // hash text with sha256, and interpret the first 64 bits of output // (the first and second int32s ("words") of CryptoJS digest output) // as a BigInt. Thus the entropy of the hash is reduced to 64 bits, which From cb6a02143e07d4f91a0b41136526828f7c46ffda Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:57:05 +0000 Subject: [PATCH 10/11] address various PR comments --- .../xpath/src/functions/xforms/node-set.ts | 13 +++--- packages/xpath/src/lib/collections/sort.ts | 41 +++++++++++-------- packages/xpath/test/xforms/randomize.test.ts | 2 +- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts index 2e21c009f..22eca33be 100644 --- a/packages/xpath/src/functions/xforms/node-set.ts +++ b/packages/xpath/src/functions/xforms/node-set.ts @@ -1,4 +1,4 @@ -import sha256 from 'crypto-js/sha256'; +import { SHA256 } from 'crypto-js'; import type { XPathNode } from '../../adapter/interface/XPathNode.ts'; import type { XPathDOMProvider } from '../../adapter/xpathDOMProvider.ts'; @@ -410,17 +410,18 @@ export const randomize = new NodeSetFunction( ); const toBigIntHash = (text: string): bigint => { - // hash text with sha256, and interpret the first 64 bits of output + // Hash text with sha256, and interpret the first 64 bits of output // (the first and second int32s ("words") of CryptoJS digest output) - // as a BigInt. Thus the entropy of the hash is reduced to 64 bits, which + // as an int64 (in JS represented in a BigInt). + // Thus the entropy of the hash is reduced to 64 bits, which // for some applications is sufficient. // The underlying representations are big-endian regardless of the endianness // of the machine this runs on, as is the equivalent JavaRosa implementation // at https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726 const buffer = new ArrayBuffer(8); const dataview = new DataView(buffer); - sha256(text) + SHA256(text) .words.slice(0, 2) - .forEach((val, ix) => dataview.setInt32(ix * 4, val)); + .forEach((val, ix) => dataview.setInt32(ix * Int32Array.BYTES_PER_ELEMENT, val)); return dataview.getBigInt64(0); -} +}; diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index 1f188d1fc..534d75c33 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -14,8 +14,7 @@ class UnseededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator } } -class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { - // Park-Miller PRNG +class ParkMillerPRNG implements PseudoRandomNumberGenerator { protected seed: number; constructor(seed: Int | bigint) { @@ -45,32 +44,38 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { } } -export const seededRandomize = (values: readonly T[], seed?: number | bigint): T[] => { - let generator: PseudoRandomNumberGenerator; +class JavaRosaPRNG extends ParkMillerPRNG { + // Per issue #49 (https://github.com/getodk/web-forms/issues/49) this is intended to be "bug-or-feature-compatible" + // with JavaRosa's implementation; org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of + // the double produced by randomSeedPathExpr.eval() — see https://github.com/getodk/javarosa/blob/6ce13527c/src/main/java/org/javarosa/core/model/ItemsetBinding.java#L311:L317 . + // That results in a 0L when the double is NaN, which happens (for instance) when there + // is a string that does not look like a number (which is a problem in itself, as any non-numeric + // looking string will then result in the same seed of 0 — see https://github.com/getodk/javarosa/issues/800). + // We'll emulate Java's Double -> Long conversion here (for NaN and some other double values) + // so that we produce the same randomization as JR. - if (seed == null) { - generator = new UnseededPseudoRandomNumberGenerator(); - } else { + constructor(seed: Int | bigint) { let finalSeed: number | bigint; - // Per issue #49 (https://github.com/getodk/web-forms/issues/49) this is intended to be "bug-or-feature-compatible" - // with JavaRosa's implementation; org.javarosa.core.model.ItemsetBinding.resolveRandomSeed takes the .longValue() of - // the double produced by randomSeedPathExpr.eval() — see https://github.com/getodk/javarosa/blob/6ce13527c/src/main/java/org/javarosa/core/model/ItemsetBinding.java#L311:L317 . - // That results in a 0L when the double is NaN, which happens (for instance) when there - // is a string that does not look like a number (which is a problem in itself, as any non-numeric - // looking string will then result in the same seed of 0 — see https://github.com/getodk/javarosa/issues/800). - // We'll emulate Java's Double -> Long conversion here (for NaN and some other double values) - // so that we produce the same randomization as JR. - // In Java, a NaN double's .longValue is 0 if (Number.isNaN(seed)) finalSeed = 0; // In Java, an Infinity double's .longValue() is 2**63 -1, which is larger than Number.MAX_SAFE_INTEGER, thus we'll need a BigInt. else if (seed === Infinity) finalSeed = 2n ** 63n - 1n; // Analogous with the above conversion, but for -Infinity else if (seed === -Infinity) finalSeed = -(2n ** 63n); - // A Java double's .longValue drops the fractional part. + // A Java Double's .longValue drops the fractional part. else if (typeof seed === 'number' && !Number.isInteger(seed)) finalSeed = Math.trunc(seed); else finalSeed = seed; - generator = new SeededPseudoRandomNumberGenerator(finalSeed); + super(finalSeed); + } +} + +export const seededRandomize = (values: readonly T[], seed?: number | bigint): T[] => { + let generator: PseudoRandomNumberGenerator; + + if (seed == null) { + generator = new UnseededPseudoRandomNumberGenerator(); + } else { + generator = new JavaRosaPRNG(seed); } const { length } = values; diff --git a/packages/xpath/test/xforms/randomize.test.ts b/packages/xpath/test/xforms/randomize.test.ts index 06fba8b8b..fa26833bf 100644 --- a/packages/xpath/test/xforms/randomize.test.ts +++ b/packages/xpath/test/xforms/randomize.test.ts @@ -19,7 +19,7 @@ describe('randomize()', () => { const SELECTOR = '//xhtml:div[@id="FunctionRandomize"]/xhtml:div'; const MIRROR = 'mirror'; - const MIRROR_HASH_VALUE = 5989458117437254; // in Python: "from struct import unpack; from hashlib import sha256; unpack('>Q', sha256(b'mirror').digest()[:8])[0]" + const MIRROR_HASH_VALUE = 5989458117437254; const MIRROR_HASH_SORT_ORDER = 'ACBEDF'; describe('shuffles nodesets', () => { From 6628199ba9f46d81a04916aa9d07dcc9b17bd780 Mon Sep 17 00:00:00 2001 From: brontolosone <177225737+brontolosone@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:14:49 +0000 Subject: [PATCH 11/11] jsdocify --- .../xpath/src/functions/xforms/node-set.ts | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts index 22eca33be..19f649e15 100644 --- a/packages/xpath/src/functions/xforms/node-set.ts +++ b/packages/xpath/src/functions/xforms/node-set.ts @@ -410,14 +410,16 @@ export const randomize = new NodeSetFunction( ); const toBigIntHash = (text: string): bigint => { - // Hash text with sha256, and interpret the first 64 bits of output - // (the first and second int32s ("words") of CryptoJS digest output) - // as an int64 (in JS represented in a BigInt). - // Thus the entropy of the hash is reduced to 64 bits, which - // for some applications is sufficient. - // The underlying representations are big-endian regardless of the endianness - // of the machine this runs on, as is the equivalent JavaRosa implementation - // at https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726 + /** + Hash text with sha256, and interpret the first 64 bits of output + (the first and second int32s ("words") of CryptoJS digest output) + as an int64 (in JS represented in a BigInt). + Thus the entropy of the hash is reduced to 64 bits, which + for some applications is sufficient. + The underlying representations are big-endian regardless of the endianness + of the machine this runs on, as is the equivalent JavaRosa implementation. + ({@link https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726 | see here}). + */ const buffer = new ArrayBuffer(8); const dataview = new DataView(buffer); SHA256(text)