Skip to content

Commit

Permalink
feat: dq_label_nutrient_levels (#9410)
Browse files Browse the repository at this point in the history
* dq_label_nutrient_levels

* make lint

* apply comment + add tests
  • Loading branch information
benbenben2 authored Dec 15, 2023
1 parent ee8dd5b commit 7c2fb51
Show file tree
Hide file tree
Showing 7 changed files with 1,824 additions and 160 deletions.
558 changes: 554 additions & 4 deletions lib/ProductOpener/DataQualityFood.pm

Large diffs are not rendered by default.

76 changes: 62 additions & 14 deletions lib/ProductOpener/Units.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ BEGIN {
&normalize_serving_size
&normalize_quantity
&extract_standard_unit
); # symbols to export on request
%EXPORT_TAGS = (all => [@EXPORT_OK]);
Expand Down Expand Up @@ -222,21 +223,24 @@ sub mmoll_to_unit ($value, $unit) {
return g_to_unit($value, $unit);
}

=head2 normalize_quantity($quantity)
=head2 parse_quantity_unit($quantity)
Returns the size in g or ml for the whole product. Eg.:
normalize_quantity(1 barquette de 40g) returns 40
normalize_quantity(20 tranches 500g) returns 500
normalize_quantity(6x90g) returns 540
normalize_quantity(2kg) returns 2000
Returns the quantity ($q), the multiplicator ($m, optional) and the unit ($u)
that may be found in the quantity field entered by contributors
Returns undef if no quantity was detected.
parse_quantity_unit(1 barquette de 40g) returns (40, 1, g)
parse_quantity_unit(20 tranches 500g) returns (500, 20, g)
parse_quantity_unit(6x90g) returns (90, 6, g)
parse_quantity_unit(2kg) returns (2, undef, kg)
Returns (undef, undef, undef) if no quantity was detected.
=cut

sub normalize_quantity ($quantity) {
sub parse_quantity_unit ($quantity, $standard_unit_bool = undef) {

my $q = undef;
my $m = undef;
my $u = undef;

# 12 pots x125 g
Expand All @@ -248,20 +252,64 @@ sub normalize_quantity ($quantity) {
=~ /(?<number>\d+)(\s(\p{Letter}| )+)?(\s)?( de | of |x|\*)(\s)?(?<quantity>$number_regexp)(\s)?(?<unit>$units_regexp)\b/i
)
{
my $m = $+{number};
$m = $+{number};
$q = lc($+{quantity});
$u = $+{unit};
$q = convert_string_to_number($q);
$q = unit_to_g($q * $m, $u);
}
elsif ($quantity =~ /(?<quantity>$number_regexp)(\s)?(?<unit>$units_regexp)\s*\b/i) {
$q = lc($+{quantity});
$u = $+{unit};
$q = convert_string_to_number($q);
$q = unit_to_g($q, $u);
}

return $q;
return ($q, $m, $u);
}

=head2 normalize_quantity($quantity)
Returns the size in g or ml for the whole product. Eg.:
normalize_quantity(1 barquette de 40g) returns 40
normalize_quantity(20 tranches 500g) returns 500
normalize_quantity(6x90g) returns 540
normalize_quantity(2kg) returns 2000
Returns undef if no quantity was detected.
=cut

sub normalize_quantity ($quantity_field) {
my ($quantity, $multiplier, $unit) = parse_quantity_unit($quantity_field);

$quantity = convert_string_to_number($quantity);

if (defined $multiplier) {
$quantity = unit_to_g($quantity * $multiplier, $unit);
}
else {
$quantity = unit_to_g($quantity, $unit);
}
return $quantity;
}

=head2 extract_standard_unit($quantity)
Returns the standard_unit corresponding to the extracted unit
extract_standard_unit(1 barquette de 40g, 1) returns g
extract_standard_unit(2kg) returns g
extract_standard_unit(33cl) returns ml
Returns undef if no unit was detected.
=cut

sub extract_standard_unit ($quantity_field) {
my (undef, undef, $unit) = parse_quantity_unit($quantity_field);

# search in the map of all synonyms in all languages ($units_names)
$unit = lc($unit);
my $unit_id = $units_names{$unit}; # $unit_id can be undefined

return $units{$unit_id}{standard_unit}; # standard_unit can be undefined
}

=head2 normalize_serving_size($serving)
Expand Down
1 change: 1 addition & 0 deletions stop_words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ Mousquetaires
msgctxt
multiline
Multiline
multiplicator
NaN
naturel
nd
Expand Down
229 changes: 229 additions & 0 deletions taxonomies/data_quality.txt

Large diffs are not rendered by default.

Loading

0 comments on commit 7c2fb51

Please sign in to comment.