diff --git a/config.json b/config.json index 2c98efc4..80564110 100644 --- a/config.json +++ b/config.json @@ -762,6 +762,14 @@ "practices": [], "prerequisites": [], "difficulty": 1 + }, + { + "slug": "micro-blog", + "name": "Micro Blog", + "uuid": "f03e4e99-97ff-4ac7-8975-bbafa38c9088", + "practices": [], + "prerequisites": [], + "difficulty": 1 } ] }, diff --git a/exercises/practice/micro-blog/.docs/instructions.md b/exercises/practice/micro-blog/.docs/instructions.md new file mode 100644 index 00000000..d6c6cf65 --- /dev/null +++ b/exercises/practice/micro-blog/.docs/instructions.md @@ -0,0 +1,37 @@ +# Instructions + +You have identified a gap in the social media market for very very short posts. +Now that Twitter allows 280 character posts, people wanting quick social media updates aren't being served. +You decide to create your own social media network. + +To make your product noteworthy, you make it extreme and only allow posts of 5 or less characters. +Any posts of more than 5 characters should be truncated to 5. + +To allow your users to express themselves fully, you allow Emoji and other Unicode. + +The task is to truncate input strings to 5 characters. + +## Text Encodings + +Text stored digitally has to be converted to a series of bytes. +There are 3 ways to map characters to bytes in common use. + +- **ASCII** can encode English language characters. + All characters are precisely 1 byte long. +- **UTF-8** is a Unicode text encoding. + Characters take between 1 and 4 bytes. +- **UTF-16** is a Unicode text encoding. + Characters are either 2 or 4 bytes long. + +UTF-8 and UTF-16 are both Unicode encodings which means they're capable of representing a massive range of characters including: + +- Text in most of the world's languages and scripts +- Historic text +- Emoji + +UTF-8 and UTF-16 are both variable length encodings, which means that different characters take up different amounts of space. + +Consider the letter 'a' and the emoji 'πŸ˜›'. +In UTF-16 the letter takes 2 bytes but the emoji takes 4 bytes. + +The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits. diff --git a/exercises/practice/micro-blog/.meta/config.json b/exercises/practice/micro-blog/.meta/config.json new file mode 100644 index 00000000..4fdb1593 --- /dev/null +++ b/exercises/practice/micro-blog/.meta/config.json @@ -0,0 +1,17 @@ +{ + "authors": [ + "m-dango" + ], + "files": { + "solution": [ + "lib/MicroBlog.pm" + ], + "test": [ + "t/micro-blog.t" + ], + "example": [ + ".meta/solutions/lib/MicroBlog.pm" + ] + }, + "blurb": "Given an input string, truncate it to 5 characters." +} diff --git a/exercises/practice/micro-blog/.meta/solutions/lib/MicroBlog.pm b/exercises/practice/micro-blog/.meta/solutions/lib/MicroBlog.pm new file mode 100644 index 00000000..5f209472 --- /dev/null +++ b/exercises/practice/micro-blog/.meta/solutions/lib/MicroBlog.pm @@ -0,0 +1,16 @@ +package MicroBlog; + +use strict; +use warnings; +use experimental qw; + +use Exporter qw; +our @EXPORT_OK = qw; + +use Encode; + +sub truncate_post ($utf8_bytes) { + return substr( Encode::decode( 'UTF-8', $utf8_bytes ), 0, 5 ); +} + +1; diff --git a/exercises/practice/micro-blog/.meta/solutions/t/micro-blog.t b/exercises/practice/micro-blog/.meta/solutions/t/micro-blog.t new file mode 120000 index 00000000..a654d821 --- /dev/null +++ b/exercises/practice/micro-blog/.meta/solutions/t/micro-blog.t @@ -0,0 +1 @@ +../../../t/micro-blog.t \ No newline at end of file diff --git a/exercises/practice/micro-blog/.meta/template-data.yaml b/exercises/practice/micro-blog/.meta/template-data.yaml new file mode 100644 index 00000000..6ec055da --- /dev/null +++ b/exercises/practice/micro-blog/.meta/template-data.yaml @@ -0,0 +1,28 @@ +subs: truncate_post + +properties: + truncate: + test: |- + use Data::Dmp; + use Encode; + + sprintf(<<'END', dmp(Encode::encode('UTF-8', $case->{input}{phrase})), $case->{expected}, $case->{description}); + is( + truncate_post(%s), + '%s', + '%s', + ); + END + + +example: |- + use Encode; + + sub truncate_post ($utf8_bytes) { + return substr(Encode::decode('UTF-8', $utf8_bytes), 0, 5); + } + +stub: |- + sub truncate_post ($utf8_bytes) { + return undef; + } diff --git a/exercises/practice/micro-blog/.meta/tests.toml b/exercises/practice/micro-blog/.meta/tests.toml new file mode 100644 index 00000000..f23ff0bc --- /dev/null +++ b/exercises/practice/micro-blog/.meta/tests.toml @@ -0,0 +1,46 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[b927b57f-7c98-42fd-8f33-fae091dc1efc] +description = "English language short" + +[a3fcdc5b-0ed4-4f49-80f5-b1a293eac2a0] +description = "English language long" + +[01910864-8e15-4007-9c7c-ac956c686e60] +description = "German language short (broth)" + +[f263e488-aefb-478f-a671-b6ba99722543] +description = "German language long (bear carpet β†’ beards)" + +[0916e8f1-41d7-4402-a110-b08aa000342c] +description = "Bulgarian language short (good)" + +[bed6b89c-03df-4154-98e6-a61a74f61b7d] +description = "Greek language short (health)" + +[485a6a70-2edb-424d-b999-5529dbc8e002] +description = "Maths short" + +[8b4b7b51-8f48-4fbe-964e-6e4e6438be28] +description = "Maths long" + +[71f4a192-0566-4402-a512-fe12878be523] +description = "English and emoji short" + +[6f0f71f3-9806-4759-a844-fa182f7bc203] +description = "Emoji short" + +[ce71fb92-5214-46d0-a7f8-d5ba56b4cc6e] +description = "Emoji long" + +[5dee98d2-d56e-468a-a1f2-121c3f7c5a0b] +description = "Royal Flush?" diff --git a/exercises/practice/micro-blog/lib/MicroBlog.pm b/exercises/practice/micro-blog/lib/MicroBlog.pm new file mode 100644 index 00000000..e6127931 --- /dev/null +++ b/exercises/practice/micro-blog/lib/MicroBlog.pm @@ -0,0 +1,12 @@ +package MicroBlog; + +use v5.40; + +use Exporter qw; +our @EXPORT_OK = qw; + +sub truncate_post ($utf8_bytes) { + return undef; +} + +1; diff --git a/exercises/practice/micro-blog/t/micro-blog.t b/exercises/practice/micro-blog/t/micro-blog.t new file mode 100755 index 00000000..1d92a50e --- /dev/null +++ b/exercises/practice/micro-blog/t/micro-blog.t @@ -0,0 +1,81 @@ +#!/usr/bin/env perl +use Test2::V0; + +use FindBin qw<$Bin>; +use lib "$Bin/../lib", "$Bin/../local/lib/perl5"; + +use MicroBlog qw; + +is( # begin: b927b57f-7c98-42fd-8f33-fae091dc1efc + truncate_post("Hi"), + 'Hi', + 'English language short', +); # end: b927b57f-7c98-42fd-8f33-fae091dc1efc + +is( # begin: a3fcdc5b-0ed4-4f49-80f5-b1a293eac2a0 + truncate_post("Hello there"), + 'Hello', + 'English language long', +); # end: a3fcdc5b-0ed4-4f49-80f5-b1a293eac2a0 + +is( # begin: 01910864-8e15-4007-9c7c-ac956c686e60 + truncate_post("br\xC3\xBChe"), + 'brΓΌhe', + 'German language short (broth)', +); # end: 01910864-8e15-4007-9c7c-ac956c686e60 + +is( # begin: f263e488-aefb-478f-a671-b6ba99722543 + truncate_post("B\xC3\xA4rteppich"), + 'BΓ€rte', + 'German language long (bear carpet β†’ beards)', +); # end: f263e488-aefb-478f-a671-b6ba99722543 + +is( # begin: 0916e8f1-41d7-4402-a110-b08aa000342c + truncate_post("\xD0\x94\xD0\xBE\xD0\xB1\xD1\x8A\xD1\x80"), + 'Π”ΠΎΠ±ΡŠΡ€', + 'Bulgarian language short (good)', +); # end: 0916e8f1-41d7-4402-a110-b08aa000342c + +is( # begin: bed6b89c-03df-4154-98e6-a61a74f61b7d + truncate_post("\xCF\x85\xCE\xB3\xCE\xB5\xCE\xB9\xCE\xAC"), + 'υγΡιά', + 'Greek language short (health)', +); # end: bed6b89c-03df-4154-98e6-a61a74f61b7d + +is( # begin: 485a6a70-2edb-424d-b999-5529dbc8e002 + truncate_post("a=\xCF\x80r\xC2\xB2"), + 'a=Ο€rΒ²', + 'Maths short', +); # end: 485a6a70-2edb-424d-b999-5529dbc8e002 + +is( # begin: 8b4b7b51-8f48-4fbe-964e-6e4e6438be28 + truncate_post("\xE2\x88\x85\xE2\x8A\x8A\xE2\x84\x95\xE2\x8A\x8A\xE2\x84\xA4\xE2\x8A\x8A\xE2\x84\x9A\xE2\x8A\x8A\xE2\x84\x9D\xE2\x8A\x8A\xE2\x84\x82"), + 'βˆ…βŠŠβ„•βŠŠβ„€', + 'Maths long', +); # end: 8b4b7b51-8f48-4fbe-964e-6e4e6438be28 + +is( # begin: 71f4a192-0566-4402-a512-fe12878be523 + truncate_post("Fly \xF0\x9F\x9B\xAB"), + 'Fly πŸ›«', + 'English and emoji short', +); # end: 71f4a192-0566-4402-a512-fe12878be523 + +is( # begin: 6f0f71f3-9806-4759-a844-fa182f7bc203 + truncate_post("\xF0\x9F\x92\x87"), + 'πŸ’‡', + 'Emoji short', +); # end: 6f0f71f3-9806-4759-a844-fa182f7bc203 + +is( # begin: ce71fb92-5214-46d0-a7f8-d5ba56b4cc6e + truncate_post("\xE2\x9D\x84\xF0\x9F\x8C\xA1\xF0\x9F\xA4\xA7\xF0\x9F\xA4\x92\xF0\x9F\x8F\xA5\xF0\x9F\x95\xB0\xF0\x9F\x98\x80"), + 'β„πŸŒ‘πŸ€§πŸ€’πŸ₯', + 'Emoji long', +); # end: ce71fb92-5214-46d0-a7f8-d5ba56b4cc6e + +is( # begin: 5dee98d2-d56e-468a-a1f2-121c3f7c5a0b + truncate_post("\xF0\x9F\x83\x8E\xF0\x9F\x82\xB8\xF0\x9F\x83\x85\xF0\x9F\x83\x8B\xF0\x9F\x83\x8D\xF0\x9F\x83\x81\xF0\x9F\x83\x8A"), + 'πŸƒŽπŸ‚ΈπŸƒ…πŸƒ‹πŸƒ', + 'Royal Flush?', +); # end: 5dee98d2-d56e-468a-a1f2-121c3f7c5a0b + +done_testing;