diff --git a/configure.php b/configure.php
index 078c66e53..52c372bac 100755
--- a/configure.php
+++ b/configure.php
@@ -734,7 +734,6 @@ function getFileModificationHistory(): array {
globbetyglob("{$ac['basedir']}/scripts", 'make_scripts_executable');
-
{ # file-entities.php
$cmd = array();
@@ -756,7 +755,6 @@ function getFileModificationHistory(): array {
}
}
-
checking("for if we should generate a simplified file");
if ($ac["GENERATE"] != "no") {
if (!file_exists($ac["GENERATE"])) {
diff --git a/entities/global.ent-dist b/entities/global.ent-dist
new file mode 100644
index 000000000..1d5c90fe8
--- /dev/null
+++ b/entities/global.ent-dist
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/entities/manual.ent-dist b/entities/manual.ent-dist
new file mode 100644
index 000000000..62ca58550
--- /dev/null
+++ b/entities/manual.ent-dist
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/entities/remove.ent-dist b/entities/remove.ent-dist
new file mode 100644
index 000000000..6bf8988ad
--- /dev/null
+++ b/entities/remove.ent-dist
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/manual.xml.in b/manual.xml.in
index 39213558c..98fa230a6 100644
--- a/manual.xml.in
+++ b/manual.xml.in
@@ -11,6 +11,11 @@
%language-snippets;
@TRANSLATION_ONLY_INCL_END@
+
+
+%manual-entities;
+
+
@@ -57,7 +62,6 @@
&install.cloud.index;
&install.fpm.index;
&install.pecl;
- &install.composer;
&install.ini;
diff --git a/scripts/dtdent-conv.php b/scripts/dtdent-conv.php
new file mode 100644
index 000000000..777a2cb2d
--- /dev/null
+++ b/scripts/dtdent-conv.php
@@ -0,0 +1,84 @@
+ |
++----------------------------------------------------------------------+
+| Description: Convert DTD Entities files into XML Entities files. |
++----------------------------------------------------------------------+
+
+See `entities.php` for detailed rationale.
+
+Use this for converting bundled entities files that use into
+XML version used by `entities.php`.
+
+After converting, add the generated entities in an global.ent or
+manual.ent file, and delete the previous one.
+
+After all old style .ent files are split or converted, this script can
+be removed. */
+
+ini_set( 'display_errors' , 1 );
+ini_set( 'display_startup_errors' , 1 );
+error_reporting( E_ALL );
+
+if ( count( $argv ) < 2 )
+ die(" Syntax: php $argv[0] infile\n" );
+
+$infile = $argv[1];
+
+$content = file_get_contents( $infile );
+
+$pos1 = 0;
+while ( true )
+{
+ $pos1 = strpos( $content , " DOMNodeList (ampunstand intended)
+
+ $name = trim( $name );
+ $text = str_replace( "&" , "&" , $text );
+
+ $frag = "\n";
+ $frag .= " $text\n";
+ $frag .= '';
+
+ $dom = new DOMDocument( '1.0' , 'utf8' );
+ $dom->recover = true;
+ $dom->resolveExternals = false;
+ libxml_use_internal_errors( true );
+
+ $dom->loadXML( $frag , LIBXML_NSCLEAN );
+ $dom->normalizeDocument();
+
+ libxml_clear_errors();
+
+ $text = $dom->saveXML( $dom->getElementsByTagName( "entity" )[0] );
+ $text = str_replace( "&" , "&" , $text );
+
+ echo "\n$text\n";
+}
diff --git a/scripts/dtdent-split.php b/scripts/dtdent-split.php
new file mode 100644
index 000000000..d23863b31
--- /dev/null
+++ b/scripts/dtdent-split.php
@@ -0,0 +1,123 @@
+ |
++----------------------------------------------------------------------+
+| Description: Split old DTD .ent file into individual XML files. |
++----------------------------------------------------------------------+
+
+See `entities.php` for detailed rationale.
+
+Use this for spliting `language-snippets-ent` and possible other DTD
+entities files into individual .xml files.
+
+After spliting, add generated files under doc-lang/entities/ , and
+the original file, in one go.
+
+After all DTD .ent files are split or converted, this script can
+be removed. */
+
+ini_set( 'display_errors' , 1 );
+ini_set( 'display_startup_errors' , 1 );
+error_reporting( E_ALL );
+
+if ( count( $argv ) < 3 )
+ die(" Syntax: php $argv[0] infile outdir [hash user]\n" );
+
+$infile = $argv[1];
+$outdir = $argv[2];
+$hash = $argv[3] ?? "";
+$user = $argv[4] ?? "_";
+
+$content = file_get_contents( $infile );
+$entities = [];
+
+// Parse
+
+$pos1 = 0;
+while ( true )
+{
+ $pos1 = strpos( $content , " $text )
+{
+ $file = "$outdir/$name.xml";
+ if ( file_exists( $file ) )
+ echo( "Entity name colision, OVERWROTE: $file\n" );
+}
+
+// Write
+
+foreach( $entities as $name => $text )
+{
+ $file = "$outdir/$name.xml";
+
+ if ( $hash == "" )
+ $header = '';
+ else
+ $header .= "\n";
+
+ file_put_contents( $file , $header . $text );
+}
+
+// Test
+
+$dom = new DOMDocument();
+$dom->recover = true;
+$dom->resolveExternals = false;
+libxml_use_internal_errors( true );
+
+foreach( $entities as $name => $text )
+{
+ $file = "$outdir/$name.xml";
+
+ $text = file_get_contents( $file );
+ $text = "$text";
+
+ $dom->loadXML( $text );
+ $err = libxml_get_errors();
+ libxml_clear_errors();
+
+ foreach( $err as $e )
+ {
+ $msg = trim( $e->message );
+ if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) )
+ continue;
+ die( "Failed to load $file\n" );
+ }
+}
+
+$total = count( $entities );
+print "Generated $total files.\n";
diff --git a/scripts/entities.php b/scripts/entities.php
new file mode 100644
index 000000000..2e927c8cd
--- /dev/null
+++ b/scripts/entities.php
@@ -0,0 +1,412 @@
+ |
++----------------------------------------------------------------------+
+| Description: Collect individual entities into an .entities.ent file. |
++----------------------------------------------------------------------+
+
+# Mental model, or things that I would liked to know 20 years prior
+
+DTD Entity processing has more in common with DOMDocumentFragment than
+DOMElement. In other words, simple text and multi rooted XML files
+are valid contents, whereas they are not valid XML documents.
+
+Also, namespaces do not automatically "cross" between a parent
+document and their entities, even if they are included in the same
+file, as local textual entities. s are, for all intended
+purposes, separated documents, with separated namespaces and have
+*expected* different default namespaces.
+
+So each one of, possibly multiple, "root" XML elements inside an
+fragment need to be annotated with default namespace, even if the
+"root" element occurs surrounded by text. For example:
+
+- "texttext", need one namespace, or it is invalid, and;
+- "", need TWO namespaces, or it is also invalid.
+
+# Output
+
+This script collects grouped and individual XML Entity files
+(detailed below), at some expected relative paths, and generates an
+doc-base/temp/entities.ent file with their respective DTD Entities.
+
+The output file has no duplications, so collection order is important
+to keep the necessary operational semantics. Here, latter loaded entities
+takes priority (overrides) an previous defined one. Note that this is the
+reverse of DTD convention, where duplicated entity names are
+ignored. The priority order used here is important to allow detecting
+cases where global entities are being overwritten, or if expected
+translatable entities are missing translations.
+
+# Individual XML Entities, or `.xml` files at `entities/`
+
+As explained above, the individual entity contents are not really
+valid XML *documents*, they are only at most valid XML *fragments*.
+More technically, these XML files are really well-balanced texts, per
+https://www.w3.org/TR/xml-fragment/#defn-well-balanced .
+
+Yet, individual entities are stored in entities/ as .xml files, for
+two reasons: first, text editors in general can highlights XML syntax in
+well-balanced texts; and second, this allows normal revision tracking
+per file, without requiring weird changes on `revcheck.php`. Note that
+is *invalid* to place XML declaration in these fragment files, at least
+in files that are invalid XML documents (on multi-node rooted ones).
+
+# Grouped entities files, file tracked
+
+For very small textual entities, down to simple text words or single
+tag elements that may never change, individual entity tracking is
+an overkill. This script also loads grouped XML Entities files, at
+some expected locations, with specific semantics.
+
+These grouped files are really normal XML files, correctly annotated
+with XML namespaces used on manuals, so any individual exported entity
+has correct and clean XML namespace annotations. These grouped entity
+files are tracked normally by revcheck, but are not directly included
+in manual.xml.in, as they only participate in general entity loading,
+described above.
+
+- global.ent - expected unreplaced
+- manual.ent - expected replaced (translated)
+- remove.ent - expected unused
+- lang/entities/* - expected replaced (translated)
+
+*/
+
+const PARTIAL_IMPL = true; // For while XML Entities are not fully implanted in all languages
+
+ini_set( 'display_errors' , 1 );
+ini_set( 'display_startup_errors' , 1 );
+error_reporting( E_ALL );
+
+if ( count( $argv ) < 2 || in_array( '--help' , $argv ) || in_array( '-h' , $argv ) )
+{
+ fwrite( STDERR , "\nUsage: {$argv[0]} [--debug] langCode [langCode]\n\n" );
+ return;
+}
+
+$filename = Entities::rotateOutputFile(); // idempotent
+
+$langs = [];
+$normal = true;
+$debug = false;
+
+for( $idx = 1 ; $idx < count( $argv ) ; $idx++ )
+ if ( $argv[$idx] == "--debug" )
+ $normal = false;
+ else
+ $langs[] = $argv[$idx];
+$debug = ! $normal;
+
+if ( $normal )
+ print "Creating .entities.ent...";
+else
+ print "Creating .entities.ent in debug mode.\n";
+$debug = ! $normal;
+
+loadEnt( __DIR__ . "/../global.ent" , global: true , warnMissing: true );
+foreach( $langs as $lang )
+{
+ loadEnt( __DIR__ . "/../../$lang/global.ent" , global: true );
+ loadEnt( __DIR__ . "/../../$lang/manual.ent" , translate: true , warnMissing: true );
+ loadEnt( __DIR__ . "/../../$lang/remove.ent" , remove: true );
+ loadDir( $langs , $lang );
+ Entities::$debugUnique = false;
+}
+
+Entities::writeOutputFile();
+Entities::checkReplaces( $debug );
+
+echo " done: " , Entities::$countTotalGenerated , " entities";
+if ( Entities::$countUnstranslated > 0 )
+ echo ", " , Entities::$countUnstranslated , " untranslated";
+if ( Entities::$countReplacedGlobal > 0 )
+ echo ", " , Entities::$countReplacedGlobal , " global replaced";
+if ( Entities::$countReplacedRemove > 0 )
+ echo ", " , Entities::$countReplacedRemove , " remove replaced";
+if ( Entities::$countDuplicated > 0 )
+ echo ", " , Entities::$countDuplicated , " duplicated (first language)";
+echo ".\n";
+
+exit;
+
+class EntityData
+{
+ public function __construct(
+ public string $path ,
+ public string $name ,
+ public string $text ) {}
+}
+
+class Entities
+{
+ private static string $filename = __DIR__ . "/../temp/entities.ent"; // idempotent
+
+ private static array $entities = []; // All entities, bi duplications
+ private static array $global = []; // Entities expected not replaced
+ private static array $replace = []; // Entities expected replaced / translated
+ private static array $remove = []; // Entities expected not replaced and not used
+ private static array $unique = []; // For detecting duplicated global+en entities
+ private static array $count = []; // Name / Count
+ private static array $slow = []; // External entities, slow, uncontrolled file overwrites
+
+ public static bool $debugUnique = true; // Start on unique mode, disable on second language
+
+ public static int $countUnstranslated = 0;
+ public static int $countReplacedGlobal = 0;
+ public static int $countReplacedRemove = 0;
+ public static int $countTotalGenerated = 0;
+ public static int $countDuplicated = 0;
+
+ static function put( string $path , string $name , string $text , bool $global = false , bool $replace = false , bool $remove = false )
+ {
+ $entity = new EntityData( $path , $name , $text );
+ Entities::$entities[ $name ] = $entity;
+
+ if ( $global )
+ Entities::$global[ $name ] = $name;
+
+ if ( $replace )
+ Entities::$replace[ $name ] = $name;
+
+ if ( $remove )
+ Entities::$remove[ $name ] = $name;
+
+ if ( ! isset( Entities::$count[ $name ] ) )
+ Entities::$count[$name] = 1;
+ else
+ Entities::$count[$name]++;
+
+ if ( Entities::$debugUnique )
+ {
+ if ( isset( Entities::$unique[ $name ] ) )
+ {
+ Entities::$countDuplicated++;
+ if ( Entities::$countDuplicated == 1 )
+ fwrite( STDERR , "\n\n" );
+ fwrite( STDERR , " Duplicated entity: $name\n" );
+ }
+ Entities::$unique[ $name ] = $entity;
+ }
+ }
+
+ static function slow( string $path )
+ {
+ if ( isset( $slow[$path] ) )
+ fwrite( STDERR , "Unexpected file overwrite: $path\n" );
+ $slow[ $path ] = $path;
+ }
+
+ static function rotateOutputFile()
+ {
+ if ( file_exists( Entities::$filename ) )
+ unlink( Entities::$filename );
+ touch( Entities::$filename );
+ Entities::$filename = realpath( Entities::$filename ); // only full paths on XML
+ }
+
+ static function writeOutputFile()
+ {
+ saveEntitiesFile( Entities::$filename , Entities::$entities );
+ }
+
+ static function checkReplaces( bool $debug )
+ {
+ Entities::$countTotalGenerated = count( Entities::$entities );
+ Entities::$countUnstranslated = 0;
+ Entities::$countReplacedGlobal = 0;
+ Entities::$countReplacedRemove = 0;
+
+ foreach( Entities::$entities as $name => $text )
+ {
+ $replaced = Entities::$count[$name] - 1;
+ $expectedGlobal = in_array( $name , Entities::$global );
+ $expectedReplaced = in_array( $name , Entities::$replace );
+ $expectedRemoved = in_array( $name , Entities::$remove );
+
+ if ( $expectedGlobal && $replaced != 0 )
+ {
+ Entities::$countReplacedGlobal++;
+ if ( $debug )
+ print "Expected global, replaced $replaced times: $name\n";
+ }
+
+ if ( $expectedReplaced && $replaced != 1 )
+ {
+ Entities::$countUnstranslated++;
+ if ( $debug )
+ print "Expected translated, replaced $replaced times: $name\n";
+ }
+
+ if ( $expectedRemoved && $replaced != 0 )
+ {
+ Entities::$countReplacedRemove++;
+ if ( $debug )
+ print "Expected removed, replaced $replaced times: $name\n";
+ }
+ }
+ }
+}
+
+function loadEnt( string $path , bool $global = false , bool $translate = false , bool $remove = false , bool $warnMissing = false )
+{
+ $realpath = realpath( $path );
+ if ( $realpath === false )
+ if ( PARTIAL_IMPL )
+ return;
+ else
+ if ( $warnMissing )
+ fwrite( STDERR , "\n Missing entity file: $path\n" );
+ $path = $realpath;
+
+ $text = file_get_contents( $path );
+ $text = str_replace( "&" , "&" , $text );
+
+ $dom = new DOMDocument( '1.0' , 'utf8' );
+ if ( ! $dom->loadXML( $text ) )
+ die( "XML load failed for $path\n" );
+
+ $xpath = new DOMXPath( $dom );
+ $list = $xpath->query( "/*/*" );
+
+ foreach( $list as $ent )
+ {
+ // weird, namespace correting, DOMNodeList -> DOMDocumentFragment transform
+ $other = new DOMDocument( '1.0' , 'utf8' );
+
+ foreach( $ent->childNodes as $node )
+ $other->appendChild( $other->importNode( $node , true ) );
+
+ $name = $ent->getAttribute( "name" );
+ $text = $other->saveXML();
+
+ $text = rtrim( $text , "\n" );
+ $text = str_replace( "&" , "&" , $text );
+ $lines = explode( "\n" , $text );
+ array_shift( $lines ); // remove XML declaration
+ $text = implode( "\n" , $lines );
+
+ Entities::put( $path , $name , $text , $global , $translate , $remove );
+ }
+}
+
+function loadDir( array $langs , string $lang )
+{
+ global $debug;
+
+ $dir = __DIR__ . "/../../$lang/entities";
+ $dir = realpath( $dir );
+ if ( $dir === false || ! is_dir( $dir ) )
+ if ( PARTIAL_IMPL )
+ {
+ if ( $debug )
+ print "Not a directory: $dir\n";
+ return;
+ }
+ else
+ exit( "Error: not a directory: $dir\n" );
+
+ $files = scandir( $dir );
+ $expectedReplaced = array_search( $lang , $langs ) > 0;
+
+ foreach( $files as $file )
+ {
+ $path = realpath( "$dir/$file" );
+
+ if ( str_starts_with( $file , '.' ) )
+ continue;
+ if ( is_dir( $path ) )
+ continue;
+
+ $text = file_get_contents( $path );
+ $text = rtrim( $text , "\n" );
+
+ loadXml( $path , $text , $expectedReplaced );
+ }
+}
+
+function loadXml( string $path , string $text , bool $expectedReplaced )
+{
+ $info = pathinfo( $path );
+ $name = $info["filename"];
+ $frag = "$text";
+
+ if ( trim( $text ) == "" )
+ {
+ if ( ! PARTIAL_IMPL )
+ fwrite( STDERR , "\n Empty entity (should it be in remove.ent?): '$path' \n" );
+ Entities::put( $path , $name , $text );
+ return;
+ }
+
+ $dom = new DOMDocument( '1.0' , 'utf8' );
+ $dom->recover = true;
+ $dom->resolveExternals = false;
+ libxml_use_internal_errors( true );
+
+ $res = $dom->loadXML( $frag );
+
+ $err = libxml_get_errors();
+ libxml_clear_errors();
+
+ foreach( $err as $item )
+ {
+ $msg = trim( $item->message );
+ if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) )
+ continue;
+
+ fwrite( STDERR , "\n XML load failed on entity file." );
+ fwrite( STDERR , "\n Path: $path" );
+ fwrite( STDERR , "\n Error: $msg\n" );
+ return;
+ }
+
+ Entities::put( $path , $name , $text , replace: $expectedReplaced );
+}
+
+function saveEntitiesFile( string $filename , array $entities )
+{
+ $tmpDir = __DIR__ . "/temp"; // idempotent
+
+ $file = fopen( $filename , "w" );
+ fputs( $file , "\n\n\n" );
+
+ foreach( $entities as $name => $entity )
+ {
+ $text = $entity->text;
+ $quote = "";
+
+ // If the text contains mixed quoting, keeping it
+ // as an external file to avoid (re)quotation hell.
+
+ if ( strpos( $text , "'" ) === false )
+ $quote = "'";
+ if ( strpos( $text , '"' ) === false )
+ $quote = '"';
+
+ if ( $quote == "" )
+ {
+ if ( $entity->path == "" )
+ {
+ $entity->path = $tmpDir . "/{$entity->path}.tmp";
+ file_put_contents( $entity->path , $text );
+ }
+ fputs( $file , "path}'>\n\n" );
+ Entities::slow( $entity->path );
+ }
+ else
+ fputs( $file , "\n\n" );
+ }
+
+ fclose( $file );
+}