From dd2b7e32a1e29d8ea96c17deea94db42c77f2a06 Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Mon, 16 Dec 2024 12:50:09 +1100 Subject: [PATCH] Deprecate Document#updateMetaCharsetElement As the setting had no effect; calls to charset(charset) always enabled it Users can still call OutputSettings.charset if desired to avoid setting the meta element. --- CHANGES.md | 3 + src/main/java/org/jsoup/nodes/Document.java | 168 +++++++----------- .../java/org/jsoup/nodes/DocumentTest.java | 13 -- 3 files changed, 64 insertions(+), 120 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2ff4d495b8..600f01d2ba 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,9 @@ * Removed previously deprecated class: `org.jsoup.UncheckedIOException` (replace with `java.io.UncheckedIOException`); method `Element Element#forEach(Consumer)` to `void Element#forEach(Consumer())`. [2246](https://github.com/jhy/jsoup/pull/2246) +* Deprecated the methods `Document#updateMetaCharsetElement(bool)` and `#Document#updateMetaCharsetElement()`, as the + setting had no effect. When `Document#charset(Charset)` is called, the document's meta charset or XML encoding + instruction is always set. ### Improvements diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 79091902f2..cd39eda032 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -28,7 +28,6 @@ public class Document extends Element { private Parser parser; // the parser used to parse this document private QuirksMode quirksMode = QuirksMode.noQuirks; private final String location; - private boolean updateMetaCharset = false; /** Create a new, empty Document, in the specified namespace. @@ -241,76 +240,56 @@ public Element text(String text) { public String nodeName() { return "#document"; } - + /** - * Sets the charset used in this document. This method is equivalent - * to {@link OutputSettings#charset(java.nio.charset.Charset) - * OutputSettings.charset(Charset)} but in addition it updates the - * charset / encoding element within the document. - * - *

This enables - * {@link #updateMetaCharsetElement(boolean) meta charset update}.

- * - *

If there's no element with charset / encoding information yet it will - * be created. Obsolete charset / encoding definitions are removed!

- * - *

Elements used:

- * - * - * - * @param charset Charset - * - * @see #updateMetaCharsetElement(boolean) - * @see OutputSettings#charset(java.nio.charset.Charset) + Set the output character set of this Document. This method is equivalent to + {@link OutputSettings#charset(java.nio.charset.Charset) OutputSettings.charset(Charset)}, but additionally adds or + updates the charset / encoding element within the Document. + +

If there's no existing element with charset / encoding information yet, one will + be created. Obsolete charset / encoding definitions are removed.

+ +

Elements used:

+ + + + @param charset Charset + @return this Document, for chaining + @see OutputSettings#charset(java.nio.charset.Charset) */ - public void charset(Charset charset) { - updateMetaCharsetElement(true); + public Document charset(Charset charset) { outputSettings.charset(charset); ensureMetaCharsetElement(); + return this; } - + /** - * Returns the charset used in this document. This method is equivalent - * to {@link OutputSettings#charset()}. - * - * @return Current Charset - * - * @see OutputSettings#charset() + Get the output character set of this Document. This method is equivalent to {@link OutputSettings#charset()}. + + @return the current Charset + @see OutputSettings#charset() */ public Charset charset() { return outputSettings.charset(); } - + /** - * Sets whether the element with charset information in this document is - * updated on changes through {@link #charset(java.nio.charset.Charset) - * Document.charset(Charset)} or not. - * - *

If set to false (default) there are no elements - * modified.

- * - * @param update If true the element updated on charset - * changes, false if not - * - * @see #charset(java.nio.charset.Charset) + @deprecated this setting has no effect; the meta charset element is always updated when + {@link Document#charset(Charset)} is called. This method will be removed in jsoup 1.20.1. */ - public void updateMetaCharsetElement(boolean update) { - this.updateMetaCharset = update; - } - + @Deprecated(forRemoval = true, since = "1.19.1") + public void updateMetaCharsetElement(boolean noop) {} + /** - * Returns whether the element with charset information in this document is - * updated on changes through {@link #charset(java.nio.charset.Charset) - * Document.charset(Charset)} or not. - * - * @return Returns true if the element is updated on charset - * changes, false if not + @deprecated this setting has no effect; the meta charset element is always updated when + {@link Document#charset(Charset)} is called. This method will be removed in jsoup 1.20.1. */ + @Deprecated(forRemoval = true, since = "1.19.1") public boolean updateMetaCharsetElement() { - return updateMetaCharset; + return true; } @Override @@ -329,61 +308,36 @@ public Document shallowClone() { return clone; } - /** - * Ensures a meta charset (html) or xml declaration (xml) with the current - * encoding used. This only applies with - * {@link #updateMetaCharsetElement(boolean) updateMetaCharset} set to - * true, otherwise this method does nothing. - * - * - * - *

Elements used:

- * - * - */ + private void ensureMetaCharsetElement() { - if (updateMetaCharset) { - OutputSettings.Syntax syntax = outputSettings().syntax(); - - if (syntax == OutputSettings.Syntax.html) { - Element metaCharset = selectFirst("meta[charset]"); - if (metaCharset != null) { - metaCharset.attr("charset", charset().displayName()); - } else { - head().appendElement("meta").attr("charset", charset().displayName()); - } - select("meta[name=charset]").remove(); // Remove obsolete elements - } else if (syntax == OutputSettings.Syntax.xml) { - Node node = ensureChildNodes().get(0); - if (node instanceof XmlDeclaration) { - XmlDeclaration decl = (XmlDeclaration) node; - if (decl.name().equals("xml")) { - decl.attr("encoding", charset().displayName()); - if (decl.hasAttr("version")) - decl.attr("version", "1.0"); - } else { - decl = new XmlDeclaration("xml", false); - decl.attr("version", "1.0"); - decl.attr("encoding", charset().displayName()); - prependChild(decl); - } - } else { - XmlDeclaration decl = new XmlDeclaration("xml", false); - decl.attr("version", "1.0"); - decl.attr("encoding", charset().displayName()); - prependChild(decl); - } + OutputSettings.Syntax syntax = outputSettings().syntax(); + + if (syntax == OutputSettings.Syntax.html) { + Element metaCharset = selectFirst("meta[charset]"); + if (metaCharset != null) { + metaCharset.attr("charset", charset().displayName()); + } else { + head().appendElement("meta").attr("charset", charset().displayName()); } + select("meta[name=charset]").remove(); // Remove obsolete elements + } else if (syntax == OutputSettings.Syntax.xml) { + XmlDeclaration decl = ensureXmlDecl(); + decl.attr("version", "1.0"); + decl.attr("encoding", charset().displayName()); } } - + + private XmlDeclaration ensureXmlDecl() { + Node node = ensureChildNodes().get(0); + if (node instanceof XmlDeclaration) { + XmlDeclaration decl = (XmlDeclaration) node; + if (decl.name().equals("xml")) return decl; + } + XmlDeclaration decl = new XmlDeclaration("xml", false); + prependChild(decl); + return decl; + } + /** * A Document's output settings control the form of the text() and html() methods. diff --git a/src/test/java/org/jsoup/nodes/DocumentTest.java b/src/test/java/org/jsoup/nodes/DocumentTest.java index 6a242cfb07..c7e28f47d0 100644 --- a/src/test/java/org/jsoup/nodes/DocumentTest.java +++ b/src/test/java/org/jsoup/nodes/DocumentTest.java @@ -227,7 +227,6 @@ public class DocumentTest { @Test public void testMetaCharsetUpdateUtf8() { final Document doc = createHtmlDocument("changeThis"); - doc.updateMetaCharsetElement(true); doc.charset(Charset.forName(charsetUtf8)); final String htmlCharsetUTF8 = "\n" + @@ -247,7 +246,6 @@ public void testMetaCharsetUpdateUtf8() { @Test public void testMetaCharsetUpdateIso8859() { final Document doc = createHtmlDocument("changeThis"); - doc.updateMetaCharsetElement(true); doc.charset(Charset.forName(charsetIso8859)); final String htmlCharsetISO = "\n" + @@ -267,7 +265,6 @@ public void testMetaCharsetUpdateIso8859() { @Test public void testMetaCharsetUpdateNoCharset() { final Document docNoCharset = Document.createShell(""); - docNoCharset.updateMetaCharsetElement(true); docNoCharset.charset(Charset.forName(charsetUtf8)); assertEquals(charsetUtf8, docNoCharset.select("meta[charset]").first().attr("charset")); @@ -328,7 +325,6 @@ public void testMetaCharsetUpdateEnabledAfterCharsetChange() { @Test public void testMetaCharsetUpdateCleanup() { final Document doc = createHtmlDocument("dontTouch"); - doc.updateMetaCharsetElement(true); doc.charset(Charset.forName(charsetUtf8)); final String htmlCharsetUTF8 = "\n" + @@ -344,7 +340,6 @@ public void testMetaCharsetUpdateCleanup() { @Test public void testMetaCharsetUpdateXmlUtf8() { final Document doc = createXmlDocument("1.0", "changeThis", true); - doc.updateMetaCharsetElement(true); doc.charset(Charset.forName(charsetUtf8)); final String xmlCharsetUTF8 = "\n" + @@ -362,7 +357,6 @@ public void testMetaCharsetUpdateXmlUtf8() { @Test public void testMetaCharsetUpdateXmlIso8859() { final Document doc = createXmlDocument("1.0", "changeThis", true); - doc.updateMetaCharsetElement(true); doc.charset(Charset.forName(charsetIso8859)); final String xmlCharsetISO = "\n" + @@ -380,7 +374,6 @@ public void testMetaCharsetUpdateXmlIso8859() { @Test public void testMetaCharsetUpdateXmlNoCharset() { final Document doc = createXmlDocument("1.0", "none", false); - doc.updateMetaCharsetElement(true); doc.charset(Charset.forName(charsetUtf8)); final String xmlCharsetUTF8 = "\n" + @@ -418,12 +411,6 @@ public void testMetaCharsetUpdateXmlDisabledNoChanges() { assertEquals("dontTouch", selectedNode.attr("version")); } - @Test - public void testMetaCharsetUpdatedDisabledPerDefault() { - final Document doc = createHtmlDocument("none"); - assertFalse(doc.updateMetaCharsetElement()); - } - private Document createHtmlDocument(String charset) { final Document doc = Document.createShell(""); doc.head().appendElement("meta").attr("charset", charset);