From 7d233825adf9abfd98b6d653b70bc8fd38ee1924 Mon Sep 17 00:00:00 2001 From: Gabriel Belingueres <belingueres@gmail.com> Date: Sun, 31 Jan 2021 20:29:11 -0300 Subject: [PATCH] Fix MXParser fails to parse xml declaration properly (#138) - Fix bugs. - Added tests. - Improved error messages. --- .../plexus/util/xml/pull/MXParser.java | 25 ++++++++---- ...onformanceTestSuite_Production32_Test.java | 18 +++++---- .../plexus/util/xml/pull/MXParserTest.java | 40 +++++++++++++++++++ 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index 4ce9bf0c..bc1c3608 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -3296,6 +3296,8 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) } xmlDeclVersion = newString( buf, versionStart, versionEnd - versionStart ); + String lastParsedAttr = "version"; + // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) char ch = more(); char prevCh = ch; @@ -3310,8 +3312,8 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) { if ( !isS( prevCh ) ) { - throw new XmlPullParserException( "expected a space after version and not " + printable( ch ), this, - null ); + throw new XmlPullParserException( "expected a space after " + lastParsedAttr + " and not " + + printable( ch ), this, null ); } ch = more(); ch = requireInput( ch, NCODING ); @@ -3363,13 +3365,23 @@ else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible", this, null ); } + + lastParsedAttr = "encoding"; + + ch = more(); + prevCh = ch; + ch = skipS( ch ); } - ch = more(); - ch = skipS( ch ); // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) if ( ch == 's' ) { + if ( !isS( prevCh ) ) + { + throw new XmlPullParserException( "expected a space after " + lastParsedAttr + " and not " + + printable( ch ), this, null ); + } + ch = more(); ch = requireInput( ch, TANDALONE ); ch = skipS( ch ); @@ -3382,11 +3394,10 @@ else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF ch = skipS( ch ); if ( ch != '\'' && ch != '"' ) { - throw new XmlPullParserException( "expected apostrophe (') or quotation mark (\") after encoding and not " + throw new XmlPullParserException( "expected apostrophe (') or quotation mark (\") after standalone and not " + printable( ch ), this, null ); } char quotChar = ch; - int standaloneStart = pos; ch = more(); if ( ch == 'y' ) { @@ -3411,9 +3422,9 @@ else if ( ch == 'n' ) + printable( ch ), this, null ); } ch = more(); + ch = skipS( ch ); } - ch = skipS( ch ); if ( ch != '?' ) { throw new XmlPullParserException( "expected ?> as last part of <?xml not " + printable( ch ), this, null ); diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/IBMXML10Tests_Test_IBMXMLConformanceTestSuite_not_wftests_Test_IBMXMLConformanceTestSuite_Production32_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/IBMXML10Tests_Test_IBMXMLConformanceTestSuite_not_wftests_Test_IBMXMLConformanceTestSuite_Production32_Test.java index d3e5f31a..446afef2 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/IBMXML10Tests_Test_IBMXMLConformanceTestSuite_not_wftests_Test_IBMXMLConformanceTestSuite_Production32_Test.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/IBMXML10Tests_Test_IBMXMLConformanceTestSuite_not_wftests_Test_IBMXMLConformanceTestSuite_Production32_Test.java @@ -53,7 +53,7 @@ public void testibm_not_wf_P32_ibm32n01xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected a space after version and not s" ) ); } } @@ -79,7 +79,7 @@ public void testibm_not_wf_P32_ibm32n02xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected equals sign (=) after standalone and not \"" ) ); } } @@ -131,7 +131,7 @@ public void testibm_not_wf_P32_ibm32n04xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not Y" ) ); } } @@ -158,7 +158,7 @@ public void testibm_not_wf_P32_ibm32n05xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not Y" ) ); } } @@ -184,7 +184,7 @@ public void testibm_not_wf_P32_ibm32n06xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not N" ) ); } } @@ -210,7 +210,7 @@ public void testibm_not_wf_P32_ibm32n07xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not N" ) ); } } @@ -236,7 +236,7 @@ public void testibm_not_wf_P32_ibm32n08xml() } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) ); + assertTrue( e.getMessage().contains( "expected equals sign (=) after standalone and not \"" ) ); } } @@ -248,8 +248,10 @@ public void testibm_not_wf_P32_ibm32n08xml() * Version: * * @throws IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as MXParser does not support parsing inside DOCTYPEDECL. */ - @Test + // @Test public void testibm_not_wf_P32_ibm32n09xml() throws IOException { diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index 0cb9c061..82bdeac9 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -661,4 +661,44 @@ public void testMalformedXMLRootElement5() } } + @Test + public void testXMLDeclVersionOnly() + throws Exception + { + String input = "<?xml version='1.0'?><hello/>"; + + MXParser parser = new MXParser(); + parser.setInput( new StringReader( input ) ); + + try + { + assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + } + catch ( Exception e ) + { + fail( "Should not throw Exception" ); + } + } + + @Test + public void testXMLDeclVersionEncodingStandaloneNoSpace() + throws Exception + { + String input = "<?xml version='1.0' encoding='ASCII'standalone='yes'?><hello/>"; + + MXParser parser = new MXParser(); + parser.setInput( new StringReader( input ) ); + + try + { + parser.nextToken(); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "expected a space after encoding and not s" )); + } + } + }