Skip to content

Commit 6af3ed8

Browse files
authored
Self-Test: Add html diffing (elastic#635)
Compare the html built by AsciiDoc and Asciidoctor as part of the self-test. Like all good tests, it doesn't pass at first. I've temporarily ignored all of the failures and have added them to the main asciidoctor issue or filed them as their own issue.
1 parent b07ab5a commit 6af3ed8

File tree

4 files changed

+109
-5
lines changed

4 files changed

+109
-5
lines changed

Dockerfile

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ LABEL MAINTAINERS="Nik Everett <[email protected]>"
1414
# * openssh-client (used by git)
1515
# * openssh-server (used to forward ssh auth for git when running with --all on macOS)
1616
# * perl-base
17+
# * python (is python2)
1718
# * xsltproc
1819
# * To install rubygems for asciidoctor
1920
# * build-essential
@@ -23,7 +24,8 @@ LABEL MAINTAINERS="Nik Everett <[email protected]>"
2324
# * ruby
2425
# * ruby-dev
2526
# * Used to check the docs build in CI
26-
# * pycodestyle
27+
# * python3
28+
# * python3-pip
2729
RUN install_packages \
2830
bash \
2931
build-essential \
@@ -38,8 +40,9 @@ RUN install_packages \
3840
openssh-client \
3941
openssh-server \
4042
perl-base \
41-
pycodestyle \
4243
python \
44+
python3 \
45+
python3-pip \
4346
ruby \
4447
ruby-dev \
4548
unzip \
@@ -66,3 +69,13 @@ RUN gem install --no-document \
6669
rubocop:0.64.0 \
6770
rspec:3.8.0 \
6871
thread_safe:0.3.6
72+
73+
# Wheel inventory:
74+
# * Used to test the docs build
75+
# * beautifulsoup4
76+
# * lxml
77+
# * pycodestyle
78+
RUN pip3 install \
79+
beautifulsoup4==4.7.1 \
80+
lxml==4.3.1 \
81+
pycodestyle==2.5.0

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ MAKEFLAGS += --silent
88
check: unit_test integration_test
99

1010
.PHONY: unit_test
11-
unit_test: build_docs_check asciidoctor_check
11+
unit_test: style asciidoctor_check
1212

13-
.PHONY: build_docs_check
14-
build_docs_check:
13+
.PHONY: style
14+
style: build_docs
1515
pycodestyle build_docs
1616

1717
.PHONY: asciidoctor_check

integtest/Makefile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@ MAKEFLAGS += --silent
33

44
.PHONY: check
55
check: \
6+
style \
67
readme_expected_files readme_same_files \
78
includes_expected_files includes_same_files
89

10+
.PHONY: style
11+
style: html_diff
12+
pycodestyle html_diff
13+
914
define STANDARD_EXPECTED_FILES=
1015
[ -s $^/index.html ]
1116
[ -s $^/docs.js ]
@@ -39,6 +44,11 @@ readme_expected_files: /tmp/readme_asciidoc
3944
| grep -v snippets/blocks \
4045
) \
4146
<(cd /tmp/$*_asciidoctor && find * -type f | sort)
47+
# The grep -v below are for known issues with asciidoctor
48+
for file in $$(cd /tmp/$*_asciidoc && find * -type f -name '*.html' \
49+
| grep -v 'blocks\|changes\|experimental\|multi-part'); do \
50+
./html_diff /tmp/$*_asciidoc/$$file /tmp/$*_asciidoctor/$$file; \
51+
done
4252

4353
define BD=
4454
/docs_build/build_docs.pl --in_standard_docker --out $@

integtest/html_diff

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python3
2+
3+
# Script to compare two html files, ignoring differences that we consider
4+
# to be unimportant. The output is a unified diff of formatted html meant
5+
# to be readable and precise at identifying differences.
6+
#
7+
# This script is designed to be run in the container managed by the
8+
# Dockerfile at the root of this repository.
9+
10+
11+
from bs4 import BeautifulSoup, NavigableString
12+
import difflib
13+
import re
14+
15+
16+
def normalize_html(html):
17+
"""Normalizes html to remove expected differences between AsciiDoc's
18+
output and Asciidoctor's output.
19+
"""
20+
# Replace many whitespace characters with a single space in some elements
21+
# kind of like a browser does.
22+
soup = BeautifulSoup(html, 'lxml')
23+
for e in soup.select(':not(script,pre,code,style)'):
24+
for part in e:
25+
if isinstance(part, NavigableString):
26+
crunched = NavigableString(re.sub(r'\s+', ' ', part))
27+
if crunched != part:
28+
part.replace_with(crunched)
29+
# Format the html with indentation so we can *see* things
30+
html = soup.prettify()
31+
# Remove the zero width space that asciidoctor adds after each horizontal
32+
# ellipsis. They don't hurt anything but asciidoc doesn't make them
33+
html = html.replace('\u2026\u200b', '\u2026')
34+
# Temporary workaround for known issues
35+
html = html.replace('class="edit_me" href="/./', 'class="edit_me" href="')
36+
html = re.sub(
37+
r'(?m)^\s+<div class="console_widget" data-snippet="[^"]+">'
38+
r'\s+</div>\n', '', html)
39+
html = html.replace('\\&lt;1&gt;', '&lt;1&gt;')
40+
return html
41+
42+
43+
def html_diff(lhs_name, lhs, rhs_name, rhs):
44+
"""Compare two html blobs, ignoring expected differences between AsciiDoc
45+
and Asciidoctor. The result is a generator for lines in the diff report.
46+
If it is entirely empty then there is no diff.
47+
"""
48+
lhs_lines = normalize_html(lhs).splitlines()
49+
rhs_lines = normalize_html(rhs).splitlines()
50+
return difflib.unified_diff(
51+
lhs_lines,
52+
rhs_lines,
53+
fromfile=lhs_name,
54+
tofile=rhs_name,
55+
lineterm='')
56+
57+
58+
def html_file_diff(lhs, rhs):
59+
"""Compare two html files, ignoring expected differences between AsciiDoc
60+
and Asciidoctor. The result is a generator for lines in the diff report.
61+
If it is entirely empty then there is no diff.
62+
"""
63+
with open(lhs, encoding='utf-8') as lhs_file:
64+
lhs_text = lhs_file.read()
65+
with open(rhs, encoding='utf-8') as rhs_file:
66+
rhs_text = rhs_file.read()
67+
return html_diff(lhs, lhs_text, rhs, rhs_text)
68+
69+
70+
if __name__ == '__main__':
71+
import sys
72+
if len(sys.argv) != 3:
73+
print("Expected exactly 2 arguments but got %s" % sys.argv[1:])
74+
exit(1)
75+
had_diff = False
76+
for line in html_file_diff(sys.argv[1], sys.argv[2]):
77+
had_diff = True
78+
# print doesn't like to print utf-8 in all cases but buffer.write is ok
79+
sys.stderr.buffer.write(line.encode('utf-8'))
80+
sys.stderr.buffer.write("\n".encode('utf-8'))
81+
exit(1 if had_diff else 0)

0 commit comments

Comments
 (0)