Skip to content

Commit 9bf4ff1

Browse files
committed
Merge branch 'jsm28-selective-escaping' into jsm28-list-indentation
2 parents c13bdd5 + a369e07 commit 9bf4ff1

File tree

2 files changed

+41
-6
lines changed

2 files changed

+41
-6
lines changed

markdownify/__init__.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,20 @@ def escape(self, text):
236236
if not text:
237237
return ''
238238
if self.options['escape_misc']:
239-
text = re.sub(r'([\\&<`[>~#=+|-])', r'\\\1', text)
240-
text = re.sub(r'([0-9])([.)])', r'\1\\\2', text)
239+
text = re.sub(r'([\\&<`[>~=+|])', r'\\\1', text)
240+
# A sequence of one or more consecutive '-', preceded and
241+
# followed by whitespace or start/end of fragment, might
242+
# be confused with an underline of a header, or with a
243+
# list marker.
244+
text = re.sub(r'(\s|^)(-+(?:\s|$))', r'\1\\\2', text)
245+
# A sequence of up to six consecutive '#', preceded and
246+
# followed by whitespace or start/end of fragment, might
247+
# be confused with an ATX heading.
248+
text = re.sub(r'(\s|^)(#{1,6}(?:\s|$))', r'\1\\\2', text)
249+
# '.' or ')' preceded by up to nine digits might be
250+
# confused with a list item.
251+
text = re.sub(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))', r'\1\\\2',
252+
text)
241253
if self.options['escape_asterisks']:
242254
text = text.replace('*', r'\*')
243255
if self.options['escape_underscores']:

tests/test_escaping.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,43 @@ def test_single_escaping_entities():
2828
assert md('&amp;amp;') == r'\&amp;'
2929

3030

31-
def text_misc():
31+
def test_misc():
3232
assert md('\\*') == r'\\\*'
33-
assert md('<foo>') == r'\<foo\>'
33+
assert md('&lt;foo>') == r'\<foo\>'
3434
assert md('# foo') == r'\# foo'
35+
assert md('#5') == r'#5'
36+
assert md('5#') == '5#'
37+
assert md('####### foo') == r'####### foo'
3538
assert md('> foo') == r'\> foo'
3639
assert md('~~foo~~') == r'\~\~foo\~\~'
3740
assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
38-
assert md('---\n') == '\\-\\-\\-\n'
41+
assert md('---\n') == '\\---\n'
42+
assert md('- test') == r'\- test'
43+
assert md('x - y') == r'x \- y'
44+
assert md('test-case') == 'test-case'
45+
assert md('x-') == 'x-'
46+
assert md('-y') == '-y'
3947
assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
4048
assert md('`x`') == r'\`x\`'
4149
assert md('[text](link)') == r'\[text](link)'
4250
assert md('1. x') == r'1\. x'
51+
# assert md('1<span>.</span> x') == r'1\. x'
52+
assert md('<span>1.</span> x') == r'1\. x'
53+
assert md(' 1. x') == r' 1\. x'
54+
assert md('123456789. x') == r'123456789\. x'
55+
assert md('1234567890. x') == r'1234567890. x'
56+
assert md('A1. x') == r'A1. x'
57+
assert md('1.2') == r'1.2'
4358
assert md('not a number. x') == r'not a number. x'
4459
assert md('1) x') == r'1\) x'
60+
# assert md('1<span>)</span> x') == r'1\) x'
61+
assert md('<span>1)</span> x') == r'1\) x'
62+
assert md(' 1) x') == r' 1\) x'
63+
assert md('123456789) x') == r'123456789\) x'
64+
assert md('1234567890) x') == r'1234567890) x'
65+
assert md('(1) x') == r'(1) x'
66+
assert md('A1) x') == r'A1) x'
67+
assert md('1)x') == r'1)x'
4568
assert md('not a number) x') == r'not a number) x'
4669
assert md('|not table|') == r'\|not table\|'
47-
assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'
70+
assert md(r'\ &lt;foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'

0 commit comments

Comments
 (0)