File tree Expand file tree Collapse file tree 3 files changed +22
-8
lines changed Expand file tree Collapse file tree 3 files changed +22
-8
lines changed Original file line number Diff line number Diff line change @@ -157,12 +157,16 @@ strip_document
157
157
within the document are unaffected.
158
158
Defaults to ``STRIP ``.
159
159
160
- beautiful_soup_parser
161
- Specify the Beautiful Soup parser to be used for interpreting HTML markup. Parsers such
162
- as `html5lib `, `lxml ` or even a custom parser as long as it is installed on the execution
163
- environment. Defaults to ``html.parser ``.
164
-
165
- .. _BeautifulSoup : https://www.crummy.com/software/BeautifulSoup/
160
+ bs4_options
161
+ Specify additional configuration options for the ``BeautifulSoup `` object
162
+ used to interpret the HTML markup. String and list values (such as ``lxml ``)
163
+ are treated as ``features `` parameter arguments to control parser
164
+ selection. Dictionary values (such as ``{"from_encoding": "iso-8859-8"} ``)
165
+ are treated as full kwargs to be used for the BeautifulSoup constructor,
166
+ allowing specification of any parameter. For parameter details, see the
167
+ Beautiful Soup documentation at:
168
+
169
+ .. _BeautifulSoup : https://www.crummy.com/software/BeautifulSoup/bs4/doc/
166
170
167
171
Options may be specified as kwargs to the ``markdownify `` function, or as a
168
172
nested ``Options `` class in ``MarkdownConverter `` subclasses.
Original file line number Diff line number Diff line change @@ -154,7 +154,7 @@ def _next_block_content_sibling(el):
154
154
class MarkdownConverter (object ):
155
155
class DefaultOptions :
156
156
autolinks = True
157
- beautiful_soup_parser = 'html.parser'
157
+ bs4_options = 'html.parser'
158
158
bullets = '*+-' # An iterable of bullet types.
159
159
code_language = ''
160
160
code_language_callback = None
@@ -188,11 +188,15 @@ def __init__(self, **options):
188
188
raise ValueError ('You may specify either tags to strip or tags to'
189
189
' convert, but not both.' )
190
190
191
+ # If a string or list is passed to bs4_options, assume it is a 'features' specification
192
+ if not isinstance (self .options ['bs4_options' ], dict ):
193
+ self .options ['bs4_options' ] = {'features' : self .options ['bs4_options' ]}
194
+
191
195
# Initialize the conversion function cache
192
196
self .convert_fn_cache = {}
193
197
194
198
def convert (self , html ):
195
- soup = BeautifulSoup (html , self .options ['beautiful_soup_parser ' ])
199
+ soup = BeautifulSoup (html , ** self .options ['bs4_options ' ])
196
200
return self .convert_soup (soup )
197
201
198
202
def convert_soup (self , soup ):
Original file line number Diff line number Diff line change @@ -32,3 +32,9 @@ def test_strip_document():
32
32
assert markdownify ("<p>Hello</p>" , strip_document = RSTRIP ) == "\n \n Hello"
33
33
assert markdownify ("<p>Hello</p>" , strip_document = STRIP ) == "Hello"
34
34
assert markdownify ("<p>Hello</p>" , strip_document = None ) == "\n \n Hello\n \n "
35
+
36
+
37
+ def bs4_options ():
38
+ assert markdownify ("<p>Hello</p>" , bs4_options = "html.parser" ) == "Hello"
39
+ assert markdownify ("<p>Hello</p>" , bs4_options = ["html.parser" ]) == "Hello"
40
+ assert markdownify ("<p>Hello</p>" , bs4_options = {"features" : "html.parser" }) == "Hello"
You can’t perform that action at this time.
0 commit comments