File tree Expand file tree Collapse file tree 2 files changed +64
-6
lines changed
material/plugins/blog/readtime
src/plugins/blog/readtime Expand file tree Collapse file tree 2 files changed +64
-6
lines changed Original file line number Diff line number Diff line change 2020
2121from html .parser import HTMLParser
2222
23+ # TODO: Refactor the `void` set into a common module and import it from there
24+ # and not from the search plugin.
25+ from material .plugins .search .plugin import void
26+
2327# -----------------------------------------------------------------------------
2428# Classes
2529# -----------------------------------------------------------------------------
@@ -31,15 +35,40 @@ class ReadtimeParser(HTMLParser):
3135 def __init__ (self ):
3236 super ().__init__ (convert_charrefs = True )
3337
38+ # Tags to skip
39+ self .skip = set ([
40+ "object" , # Objects
41+ "script" , # Scripts
42+ "style" , # Styles
43+ "svg" # SVGs
44+ ])
45+
46+ # Current context
47+ self .context = []
48+
3449 # Keep track of text and images
3550 self .text = []
3651 self .images = 0
3752
38- # Collect images
53+ # Called at the start of every HTML tag
3954 def handle_starttag (self , tag , attrs ):
55+ # Collect images
4056 if tag == "img" :
4157 self .images += 1
4258
43- # Collect text
59+ # Ignore self-closing tags
60+ if tag not in void :
61+ # Add tag to context
62+ self .context .append (tag )
63+
64+ # Called for the text contents of each tag
4465 def handle_data (self , data ):
45- self .text .append (data )
66+ # Collect text if not inside skip context
67+ if not self .skip .intersection (self .context ):
68+ self .text .append (data )
69+
70+ # Called at the end of every HTML tag
71+ def handle_endtag (self , tag ):
72+ if self .context and self .context [- 1 ] == tag :
73+ # Remove tag from context
74+ self .context .pop ()
Original file line number Diff line number Diff line change 2020
2121from html .parser import HTMLParser
2222
23+ # TODO: Refactor the `void` set into a common module and import it from there
24+ # and not from the search plugin.
25+ from material .plugins .search .plugin import void
26+
2327# -----------------------------------------------------------------------------
2428# Classes
2529# -----------------------------------------------------------------------------
@@ -31,15 +35,40 @@ class ReadtimeParser(HTMLParser):
3135 def __init__ (self ):
3236 super ().__init__ (convert_charrefs = True )
3337
38+ # Tags to skip
39+ self .skip = set ([
40+ "object" , # Objects
41+ "script" , # Scripts
42+ "style" , # Styles
43+ "svg" # SVGs
44+ ])
45+
46+ # Current context
47+ self .context = []
48+
3449 # Keep track of text and images
3550 self .text = []
3651 self .images = 0
3752
38- # Collect images
53+ # Called at the start of every HTML tag
3954 def handle_starttag (self , tag , attrs ):
55+ # Collect images
4056 if tag == "img" :
4157 self .images += 1
4258
43- # Collect text
59+ # Ignore self-closing tags
60+ if tag not in void :
61+ # Add tag to context
62+ self .context .append (tag )
63+
64+ # Called for the text contents of each tag
4465 def handle_data (self , data ):
45- self .text .append (data )
66+ # Collect text if not inside skip context
67+ if not self .skip .intersection (self .context ):
68+ self .text .append (data )
69+
70+ # Called at the end of every HTML tag
71+ def handle_endtag (self , tag ):
72+ if self .context and self .context [- 1 ] == tag :
73+ # Remove tag from context
74+ self .context .pop ()
You can’t perform that action at this time.
0 commit comments