Skip to content

Commit a9a4115

Browse files
authored
Add vm target to pegen script to generate the vm parser (python#130)
1 parent 10f7be1 commit a9a4115

File tree

4 files changed

+97
-8
lines changed

4 files changed

+97
-8
lines changed

Tools/peg_generator/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ build: peg_extension/parse.c
2525
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
2626
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
2727

28+
generate_vm: $(GRAMMAR) $(TOKENS) pegen/*.py ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h
29+
$(PYTHON) -m pegen -q vm $(GRAMMAR) $(TOKENS) -o ../../Parser/pegen/vmparse.h
30+
2831
clean:
2932
-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
3033
-rm -f data/xxl.py

Tools/peg_generator/pegen/__main__.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,31 @@
1616
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
1717

1818

19+
def generate_vm_code(
20+
args: argparse.Namespace,
21+
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
22+
from pegen.build import build_vm_parser_and_generator
23+
24+
verbose = args.verbose
25+
verbose_tokenizer = verbose >= 3
26+
verbose_parser = verbose == 2 or verbose >= 4
27+
try:
28+
grammar, parser, tokenizer, gen = build_vm_parser_and_generator(
29+
args.grammar_filename,
30+
args.tokens_filename,
31+
args.output,
32+
verbose_tokenizer,
33+
verbose_parser,
34+
)
35+
return grammar, parser, tokenizer, gen
36+
except Exception as err:
37+
if args.verbose:
38+
raise # Show traceback
39+
traceback.print_exception(err.__class__, err, None)
40+
sys.stderr.write("For full traceback, use -v\n")
41+
sys.exit(1)
42+
43+
1944
def generate_c_code(
2045
args: argparse.Namespace,
2146
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
@@ -116,6 +141,18 @@ def generate_python_code(
116141
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
117142
)
118143

144+
vm_parser = subparsers.add_parser("vm", help="Generate the new VM parser generator")
145+
vm_parser.set_defaults(func=generate_vm_code)
146+
vm_parser.add_argument("grammar_filename", help="Grammar description")
147+
vm_parser.add_argument("tokens_filename", help="Tokens description")
148+
vm_parser.add_argument(
149+
"-o",
150+
"--output",
151+
metavar="OUT",
152+
default="vmparse.h",
153+
help="Where to write the generated parser",
154+
)
155+
119156

120157
def main() -> None:
121158
from pegen.testutil import print_memstats

Tools/peg_generator/pegen/build.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Optional, Tuple, List, IO, Set, Dict
99

1010
from pegen.c_generator import CParserGenerator
11+
from pegen.vm_generator import VMParserGenerator
1112
from pegen.grammar import Grammar
1213
from pegen.grammar_parser import GeneratedParser as GrammarParser
1314
from pegen.parser import Parser
@@ -181,6 +182,19 @@ def build_python_generator(
181182
return gen
182183

183184

185+
def build_vm_generator(
186+
grammar: Grammar, grammar_file: str, tokens_file: str, output_file: str,
187+
) -> ParserGenerator:
188+
with open(tokens_file, "r") as tok_file:
189+
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
190+
with open(output_file, "w") as file:
191+
gen: ParserGenerator = VMParserGenerator(
192+
grammar, all_tokens, exact_tok, non_exact_tok, file
193+
)
194+
gen.generate(grammar_file)
195+
return gen
196+
197+
184198
def build_c_parser_and_generator(
185199
grammar_file: str,
186200
tokens_file: str,
@@ -246,3 +260,26 @@ def build_python_parser_and_generator(
246260
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
247261
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
248262
return grammar, parser, tokenizer, gen
263+
264+
265+
def build_vm_parser_and_generator(
266+
grammar_file: str,
267+
tokens_file: str,
268+
output_file: str,
269+
verbose_tokenizer: bool = False,
270+
verbose_parser: bool = False,
271+
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
272+
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
273+
274+
Args:
275+
grammar_file (string): Path for the grammar file
276+
tokens_file (string): Path for the tokens file
277+
output_file (string): Path for the output file
278+
verbose_tokenizer (bool, optional): Whether to display additional output
279+
when generating the tokenizer. Defaults to False.
280+
verbose_parser (bool, optional): Whether to display additional output
281+
when generating the parser. Defaults to False.
282+
"""
283+
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
284+
gen = build_vm_generator(grammar, grammar_file, tokens_file, output_file)
285+
return grammar, parser, tokenizer, gen

Tools/peg_generator/pegen/vm_generator.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77
import tokenize
88
from collections import defaultdict
99
from itertools import accumulate
10-
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
10+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Set, IO, Text, Union
1111

1212
from pegen import grammar
13-
from pegen.build import build_parser
1413
from pegen.grammar import (
1514
Alt,
1615
Cut,
@@ -77,9 +76,14 @@ def __init__(self, name: str, startrulename: str):
7776

7877
class VMCallMakerVisitor(GrammarVisitor):
7978
def __init__(
80-
self, parser_generator: ParserGenerator,
79+
self,
80+
parser_generator: ParserGenerator,
81+
exact_tokens: Dict[str, int],
82+
non_exact_tokens: Set[str],
8183
):
8284
self.gen = parser_generator
85+
self.exact_tokens = exact_tokens
86+
self.non_exact_tokens = non_exact_tokens
8387
self.cache: Dict[Any, Any] = {}
8488
self.keyword_cache: Dict[str, int] = {}
8589
self.soft_keyword_cache: List[str] = []
@@ -101,8 +105,8 @@ def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
101105
return self.keyword_helper(val)
102106
else:
103107
return self.soft_keyword_helper(val)
104-
tok_num: int = token.EXACT_TOKEN_TYPES[val] # type: ignore [attr-defined]
105-
return "OP_TOKEN", token.tok_name[tok_num]
108+
tok_num: int = self.exact_tokens[val]
109+
return "OP_TOKEN", self.gen.tokens[tok_num]
106110

107111
def visit_Repeat0(self, node: Repeat0) -> str:
108112
if node in self.cache:
@@ -149,12 +153,19 @@ def can_we_inline(node: Rhs) -> int:
149153

150154
class VMParserGenerator(ParserGenerator, GrammarVisitor):
151155
def __init__(
152-
self, grammar: grammar.Grammar,
156+
self,
157+
grammar: grammar.Grammar,
158+
tokens: Dict[str, int],
159+
exact_tokens: Dict[str, int],
160+
non_exact_tokens: Set[str],
161+
file: Optional[IO[Text]],
153162
):
154-
super().__init__(grammar, token.tok_name, sys.stdout)
163+
super().__init__(grammar, tokens, file)
155164

156165
self.opcode_buffer: Optional[List[Opcode]] = None
157-
self.callmakervisitor: VMCallMakerVisitor = VMCallMakerVisitor(self)
166+
self.callmakervisitor: VMCallMakerVisitor = VMCallMakerVisitor(
167+
self, exact_tokens, non_exact_tokens,
168+
)
158169

159170
@contextlib.contextmanager
160171
def set_opcode_buffer(self, buffer: List[Opcode]) -> Iterator[None]:
@@ -517,6 +528,7 @@ def visit_Gather(self, node: Gather) -> None:
517528

518529

519530
def main() -> None:
531+
from pegen.build import build_parser
520532
filename = "../../Grammar/python.gram"
521533
if sys.argv[1:]:
522534
filename = sys.argv[1]

0 commit comments

Comments
 (0)