Lexer.tokenize
Token
# FILE: lex.py
import re
import enum
class Token:
def __init__(self, type, value: str, lineno: int, pos: int):
self.type, self.value, self.lineno, self.pos = type, value, lineno, pos
def __str__(self):
v = f'({self.value!r})' if self.value else ''
return f'{self.type.name}{v} at {self.lineno}:{self.pos}'
__repr__ = __str__
class Lexer:
def __init__(self, token_types: enum.Enum, tokens_regexes: dict):
self.token_types = token_types
regex = '|'.join(map('(?P<{}>{})'.format, *zip(*((tok.name, regex) for tok, regex in tokens_regexes.items()))))
self.regex = re.compile(regex)
def tokenize(self, string, skip=['space']):
# TODO: detect invalid input
lineno, pos = 0, 0
skip = set(map(self.token_types.__getitem__, skip))
for matchobj in self.regex.finditer(string):
type_name = matchobj.lastgroup
value = matchobj.groupdict()[type_name]
Type = self.token_types[type_name]
if Type == self.token_types.newline: # possibly buggy, but not catastrophic
self.lineno += 1
self.pos = 0
continue
pos = matchobj.end()
if Type not in skip:
yield Token(Type, value, lineno, pos)
yield Token(self.token_types.EOF, '', lineno, pos)
lex.Lexer.tokenize
Opt_list -> Option Opt_list_
Opt_list_ -> comma Option Opt_list_ | empty
Option -> Choice | Mult
Choice -> Compound More_choices Exchange
Compound -> item Add_item
Add_item -> plus item Add_item | empty
More_choices -> slash Compound More_choices | empty
Exchange -> minus num | empty
Mult -> num star Compound
EOF
vital statistics
parse_<something>
Parser.parse
# FILE: parse.py
import lex
class Parser:
def __init__(self, lexer):
self.string, self.tokens = None, None
self.lexer = lexer
self.t = self.lexer.token_types
self.__lookahead = None
@property
def lookahead(self):
if not self.__lookahead:
try:
self.__lookahead = next(self.tokens)
except StopIteration:
self.__lookahead = lex.Token(self.t.EOF, '', 0, -1)
return self.__lookahead
def next(self):
if self.__lookahead and self.__lookahead.type == self.t.EOF:
return self.__lookahead
self.__lookahead = None
return self.lookahead
def match(self, token_type):
if self.lookahead.type == token_type:
return self.next()
raise SyntaxError(f'Expected {token_type}, got {self.lookahead.type}', ('<string>', self.lookahead.lineno, self.lookahead.pos, self.string))
# THE PARSING STARTS HERE
def parse(self, string):
# setup
self.string = string
self.tokens = self.lexer.tokenize(string)
self.__lookahead = None
self.next()
# do parsing
ret = [''] + self.parse_opt_list()
return ' '.join(ret)
def parse_opt_list(self) -> list:
ret = self.parse_option(1)
ret.extend(self.parse_opt_list_(1))
return ret
def parse_opt_list_(self, curr_opt_number) -> list:
if self.lookahead.type in {self.t.EOF}:
return []
self.match(self.t.comma)
ret = self.parse_option(curr_opt_number + 1)
ret.extend(self.parse_opt_list_(curr_opt_number + 1))
return ret
def parse_option(self, opt_number) -> list:
ret = [f'{opt_number}.']
if self.lookahead.type == self.t.item:
ret.extend(self.parse_choice())
elif self.lookahead.type == self.t.num:
ret.extend(self.parse_mult())
else:
raise SyntaxError(f'Expected {token_type}, got {self.lookahead.type}', ('<string>', self.lookahead.lineno, self.lookahead.pos, self.string))
ret[-1] += '\n'
return ret
def parse_choice(self) -> list:
c = self.parse_compound()
m = self.parse_more_choices()
e = self.parse_exchange()
if not m:
if not e:
ret = f'You may take {" ".join(c)}'
else:
ret = f'for every {e} models you may take item {" ".join(c)}'
elif m:
c.extend(m)
if not e:
ret = f'each model may take one of: {", ".join(c)}'
else:
ret = f'for every {e} models you may exchange the following items with each other: {", ".join(c)}'
else:
ret = 'Semantic error!'
return [ret]
def parse_compound(self) -> list:
ret = [self.lookahead.value]
self.match(self.t.item)
_ret = self.parse_add_item()
return [' '.join(ret + _ret)]
def parse_add_item(self) -> list:
if self.lookahead.type in {self.t.comma, self.t.minus, self.t.slash, self.t.EOF}:
return []
ret = ['with']
self.match(self.t.plus)
ret.append(self.lookahead.value)
self.match(self.t.item)
return ret + self.parse_add_item()
def parse_more_choices(self) -> list:
if self.lookahead.type in {self.t.comma, self.t.minus, self.t.EOF}:
return []
self.match(self.t.slash)
ret = self.parse_compound()
return ret + self.parse_more_choices()
def parse_exchange(self) -> str:
if self.lookahead.type in {self.t.comma, self.t.EOF}:
return ''
self.match(self.t.minus)
ret = self.lookahead.value
self.match(self.t.num)
return ret
def parse_mult(self) -> list:
ret = [f'each model may take {self.lookahead.value} of:']
self.match(self.t.num)
self.match(self.t.star)
return ret + self.parse_compound()
# FILE: evaluate.py
import enum
from lex import Lexer
from parse import Parser
# these are all the types of tokens present in our grammar
token_types = enum.Enum('Types', 'item num plus minus star slash comma space newline empty EOF')
t = token_types
# these are the regexes that the lexer uses to recognise the tokens
terminals_regexes = {
t.item: r'[a-zA-Z_]\w*',
t.num: '0|[1-9][0-9]*',
t.plus: r'\+',
t.minus: '-',
t.star: r'\*',
t.slash: '/',
t.comma: ',',
t.space: r'[ \t]',
t.newline: r'\n'
}
lexer = Lexer(token_types, terminals_regexes)
parser = Parser(lexer)
string = 'itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ'
print(f'STRING FROM THE QUESTION: {string!r}\nRESULT:')
print(parser.parse(string), '\n\n')
string = input('Enter a command: ')
while string and string.lower() not in {'q', 'quit', 'e', 'exit'}:
try:
print(parser.parse(string))
except SyntaxError as e:
print(f' Syntax error: {e}\n {e.text}\n' + ' ' * (4 + e.offset - 1) + '^\n')
string = input('Enter a command: ')
# python3 evaluate.py
STRING FROM THE QUESTION: 'itemA, itemB/itemC-3, 2*itemD, itemE/itemF/itemG, itemH/itemI+itemJ'
RESULT:
1. You may take itemA
2. for every 3 models you may exchange the following items with each other: itemB, itemC
3. each model may take 2 of: itemD
4. each model may take one of: itemE, itemF, itemG
5. each model may take one of: itemH, itemI with itemJ
Enter a command: itemA/b/c/stuff
1. each model may take one of: itemA, b, c, stuff
Enter a command: 4 * anything
1. each model may take 4 of: anything
Enter a command: 5 * anything + more
1. each model may take 5 of: anything with more
Enter a command: a + b + c+ d
1. You may take a with b with c with d
Enter a command: a+b/c
1. each model may take one of: a with b, c
Enter a command: itemA/itemB-2
1. for every 2 models you may exchange the following items with each other: itemA, itemB
Enter a command: itemA+itemB/itemC - 5
1. for every 5 models you may exchange the following items with each other: itemA with itemB, itemC
Enter a command: q