CANLibrary/pycrc/crc_lexer.py

301 lines
10 KiB
Python

# -*- coding: Latin-1 -*-
# pycrc -- parametrisable CRC calculation utility and C source code generator
#
# Copyright (c) 2006-2012 Thomas Pircher <tehpeh@gmx.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
Lexical analyzer for pycrc. This module is used internally by pycrc for the
macro processing and code generation.
A basic example of how the lexer is used:
from crc_lexer import Lexer
input_str = "the input string to parse"
lex = Lexer()
lex.set_str(input_str)
while True:
tok = lex.peek()
if tok == lex.tok_EOF:
break
else:
print("%4d: %s\n" % (tok, lex.text))
lex.advance()
"""
import re
# Class Lexer
###############################################################################
class Lexer(object):
"""
A lexical analyser base class.
"""
tok_unknown = 0
tok_EOF = 1
tok_gibberish = 10
tok_identifier = 11
tok_block_open = 12
tok_block_close = 13
tok_num = 20
tok_str = 21
tok_par_open = 22
tok_par_close = 23
tok_op = 24
tok_and = 25
tok_or = 26
state_gibberish = 0
state_expr = 1
# Class constructor
###############################################################################
def __init__(self, input_str = ""):
"""
The class constructor.
"""
self.re_id = re.compile("^\\$[a-zA-Z][a-zA-Z0-9_-]*")
self.re_num = re.compile("^(0[xX][0-9a-fA-F]+|[0-9]+)")
self.re_op = re.compile("<=|<|==|!=|>=|>")
self.re_str = re.compile("\"?([a-zA-Z0-9_-]+)\"?")
self.set_str(input_str)
self.state = self.state_gibberish
# function set_str
###############################################################################
def set_str(self, input_str):
"""
Set the parse input string.
"""
self.input_str = input_str
self.text = ""
self.next_token = None
# function peek
###############################################################################
def peek(self):
"""
Return the next token, without taking it away from the input_str.
"""
if self.next_token == None:
self.next_token = self._parse_next()
return self.next_token
# function advance
###############################################################################
def advance(self, skip_nl = False):
"""
Discard the current symbol from the input stream and advance to the
following characters. If skip_nl is True, then skip also a following
newline character.
"""
self.next_token = None
if skip_nl and len(self.input_str) > 1 and self.input_str[0] == "\n":
self.input_str = self.input_str[1:]
# function delete_spaces
###############################################################################
def delete_spaces(self, skip_unconditional = True):
"""
Delete spaces in the input string.
If skip_unconditional is False, then skip the spaces only if followed
by $if() $else() or $elif().
"""
new_input = self.input_str.lstrip(" \t")
# check for an identifier
m = self.re_id.match(new_input)
if m != None:
text = m.group(0)[1:]
# if the identifier is a reserved keyword, skip the spaces.
if (text == "if" or text == "elif" or text == "else"):
skip_unconditional = True
if skip_unconditional:
self.next_token = None
self.input_str = new_input
# function prepend
###############################################################################
def prepend(self, in_str):
"""
Prepend the parameter to to the input string.
"""
self.input_str = in_str + self.input_str
# function set_state
###############################################################################
def set_state(self, new_state):
"""
Set the new state for the lexer.
This changes the behaviour of the lexical scanner from normal operation
to expression scanning (within $if () expressions) and back.
"""
self.state = new_state
self.next_token = None
# function _parse_next
###############################################################################
def _parse_next(self):
"""
Parse the next token, update the state variables and take the consumed
text from the imput stream.
"""
if len(self.input_str) == 0:
return self.tok_EOF
if self.state == self.state_gibberish:
return self._parse_gibberish()
if self.state == self.state_expr:
return self._parse_expr()
return self.tok_unknown
# function _parse_gibberish
###############################################################################
def _parse_gibberish(self):
"""
Parse the next token, update the state variables and take the consumed
text from the imput stream.
"""
# check for an identifier
m = self.re_id.match(self.input_str)
if m != None:
self.text = m.group(0)[1:]
self.input_str = self.input_str[m.end():]
return self.tok_identifier
if len(self.input_str) > 1:
# check for "{:"
if self.input_str[0:2] == "{:":
self.text = self.input_str[0:2]
self.input_str = self.input_str[2:]
return self.tok_block_open
# check for ":}"
if self.input_str[0:2] == ":}":
self.text = self.input_str[0:2]
self.input_str = self.input_str[2:]
return self.tok_block_close
# check for "$$"
if self.input_str[0:2] == "$$":
self.text = self.input_str[0:1]
self.input_str = self.input_str[2:]
return self.tok_gibberish
# check for malformed "$"
if self.input_str[0] == "$":
self.text = self.input_str[0:1]
# self.input_str = self.input_str[1:]
return self.tok_unknown
# the character is gibberish.
# find the position of the next special character.
pos = self.input_str.find("$")
tmp = self.input_str.find("{:")
if pos < 0 or (tmp >= 0 and tmp < pos):
pos = tmp
tmp = self.input_str.find(":}")
if pos < 0 or (tmp >= 0 and tmp < pos):
pos = tmp
if pos < 0 or len(self.input_str) == 1:
# neither id nor block start nor block end found:
# the whole text is just gibberish.
self.text = self.input_str
self.input_str = ""
else:
self.text = self.input_str[:pos]
self.input_str = self.input_str[pos:]
return self.tok_gibberish
# function _parse_expr
###############################################################################
def _parse_expr(self):
"""
Parse the next token, update the state variables and take the consumed
text from the imput stream.
"""
# skip whitespaces
pos = 0
while pos < len(self.input_str) and self.input_str[pos] == ' ':
pos = pos + 1
if pos > 0:
self.input_str = self.input_str[pos:]
if len(self.input_str) == 0:
return self.tok_EOF
m = self.re_id.match(self.input_str)
if m != None:
self.text = m.group(0)[1:]
self.input_str = self.input_str[m.end():]
return self.tok_identifier
m = self.re_num.match(self.input_str)
if m != None:
self.text = m.group(0)
self.input_str = self.input_str[m.end():]
return self.tok_num
m = self.re_op.match(self.input_str)
if m != None:
self.text = m.string[:m.end()]
self.input_str = self.input_str[m.end():]
return self.tok_op
if self.input_str[:4] == "and ":
self.text = "and"
self.input_str = self.input_str[len(self.text) + 1:]
return self.tok_and
if self.input_str[:3] == "or ":
self.text = "or"
self.input_str = self.input_str[len(self.text) + 1:]
return self.tok_or
m = self.re_str.match(self.input_str)
if m != None:
self.text = m.group(1)
self.input_str = self.input_str[m.end():]
return self.tok_str
if self.input_str[0] == "(":
self.text = self.input_str[0]
self.input_str = self.input_str[len(self.text):]
return self.tok_par_open
if self.input_str[0] == ")":
self.text = self.input_str[0]
self.input_str = self.input_str[len(self.text):]
return self.tok_par_close
return self.tok_unknown