301 lines
10 KiB
Python
301 lines
10 KiB
Python
# -*- coding: Latin-1 -*-
|
|
|
|
# pycrc -- parametrisable CRC calculation utility and C source code generator
|
|
#
|
|
# Copyright (c) 2006-2012 Thomas Pircher <tehpeh@gmx.net>
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to
|
|
# deal in the Software without restriction, including without limitation the
|
|
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
# sell copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
# IN THE SOFTWARE.
|
|
|
|
|
|
"""
|
|
Lexical analyzer for pycrc. This module is used internally by pycrc for the
|
|
macro processing and code generation.
|
|
|
|
A basic example of how the lexer is used:
|
|
|
|
from crc_lexer import Lexer
|
|
|
|
input_str = "the input string to parse"
|
|
lex = Lexer()
|
|
lex.set_str(input_str)
|
|
while True:
|
|
tok = lex.peek()
|
|
if tok == lex.tok_EOF:
|
|
break
|
|
else:
|
|
print("%4d: %s\n" % (tok, lex.text))
|
|
lex.advance()
|
|
"""
|
|
|
|
import re
|
|
|
|
|
|
# Class Lexer
|
|
###############################################################################
|
|
class Lexer(object):
|
|
"""
|
|
A lexical analyser base class.
|
|
"""
|
|
|
|
tok_unknown = 0
|
|
tok_EOF = 1
|
|
tok_gibberish = 10
|
|
tok_identifier = 11
|
|
tok_block_open = 12
|
|
tok_block_close = 13
|
|
tok_num = 20
|
|
tok_str = 21
|
|
tok_par_open = 22
|
|
tok_par_close = 23
|
|
tok_op = 24
|
|
tok_and = 25
|
|
tok_or = 26
|
|
|
|
state_gibberish = 0
|
|
state_expr = 1
|
|
|
|
|
|
# Class constructor
|
|
###############################################################################
|
|
def __init__(self, input_str = ""):
|
|
"""
|
|
The class constructor.
|
|
"""
|
|
self.re_id = re.compile("^\\$[a-zA-Z][a-zA-Z0-9_-]*")
|
|
self.re_num = re.compile("^(0[xX][0-9a-fA-F]+|[0-9]+)")
|
|
self.re_op = re.compile("<=|<|==|!=|>=|>")
|
|
self.re_str = re.compile("\"?([a-zA-Z0-9_-]+)\"?")
|
|
self.set_str(input_str)
|
|
self.state = self.state_gibberish
|
|
|
|
|
|
# function set_str
|
|
###############################################################################
|
|
def set_str(self, input_str):
|
|
"""
|
|
Set the parse input string.
|
|
"""
|
|
self.input_str = input_str
|
|
self.text = ""
|
|
self.next_token = None
|
|
|
|
|
|
# function peek
|
|
###############################################################################
|
|
def peek(self):
|
|
"""
|
|
Return the next token, without taking it away from the input_str.
|
|
"""
|
|
if self.next_token == None:
|
|
self.next_token = self._parse_next()
|
|
return self.next_token
|
|
|
|
|
|
# function advance
|
|
###############################################################################
|
|
def advance(self, skip_nl = False):
|
|
"""
|
|
Discard the current symbol from the input stream and advance to the
|
|
following characters. If skip_nl is True, then skip also a following
|
|
newline character.
|
|
"""
|
|
self.next_token = None
|
|
if skip_nl and len(self.input_str) > 1 and self.input_str[0] == "\n":
|
|
self.input_str = self.input_str[1:]
|
|
|
|
|
|
# function delete_spaces
|
|
###############################################################################
|
|
def delete_spaces(self, skip_unconditional = True):
|
|
"""
|
|
Delete spaces in the input string.
|
|
If skip_unconditional is False, then skip the spaces only if followed
|
|
by $if() $else() or $elif().
|
|
"""
|
|
new_input = self.input_str.lstrip(" \t")
|
|
|
|
# check for an identifier
|
|
m = self.re_id.match(new_input)
|
|
if m != None:
|
|
text = m.group(0)[1:]
|
|
# if the identifier is a reserved keyword, skip the spaces.
|
|
if (text == "if" or text == "elif" or text == "else"):
|
|
skip_unconditional = True
|
|
if skip_unconditional:
|
|
self.next_token = None
|
|
self.input_str = new_input
|
|
|
|
|
|
# function prepend
|
|
###############################################################################
|
|
def prepend(self, in_str):
|
|
"""
|
|
Prepend the parameter to to the input string.
|
|
"""
|
|
self.input_str = in_str + self.input_str
|
|
|
|
|
|
# function set_state
|
|
###############################################################################
|
|
def set_state(self, new_state):
|
|
"""
|
|
Set the new state for the lexer.
|
|
This changes the behaviour of the lexical scanner from normal operation
|
|
to expression scanning (within $if () expressions) and back.
|
|
"""
|
|
self.state = new_state
|
|
self.next_token = None
|
|
|
|
|
|
# function _parse_next
|
|
###############################################################################
|
|
def _parse_next(self):
|
|
"""
|
|
Parse the next token, update the state variables and take the consumed
|
|
text from the imput stream.
|
|
"""
|
|
if len(self.input_str) == 0:
|
|
return self.tok_EOF
|
|
|
|
if self.state == self.state_gibberish:
|
|
return self._parse_gibberish()
|
|
if self.state == self.state_expr:
|
|
return self._parse_expr()
|
|
return self.tok_unknown
|
|
|
|
|
|
# function _parse_gibberish
|
|
###############################################################################
|
|
def _parse_gibberish(self):
|
|
"""
|
|
Parse the next token, update the state variables and take the consumed
|
|
text from the imput stream.
|
|
"""
|
|
# check for an identifier
|
|
m = self.re_id.match(self.input_str)
|
|
if m != None:
|
|
self.text = m.group(0)[1:]
|
|
self.input_str = self.input_str[m.end():]
|
|
return self.tok_identifier
|
|
|
|
if len(self.input_str) > 1:
|
|
# check for "{:"
|
|
if self.input_str[0:2] == "{:":
|
|
self.text = self.input_str[0:2]
|
|
self.input_str = self.input_str[2:]
|
|
return self.tok_block_open
|
|
# check for ":}"
|
|
if self.input_str[0:2] == ":}":
|
|
self.text = self.input_str[0:2]
|
|
self.input_str = self.input_str[2:]
|
|
return self.tok_block_close
|
|
# check for "$$"
|
|
if self.input_str[0:2] == "$$":
|
|
self.text = self.input_str[0:1]
|
|
self.input_str = self.input_str[2:]
|
|
return self.tok_gibberish
|
|
# check for malformed "$"
|
|
if self.input_str[0] == "$":
|
|
self.text = self.input_str[0:1]
|
|
# self.input_str = self.input_str[1:]
|
|
return self.tok_unknown
|
|
|
|
# the character is gibberish.
|
|
# find the position of the next special character.
|
|
pos = self.input_str.find("$")
|
|
tmp = self.input_str.find("{:")
|
|
if pos < 0 or (tmp >= 0 and tmp < pos):
|
|
pos = tmp
|
|
tmp = self.input_str.find(":}")
|
|
if pos < 0 or (tmp >= 0 and tmp < pos):
|
|
pos = tmp
|
|
|
|
if pos < 0 or len(self.input_str) == 1:
|
|
# neither id nor block start nor block end found:
|
|
# the whole text is just gibberish.
|
|
self.text = self.input_str
|
|
self.input_str = ""
|
|
else:
|
|
self.text = self.input_str[:pos]
|
|
self.input_str = self.input_str[pos:]
|
|
return self.tok_gibberish
|
|
|
|
|
|
# function _parse_expr
|
|
###############################################################################
|
|
def _parse_expr(self):
|
|
"""
|
|
Parse the next token, update the state variables and take the consumed
|
|
text from the imput stream.
|
|
"""
|
|
# skip whitespaces
|
|
pos = 0
|
|
while pos < len(self.input_str) and self.input_str[pos] == ' ':
|
|
pos = pos + 1
|
|
if pos > 0:
|
|
self.input_str = self.input_str[pos:]
|
|
|
|
if len(self.input_str) == 0:
|
|
return self.tok_EOF
|
|
|
|
m = self.re_id.match(self.input_str)
|
|
if m != None:
|
|
self.text = m.group(0)[1:]
|
|
self.input_str = self.input_str[m.end():]
|
|
return self.tok_identifier
|
|
|
|
m = self.re_num.match(self.input_str)
|
|
if m != None:
|
|
self.text = m.group(0)
|
|
self.input_str = self.input_str[m.end():]
|
|
return self.tok_num
|
|
|
|
m = self.re_op.match(self.input_str)
|
|
if m != None:
|
|
self.text = m.string[:m.end()]
|
|
self.input_str = self.input_str[m.end():]
|
|
return self.tok_op
|
|
|
|
if self.input_str[:4] == "and ":
|
|
self.text = "and"
|
|
self.input_str = self.input_str[len(self.text) + 1:]
|
|
return self.tok_and
|
|
|
|
if self.input_str[:3] == "or ":
|
|
self.text = "or"
|
|
self.input_str = self.input_str[len(self.text) + 1:]
|
|
return self.tok_or
|
|
|
|
m = self.re_str.match(self.input_str)
|
|
if m != None:
|
|
self.text = m.group(1)
|
|
self.input_str = self.input_str[m.end():]
|
|
return self.tok_str
|
|
|
|
if self.input_str[0] == "(":
|
|
self.text = self.input_str[0]
|
|
self.input_str = self.input_str[len(self.text):]
|
|
return self.tok_par_open
|
|
|
|
if self.input_str[0] == ")":
|
|
self.text = self.input_str[0]
|
|
self.input_str = self.input_str[len(self.text):]
|
|
return self.tok_par_close
|
|
|
|
return self.tok_unknown
|