CANLibrary/pycrc/crc_lexer.py

# -*- coding: Latin-1 -*-

#  pycrc -- parametrisable CRC calculation utility and C source code generator
#
#  Copyright (c) 2006-2012  Thomas Pircher  <tehpeh@gmx.net>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to
#  deal in the Software without restriction, including without limitation the
#  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
#  sell copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in
#  all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
#  IN THE SOFTWARE.


"""
Lexical analyzer for pycrc. This module is used internally by pycrc for the
macro processing and code generation.

A basic example of how the lexer is used:

    from crc_lexer import Lexer

    input_str = "the input string to parse"
    lex = Lexer()
    lex.set_str(input_str)
    while True:
        tok = lex.peek()
        if tok == lex.tok_EOF:
            break
        else:
            print("%4d: %s\n" % (tok, lex.text))
            lex.advance()
"""

import re


# Class Lexer
###############################################################################
class Lexer(object):
    """
    A lexical analyser base class.
    """

    tok_unknown     = 0
    tok_EOF         = 1
    tok_gibberish   = 10
    tok_identifier  = 11
    tok_block_open  = 12
    tok_block_close = 13
    tok_num         = 20
    tok_str         = 21
    tok_par_open    = 22
    tok_par_close   = 23
    tok_op          = 24
    tok_and         = 25
    tok_or          = 26

    state_gibberish = 0
    state_expr      = 1


    # Class constructor
    ###############################################################################
    def __init__(self, input_str = ""):
        """
        The class constructor.
        """
        self.re_id = re.compile("^\\$[a-zA-Z][a-zA-Z0-9_-]*")
        self.re_num = re.compile("^(0[xX][0-9a-fA-F]+|[0-9]+)")
        self.re_op = re.compile("<=|<|==|!=|>=|>")
        self.re_str = re.compile("\"?([a-zA-Z0-9_-]+)\"?")
        self.set_str(input_str)
        self.state = self.state_gibberish


    # function set_str
    ###############################################################################
    def set_str(self, input_str):
        """
        Set the parse input string.
        """
        self.input_str = input_str
        self.text = ""
        self.next_token = None


    # function peek
    ###############################################################################
    def peek(self):
        """
        Return the next token, without taking it away from the input_str.
        """
        if self.next_token == None:
            self.next_token = self._parse_next()
        return self.next_token


    # function advance
    ###############################################################################
    def advance(self, skip_nl = False):
        """
        Discard the current symbol from the input stream and advance to the
        following characters.  If skip_nl is True, then skip also a following
        newline character.
        """
        self.next_token = None
        if skip_nl and  len(self.input_str) > 1 and self.input_str[0] == "\n":
            self.input_str = self.input_str[1:]


    # function delete_spaces
    ###############################################################################
    def delete_spaces(self, skip_unconditional = True):
        """
        Delete spaces in the input string.
        If skip_unconditional is False, then skip the spaces only if followed
        by $if() $else() or $elif().
        """
        new_input = self.input_str.lstrip(" \t")

        # check for an identifier
        m = self.re_id.match(new_input)
        if m != None:
            text = m.group(0)[1:]
            # if the identifier is a reserved keyword, skip the spaces.
            if (text == "if" or text == "elif" or text == "else"):
                skip_unconditional = True
        if skip_unconditional:
            self.next_token = None
            self.input_str = new_input


    # function prepend
    ###############################################################################
    def prepend(self, in_str):
        """
        Prepend the parameter to to the input string.
        """
        self.input_str = in_str + self.input_str


    # function set_state
    ###############################################################################
    def set_state(self, new_state):
        """
        Set the new state for the lexer.
        This changes the behaviour of the lexical scanner from normal operation
        to expression scanning (within $if () expressions) and back.
        """
        self.state = new_state
        self.next_token = None


    # function _parse_next
    ###############################################################################
    def _parse_next(self):
        """
        Parse the next token, update the state variables and take the consumed
        text from the imput stream.
        """
        if len(self.input_str) == 0:
            return self.tok_EOF

        if self.state == self.state_gibberish:
            return self._parse_gibberish()
        if self.state == self.state_expr:
            return self._parse_expr()
        return self.tok_unknown


    # function _parse_gibberish
    ###############################################################################
    def _parse_gibberish(self):
        """
        Parse the next token, update the state variables and take the consumed
        text from the imput stream.
        """
        # check for an identifier
        m = self.re_id.match(self.input_str)
        if m != None:
            self.text = m.group(0)[1:]
            self.input_str = self.input_str[m.end():]
            return self.tok_identifier

        if len(self.input_str) > 1:
            # check for "{:"
            if self.input_str[0:2] == "{:":
                self.text = self.input_str[0:2]
                self.input_str = self.input_str[2:]
                return self.tok_block_open
            # check for ":}"
            if self.input_str[0:2] == ":}":
                self.text = self.input_str[0:2]
                self.input_str = self.input_str[2:]
                return self.tok_block_close
            # check for "$$"
            if self.input_str[0:2] == "$$":
                self.text = self.input_str[0:1]
                self.input_str = self.input_str[2:]
                return self.tok_gibberish
            # check for malformed "$"
            if self.input_str[0] == "$":
                self.text = self.input_str[0:1]
                # self.input_str = self.input_str[1:]
                return self.tok_unknown

        # the character is gibberish.
        # find the position of the next special character.
        pos = self.input_str.find("$")
        tmp = self.input_str.find("{:")
        if pos < 0 or (tmp >= 0 and tmp < pos):
            pos = tmp
        tmp = self.input_str.find(":}")
        if pos < 0 or (tmp >= 0 and tmp < pos):
            pos = tmp

        if pos < 0 or len(self.input_str) == 1:
            # neither id nor block start nor block end found:
            # the whole text is just gibberish.
            self.text = self.input_str
            self.input_str = ""
        else:
            self.text = self.input_str[:pos]
            self.input_str = self.input_str[pos:]
        return self.tok_gibberish


    # function _parse_expr
    ###############################################################################
    def _parse_expr(self):
        """
        Parse the next token, update the state variables and take the consumed
        text from the imput stream.
        """
        # skip whitespaces
        pos = 0
        while pos < len(self.input_str) and self.input_str[pos] == ' ':
            pos = pos + 1
        if pos > 0:
            self.input_str = self.input_str[pos:]

        if len(self.input_str) == 0:
            return self.tok_EOF

        m = self.re_id.match(self.input_str)
        if m != None:
            self.text = m.group(0)[1:]
            self.input_str = self.input_str[m.end():]
            return self.tok_identifier

        m = self.re_num.match(self.input_str)
        if m != None:
            self.text = m.group(0)
            self.input_str = self.input_str[m.end():]
            return self.tok_num

        m = self.re_op.match(self.input_str)
        if m != None:
            self.text = m.string[:m.end()]
            self.input_str = self.input_str[m.end():]
            return self.tok_op

        if self.input_str[:4] == "and ":
            self.text = "and"
            self.input_str = self.input_str[len(self.text) + 1:]
            return self.tok_and

        if self.input_str[:3] == "or ":
            self.text = "or"
            self.input_str = self.input_str[len(self.text) + 1:]
            return self.tok_or

        m = self.re_str.match(self.input_str)
        if m != None:
            self.text = m.group(1)
            self.input_str = self.input_str[m.end():]
            return self.tok_str

        if self.input_str[0] == "(":
            self.text = self.input_str[0]
            self.input_str = self.input_str[len(self.text):]
            return self.tok_par_open

        if self.input_str[0] == ")":
            self.text = self.input_str[0]
            self.input_str = self.input_str[len(self.text):]
            return self.tok_par_close

        return self.tok_unknown