robotframework · mikeleppane · Oct 17, 2024 · Oct 19, 2024 · Oct 20, 2024
diff --git a/src/robot/parsing/__init__.py b/src/robot/parsing/__init__.py
@@ -21,8 +21,21 @@
 :mod:`robot.api.parsing`.
 """
 
-from .lexer import get_tokens, get_resource_tokens, get_init_tokens, Token
+from .lexer import (
+    ErrorCode,
+    ErrorKind,
+    InvalidTokenError,
+    Token,
+    get_init_tokens,
+    get_resource_tokens,
+    get_tokens,
+)
 from .model import File, ModelTransformer, ModelVisitor
-from .parser import get_model, get_resource_model, get_init_model
-from .suitestructure import (SuiteFile, SuiteDirectory, SuiteStructure,
-                             SuiteStructureBuilder, SuiteStructureVisitor)
+from .parser import get_init_model, get_model, get_resource_model
+from .suitestructure import (
+    SuiteDirectory,
+    SuiteFile,
+    SuiteStructure,
+    SuiteStructureBuilder,
+    SuiteStructureVisitor,
+)
diff --git a/src/robot/parsing/lexer/__init__.py b/src/robot/parsing/lexer/__init__.py
@@ -13,5 +13,5 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from .lexer import get_tokens, get_resource_tokens, get_init_tokens
-from .tokens import StatementTokens, Token
+from .lexer import get_init_tokens, get_resource_tokens, get_tokens
+from .tokens import ErrorCode, ErrorKind, InvalidTokenError, StatementTokens, Token
diff --git a/src/robot/parsing/lexer/context.py b/src/robot/parsing/lexer/context.py
@@ -13,12 +13,19 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from robot.conf import Languages, LanguageLike, LanguagesLike
+from robot.conf import LanguageLike, Languages, LanguagesLike
 from robot.utils import normalize_whitespace
 
-from .settings import (InitFileSettings, FileSettings, Settings, SuiteFileSettings,
-                       ResourceFileSettings, TestCaseSettings, KeywordSettings)
-from .tokens import StatementTokens, Token
+from .settings import (
+    FileSettings,
+    InitFileSettings,
+    KeywordSettings,
+    ResourceFileSettings,
+    Settings,
+    SuiteFileSettings,
+    TestCaseSettings,
+)
+from .tokens import ErrorCode, ErrorKind, InvalidTokenError, StatementTokens, Token
 
 
 class LexingContext:
@@ -71,7 +78,7 @@ def lex_invalid_section(self, statement: StatementTokens):
         for token in statement[1:]:
             token.type = Token.COMMENT
 
-    def _get_invalid_section_error(self, header: str) -> str:
+    def _get_invalid_section_error(self, header: str) -> InvalidTokenError:
         raise NotImplementedError
 
     def _handles_section(self, statement: StatementTokens, header: str) -> bool:
@@ -82,10 +89,9 @@ def _handles_section(self, statement: StatementTokens, header: str) -> bool:
         if self.languages.headers.get(normalized) == header:
             return True
         if normalized == header[:-1]:
-            statement[0].error = (
-                f"Singular section headers like '{marker}' are deprecated. "
-                f"Use plural format like '*** {header} ***' instead."
-            )
+            statement[0].error = InvalidTokenError.as_warning(
+                code=ErrorCode.SINGULAR_HEADER_DEPRECATED,
+                message=f"Singular section headers like '{marker}' are deprecated. Use plural format like '*** {header} ***' instead.")
             return True
         return False
 
@@ -105,32 +111,38 @@ def test_case_section(self, statement: StatementTokens) -> bool:
     def task_section(self, statement: StatementTokens) -> bool:
         return self._handles_section(statement, 'Tasks')
 
-    def _get_invalid_section_error(self, header: str) -> str:
-        return (f"Unrecognized section header '{header}'. Valid sections: "
-                f"'Settings', 'Variables', 'Test Cases', 'Tasks', 'Keywords' "
-                f"and 'Comments'.")
+    def _get_invalid_section_error(self, header: str) -> InvalidTokenError:
+        return InvalidTokenError.as_error(
+            code=ErrorCode.INVALID_SECTION_HEADER,
+            message=f"Unrecognized section header '{header}'. Valid sections: 'Settings', 'Variables', 'Test Cases', 'Tasks', 'Keywords' and 'Comments'.")
 
 
 class ResourceFileContext(FileContext):
     settings: ResourceFileSettings
 
-    def _get_invalid_section_error(self, header: str) -> str:
+    def _get_invalid_section_error(self, header: str) -> InvalidTokenError:
         name = self._normalize(header)
         if self.languages.headers.get(name) in ('Test Cases', 'Tasks'):
-            return f"Resource file with '{name}' section is invalid."
-        return (f"Unrecognized section header '{header}'. Valid sections: "
-                f"'Settings', 'Variables', 'Keywords' and 'Comments'.")
+            return InvalidTokenError.as_fatal(
+            code=ErrorCode.INVALID_SECTION_IN_RESOURCE_FILE,
+            message=f"Resource file with '{name}' section is invalid.")
+        return InvalidTokenError.as_fatal(
+            code=ErrorCode.INVALID_SECTION_HEADER,
+            message=f"Unrecognized section header '{header}'. Valid sections: 'Settings', 'Variables', 'Keywords' and 'Comments'.")
 
 
 class InitFileContext(FileContext):
     settings: InitFileSettings
 
-    def _get_invalid_section_error(self, header: str) -> str:
+    def _get_invalid_section_error(self, header: str) -> InvalidTokenError:
         name = self._normalize(header)
         if self.languages.headers.get(name) in ('Test Cases', 'Tasks'):
-            return f"'{name}' section is not allowed in suite initialization file."
-        return (f"Unrecognized section header '{header}'. Valid sections: "
-                f"'Settings', 'Variables', 'Keywords' and 'Comments'.")
+            return InvalidTokenError.as_error(
+                code=ErrorCode.INVALID_SECTION_IN_INIT_FILE,
+                message=f"'{name}' section is not allowed in suite initialization file.")
+        return InvalidTokenError.as_error(
+            code=ErrorCode.INVALID_SECTION_HEADER,
+            message=f"Unrecognized section header '{header}'. Valid sections: 'Settings', 'Variables', 'Keywords' and 'Comments'.")
 
 
 class TestCaseContext(LexingContext):

diff --git a/src/robot/parsing/lexer/settings.py b/src/robot/parsing/lexer/settings.py
@@ -16,9 +16,9 @@
 from abc import ABC, abstractmethod
 
 from robot.conf import Languages
-from robot.utils import normalize, normalize_whitespace, RecommendationFinder
+from robot.utils import RecommendationFinder, normalize, normalize_whitespace
 
-from .tokens import StatementTokens, Token
+from .tokens import ErrorCode, ErrorKind, InvalidTokenError, StatementTokens, Token
 
 
 class Settings(ABC):
@@ -68,7 +68,7 @@ def lex(self, statement: StatementTokens):
         try:
             self._validate(orig, name, statement)
         except ValueError as err:
-            self._lex_error(statement, err.args[0])
+            self._lex_error(statement, InvalidTokenError.as_error(code=ErrorCode.SETTINGS_VALIDATION_ERROR, message=err.args[0]))
         else:
             self._lex_setting(statement, name)
 
@@ -106,7 +106,7 @@ def _is_valid_somewhere(self, name: str, classes: 'list[type[Settings]]') -> boo
     def _not_valid_here(self, name: str) -> str:
         raise NotImplementedError
 
-    def _lex_error(self, statement: StatementTokens, error: str):
+    def _lex_error(self, statement: StatementTokens, error: InvalidTokenError):
         statement[0].set_error(error)
         for token in statement[1:]:
             token.type = Token.COMMENT
@@ -122,8 +122,8 @@ def _lex_setting(self, statement: StatementTokens, name: str):
         else:
             self._lex_arguments(values)
         if name == 'Return':
-            statement[0].error = ("The '[Return]' setting is deprecated. "
-                                  "Use the 'RETURN' statement instead.")
+            statement[0].error = InvalidTokenError.as_warning(code=ErrorCode.RETURN_SETTING_DEPRECATED,
+                                       message="The '[Return]' setting is deprecated. Use the 'RETURN' statement instead.")
 
     def _lex_name_and_arguments(self, tokens: StatementTokens):
         if tokens:

diff --git a/src/robot/parsing/lexer/statementlexers.py b/src/robot/parsing/lexer/statementlexers.py
@@ -19,8 +19,8 @@
 from robot.utils import normalize_whitespace
 from robot.variables import is_assign
 
-from .context import FileContext, LexingContext, KeywordContext, TestCaseContext
-from .tokens import StatementTokens, Token
+from .context import FileContext, KeywordContext, LexingContext, TestCaseContext
+from .tokens import ErrorCode, ErrorKind, InvalidTokenError, StatementTokens, Token
 
 
 class Lexer(ABC):
@@ -140,10 +140,9 @@ def input(self, statement: StatementTokens):
             try:
                 self.ctx.add_language(lang)
             except DataError:
-                statement[0].set_error(
-                    f"Invalid language configuration: "
-                    f"Language '{lang}' not found nor importable as a language module."
-                )
+                statement[0].set_error(InvalidTokenError.as_error(
+                                                         code=ErrorCode.INVALID_LANGUAGE_CONFIGURATION,
+                                                         message=f"Invalid language configuration: Language '{lang}' not found nor importable as a language module."))
             else:
                 statement[0].type = Token.CONFIG
 
@@ -387,6 +386,8 @@ def handles(self, statement: StatementTokens) -> bool:
 
     def lex(self):
         token = self.statement[0]
-        token.set_error(f'{token.value} is not allowed in this context.')
+        token.set_error(InvalidTokenError.as_error(
+                                          code=ErrorCode.SYNTAX_ERROR,
+                                          message=f'{token.value} is not allowed in this context.'))
         for t in self.statement[1:]:
             t.type = Token.ARGUMENT
diff --git a/src/robot/parsing/lexer/tokens.py b/src/robot/parsing/lexer/tokens.py
@@ -14,15 +14,94 @@
 #  limitations under the License.
 
 from collections.abc import Iterator
-from typing import cast, List
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import List, cast
 
+from robot.errors import DataError
+from robot.output import LOGGER
 from robot.variables import VariableMatches
 
-
 # Type alias to ease typing elsewhere
 StatementTokens = List['Token']
 
 
+class ErrorCode(Enum):
+    """ Error codes for invalid tokens.
+
+    The error codes are used to identify the error that occurred when tokenizing data. 
+    """
+    INVALID_LANGUAGE_CONFIGURATION = auto()
+    INVALID_SECTION_HEADER = auto()
+    INVALID_SECTION_IN_INIT_FILE = auto()
+    INVALID_SECTION_IN_RESOURCE_FILE = auto()
+
+    RETURN_SETTING_DEPRECATED = auto()
+    SETTINGS_VALIDATION_ERROR = auto()
+    SINGULAR_HEADER_DEPRECATED = auto()
+    SYNTAX_ERROR = auto()
+
+
+class ErrorKind(Enum):
+    WARNING = 'WARNING'
+    ERROR = 'ERROR'
+    FATAL = 'FATAL'
+
+
+@dataclass(frozen=True)
+class InvalidTokenError:
+    """ Error information for invalid tokens.
+
+    :param kind: The kind of the error, either `ErrorKind.WARNING` or `ErrorKind.ERROR`.
+    :param code: The error code.
+    :param message: The error message.
+
+    The `kind` attribute is either `ErrorCode.WARNING`, `ErrorCode.ERROR` or `ErrorCode.FATAL`.
+    The `message` attribute is a string describing the error.
+
+    The `message` attribute is optional and defaults to `None`. The `is_warning` and `is_fatal` properties
+    can be used to check the kind of the error. If `is_fatal` equals to `True` (ErrorKind.FATAL), the error
+    should be treated as fatal and the parsing should be stopped. The `as_warning`, `as_error` and `as_fatal` class methods
+    can be used to create new instances of `InvalidTokenError` with the kind set to `ErrorKind.WARNING`,
+    `ErrorKind.ERROR` or `ErrorKind.FATAL` respectively.
+    """
+
+    kind: ErrorKind
+    code: ErrorCode
+    message: str | None = None
+
+    def __str__(self) -> str:
+        return f"{self.message or ''}"
+
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}({self.kind.value}, {self.code.name}, {self.message!r})'
+
+    def __eq__(self, value: object) -> bool:
+        if not isinstance(value, InvalidTokenError):
+            return False
+        return (self.kind == value.kind and self.code == value.code and self.message == value.message)
+
+    @property
+    def is_warning(self) -> bool:
+        return self.kind == ErrorKind.WARNING
+
+    @property
+    def is_fatal(self) -> bool:
+        return self.kind == ErrorKind.FATAL
+
+    @classmethod
+    def as_warning(cls, code: ErrorCode, message: str | None = None) -> 'InvalidTokenError':
+        return cls(ErrorKind.WARNING, code, message)
+
+    @classmethod
+    def as_error(cls, code: ErrorCode, message: str | None = None) -> 'InvalidTokenError':
+        return cls(ErrorKind.ERROR, code, message)
+
+    @classmethod
+    def as_fatal(cls, code: ErrorCode, message: str | None = None) -> 'InvalidTokenError':
+        return cls(ErrorKind.FATAL, code, message)
+
+
 class Token:
     """Token representing piece of Robot Framework data.
 
@@ -162,7 +241,7 @@ class Token:
                  '_add_eos_before', '_add_eos_after']
 
     def __init__(self, type: 'str|None' = None, value: 'str|None' = None,
-                 lineno: int = -1, col_offset: int = -1, error: 'str|None' = None):
+                 lineno: int = -1, col_offset: int = -1, error: 'InvalidTokenError|None' = None):
         self.type = type
         if value is None:
             value = {
@@ -188,7 +267,7 @@ def end_col_offset(self) -> int:
             return -1
         return self.col_offset + len(self.value)
 
-    def set_error(self, error: str):
+    def set_error(self, error: InvalidTokenError) -> None:
         self.type = Token.ERROR
         self.error = error
 
@@ -240,6 +319,15 @@ def __eq__(self, other) -> bool:
                 and self.col_offset == other.col_offset
                 and self.error == other.error)
 
+    def dump_error_or_raise(self, source: str) -> None:
+        if self.error is None:
+            return
+        message = f"Error in file '{source}' on line {self.lineno}: {self.error}"
+
+        if self.type == Token.INVALID_HEADER:
+            raise DataError(message)
+
+        LOGGER.write(message, level='WARN' if self.error.is_warning else 'ERROR')
 
 class EOS(Token):
     """Token representing end of a statement."""

diff --git a/src/robot/parsing/model/statements.py b/src/robot/parsing/model/statements.py
@@ -18,13 +18,18 @@
 import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Iterator, Sequence
-from typing import cast, ClassVar, Literal, overload, TYPE_CHECKING, Type, TypeVar
+from typing import TYPE_CHECKING, ClassVar, Literal, Type, TypeVar, cast, overload
 
 from robot.conf import Language
+from robot.parsing.lexer.tokens import InvalidTokenError
 from robot.running.arguments import UserKeywordArgumentParser
 from robot.utils import normalize_whitespace, seq2str, split_from_equals, test_or_task
-from robot.variables import (contains_variable, is_scalar_assign, is_dict_variable,
-                             search_variable)
+from robot.variables import (
+    contains_variable,
+    is_dict_variable,
+    is_scalar_assign,
+    search_variable,
+)
 
 from ..lexer import Token
 
@@ -1353,7 +1358,7 @@ class Error(Statement):
     _errors: 'tuple[str, ...]' = ()
 
     @classmethod
-    def from_params(cls, error: str, value: str = '', indent: str = FOUR_SPACES,
+    def from_params(cls, error: InvalidTokenError, value: str = '', indent: str = FOUR_SPACES,
                     eol: str = EOL) -> 'Error':
         return cls([
             Token(Token.SEPARATOR, indent),
@@ -1373,13 +1378,12 @@ def errors(self) -> 'tuple[str, ...]':
         along with errors got from tokens.
         """
         tokens = self.get_tokens(Token.ERROR)
-        return tuple(t.error or '' for t in tokens) + self._errors
+        return tuple(str(t.error.message) if t.error else "" for t in tokens) + self._errors
 
     @errors.setter
     def errors(self, errors: 'Sequence[str]'):
         self._errors = tuple(errors)
 
-
 class EmptyLine(Statement):
     type = Token.EOL