python-semantic-release · codejedi365 · Nov 10, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
diff --git a/src/semantic_release/commit_parser/angular.py b/src/semantic_release/commit_parser/angular.py
@@ -8,19 +8,26 @@
 import logging
 import re
 from functools import reduce
+from itertools import zip_longest
 from re import compile as regexp
 from typing import TYPE_CHECKING, Tuple
 
 from pydantic.dataclasses import dataclass
 
 from semantic_release.commit_parser._base import CommitParser, ParserOptions
-from semantic_release.commit_parser.token import ParsedCommit, ParseError, ParseResult
+from semantic_release.commit_parser.token import (
+    ParsedCommit,
+    ParsedMessageResult,
+    ParseError,
+    ParseResult,
+)
 from semantic_release.commit_parser.util import breaking_re, parse_paragraphs
 from semantic_release.enums import LevelBump
 
 if TYPE_CHECKING:
     from git.objects.commit import Commit
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -65,11 +72,16 @@ class AngularParserOptions(ParserOptions):
     default_bump_level: LevelBump = LevelBump.NO_RELEASE
 
     def __post_init__(self) -> None:
-        self.tag_to_level = {tag: self.default_bump_level for tag in self.allowed_tags}
-        for tag in self.patch_tags:
-            self.tag_to_level[tag] = LevelBump.PATCH
-        for tag in self.minor_tags:
-            self.tag_to_level[tag] = LevelBump.MINOR
+        self.tag_to_level: dict[str, LevelBump] = dict(
+            [
+                # we have to do a type ignore as zip_longest provides a type that is not specific enough
+                # for our expected output. Due to the empty second array, we know the first is always longest
+                # and that means no values in the first entry of the tuples will ever be a LevelBump.
+                *zip_longest(self.allowed_tags, (), fillvalue=self.default_bump_level),  # type: ignore[list-item]
+                *zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH),  # type: ignore[list-item]
+                *zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR),  # type: ignore[list-item]
+            ]
+        )
 
 
 class AngularCommitParser(CommitParser[ParseResult, AngularParserOptions]):
@@ -100,7 +112,7 @@ def __init__(self, options: AngularParserOptions | None = None) -> None:
         )
         # GitHub & Gitea use (#123), GitLab uses (!123), and BitBucket uses (pull request #123)
         self.mr_selector = regexp(
-            r"[\t ]\((?:pull request )?(?P<mr_number>[#!]\d+)\)[\t ]*$"
+            r"[\t ]+\((?:pull request )?(?P<mr_number>[#!]\d+)\)[\t ]*$"
         )
 
     @staticmethod
@@ -118,21 +130,10 @@ def commit_body_components_separator(
         accumulator["descriptions"].append(text)
         return accumulator
 
-    # Maybe this can be cached as an optimization, similar to how
-    # mypy/pytest use their own caching directories, for very large commit
-    # histories?
-    # The problem is the cache likely won't be present in CI environments
-    def parse(self, commit: Commit) -> ParseResult:
-        """
-        Attempt to parse the commit message with a regular expression into a
-        ParseResult
-        """
-        message = str(commit.message)
-        parsed = self.re_parser.match(message)
-        if not parsed:
-            return _logged_parse_error(
-                commit, f"Unable to parse commit message: {message}"
-            )
+    def parse_message(self, message: str) -> ParsedMessageResult | None:
+        if not (parsed := self.re_parser.match(message)):
+            return None
+
         parsed_break = parsed.group("break")
         parsed_scope = parsed.group("scope")
         parsed_subject = parsed.group("subject")
@@ -168,20 +169,34 @@ def parse(self, commit: Commit) -> ParseResult:
             )
         )
 
-        # TODO: remove in the future
-        if level_bump == LevelBump.MAJOR:
-            parsed_type = "breaking"
-
-        logger.debug(
-            "commit %s introduces a %s level_bump", commit.hexsha[:8], level_bump
-        )
-
-        return ParsedCommit(
+        return ParsedMessageResult(
             bump=level_bump,
-            type=LONG_TYPE_NAMES.get(parsed_type, parsed_type),
+            type=parsed_type,
+            category=LONG_TYPE_NAMES.get(parsed_type, parsed_type),
             scope=parsed_scope,
-            descriptions=body_components["descriptions"],
-            breaking_descriptions=body_components["breaking_descriptions"],
-            commit=commit,
+            descriptions=tuple(body_components["descriptions"]),
+            breaking_descriptions=tuple(body_components["breaking_descriptions"]),
             linked_merge_request=linked_merge_request,
         )
+
+    # Maybe this can be cached as an optimization, similar to how
+    # mypy/pytest use their own caching directories, for very large commit
+    # histories?
+    # The problem is the cache likely won't be present in CI environments
+    def parse(self, commit: Commit) -> ParseResult:
+        """
+        Attempt to parse the commit message with a regular expression into a
+        ParseResult
+        """
+        if not (pmsg_result := self.parse_message(str(commit.message))):
+            return _logged_parse_error(
+                commit, f"Unable to parse commit message: {commit.message!r}"
+            )
+
+        logger.debug(
+            "commit %s introduces a %s level_bump",
+            commit.hexsha[:8],
+            pmsg_result.bump,
+        )
+
+        return ParsedCommit.from_parsed_message_result(commit, pmsg_result)
diff --git a/src/semantic_release/commit_parser/emoji.py b/src/semantic_release/commit_parser/emoji.py
@@ -3,14 +3,19 @@
 from __future__ import annotations
 
 import logging
+from itertools import zip_longest
 from re import compile as regexp
 from typing import Tuple
 
 from git.objects.commit import Commit
 from pydantic.dataclasses import dataclass
 
 from semantic_release.commit_parser._base import CommitParser, ParserOptions
-from semantic_release.commit_parser.token import ParsedCommit, ParseResult
+from semantic_release.commit_parser.token import (
+    ParsedCommit,
+    ParsedMessageResult,
+    ParseResult,
+)
 from semantic_release.commit_parser.util import parse_paragraphs
 from semantic_release.enums import LevelBump
 
@@ -44,8 +49,26 @@ class EmojiParserOptions(ParserOptions):
         ":robot:",
         ":green_apple:",
     )
+    allowed_tags: Tuple[str, ...] = (
+        *major_tags,
+        *minor_tags,
+        *patch_tags,
+    )
     default_bump_level: LevelBump = LevelBump.NO_RELEASE
 
+    def __post_init__(self) -> None:
+        self.tag_to_level: dict[str, LevelBump] = dict(
+            [
+                # we have to do a type ignore as zip_longest provides a type that is not specific enough
+                # for our expected output. Due to the empty second array, we know the first is always longest
+                # and that means no values in the first entry of the tuples will ever be a LevelBump.
+                *zip_longest(self.allowed_tags, (), fillvalue=self.default_bump_level),  # type: ignore[list-item]
+                *zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH),  # type: ignore[list-item]
+                *zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR),  # type: ignore[list-item]
+                *zip_longest(self.major_tags, (), fillvalue=LevelBump.MAJOR),  # type: ignore[list-item]
+            ]
+        )
+
 
 class EmojiCommitParser(CommitParser[ParseResult, EmojiParserOptions]):
     """
@@ -65,22 +88,27 @@ class EmojiCommitParser(CommitParser[ParseResult, EmojiParserOptions]):
 
     def __init__(self, options: EmojiParserOptions | None = None) -> None:
         super().__init__(options)
+        prcedence_order_regex = str.join(
+            "|",
+            [
+                *self.options.major_tags,
+                *self.options.minor_tags,
+                *self.options.patch_tags,
+            ],
+        )
+        self.emoji_selector = regexp(r"(?P<type>%s)" % prcedence_order_regex)
+
         # GitHub & Gitea use (#123), GitLab uses (!123), and BitBucket uses (pull request #123)
         self.mr_selector = regexp(
-            r"[\t ]\((?:pull request )?(?P<mr_number>[#!]\d+)\)[\t ]*$"
+            r"[\t ]+\((?:pull request )?(?P<mr_number>[#!]\d+)\)[\t ]*$"
         )
 
     @staticmethod
     def get_default_options() -> EmojiParserOptions:
         return EmojiParserOptions()
 
-    def parse(self, commit: Commit) -> ParseResult:
-        all_emojis = (
-            self.options.major_tags + self.options.minor_tags + self.options.patch_tags
-        )
-
-        message = str(commit.message)
-        subject = message.split("\n")[0]
+    def parse_message(self, message: str) -> ParsedMessageResult:
+        subject = message.split("\n", maxsplit=1)[0]
 
         linked_merge_request = ""
         if mr_match := self.mr_selector.search(subject):
@@ -89,45 +117,46 @@ def parse(self, commit: Commit) -> ParseResult:
             # expects changelog template to format the line accordingly
             # subject = self.mr_selector.sub("", subject).strip()
 
-        # Loop over emojis from most important to least important
-        # Therefore, we find the highest level emoji first
-        primary_emoji = "Other"
-        for emoji in all_emojis:
-            if emoji in subject:
-                primary_emoji = emoji
-                break
-        logger.debug("Selected %s as the primary emoji", primary_emoji)
-
-        # Find which level this commit was from
-        level_bump = LevelBump.NO_RELEASE
-        if primary_emoji in self.options.major_tags:
-            level_bump = LevelBump.MAJOR
-        elif primary_emoji in self.options.minor_tags:
-            level_bump = LevelBump.MINOR
-        elif primary_emoji in self.options.patch_tags:
-            level_bump = LevelBump.PATCH
-        else:
-            level_bump = self.options.default_bump_level
-            logger.debug(
-                "commit %s introduces a level bump of %s due to the default_bump_level",
-                commit.hexsha[:8],
-                level_bump,
-            )
+        # Search for emoji of the highest importance in the subject
+        primary_emoji = (
+            match.group("type")
+            if (match := self.emoji_selector.search(subject))
+            else "Other"
+        )
 
-        logger.debug(
-            "commit %s introduces a %s level_bump", commit.hexsha[:8], level_bump
+        level_bump = self.options.tag_to_level.get(
+            primary_emoji, self.options.default_bump_level
         )
 
         # All emojis will remain part of the returned description
-        descriptions = parse_paragraphs(message)
-        return ParsedCommit(
+        descriptions = tuple(parse_paragraphs(message))
+        return ParsedMessageResult(
             bump=level_bump,
             type=primary_emoji,
-            scope="",
+            category=primary_emoji,
+            scope="",  # TODO: add scope support
+            # TODO: breaking change v10, removes breaking change footers from descriptions
+            # descriptions=(
+            #     descriptions[:1] if level_bump is LevelBump.MAJOR else descriptions
+            # )
             descriptions=descriptions,
             breaking_descriptions=(
-                descriptions[1:] if level_bump is LevelBump.MAJOR else []
+                descriptions[1:] if level_bump is LevelBump.MAJOR else ()
             ),
-            commit=commit,
             linked_merge_request=linked_merge_request,
         )
+
+    def parse(self, commit: Commit) -> ParseResult:
+        """
+        Attempt to parse the commit message with a regular expression into a
+        ParseResult
+        """
+        pmsg_result = self.parse_message(str(commit.message))
+
+        logger.debug(
+            "commit %s introduces a %s level_bump",
+            commit.hexsha[:8],
+            pmsg_result.bump,
+        )
+
+        return ParsedCommit.from_parsed_message_result(commit, pmsg_result)