{"sha":"575253ac235ae69c846fc5b8312b3be7e1721fcb","node_id":"C_kwDOBX7wHdoAKDU3NTI1M2FjMjM1YWU2OWM4NDZmYzViODMxMmIzYmU3ZTE3MjFmY2I","commit":{"author":{"name":"Khoi Pham","email":"132031702+dkphm@users.noreply.github.com","date":"2024-11-27T00:18:04Z"},"committer":{"name":"GitHub","email":"noreply@github.com","date":"2024-11-27T00:18:04Z"},"message":"fix: Another approach to parse tag string with spaces to avoid long running time  on regex matching. (#7625)\n\n* fix: Another approach to parse tag string with spaces to avoid long running time on regex matching.\n\n* Format","tree":{"sha":"564a89ea654879ad630ef3fb6dbbafecf531c930","url":"https://api.github.com/repos/aws/aws-sam-cli/git/trees/564a89ea654879ad630ef3fb6dbbafecf531c930"},"url":"https://api.github.com/repos/aws/aws-sam-cli/git/commits/575253ac235ae69c846fc5b8312b3be7e1721fcb","comment_count":0,"verification":{"verified":true,"reason":"valid","signature":"-----BEGIN PGP SIGNATURE-----\n\nwsFcBAABCAAQBQJnRmVMCRC1aQ7uu5UhlAAAQV8QABGwUJR0QwFeZGc7iqtzzoLQ\n7WnZ4JW+z2eKq4vVuUNN/N5tTECdr4rv2rvuUt8mIaj3F8tFBjsOKjqdnmCXuT28\nkhlWGDJnbQs9hrbf62lS6XJP5gGWg/xpBWQO6OUi8a7N3BXjGXWILfnzzs7G9Czs\nT6bHRGKcQKOCl1fbpZQXDq6A2t/ZlkZTXiyLdPwrKqRmHna9dGLY1vYgnsEWYrhX\neC0Te2b90iQgWfkqobKbLsMajIhVwVop0LcF0+n9RUZplEjuoW1V2nkmigzTA2Ko\nA8MBVJLlE/iW8UqVg/FHyY8qLK/RGAR8A8yLROP/VpSrUC3J/gFqAEI7/Vs3EwmZ\nmmtcCCNg/MFRSQBTK86eXRh/V6ZIPniSB1LaJ2j8C24KeAF8ndJhqGVKLcG/TUzu\nd5L6o7zEiaGmXSo+Ssj2VBheWvgRuL9L/i4uZV5zq5lGw9leMf5LaIty3ek8ZoHs\nCn87or1Wo0kEnC4cJPKB3Ebueprq9HsGmoGPDItyZR8bvLRfHRnThJ2oJgl9dTQm\nvL9EKtD0d7yaHTntBjZksIaQG24NYQo18/0huIKawZi0YpUY5nhNBkEWcKy/RvD0\n7xSdvVC+AETSBMsfR7bqWN4pVmORSWxcF4HFqLjNf1GFDIYOcuS27q5FG30Cj7p4\nK0Plms+KVcTdHoOYyARF\n=GJM9\n-----END PGP SIGNATURE-----\n","payload":"tree 564a89ea654879ad630ef3fb6dbbafecf531c930\nparent 6f750a8e127ee3c4e7d3c2e56b47bf63ef0fb9db\nauthor Khoi Pham <132031702+dkphm@users.noreply.github.com> 1732666684 -0800\ncommitter GitHub <noreply@github.com> 1732666684 +0000\n\nfix: Another approach to parse tag string with spaces to avoid long running time  on regex matching. (#7625)\n\n* fix: Another approach to parse tag string with spaces to avoid long running time on regex matching.\n\n* Format","verified_at":"2024-11-27T00:23:22Z"}},"url":"https://api.github.com/repos/aws/aws-sam-cli/commits/575253ac235ae69c846fc5b8312b3be7e1721fcb","html_url":"https://github.com/aws/aws-sam-cli/commit/575253ac235ae69c846fc5b8312b3be7e1721fcb","comments_url":"https://api.github.com/repos/aws/aws-sam-cli/commits/575253ac235ae69c846fc5b8312b3be7e1721fcb/comments","author":{"login":"dkphm","id":132031702,"node_id":"U_kgDOB96k1g","avatar_url":"https://avatars.githubusercontent.com/u/132031702?v=4","gravatar_id":"","url":"https://api.github.com/users/dkphm","html_url":"https://github.com/dkphm","followers_url":"https://api.github.com/users/dkphm/followers","following_url":"https://api.github.com/users/dkphm/following{/other_user}","gists_url":"https://api.github.com/users/dkphm/gists{/gist_id}","starred_url":"https://api.github.com/users/dkphm/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/dkphm/subscriptions","organizations_url":"https://api.github.com/users/dkphm/orgs","repos_url":"https://api.github.com/users/dkphm/repos","events_url":"https://api.github.com/users/dkphm/events{/privacy}","received_events_url":"https://api.github.com/users/dkphm/received_events","type":"User","user_view_type":"public","site_admin":false},"committer":{"login":"web-flow","id":19864447,"node_id":"MDQ6VXNlcjE5ODY0NDQ3","avatar_url":"https://avatars.githubusercontent.com/u/19864447?v=4","gravatar_id":"","url":"https://api.github.com/users/web-flow","html_url":"https://github.com/web-flow","followers_url":"https://api.github.com/users/web-flow/followers","following_url":"https://api.github.com/users/web-flow/following{/other_user}","gists_url":"https://api.github.com/users/web-flow/gists{/gist_id}","starred_url":"https://api.github.com/users/web-flow/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/web-flow/subscriptions","organizations_url":"https://api.github.com/users/web-flow/orgs","repos_url":"https://api.github.com/users/web-flow/repos","events_url":"https://api.github.com/users/web-flow/events{/privacy}","received_events_url":"https://api.github.com/users/web-flow/received_events","type":"User","user_view_type":"public","site_admin":false},"parents":[{"sha":"6f750a8e127ee3c4e7d3c2e56b47bf63ef0fb9db","url":"https://api.github.com/repos/aws/aws-sam-cli/commits/6f750a8e127ee3c4e7d3c2e56b47bf63ef0fb9db","html_url":"https://github.com/aws/aws-sam-cli/commit/6f750a8e127ee3c4e7d3c2e56b47bf63ef0fb9db"}],"stats":{"total":138,"additions":124,"deletions":14},"files":[{"sha":"a3743128ade3407ea046d9c08a8127dbe9266408","filename":"samcli/cli/types.py","status":"modified","additions":116,"deletions":14,"changes":130,"blob_url":"https://github.com/aws/aws-sam-cli/blob/575253ac235ae69c846fc5b8312b3be7e1721fcb/samcli%2Fcli%2Ftypes.py","raw_url":"https://github.com/aws/aws-sam-cli/raw/575253ac235ae69c846fc5b8312b3be7e1721fcb/samcli%2Fcli%2Ftypes.py","contents_url":"https://api.github.com/repos/aws/aws-sam-cli/contents/samcli%2Fcli%2Ftypes.py?ref=575253ac235ae69c846fc5b8312b3be7e1721fcb","patch":"@@ -17,7 +17,7 @@\n LOG = logging.getLogger(__name__)\n \n \n-def _generate_match_regex(match_pattern, delim):\n+def _generate_match_regex(match_pattern, delim=None):\n     \"\"\"\n     Creates a regex string based on a match pattern (also a regex) that is to be\n     run on a string (which may contain escaped quotes) that is separated by delimiters.\n@@ -32,13 +32,13 @@ def _generate_match_regex(match_pattern, delim):\n     str: regex expression\n \n     \"\"\"\n+    result = f\"\"\"(\\\\\"(?:\\\\\\\\{match_pattern}|[^\\\\\"\\\\\\\\]+)*\\\\\"|\"\"\" + f\"\"\"\\'(?:\\\\\\\\{match_pattern}|[^\\'\\\\\\\\]+)*\\'\"\"\"\n \n-    # Non capturing groups reduces duplicates in groups, but does not reduce matches.\n-    return (\n-        f\"\"\"(\\\\\"(?:\\\\\\\\{match_pattern}|[^\\\\\"\\\\\\\\]+)*\\\\\"|\"\"\"\n-        + f\"\"\"\\'(?:\\\\\\\\{match_pattern}|[^\\'\\\\\\\\]+)*\\'|\"\"\"\n-        + f\"\"\"(?:\\\\\\\\{match_pattern}|[^{delim}\\\\\"\\\\\\\\]+)+)\"\"\"\n-    )\n+    if delim is not None:\n+        # Non capturing groups reduces duplicates in groups, but does not reduce matches.\n+        return result + f\"\"\"|(?:\\\\\\\\{match_pattern}|[^{delim}\\\\\"\\\\\\\\]+)+)\"\"\"\n+    else:\n+        return result + \")\"\n \n \n def _unquote_wrapped_quotes(value):\n@@ -194,6 +194,7 @@ def __init__(self, multiple_values_per_key=False):\n     TAG_REGEX = '[A-Za-z0-9\\\\\"_:\\\\.\\\\/\\\\+-\\\\@=]'\n \n     _pattern = r\"{tag}={tag}\".format(tag=_generate_match_regex(match_pattern=TAG_REGEX, delim=\" \"))\n+    _quoted_pattern = _generate_match_regex(match_pattern=TAG_REGEX)\n \n     name = \"string,list\"\n \n@@ -222,13 +223,7 @@ def convert(self, value, param, ctx):\n                 for k in tags:\n                     self._add_value(result, _unquote_wrapped_quotes(k), _unquote_wrapped_quotes(tags[k]))\n             else:\n-                groups = re.findall(self._pattern, val)\n-\n-                if not groups:\n-                    fail = True\n-                for group in groups:\n-                    key, v = group\n-                    self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(v))\n+                fail = not self._parse_key_value_pair(result, val)\n \n             if fail:\n                 return self.fail(\n@@ -239,6 +234,66 @@ def convert(self, value, param, ctx):\n \n         return result\n \n+    def _parse_key_value_pair(self, result: dict, key_value_string: str):\n+        \"\"\"\n+        This method processes a string in the format \"'key1'='value1','key2'='value2'\",\n+        where spaces may exist within keys or values.\n+\n+        To optimize performance, the parsing is divided into two stages:\n+\n+        Stage 1: Optimized Parsing\n+        1. Identify quoted strings containing spaces within values.\n+        2. Temporarily replace spaces in these strings with a placeholder (e.g., \"_\").\n+        3. Use a fast, standard parser to extract key-value pairs, as no spaces are expected.\n+        4. Restore original spaces in the extracted key-value pairs.\n+\n+        Stage 2: Fallback Parsing\n+        If Stage 1 fails to parse the string correctly,run against a comprehensive regex pattern\n+        {tag}={tag}) to parse the entire string.\n+\n+        Parameters\n+        ----------\n+        result: result dict\n+        key_value_string: string to parse\n+\n+        Returns\n+        -------\n+        boolean - parse result\n+        \"\"\"\n+        parse_result = True\n+\n+        # Unquote an entire string\n+        modified_val = _unquote_wrapped_quotes(key_value_string)\n+\n+        # Looking for a quote strings that contain spaces and proceed to replace them\n+        quoted_strings_with_spaces = re.findall(self._quoted_pattern, modified_val)\n+        quoted_strings_with_spaces_objects = [\n+            TextWithSpaces(str_with_spaces) for str_with_spaces in quoted_strings_with_spaces\n+        ]\n+        for s, replacement in zip(quoted_strings_with_spaces, quoted_strings_with_spaces_objects):\n+            modified_val = modified_val.replace(s, replacement.replace_spaces())\n+\n+        # Use default parser to parse key=value\n+        tags = self._multiple_space_separated_key_value_parser(modified_val)\n+        if tags is not None:\n+            for key, value in tags.items():\n+                new_value = value\n+                text_objects = [obj for obj in quoted_strings_with_spaces_objects if obj.modified_text == value]\n+                if len(text_objects) > 0:\n+                    new_value = text_objects[0].restore_spaces()\n+                self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(new_value))\n+        else:\n+            # Otherwise, fall back to the original mechanism.\n+            groups = re.findall(self._pattern, key_value_string)\n+\n+            if not groups:\n+                parse_result = False\n+            for group in groups:\n+                key, v = group\n+                self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(v))\n+\n+        return parse_result\n+\n     def _add_value(self, result: dict, key: str, new_value: str):\n         \"\"\"\n         Add a given value to a given key in the result map.\n@@ -286,6 +341,22 @@ def _space_separated_key_value_parser(tag_value):\n             tags_dict = {**tags_dict, **parsed_tag}\n         return True, tags_dict\n \n+    @staticmethod\n+    def _multiple_space_separated_key_value_parser(tag_value):\n+        \"\"\"\n+        Method to parse space separated `Key1=Value1 Key2=Value2` type tags without using regex.\n+        Parameters\n+        ----------\n+        tag_value\n+        \"\"\"\n+        tags_dict = {}\n+        for value in tag_value.split():\n+            parsed, parsed_tag = CfnTags._standard_key_value_parser(value)\n+            if not parsed:\n+                return None\n+            tags_dict.update(parsed_tag)\n+        return tags_dict\n+\n \n class SigningProfilesOptionType(click.ParamType):\n     \"\"\"\n@@ -560,3 +631,34 @@ def convert(\n             )\n \n         return {resource_id: [excluded_path]}\n+\n+\n+class TextWithSpaces:\n+    def __init__(self, text) -> None:\n+        self.text = text\n+        self.modified_text = text\n+        self.space_positions = []  # type: List[int]\n+\n+    def replace_spaces(self, replacement=\"_\"):\n+        \"\"\"\n+        Replace spaces in a text with a replacement together with its original locations.\n+        Input: \"test 1\"\n+        Output: \"test_1\" [4]\n+        \"\"\"\n+        self.space_positions = [i for i, char in enumerate(self.text) if char == \" \"]\n+        self.modified_text = self.text.replace(\" \", replacement)\n+\n+        return self.modified_text\n+\n+    def restore_spaces(self):\n+        \"\"\"\n+        Restore spaces in a text from a original space locations.\n+        Input: \"test_1\" [4]\n+        Output: \"test 1\"\n+        \"\"\"\n+        text_list = list(self.modified_text)\n+\n+        for pos in self.space_positions:\n+            text_list[pos] = \" \"\n+\n+        return \"\".join(text_list)"},{"sha":"f6943107f6cffafa31644b90fe3aae2e3558f00b","filename":"tests/unit/cli/test_types.py","status":"modified","additions":8,"deletions":0,"changes":8,"blob_url":"https://github.com/aws/aws-sam-cli/blob/575253ac235ae69c846fc5b8312b3be7e1721fcb/tests%2Funit%2Fcli%2Ftest_types.py","raw_url":"https://github.com/aws/aws-sam-cli/raw/575253ac235ae69c846fc5b8312b3be7e1721fcb/tests%2Funit%2Fcli%2Ftest_types.py","contents_url":"https://api.github.com/repos/aws/aws-sam-cli/contents/tests%2Funit%2Fcli%2Ftest_types.py?ref=575253ac235ae69c846fc5b8312b3be7e1721fcb","patch":"@@ -238,6 +238,14 @@ def test_must_fail_on_invalid_format(self, input):\n                 [\"stage=int\", \"company:application=awesome-service\", \"company:department=engineering\"],\n                 {\"stage\": \"int\", \"company:application\": \"awesome-service\", \"company:department\": \"engineering\"},\n             ),\n+            # input as string with multiple key-values including spaces\n+            (('tag1=\"son of anton\" tag2=\"company abc\"',), {\"tag1\": \"son of anton\", \"tag2\": \"company abc\"}),\n+            (('tag1=\"son of anton\"   tag2=\"company abc\"',), {\"tag1\": \"son of anton\", \"tag2\": \"company abc\"}),\n+            (('\\'tag1=\"son of anton\" tag2=\"company abc\"\\'',), {\"tag1\": \"son of anton\", \"tag2\": \"company abc\"}),\n+            (\n+                ('tag1=\"son of anton\" tag2=\"company abc\" tag:3=\"dummy tag\"',),\n+                {\"tag1\": \"son of anton\", \"tag2\": \"company abc\", \"tag:3\": \"dummy tag\"},\n+            ),\n         ]\n     )\n     def test_successful_parsing(self, input, expected):"}]}