Showing
1 changed file
with
38 additions
and
10 deletions
| @@ -51,9 +51,37 @@ class TemplateSection: | @@ -51,9 +51,37 @@ class TemplateSection: | ||
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | 53 | ||
| 54 | -heading_pattern = re.compile(r"^(#{1,6})\s+(.*)$") | ||
| 55 | -bullet_pattern = re.compile(r"^[-*+]\s+(.*)$") | ||
| 56 | -number_pattern = re.compile(r"^(?P<num>\d+(?:\.\d+)*)(?:[\s、::.-]+(?P<label>.*))?$") | 54 | +# The parsing expressions intentionally avoid `.*` to keep matching deterministic and |
| 55 | +# eliminate easy Regular-Expression-DoS gadgets on untrusted template text. | ||
| 56 | +heading_pattern = re.compile( | ||
| 57 | + r""" | ||
| 58 | + (?P<marker>\#{1,6}) # Markdown heading markers | ||
| 59 | + [ \t]+ # required whitespace | ||
| 60 | + (?P<title>[^\r\n]+) # heading text without newline characters | ||
| 61 | + """, | ||
| 62 | + re.VERBOSE, | ||
| 63 | +) | ||
| 64 | +bullet_pattern = re.compile( | ||
| 65 | + r""" | ||
| 66 | + (?P<marker>[-*+]) # list bullet symbol | ||
| 67 | + [ \t]+ | ||
| 68 | + (?P<title>[^\r\n]+) | ||
| 69 | + """, | ||
| 70 | + re.VERBOSE, | ||
| 71 | +) | ||
| 72 | +number_pattern = re.compile( | ||
| 73 | + r""" | ||
| 74 | + (?P<num> | ||
| 75 | + (?:0|[1-9]\d*) | ||
| 76 | + (?:\.(?:0|[1-9]\d*))* | ||
| 77 | + ) | ||
| 78 | + (?: | ||
| 79 | + (?:[ \t\u00A0\u3000、::-]+|\.(?!\d))+ | ||
| 80 | + (?P<label>[^\r\n]*) | ||
| 81 | + )? | ||
| 82 | + """, | ||
| 83 | + re.VERBOSE, | ||
| 84 | +) | ||
| 57 | 85 | ||
| 58 | 86 | ||
| 59 | def parse_template_sections(template_md: str) -> List[TemplateSection]: | 87 | def parse_template_sections(template_md: str) -> List[TemplateSection]: |
| @@ -128,10 +156,10 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]: | @@ -128,10 +156,10 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]: | ||
| 128 | dict | None: 识别后的元数据;无法识别时返回None。 | 156 | dict | None: 识别后的元数据;无法识别时返回None。 |
| 129 | """ | 157 | """ |
| 130 | 158 | ||
| 131 | - heading_match = heading_pattern.match(stripped) | 159 | + heading_match = heading_pattern.fullmatch(stripped) |
| 132 | if heading_match: | 160 | if heading_match: |
| 133 | - level = len(heading_match.group(1)) | ||
| 134 | - payload = _strip_markup(heading_match.group(2).strip()) | 161 | + level = len(heading_match.group("marker")) |
| 162 | + payload = _strip_markup(heading_match.group("title").strip()) | ||
| 135 | title_info = _split_number(payload) | 163 | title_info = _split_number(payload) |
| 136 | slug = _build_slug(title_info["number"], title_info["title"]) | 164 | slug = _build_slug(title_info["number"], title_info["title"]) |
| 137 | return { | 165 | return { |
| @@ -143,9 +171,9 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]: | @@ -143,9 +171,9 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]: | ||
| 143 | "slug": slug, | 171 | "slug": slug, |
| 144 | } | 172 | } |
| 145 | 173 | ||
| 146 | - bullet_match = bullet_pattern.match(stripped) | 174 | + bullet_match = bullet_pattern.fullmatch(stripped) |
| 147 | if bullet_match: | 175 | if bullet_match: |
| 148 | - payload = _strip_markup(bullet_match.group(1).strip()) | 176 | + payload = _strip_markup(bullet_match.group("title").strip()) |
| 149 | title_info = _split_number(payload) | 177 | title_info = _split_number(payload) |
| 150 | slug = _build_slug(title_info["number"], title_info["title"]) | 178 | slug = _build_slug(title_info["number"], title_info["title"]) |
| 151 | is_section = indent <= 1 | 179 | is_section = indent <= 1 |
| @@ -160,7 +188,7 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]: | @@ -160,7 +188,7 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]: | ||
| 160 | } | 188 | } |
| 161 | 189 | ||
| 162 | # 兼容“1.1 ...”没有前缀符号的行 | 190 | # 兼容“1.1 ...”没有前缀符号的行 |
| 163 | - number_match = number_pattern.match(stripped) | 191 | + number_match = number_pattern.fullmatch(stripped) |
| 164 | if number_match and number_match.group("label"): | 192 | if number_match and number_match.group("label"): |
| 165 | payload = stripped | 193 | payload = stripped |
| 166 | title = number_match.group("label").strip() | 194 | title = number_match.group("label").strip() |
| @@ -201,7 +229,7 @@ def _split_number(payload: str) -> dict: | @@ -201,7 +229,7 @@ def _split_number(payload: str) -> dict: | ||
| 201 | 返回: | 229 | 返回: |
| 202 | dict: 包含 number/title/display。 | 230 | dict: 包含 number/title/display。 |
| 203 | """ | 231 | """ |
| 204 | - match = number_pattern.match(payload) | 232 | + match = number_pattern.fullmatch(payload) |
| 205 | number = match.group("num") if match else "" | 233 | number = match.group("num") if match else "" |
| 206 | label = match.group("label") if match else payload | 234 | label = match.group("label") if match else payload |
| 207 | label = (label or "").strip() | 235 | label = (label or "").strip() |
-
Please register or login to post a comment