马一丁

Fix the Issue Where Words in the Word Cloud are Displayed Incorrectly.

@@ -1767,6 +1767,7 @@ class HTMLRenderer: @@ -1767,6 +1767,7 @@ class HTMLRenderer:
1767 def _collect_items(raw: Any) -> list[dict]: 1767 def _collect_items(raw: Any) -> list[dict]:
1768 """将多种词云输入格式(数组/对象/元组/纯文本)规整为统一的词条列表""" 1768 """将多种词云输入格式(数组/对象/元组/纯文本)规整为统一的词条列表"""
1769 collected: list[dict] = [] 1769 collected: list[dict] = []
  1770 + skip_keys = {"items", "data", "words", "labels", "datasets", "sourceData"}
1770 if isinstance(raw, list): 1771 if isinstance(raw, list):
1771 for item in raw: 1772 for item in raw:
1772 if isinstance(item, dict): 1773 if isinstance(item, dict):
@@ -1775,6 +1776,11 @@ class HTMLRenderer: @@ -1775,6 +1776,11 @@ class HTMLRenderer:
1775 category = item.get("category") or "" 1776 category = item.get("category") or ""
1776 if text: 1777 if text:
1777 collected.append({"word": str(text), "weight": weight, "category": str(category)}) 1778 collected.append({"word": str(text), "weight": weight, "category": str(category)})
  1779 + # 若嵌套了 items/words/data 列表,递归提取
  1780 + for nested_key in ("items", "words", "data"):
  1781 + nested = item.get(nested_key)
  1782 + if isinstance(nested, list):
  1783 + collected.extend(_collect_items(nested))
1778 elif isinstance(item, (list, tuple)) and item: 1784 elif isinstance(item, (list, tuple)) and item:
1779 text = item[0] 1785 text = item[0]
1780 weight = item[1] if len(item) > 1 else None 1786 weight = item[1] if len(item) > 1 else None
@@ -1784,8 +1790,21 @@ class HTMLRenderer: @@ -1784,8 +1790,21 @@ class HTMLRenderer:
1784 elif isinstance(item, str): 1790 elif isinstance(item, str):
1785 collected.append({"word": item, "weight": 1.0, "category": ""}) 1791 collected.append({"word": item, "weight": 1.0, "category": ""})
1786 elif isinstance(raw, dict): 1792 elif isinstance(raw, dict):
  1793 + # 若包含 items/words/data 列表,优先递归提取,不把键名当词
  1794 + handled = False
  1795 + for nested_key in ("items", "words", "data"):
  1796 + nested = raw.get(nested_key)
  1797 + if isinstance(nested, list):
  1798 + collected.extend(_collect_items(nested))
  1799 + handled = True
  1800 + if handled:
  1801 + return collected
  1802 +
  1803 + # 非Chart结构且不包含skip_keys时,把key/value当作词云条目
1787 if not {"labels", "datasets"}.intersection(raw.keys()): 1804 if not {"labels", "datasets"}.intersection(raw.keys()):
1788 for text, weight in raw.items(): 1805 for text, weight in raw.items():
  1806 + if text in skip_keys:
  1807 + continue
1789 collected.append({"word": str(text), "weight": weight, "category": ""}) 1808 collected.append({"word": str(text), "weight": weight, "category": ""})
1790 return collected 1809 return collected
1791 1810
@@ -1793,14 +1812,21 @@ class HTMLRenderer: @@ -1793,14 +1812,21 @@ class HTMLRenderer:
1793 seen: set[str] = set() 1812 seen: set[str] = set()
1794 candidates = [] 1813 candidates = []
1795 if isinstance(props, dict): 1814 if isinstance(props, dict):
1796 - for key in ("data", "items", "words"):  
1797 - if key in props:  
1798 - candidates.append(props[key]) 1815 + # 仅接受明确的词条数组字段,避免将嵌套items误当作词条
  1816 + if "data" in props and isinstance(props.get("data"), list):
  1817 + candidates.append(props["data"])
  1818 + if "words" in props and isinstance(props.get("words"), list):
  1819 + candidates.append(props["words"])
  1820 + if "items" in props and isinstance(props.get("items"), list):
  1821 + candidates.append(props["items"])
1799 candidates.append((props or {}).get("sourceData")) 1822 candidates.append((props or {}).get("sourceData"))
1800 1823
1801 # 允许使用block.data兜底,避免缺失props时出现空白 1824 # 允许使用block.data兜底,避免缺失props时出现空白
1802 if block_data is not None: 1825 if block_data is not None:
1803 - candidates.append(block_data) 1826 + if isinstance(block_data, dict) and "items" in block_data and isinstance(block_data.get("items"), list):
  1827 + candidates.append(block_data["items"])
  1828 + else:
  1829 + candidates.append(block_data)
1804 1830
1805 for raw in candidates: 1831 for raw in candidates:
1806 for item in _collect_items(raw): 1832 for item in _collect_items(raw):