1.代码
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import jieba # 如果你处理的是中文文本,需要jieba分词
import re # 停用词列表,这里只是示例,你可以根据需要添加或修改
stopwords = ['的', '是', '在', '了', '有', '和', '人', '我', '他', '她', '它', '们', '...','0','1','2','3','4','5','6','7','8','9','10','12','20','30'] # 读取Excel文件
df = pd.read_csv('word.csv', encoding='gbk') # 假设你的数据在名为'text'的列中
texts = df['text'].tolist() # 数据清洗和分词
cleaned_texts = []
for text in texts: # 去除标点符号和非中文字符 cleaned_text = re.sub(r'[^\u4e00-\u9fa5\w]', '', text) # 使用jieba进行分词 words = jieba.cut(cleaned_text) # 去除停用词 filtered_words = [word for word in words if word not in stopwords] cleaned_texts.append(' '.join(filtered_words)) # 生成词频字典
word_freq = Counter()
for text in cleaned_texts: word_freq.update(text.split()) # 绘制词云图
wordcloud = WordCloud(font_path='simhei.ttf', # 设置字体文件,确保能正确显示中文 background_color='white', stopwords=None, # WordCloud已经通过上面的步骤去除了停用词 min_font_size=10).generate_from_frequencies(word_freq) plt.figure(figsize=(10, 10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
2.运行结果