Temp
from docx import Document
import wordfreq
def classify_word(word):
"""使用 wordfreq 库根据单词频率进行难度分级"""
freq = wordfreq.word_frequency(word, "en") # 获取词频
if freq > 0.001: # 高频词
return "低"
elif 0.0001 < freq <= 0.001: # 中等频率词
return "中"
else: # 低频词
return "高"
def process_docx(file_path):
"""读取 docx 并对单词进行分级"""
doc = Document(file_path)
low_level, medium_level, high_level = [], [], []
for para in doc.paragraphs:
words = para.text.split() # 假设单词在句首
if words:
word = words[0].strip().lower() # 提取单词,处理大小写
level = classify_word(word)
if level == "低":
low_level.append(para.text)
elif level == "中":
medium_level.append(para.text)
else:
high_level.append(para.text)
return low_level, medium_level, high_level
def main():
file_path = "vocabulary.docx" # 替换为你的文件路径
low, medium, high = process_docx(file_path)
# 保存到文本文件
with open("word_levels.txt", "w", encoding="utf-8") as f:
f.write("【低难度】\n")
f.writelines("\n".join(low) + "\n\n")
f.write("【中难度】\n")
f.writelines("\n".join(medium) + "\n\n")
f.write("【高难度】\n")
f.writelines("\n".join(high) + "\n")
print("单词分级已保存到 word_levels.txt")
if __name__ == "__main__":
main()
from docx import Document
import wordfreq
def classify_word(word):
"""使用 wordfreq 库根据单词频率进行难度分级"""
freq = wordfreq.word_frequency(word, "en") # 获取词频
if freq > 0.001: # 高频词
return "低"
elif 0.0001 < freq <= 0.001: # 中等频率词
return "中"
else: # 低频词
return "高"
def process_docx(file_path):
"""读取 docx 并对单词进行分级"""
doc = Document(file_path)
low_level, medium_level, high_level = [], [], []
for para in doc.paragraphs:
words = para.text.split() # 假设单词在句首
if words:
word = words[0].strip().lower() # 提取单词,处理大小写
level = classify_word(word)
if level == "低":
low_level.append(para.text)
elif level == "中":
medium_level.append(para.text)
else:
high_level.append(para.text)
return low_level, medium_level, high_level
def main():
file_path = "vocabulary.docx" # 替换为你的文件路径
low, medium, high = process_docx(file_path)
# 保存到文本文件
with open("word_levels.txt", "w", encoding="utf-8") as f:
f.write("【低难度】\n")
f.writelines("\n".join(low) + "\n\n")
f.write("【中难度】\n")
f.writelines("\n".join(medium) + "\n\n")
f.write("【高难度】\n")
f.writelines("\n".join(high) + "\n")
print("单词分级已保存到 word_levels.txt")
if __name__ == "__main__":
main()
from docx import Document
import wordfreq
def classify_word(word):
"""使用 wordfreq 库根据单词频率进行难度分级"""
freq = wordfreq.word_frequency(word, "en") # 获取词频
if freq > 0.001: # 高频词
return "低"
elif 0.0001 < freq <= 0.001: # 中等频率词
return "中"
else: # 低频词
return "高"
def process_docx(file_path):
"""读取 docx 并对单词进行分级"""
doc = Document(file_path)
low_level, medium_level, high_level = [], [], []
for para in doc.paragraphs:
words = para.text.split() # 假设单词在句首
if words:
word = words[0].strip().lower() # 提取单词,处理大小写
level = classify_word(word)
if level == "低":
low_level.append(para.text)
elif level == "中":
medium_level.append(para.text)
else:
high_level.append(para.text)
return low_level, medium_level, high_level
def main():
file_path = "vocabulary.docx" # 替换为你的文件路径
low, medium, high = process_docx(file_path)
# 保存到文本文件
with open("word_levels.txt", "w", encoding="utf-8") as f:
f.write("【低难度】\n")
f.writelines("\n".join(low) + "\n\n")
f.write("【中难度】\n")
f.writelines("\n".join(medium) + "\n\n")
f.write("【高难度】\n")
f.writelines("\n".join(high) + "\n")
print("单词分级已保存到 word_levels.txt")
if __name__ == "__main__":
main()