Temp

from docx import Document
import wordfreq

def classify_word(word):
    """使用 wordfreq 库根据单词频率进行难度分级"""
    freq = wordfreq.word_frequency(word, "en")  # 获取词频
    if freq > 0.001:  # 高频词
        return "低"
    elif 0.0001 < freq <= 0.001:  # 中等频率词
        return "中"
    else:  # 低频词
        return "高"

def process_docx(file_path):
    """读取 docx 并对单词进行分级"""
    doc = Document(file_path)
    low_level, medium_level, high_level = [], [], []

    for para in doc.paragraphs:
        words = para.text.split()  # 假设单词在句首
        if words:
            word = words[0].strip().lower()  # 提取单词,处理大小写
            level = classify_word(word)
            
            if level == "低":
                low_level.append(para.text)
            elif level == "中":
                medium_level.append(para.text)
            else:
                high_level.append(para.text)

    return low_level, medium_level, high_level

def main():
    file_path = "vocabulary.docx"  # 替换为你的文件路径
    low, medium, high = process_docx(file_path)

    # 保存到文本文件
    with open("word_levels.txt", "w", encoding="utf-8") as f:
        f.write("【低难度】\n")
        f.writelines("\n".join(low) + "\n\n")

        f.write("【中难度】\n")
        f.writelines("\n".join(medium) + "\n\n")

        f.write("【高难度】\n")
        f.writelines("\n".join(high) + "\n")

    print("单词分级已保存到 word_levels.txt")

if __name__ == "__main__":
    main()

 

from docx import Document
import wordfreq

def classify_word(word):
    """使用 wordfreq 库根据单词频率进行难度分级"""
    freq = wordfreq.word_frequency(word, "en")  # 获取词频
    if freq > 0.001:  # 高频词
        return "低"
    elif 0.0001 < freq <= 0.001:  # 中等频率词
        return "中"
    else:  # 低频词
        return "高"

def process_docx(file_path):
    """读取 docx 并对单词进行分级"""
    doc = Document(file_path)
    low_level, medium_level, high_level = [], [], []

    for para in doc.paragraphs:
        words = para.text.split()  # 假设单词在句首
        if words:
            word = words[0].strip().lower()  # 提取单词,处理大小写
            level = classify_word(word)
            
            if level == "低":
                low_level.append(para.text)
            elif level == "中":
                medium_level.append(para.text)
            else:
                high_level.append(para.text)

    return low_level, medium_level, high_level

def main():
    file_path = "vocabulary.docx"  # 替换为你的文件路径
    low, medium, high = process_docx(file_path)

    # 保存到文本文件
    with open("word_levels.txt", "w", encoding="utf-8") as f:
        f.write("【低难度】\n")
        f.writelines("\n".join(low) + "\n\n")

        f.write("【中难度】\n")
        f.writelines("\n".join(medium) + "\n\n")

        f.write("【高难度】\n")
        f.writelines("\n".join(high) + "\n")

    print("单词分级已保存到 word_levels.txt")

if __name__ == "__main__":
    main()

 

from docx import Document
import wordfreq

def classify_word(word):
    """使用 wordfreq 库根据单词频率进行难度分级"""
    freq = wordfreq.word_frequency(word, "en")  # 获取词频
    if freq > 0.001:  # 高频词
        return "低"
    elif 0.0001 < freq <= 0.001:  # 中等频率词
        return "中"
    else:  # 低频词
        return "高"

def process_docx(file_path):
    """读取 docx 并对单词进行分级"""
    doc = Document(file_path)
    low_level, medium_level, high_level = [], [], []

    for para in doc.paragraphs:
        words = para.text.split()  # 假设单词在句首
        if words:
            word = words[0].strip().lower()  # 提取单词,处理大小写
            level = classify_word(word)
            
            if level == "低":
                low_level.append(para.text)
            elif level == "中":
                medium_level.append(para.text)
            else:
                high_level.append(para.text)

    return low_level, medium_level, high_level

def main():
    file_path = "vocabulary.docx"  # 替换为你的文件路径
    low, medium, high = process_docx(file_path)

    # 保存到文本文件
    with open("word_levels.txt", "w", encoding="utf-8") as f:
        f.write("【低难度】\n")
        f.writelines("\n".join(low) + "\n\n")

        f.write("【中难度】\n")
        f.writelines("\n".join(medium) + "\n\n")

        f.write("【高难度】\n")
        f.writelines("\n".join(high) + "\n")

    print("单词分级已保存到 word_levels.txt")

if __name__ == "__main__":
    main()