字母组合计算

888 查看

确定常用单词的各种字母组合频率

1.将单词切分成不同的字母组合
2.计算每个字母组合出现的次数

编码实现

#coding=utf-8
import fileinput
from collections import Counter
testword="find"
minslice=2
maxslice=5
cutnumber=2

def cut_word(inword,slicenumber):
    ddd=[]
    lenb=len(inword)
    for i in range(0,lenb,slicenumber):
        abc= inword[i:i+slicenumber]
        if len(abc)==slicenumber:
            ddd.append(abc)
    return ddd

def get_slice_list(inword,slicenumber):
    lll=[]
    lena=len(inword)
    for i in range(lena):
        aaa=inword[i:]
        lll.extend(cut_word(aaa,slicenumber))
        mylist=list(set(lll))
    return mylist

def get_all_slice_list(inword):
    lll=[]
    for i in range(minslice,maxslice+1):
        lll.extend(get_slice_list(inword,i))
    return lll

alllist=[]
for line in fileinput.input( "1.txt" ):
    aaa=line.rstrip()
    print "read   :%s"%(aaa)
    alllist.extend(get_all_slice_list(aaa))
c=Counter(alllist).most_common()


print "##########begin write file..."
output = open('jieguo.txt', 'w')
for i in c:
    output.write("%s,%d\n"%i)
output.close()

print "##########write end"