抽取MTK的待翻译字符串

写了个脚本，根据string_resource_usage.txt，从ref_list.txt抽取实际用到的字符串，提炼出来给人翻译。python性能还是不错的，十来秒的功夫就处理完了。看到有个竟然还调用了Excel来处理，估计是MFC类的程序，处理的很慢。

如果拷贝过去，编译有问题，请把print语句都去掉就好了。

#! /usr/bin/env python

#coding=utf-8

import codecs,re,os

REMOVE_DUPLICATE_SUPPORT = True

def getUsageSting():

usageobj = re.compile('.*?.*?.*?(.*?).*?.*?.*?')

newlineList = []

usageFile = 'string_resource_usage.txt'

reflist = 'ref_list.txt'

output = 'to_trans.txt'

if not os.path.isfile(usageFile):

print u'string_resource_usage.txt不存在'

return

if not os.path.isfile(reflist):

print u'ref_list.txt不存在'

return

print u'解析string_resource_usage.txt文件'

try:

fSrc = open(usageFile)

allText = fSrc.read()

finally:

fSrc.close()

objs = usageobj.findall(allText)

print u'string_resource_usage.txt的字符串个数:%d' % len(objs)

print u'解析ref_list.txt'

try:

fRef = codecs.open(reflist,'r','utf-16')

lines = fRef.readlines()

totalline = len(lines)

print u'ref_list.txt的行数:%d' % (totalline)

linenum = 0

for line in lines:

linenum += 1

if linenum < 4 or linenum == totalline:

newlineList.append(line)

else:

myList = line.split('')

strid = myList[0]

if strid in objs:

newlineList.append(line)

if REMOVE_DUPLICATE_SUPPORT:

objs.remove(strid)

print u'从ref_list.txt找到的待翻译字串条数:%d' % len(newlineList)

content = ''.join(newlineList)

finally:

fRef.close()

try:

foutput = codecs.open(output,'w','utf-16')

foutput.write(content)

finally:

foutput.close()

print u'已成功生成to_trans.txt文件'

def main():

print u'用法:把本文件、string_resource_usage.txt、ref_list.txt置于同一目录，双击运行即可'

getUsageSting()

raw_input("\nPress Enter to terminate program.")

if __name__ == "__main__":

main()