写了个脚本,根据string_resource_usage.txt,从ref_list.txt抽取实际用到的字符串,提炼出来给人翻译。python性能还是不错的,十来秒的功夫就处理完了。看到有个竟然还调用了Excel来处理,估计是MFC类的程序,处理的很慢。
如果拷贝过去,编译有问题,请把print语句都去掉就好了。
#! /usr/bin/env python
#coding=utf-8
import codecs,re,os
REMOVE_DUPLICATE_SUPPORT = True
def getUsageSting():
usageobj = re.compile('.*?.*?.*?(.*?).*?.*?.*?')
newlineList = []
usageFile = 'string_resource_usage.txt'
reflist = 'ref_list.txt'
output = 'to_trans.txt'
if not os.path.isfile(usageFile):
print u'string_resource_usage.txt不存在'
return
if not os.path.isfile(reflist):
print u'ref_list.txt不存在'
return
print u'解析string_resource_usage.txt文件'
try:
fSrc = open(usageFile)
allText = fSrc.read()
finally:
fSrc.close()
objs = usageobj.findall(allText)
print u'string_resource_usage.txt的字符串个数:%d' % len(objs)
print u'解析ref_list.txt'
try:
fRef = codecs.open(reflist,'r','utf-16')
lines = fRef.readlines()
totalline = len(lines)
print u'ref_list.txt的行数:%d' % (totalline)
linenum = 0
for line in lines:
linenum += 1
if linenum < 4 or linenum == totalline:
newlineList.append(line)
else:
myList = line.split('')
strid = myList[0]
if strid in objs:
newlineList.append(line)
if REMOVE_DUPLICATE_SUPPORT:
objs.remove(strid)
print u'从ref_list.txt找到的待翻译字串条数:%d' % len(newlineList)
content = ''.join(newlineList)
finally:
fRef.close()
try:
foutput = codecs.open(output,'w','utf-16')
foutput.write(content)
finally:
foutput.close()
print u'已成功生成to_trans.txt文件'
def main():
print u'用法:把本文件、string_resource_usage.txt、ref_list.txt置于同一目录,双击运行即可'
getUsageSting()
raw_input("\nPress Enter to terminate program.")
if __name__ == "__main__":
main()
...