当前位置:首页> 正文

word2vec中的bin文件转换为txt 文件-BIN文件

from gensim.models import word2vec

model = word2vec.Word2Vec.load_word2vec_format('/home/ubuntu/word2vec/PubMed-w2v.bin', binary=True)
model.save_word2vec_format('/home/ubuntu/word2vec/PubMed-w2v.txt', binary=False)


运行代码时提醒错误:

word2vec中的bin文件转换为txt 文件


作为轻度强迫症的我,看到这个UserWarning 极为不爽快,于是就安装 Pattern, 也是各种安装不成功,网上搜索原因的时候发现:python2.x 才支持Pattern ,而我用的是python3.5.2, 不能因为个Warning退回Python2.x 于是我忍了。果然只是轻度强迫症

第二种方法(与第一种大同小异,不过也记录一下)


from gensim.models import word2vec

model = word2vec.Word2Vec.load_word2vec_format('Path/to/GoogleNews-vectors-negative300.bin', binary=True)
model.save("file.txt")
第三种方法(其实都一样啦)
import codecs
from gensim.models import Word2Vec

def main():
path_to_model = 'GoogleNews-vectors-negative300.bin'
output_file = 'GoogleNews-vectors-negative300_test.txt'
export_to_file(path_to_model, output_file)


def export_to_file(path_to_model, output_file):
output = codecs.open(output_file, 'w' , 'utf-8')
model = Word2Vec.load_word2vec_format(path_to_model, binary=True)
print('done loading Word2Vec')
vocab = model.vocab
for mid in vocab:
#print(model[mid])
#print(mid)
vector = list()
for dimension in model[mid]:
vector.append(str(dimension))
#line = { "mid": mid, "vector": vector }
vector_str = ",".join(vector)
line = mid + "\t" + vector_str
#line = json.dumps(line)
output.write(line + "\n")
output.close()

if __name__ == "__main__":
main()

word2vec中的bin文件转换为txt 文件

展开全文阅读

相关内容