Home
last modified time | relevance | path

Searched refs:id2word (Results 1 – 25 of 49) sorted by relevance

12

/dports/textproc/py-gensim/gensim-4.0.1/gensim/corpora/
H A Dlowcorpus.py77 def __init__(self, fname, id2word=None, line2words=split_on_space): argument
99 if not id2word:
108 self.id2word = dict(zip(range(len(all_terms)), all_terms))
110 logger.info("using provided word mapping (%i ids)", len(id2word))
111 self.id2word = id2word
196 def save_corpus(fname, corpus, id2word=None, metadata=False): argument
223 if id2word is None:
225 id2word = utils.dict_from_corpus(corpus)
281 def id2word(self): member in LowCorpus
285 @id2word.setter
[all …]
H A Dbleicorpus.py78 self.id2word = dict(enumerate(words))
117 def save_corpus(fname, corpus, id2word=None, metadata=False): argument
141 if id2word is None:
143 id2word = utils.dict_from_corpus(corpus)
144 num_terms = len(id2word)
145 elif id2word:
146 num_terms = 1 + max(id2word)
164 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
H A Ducicorpus.py191 self.id2word = dict(enumerate(words))
232 dictionary.id2token = self.id2word
233 dictionary.token2id = utils.revdict(self.id2word)
249 def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False): argument
275 if id2word is None:
277 id2word = utils.dict_from_corpus(corpus)
278 num_terms = len(id2word)
279 elif id2word:
280 num_terms = 1 + max(id2word)
289 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
H A Dmalletcorpus.py57 def __init__(self, fname, id2word=None, metadata=False): argument
75 LowCorpus.__init__(self, fname, id2word)
140 def save_corpus(fname, corpus, id2word=None, metadata=False): argument
175 if id2word is None:
177 id2word = utils.dict_from_corpus(corpus)
195 words.extend([utils.to_unicode(id2word[wordid])] * int(value))
H A Dmmcorpus.py76 def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False): argument
112 num_terms = len(id2word) if id2word is not None else None
H A Dindexedcorpus.py72 def serialize(serializer, fname, corpus, id2word=None, index_fname=None, argument
122 offsets = serializer.save_corpus(fname, corpus, id2word, **kwargs)
/dports/textproc/py-gensim/gensim-4.0.1/gensim/models/
H A Drpmodel.py51 def __init__(self, corpus, id2word=None, num_topics=300): argument
66 self.id2word = id2word
84 if self.id2word is None:
86 self.id2word = utils.dict_from_corpus(corpus)
87 self.num_terms = len(self.id2word)
88 elif self.id2word:
89 self.num_terms = 1 + max(self.id2word)
H A Dnmf.py132 id2word=None, argument
188 self.id2word = id2word
203 if self.id2word is None:
204 self.id2word = utils.dict_from_corpus(corpus)
206 self.num_tokens = len(self.id2word)
302 topic = [(self.id2word[id], topic[id]) for id in bestn]
335 (self.id2word[id], value)
418 beststr = [(topic[_id], self.id2word[_id]) for _id in bestn] # membership, token
451 word_id = self.id2word.doc2bow([word_id])[0][0]
H A Dldaseqmodel.py124 self.id2word = id2word
125 if corpus is None and self.id2word is None:
130 if self.id2word is None:
132 self.id2word = utils.dict_from_corpus(corpus)
133 self.vocab_len = len(self.id2word)
134 elif self.id2word:
135 self.vocab_len = len(self.id2word)
178 corpus, id2word=self.id2word, num_topics=self.num_topics,
349 …lda = ldamodel.LdaModel(num_topics=num_topics, alpha=self.alphas, id2word=self.id2word, dtype=np.f…
629 vocab = [self.id2word[i] for i in range(len(self.id2word))]
[all …]
H A Dlsimodel.py356 self, corpus=None, num_topics=200, id2word=None, chunksize=20000, argument
388 self.id2word = id2word
400 if corpus is None and self.id2word is None:
405 if self.id2word is None:
407 self.id2word = utils.dict_from_corpus(corpus)
408 self.num_terms = len(self.id2word)
410 self.num_terms = 1 + (max(self.id2word.keys()) if self.id2word else -1)
432 id2word=self.id2word, num_topics=num_topics, chunksize=chunksize, decay=decay,
673 return [(self.id2word[val], 1.0 * c[val] / norm) for val in most]
730 self.id2word, self.projection.u, self.projection.s,
H A Dldamodel.py347 def __init__(self, corpus=None, num_topics=100, id2word=None, argument
428 self.id2word = id2word
429 if corpus is None and self.id2word is None:
434 if self.id2word is None:
436 self.id2word = utils.dict_from_corpus(corpus)
437 self.num_terms = len(self.id2word)
438 elif len(self.id2word) > 0:
439 self.num_terms = 1 + max(self.id2word.keys())
501 id2word=self.id2word, num_topics=self.num_topics, chunksize=chunksize,
1398 word_id = self.id2word.doc2bow([word_id])[0][0]
[all …]
H A Dtfidfmodel.py264 def __init__(self, corpus=None, id2word=None, dictionary=None, wlocal=utils.identity, argument
363 self.id2word = id2word
389 if not id2word:
390 self.id2word = dictionary
H A Dldamulticore.py104 def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None, argument
181 id2word=id2word, chunksize=chunksize, passes=passes, alpha=alpha, eta=eta,
H A Datmodel.py161 def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, doc2author=None, argument
226 self.id2word = id2word
227 if corpus is None and self.id2word is None:
232 if self.id2word is None:
234 self.id2word = utils.dict_from_corpus(corpus)
235 self.num_terms = len(self.id2word)
236 elif len(self.id2word) > 0:
237 self.num_terms = 1 + max(self.id2word.keys())
H A Dhdpmodel.py295 def __init__(self, corpus, id2word, max_chunks=None, max_time=None, argument
341 self.id2word = id2word
352 self.m_W = len(id2word)
784 hdp_formatter = HdpTopicFormatter(self.id2word, betas)
822 hdp_formatter = HdpTopicFormatter(self.id2word, betas)
914 …num_topics=self.m_T, alpha=alpha, id2word=self.id2word, random_state=self.random_state, dtype=np.f…
/dports/textproc/py-gensim/gensim-4.0.1/gensim/test/
H A Dtest_dtm.py23 self.id2word = corpora.Dictionary.load(datapath('dtm_test.dict'))
33 id2word=self.id2word, model='dtm', initialize_lda=True,
47 id2word=self.id2word, model='fixed', initialize_lda=True,
63 id2word=self.id2word, model='dtm', initialize_lda=False,
H A Dtest_atmodel.py126 id2word=dictionary, num_topics=2, random_state=0
129 corpus, doc2author=doc2author, id2word=dictionary,
144 id2word=dictionary, num_topics=2, random_state=0
147 corpus, author2doc=author2doc, id2word=dictionary,
268 corpus, author2doc=author2doc, id2word=dictionary,
272 corpus, author2doc=author2doc, id2word=dictionary,
282 id2word=dictionary,
350 id2word=dictionary,
457 result = model.get_term_topics(str(model.id2word[2]))
543 self.assertTrue(model2.id2word is None)
[all …]
H A Dtest_ldamodel.py43 self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100)
71 model = self.class_(id2word=dictionary, num_topics=2, passes=100)
98 id2word=dictionary,
152 modelauto = self.class_(corpus, id2word=dictionary, eta='auto', passes=10)
159 id2word=dictionary,
339 result = model.get_term_topics(str(model.id2word[2]))
353 model = self.class_(id2word=dictionary, chunksize=1, num_topics=2)
428 id2word_2_7 = dict(model_2_7.id2word.iteritems())
429 id2word_3_5 = dict(model_3_5.id2word.iteritems())
437 self.assertTrue(model2.id2word is None)
[all …]
H A Dtest_nmf.py28 id2word=common_dictionary,
38 id2word=common_dictionary,
47 id2word=common_dictionary,
65 id2word=common_dictionary,
73 id2word=common_dictionary,
152 result = self.model.get_term_topics(str(self.model.id2word[2]))
H A Dsvd_error.py112 id2word = gensim.utils.FakeDict(m) variable
164 corpus, id2word=id2word, num_topics=factors,
180 corpus, id2word=id2word, num_topics=factors, chunksize=2000,
H A Dtest_similarity_metrics.py81 self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100)
138 model = self.class_(self.corpus, id2word=common_dictionary, num_topics=2, passes=100)
150 self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100)
205 model = self.class_(self.corpus, id2word=common_dictionary, num_topics=2, passes=100)
H A Dtest_big.py57 model = gensim.models.LsiModel(corpus, num_topics=500, id2word=corpus.dictionary)
65 model = gensim.models.LdaModel(corpus, num_topics=500, id2word=corpus.dictionary)
H A Dtest_corpora.py163 testdoc = set((to_unicode(corpus.id2word[x]), y) for x, y in firstdoc)
167 d = corpus.id2word
169 corpus.id2word = d
172 testdoc2 = set((to_unicode(corpus.id2word[x]), y) for x, y in firstdoc2)
444 id2word = {1: 'mom', 2: 'window'}
446 corpus = self.corpus_class(fname, id2word=id2word)
501 id2word = {1: 'mom', 2: 'window'}
503 corpus = self.corpus_class(fname, id2word=id2word, metadata=True)
H A Dtest_lsimodel.py90 model = lsimodel.LsiModel(corpus=None, id2word=model2.id2word, num_topics=5)
178 vocab_size = len(self.model.id2word)
/dports/textproc/py-orange3-text/orange3-text-1.3.1/orangecontrib/text/topics/
H A Dtopics.py64 self.id2word = Dictionary(corpus.ngrams_iterator(include_postags=True), prune_at=None)
66 id2word=self.id2word, **self.kwargs)

12