/dports/textproc/py-gensim/gensim-4.0.1/gensim/corpora/ |
H A D | lowcorpus.py | 77 def __init__(self, fname, id2word=None, line2words=split_on_space): argument 99 if not id2word: 108 self.id2word = dict(zip(range(len(all_terms)), all_terms)) 110 logger.info("using provided word mapping (%i ids)", len(id2word)) 111 self.id2word = id2word 196 def save_corpus(fname, corpus, id2word=None, metadata=False): argument 223 if id2word is None: 225 id2word = utils.dict_from_corpus(corpus) 281 def id2word(self): member in LowCorpus 285 @id2word.setter [all …]
|
H A D | bleicorpus.py | 78 self.id2word = dict(enumerate(words)) 117 def save_corpus(fname, corpus, id2word=None, metadata=False): argument 141 if id2word is None: 143 id2word = utils.dict_from_corpus(corpus) 144 num_terms = len(id2word) 145 elif id2word: 146 num_terms = 1 + max(id2word) 164 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
|
H A D | ucicorpus.py | 191 self.id2word = dict(enumerate(words)) 232 dictionary.id2token = self.id2word 233 dictionary.token2id = utils.revdict(self.id2word) 249 def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False): argument 275 if id2word is None: 277 id2word = utils.dict_from_corpus(corpus) 278 num_terms = len(id2word) 279 elif id2word: 280 num_terms = 1 + max(id2word) 289 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
|
H A D | malletcorpus.py | 57 def __init__(self, fname, id2word=None, metadata=False): argument 75 LowCorpus.__init__(self, fname, id2word) 140 def save_corpus(fname, corpus, id2word=None, metadata=False): argument 175 if id2word is None: 177 id2word = utils.dict_from_corpus(corpus) 195 words.extend([utils.to_unicode(id2word[wordid])] * int(value))
|
H A D | mmcorpus.py | 76 def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False): argument 112 num_terms = len(id2word) if id2word is not None else None
|
H A D | indexedcorpus.py | 72 def serialize(serializer, fname, corpus, id2word=None, index_fname=None, argument 122 offsets = serializer.save_corpus(fname, corpus, id2word, **kwargs)
|
/dports/textproc/py-gensim/gensim-4.0.1/gensim/models/ |
H A D | rpmodel.py | 51 def __init__(self, corpus, id2word=None, num_topics=300): argument 66 self.id2word = id2word 84 if self.id2word is None: 86 self.id2word = utils.dict_from_corpus(corpus) 87 self.num_terms = len(self.id2word) 88 elif self.id2word: 89 self.num_terms = 1 + max(self.id2word)
|
H A D | nmf.py | 132 id2word=None, argument 188 self.id2word = id2word 203 if self.id2word is None: 204 self.id2word = utils.dict_from_corpus(corpus) 206 self.num_tokens = len(self.id2word) 302 topic = [(self.id2word[id], topic[id]) for id in bestn] 335 (self.id2word[id], value) 418 beststr = [(topic[_id], self.id2word[_id]) for _id in bestn] # membership, token 451 word_id = self.id2word.doc2bow([word_id])[0][0]
|
H A D | ldaseqmodel.py | 124 self.id2word = id2word 125 if corpus is None and self.id2word is None: 130 if self.id2word is None: 132 self.id2word = utils.dict_from_corpus(corpus) 133 self.vocab_len = len(self.id2word) 134 elif self.id2word: 135 self.vocab_len = len(self.id2word) 178 corpus, id2word=self.id2word, num_topics=self.num_topics, 349 …lda = ldamodel.LdaModel(num_topics=num_topics, alpha=self.alphas, id2word=self.id2word, dtype=np.f… 629 vocab = [self.id2word[i] for i in range(len(self.id2word))] [all …]
|
H A D | lsimodel.py | 356 self, corpus=None, num_topics=200, id2word=None, chunksize=20000, argument 388 self.id2word = id2word 400 if corpus is None and self.id2word is None: 405 if self.id2word is None: 407 self.id2word = utils.dict_from_corpus(corpus) 408 self.num_terms = len(self.id2word) 410 self.num_terms = 1 + (max(self.id2word.keys()) if self.id2word else -1) 432 id2word=self.id2word, num_topics=num_topics, chunksize=chunksize, decay=decay, 673 return [(self.id2word[val], 1.0 * c[val] / norm) for val in most] 730 self.id2word, self.projection.u, self.projection.s,
|
H A D | ldamodel.py | 347 def __init__(self, corpus=None, num_topics=100, id2word=None, argument 428 self.id2word = id2word 429 if corpus is None and self.id2word is None: 434 if self.id2word is None: 436 self.id2word = utils.dict_from_corpus(corpus) 437 self.num_terms = len(self.id2word) 438 elif len(self.id2word) > 0: 439 self.num_terms = 1 + max(self.id2word.keys()) 501 id2word=self.id2word, num_topics=self.num_topics, chunksize=chunksize, 1398 word_id = self.id2word.doc2bow([word_id])[0][0] [all …]
|
H A D | tfidfmodel.py | 264 def __init__(self, corpus=None, id2word=None, dictionary=None, wlocal=utils.identity, argument 363 self.id2word = id2word 389 if not id2word: 390 self.id2word = dictionary
|
H A D | ldamulticore.py | 104 def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None, argument 181 id2word=id2word, chunksize=chunksize, passes=passes, alpha=alpha, eta=eta,
|
H A D | atmodel.py | 161 def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, doc2author=None, argument 226 self.id2word = id2word 227 if corpus is None and self.id2word is None: 232 if self.id2word is None: 234 self.id2word = utils.dict_from_corpus(corpus) 235 self.num_terms = len(self.id2word) 236 elif len(self.id2word) > 0: 237 self.num_terms = 1 + max(self.id2word.keys())
|
H A D | hdpmodel.py | 295 def __init__(self, corpus, id2word, max_chunks=None, max_time=None, argument 341 self.id2word = id2word 352 self.m_W = len(id2word) 784 hdp_formatter = HdpTopicFormatter(self.id2word, betas) 822 hdp_formatter = HdpTopicFormatter(self.id2word, betas) 914 …num_topics=self.m_T, alpha=alpha, id2word=self.id2word, random_state=self.random_state, dtype=np.f…
|
/dports/textproc/py-gensim/gensim-4.0.1/gensim/test/ |
H A D | test_dtm.py | 23 self.id2word = corpora.Dictionary.load(datapath('dtm_test.dict')) 33 id2word=self.id2word, model='dtm', initialize_lda=True, 47 id2word=self.id2word, model='fixed', initialize_lda=True, 63 id2word=self.id2word, model='dtm', initialize_lda=False,
|
H A D | test_atmodel.py | 126 id2word=dictionary, num_topics=2, random_state=0 129 corpus, doc2author=doc2author, id2word=dictionary, 144 id2word=dictionary, num_topics=2, random_state=0 147 corpus, author2doc=author2doc, id2word=dictionary, 268 corpus, author2doc=author2doc, id2word=dictionary, 272 corpus, author2doc=author2doc, id2word=dictionary, 282 id2word=dictionary, 350 id2word=dictionary, 457 result = model.get_term_topics(str(model.id2word[2])) 543 self.assertTrue(model2.id2word is None) [all …]
|
H A D | test_ldamodel.py | 43 self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100) 71 model = self.class_(id2word=dictionary, num_topics=2, passes=100) 98 id2word=dictionary, 152 modelauto = self.class_(corpus, id2word=dictionary, eta='auto', passes=10) 159 id2word=dictionary, 339 result = model.get_term_topics(str(model.id2word[2])) 353 model = self.class_(id2word=dictionary, chunksize=1, num_topics=2) 428 id2word_2_7 = dict(model_2_7.id2word.iteritems()) 429 id2word_3_5 = dict(model_3_5.id2word.iteritems()) 437 self.assertTrue(model2.id2word is None) [all …]
|
H A D | test_nmf.py | 28 id2word=common_dictionary, 38 id2word=common_dictionary, 47 id2word=common_dictionary, 65 id2word=common_dictionary, 73 id2word=common_dictionary, 152 result = self.model.get_term_topics(str(self.model.id2word[2]))
|
H A D | svd_error.py | 112 id2word = gensim.utils.FakeDict(m) variable 164 corpus, id2word=id2word, num_topics=factors, 180 corpus, id2word=id2word, num_topics=factors, chunksize=2000,
|
H A D | test_similarity_metrics.py | 81 self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100) 138 model = self.class_(self.corpus, id2word=common_dictionary, num_topics=2, passes=100) 150 self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100) 205 model = self.class_(self.corpus, id2word=common_dictionary, num_topics=2, passes=100)
|
H A D | test_big.py | 57 model = gensim.models.LsiModel(corpus, num_topics=500, id2word=corpus.dictionary) 65 model = gensim.models.LdaModel(corpus, num_topics=500, id2word=corpus.dictionary)
|
H A D | test_corpora.py | 163 testdoc = set((to_unicode(corpus.id2word[x]), y) for x, y in firstdoc) 167 d = corpus.id2word 169 corpus.id2word = d 172 testdoc2 = set((to_unicode(corpus.id2word[x]), y) for x, y in firstdoc2) 444 id2word = {1: 'mom', 2: 'window'} 446 corpus = self.corpus_class(fname, id2word=id2word) 501 id2word = {1: 'mom', 2: 'window'} 503 corpus = self.corpus_class(fname, id2word=id2word, metadata=True)
|
H A D | test_lsimodel.py | 90 model = lsimodel.LsiModel(corpus=None, id2word=model2.id2word, num_topics=5) 178 vocab_size = len(self.model.id2word)
|
/dports/textproc/py-orange3-text/orange3-text-1.3.1/orangecontrib/text/topics/ |
H A D | topics.py | 64 self.id2word = Dictionary(corpus.ngrams_iterator(include_postags=True), prune_at=None) 66 id2word=self.id2word, **self.kwargs)
|