• Home
  • History
  • Annotate
Name Date Size #Lines LOC

..03-May-2022-

p_con.pyH A D24-Aug-202144.7 KiB1,3961,183

readme.mdH A D24-Aug-20211.4 KiB5130

readme.md

1Documentation for p_con (https://github.com/pzc/rdkit/blob/master/Contrib/pzc/p_con.html)
2
3from p_con import p_con
4
5
6pco = p_con("P43088")
7pco.verbous = True
8
9pco.step_0_get_chembl_data() # Download Compounds for P43088 from ChEMBL
10(or pco.load_mols("sdf-file.sdf.gz"))
11
12pco.step_1_keeplargestfrag() # remove small Fragments from compounds
13
14pco.step_2_remove_dupl()     # remove duplicate-Entries
15
16pco.step_3_merge_IC50()      # merge IC50 from Entries with same canonical smiles into one compound
17
18pco.step_4_set_TL(4000,ic50_tag="value") # set TrafficLights, value > 4000nm: 0, else 1
19
20pco.step_5_remove_descriptors() # remove Descriptors from compounds
21
22pco.step_6_calc_descriptors() # calculate new Descriptors which are used to create prediction-models
23
24pco.step_7_train_models() # train up to 10 models
25
26pco.save_model_info("model_info.csv",mode="csv")   # create csv with data for each model
27pco.save_model_info("model_info.html",mode="html") # create html -#-
28
29for i in range(len(pco.model)):
30    pco.save_model("model_%d.pkl" % i,i)
31
32for i in range(len(pco.model)):
33    act,inact = pco.predict(i)
34    print "Model %d active: %d\tinactive: %d" % (i,act,inact)
35
36
37
38# to Check compounds using Models
39
40pco2 = p_con("P43088")
41pco2.verbous = True
42pco2.load_mols("P43088.sdf.gz")
43models = ["model1.pkl","model2.pkl"]
44pco2.load_models(models)
45
46print "\n#Model\tActive\tInactive"
47
48for i in range(len(self.model)):
49    act,inact = pco2.predict(i)
50    print "%d\t%d\t%d" % (i,act,inact)
51