1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Neural Network implementation
8!> \author Ole Schuett
9! **************************************************************************************************
10MODULE pao_ml_neuralnet
11   USE kinds,                           ONLY: dp
12   USE pao_types,                       ONLY: pao_env_type,&
13                                              training_matrix_type
14   USE parallel_rng_types,              ONLY: rng_stream_type
15#include "./base/base_uses.f90"
16
17   IMPLICIT NONE
18
19   PRIVATE
20
21   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pao_ml_neuralnet'
22
23   PUBLIC ::pao_ml_nn_train, pao_ml_nn_predict, pao_ml_nn_gradient
24
25   ! TODO turn these into input parameters
26   REAL(dp), PARAMETER   :: step_size = 0.001_dp
27   INTEGER, PARAMETER    :: nlayers = 3
28   REAL(dp), PARAMETER   :: convergence_eps = 1e-7_dp
29   INTEGER, PARAMETER    :: max_training_cycles = 50000
30
31CONTAINS
32
33! **************************************************************************************************
34!> \brief Uses neural network to make a prediction
35!> \param pao ...
36!> \param ikind ...
37!> \param descriptor ...
38!> \param output ...
39!> \param variance ...
40! **************************************************************************************************
41   SUBROUTINE pao_ml_nn_predict(pao, ikind, descriptor, output, variance)
42      TYPE(pao_env_type), POINTER                        :: pao
43      INTEGER, INTENT(IN)                                :: ikind
44      REAL(dp), DIMENSION(:), INTENT(IN)                 :: descriptor
45      REAL(dp), DIMENSION(:), INTENT(OUT)                :: output
46      REAL(dp), INTENT(OUT)                              :: variance
47
48      TYPE(training_matrix_type), POINTER                :: training_matrix
49
50      training_matrix => pao%ml_training_matrices(ikind)
51
52      CALL nn_eval(training_matrix%NN, input=descriptor, prediction=output)
53
54      variance = 0.0_dp ! Neural Networks don't provide a variance
55   END SUBROUTINE pao_ml_nn_predict
56
57! **************************************************************************************************
58!> \brief Calculate gradient of neural network
59!> \param pao ...
60!> \param ikind ...
61!> \param descriptor ...
62!> \param outer_deriv ...
63!> \param gradient ...
64! **************************************************************************************************
65   SUBROUTINE pao_ml_nn_gradient(pao, ikind, descriptor, outer_deriv, gradient)
66      TYPE(pao_env_type), POINTER                        :: pao
67      INTEGER, INTENT(IN)                                :: ikind
68      REAL(dp), DIMENSION(:), INTENT(IN), TARGET         :: descriptor
69      REAL(dp), DIMENSION(:), INTENT(IN)                 :: outer_deriv
70      REAL(dp), DIMENSION(:), INTENT(OUT)                :: gradient
71
72      INTEGER                                            :: i, ilayer, j, nlayers, width, width_in, &
73                                                            width_out
74      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: backward, forward
75      REAL(dp), DIMENSION(:, :, :), POINTER              :: A
76
77      A => pao%ml_training_matrices(ikind)%NN
78
79      nlayers = SIZE(A, 1)
80      width = SIZE(A, 2); CPASSERT(SIZE(A, 2) == SIZE(A, 3))
81      width_in = SIZE(descriptor)
82      width_out = SIZE(outer_deriv)
83
84      ALLOCATE (forward(0:nlayers, width), backward(0:nlayers, width))
85
86      forward = 0.0_dp
87      forward(0, 1:width_in) = descriptor
88
89      DO ilayer = 1, nlayers
90      DO i = 1, width
91      DO j = 1, width
92         forward(ilayer, i) = forward(ilayer, i) + A(ilayer, i, j)*TANH(forward(ilayer - 1, j))
93      ENDDO
94      ENDDO
95      ENDDO
96
97      ! Turning Point ------------------------------------------------------------------------------
98      backward = 0.0_dp
99      backward(nlayers, 1:width_out) = outer_deriv(:)
100
101      DO ilayer = nlayers, 1, -1
102      DO i = 1, width
103      DO j = 1, width
104  backward(ilayer - 1, j) = backward(ilayer - 1, j) + backward(ilayer, i)*A(ilayer, i, j)*(1.0_dp - TANH(forward(ilayer - 1, j))**2)
105      ENDDO
106      ENDDO
107      ENDDO
108
109      gradient(:) = backward(0, 1:width_in)
110
111      DEALLOCATE (forward, backward)
112   END SUBROUTINE pao_ml_nn_gradient
113
114! **************************************************************************************************
115!> \brief Trains the neural network on given training points
116!> \param pao ...
117! **************************************************************************************************
118   SUBROUTINE pao_ml_nn_train(pao)
119      TYPE(pao_env_type), POINTER                        :: pao
120
121      INTEGER                                            :: i, icycle, ikind, ilayer, ipoint, j, &
122                                                            npoints, width, width_in, width_out
123      REAL(dp)                                           :: bak, eps, error, error1, error2, num_grad
124      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: prediction
125      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: gradient
126      TYPE(rng_stream_type)                              :: rng_stream
127      TYPE(training_matrix_type), POINTER                :: training_matrix
128
129      ! TODO this could be parallelized over ranks
130      DO ikind = 1, SIZE(pao%ml_training_matrices)
131         training_matrix => pao%ml_training_matrices(ikind)
132
133         npoints = SIZE(training_matrix%inputs, 2) ! number of points
134         CPASSERT(SIZE(training_matrix%outputs, 2) == npoints)
135         IF (npoints == 0) CYCLE
136
137         !TODO proper output
138         IF (pao%iw > 0) WRITE (pao%iw, *) "PAO|ML| Training neural network for kind: ", &
139            TRIM(training_matrix%kindname), " from ", npoints, "training points."
140
141         ! determine network width and allocate it
142         width_in = SIZE(training_matrix%inputs, 1)
143         width_out = SIZE(training_matrix%outputs, 1)
144         width = MAX(width_in, width_out)
145         ALLOCATE (training_matrix%NN(nlayers, width, width))
146
147         ! initialize network with random numbers from -1.0 ... +1.0
148         rng_stream = rng_stream_type(name="pao_nn")
149         DO ilayer = 1, nlayers
150         DO i = 1, width
151         DO j = 1, width
152            training_matrix%NN(ilayer, i, j) = -1.0_dp + 2.0_dp*rng_stream%next()
153         ENDDO
154         ENDDO
155         ENDDO
156
157         ! train the network using backpropagation
158         ALLOCATE (gradient(nlayers, width, width))
159         DO icycle = 1, max_training_cycles
160            error = 0.0_dp
161            gradient = 0.0_dp
162            DO ipoint = 1, npoints
163               CALL nn_backpropagate(training_matrix%NN, &
164                                     input=training_matrix%inputs(:, ipoint), &
165                                     goal=training_matrix%outputs(:, ipoint), &
166                                     gradient=gradient, &
167                                     error=error)
168            ENDDO
169            training_matrix%NN(:, :, :) = training_matrix%NN - step_size*gradient
170
171            IF (pao%iw > 0 .AND. MOD(icycle, 100) == 0) WRITE (pao%iw, *) &
172               "PAO|ML| ", TRIM(training_matrix%kindname), &
173               " training-cycle:", icycle, "SQRT(error):", SQRT(error), "grad:", SUM(gradient**2)
174
175            IF (SUM(gradient**2) < convergence_eps) EXIT
176         ENDDO
177
178         ! numeric gradient for debugging ----------------------------------------------------------
179         IF (.FALSE.) THEN
180            eps = 1e-4_dp
181            ilayer = 1
182            ipoint = 1
183            error = 0.0_dp
184            gradient = 0.0_dp
185            CALL nn_backpropagate(training_matrix%NN, &
186                                  input=training_matrix%inputs(:, ipoint), &
187                                  goal=training_matrix%outputs(:, ipoint), &
188                                  gradient=gradient, &
189                                  error=error)
190
191            ALLOCATE (prediction(width_out))
192            DO i = 1, width
193            DO j = 1, width
194               bak = training_matrix%NN(ilayer, i, j)
195
196               training_matrix%NN(ilayer, i, j) = bak + eps
197               CALL nn_eval(training_matrix%NN, &
198                            input=training_matrix%inputs(:, ipoint), &
199                            prediction=prediction)
200               error1 = SUM((training_matrix%outputs(:, ipoint) - prediction)**2)
201
202               training_matrix%NN(ilayer, i, j) = bak - eps
203               CALL nn_eval(training_matrix%NN, &
204                            input=training_matrix%inputs(:, ipoint), &
205                            prediction=prediction)
206               error2 = SUM((training_matrix%outputs(:, ipoint) - prediction)**2)
207
208               training_matrix%NN(ilayer, i, j) = bak
209               num_grad = (error1 - error2)/(2.0_dp*eps)
210               IF (pao%iw > 0) WRITE (pao%iw, *) "PAO|ML| Numeric gradient:", i, j, gradient(ilayer, i, j), num_grad
211
212            ENDDO
213            ENDDO
214            DEALLOCATE (prediction)
215         ENDIF
216         !------------------------------------------------------------------------------------------
217
218         DEALLOCATE (gradient)
219
220         ! test training points individually
221         ALLOCATE (prediction(width_out))
222         DO ipoint = 1, npoints
223            CALL nn_eval(training_matrix%NN, &
224                         input=training_matrix%inputs(:, ipoint), &
225                         prediction=prediction)
226            error = MAXVAL(ABS(training_matrix%outputs(:, ipoint) - prediction))
227            IF (pao%iw > 0) WRITE (pao%iw, *) "PAO|ML| ", TRIM(training_matrix%kindname), &
228               " verify training-point:", ipoint, "SQRT(error):", SQRT(error)
229         ENDDO
230         DEALLOCATE (prediction)
231
232      ENDDO
233
234   END SUBROUTINE pao_ml_nn_train
235
236! **************************************************************************************************
237!> \brief Evaluates the neural network for a given input
238!> \param A ...
239!> \param input ...
240!> \param prediction ...
241! **************************************************************************************************
242   SUBROUTINE nn_eval(A, input, prediction)
243      REAL(dp), DIMENSION(:, :, :), INTENT(IN)           :: A
244      REAL(dp), DIMENSION(:), INTENT(IN)                 :: input
245      REAL(dp), DIMENSION(:), INTENT(OUT)                :: prediction
246
247      INTEGER                                            :: i, ilayer, j, nlayers, width, width_in, &
248                                                            width_out
249      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: forward
250
251      nlayers = SIZE(A, 1)
252      width = SIZE(A, 2); CPASSERT(SIZE(A, 2) == SIZE(A, 3))
253      width_in = SIZE(input)
254      width_out = SIZE(prediction)
255
256      ALLOCATE (forward(0:nlayers, width))
257
258      forward = 0.0_dp
259      forward(0, 1:width_in) = input(:)
260
261      DO ilayer = 1, nlayers
262      DO i = 1, width
263      DO j = 1, width
264         forward(ilayer, i) = forward(ilayer, i) + A(ilayer, i, j)*TANH(forward(ilayer - 1, j))
265      ENDDO
266      ENDDO
267      ENDDO
268
269      prediction(:) = forward(nlayers, 1:width_out)
270
271   END SUBROUTINE nn_eval
272
273! **************************************************************************************************
274!> \brief Uses backpropagation to calculate the gradient for a given training point
275!> \param A ...
276!> \param input ...
277!> \param goal ...
278!> \param error ...
279!> \param gradient ...
280! **************************************************************************************************
281   SUBROUTINE nn_backpropagate(A, input, goal, error, gradient)
282      REAL(dp), DIMENSION(:, :, :), INTENT(IN)           :: A
283      REAL(dp), DIMENSION(:), INTENT(IN)                 :: input, goal
284      REAL(dp), INTENT(INOUT)                            :: error
285      REAL(dp), DIMENSION(:, :, :), INTENT(INOUT)        :: gradient
286
287      INTEGER                                            :: i, ilayer, j, nlayers, width, width_in, &
288                                                            width_out
289      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: prediction
290      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: backward, forward
291
292      nlayers = SIZE(A, 1)
293      width = SIZE(A, 2); CPASSERT(SIZE(A, 2) == SIZE(A, 3))
294      width_in = SIZE(input)
295      width_out = SIZE(goal)
296
297      ALLOCATE (forward(0:nlayers, width), prediction(width_out), backward(0:nlayers, width))
298
299      forward = 0.0_dp
300      forward(0, 1:width_in) = input
301
302      DO ilayer = 1, nlayers
303      DO i = 1, width
304      DO j = 1, width
305         forward(ilayer, i) = forward(ilayer, i) + A(ilayer, i, j)*TANH(forward(ilayer - 1, j))
306      ENDDO
307      ENDDO
308      ENDDO
309
310      prediction(:) = forward(nlayers, 1:width_out)
311
312      error = error + SUM((prediction - goal)**2)
313
314      ! Turning Point ------------------------------------------------------------------------------
315      backward = 0.0_dp
316      backward(nlayers, 1:width_out) = prediction - goal
317
318      DO ilayer = nlayers, 1, -1
319      DO i = 1, width
320      DO j = 1, width
321         gradient(ilayer, i, j) = gradient(ilayer, i, j) + 2.0_dp*backward(ilayer, i)*TANH(forward(ilayer - 1, j))
322  backward(ilayer - 1, j) = backward(ilayer - 1, j) + backward(ilayer, i)*A(ilayer, i, j)*(1.0_dp - TANH(forward(ilayer - 1, j))**2)
323      ENDDO
324      ENDDO
325      ENDDO
326
327      DEALLOCATE (forward, backward, prediction)
328   END SUBROUTINE nn_backpropagate
329
330END MODULE pao_ml_neuralnet
331