Skip to content

Commit d4af277

Browse files
Added example test for input/output gradient scaling
1 parent 51fa73c commit d4af277

File tree

1 file changed

+228
-22
lines changed

1 file changed

+228
-22
lines changed

examples/ex19_atomic_forces.py

Lines changed: 228 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import mala
55
from mala import printout
66
import numpy as np
7+
import torch
78

89
from mala.datahandling.data_repo import data_repo_path
910

@@ -166,32 +167,237 @@ def hartree_contribution():
166167
print(mala_forces[point, :] / np.array(numerical_forces))
167168

168169

169-
def backpropagation_pieces():
170-
"""
171-
Check the individual parts of the backpropagation.
172-
"""
173-
# Only compute a specific part of the forces.
174-
ldos_calculator: mala.LDOS
170+
def check_input_gradient_scaling():
171+
# Load a model, set paramters.
175172
predictor: mala.Predictor
176-
ldos, ldos_calculator, parameters, predictor = run_prediction(
177-
backprop=True
173+
parameters: mala.Parameters
174+
parameters, network, data_handler, predictor = mala.Predictor.load_run(
175+
"Be_ACE_model_FULLSCALING"
178176
)
179-
# For easier testing, can also be commented out.
180-
ldos_calculator.debug_forces_flag = "band_energy"
181-
182-
# This next line can later be part of the prediction itself within the
183-
# Predictor class.
184-
ldos_calculator.setup_for_forces(predictor)
185-
186-
# Performs the calculation internally.
187-
mala_forces = ldos_calculator.atomic_forces
188-
189-
# Should be 8748, 91
190-
print("FORCE TEST: Backpropagation machinery.")
191-
print(mala_forces.size())
177+
parameters.targets.target_type = "LDOS"
178+
parameters.targets.ldos_gridsize = 11
179+
parameters.targets.ldos_gridspacing_ev = 2.5
180+
parameters.targets.ldos_gridoffset_ev = -5
181+
parameters.targets.restrict_targets = None
182+
parameters.descriptors.descriptors_contain_xyz = False
183+
parameters.descriptors.descriptor_type = "ACE"
184+
185+
# Compute descriptors, only do further computations for one point.
186+
atoms1 = read("/home/fiedlerl/data/mala_data_repo/Be2/Be_snapshot1.out")
187+
descriptors, ngrid = (
188+
predictor.data.descriptor_calculator.calculate_from_atoms(
189+
atoms1, [18, 18, 27]
190+
)
191+
)
192+
snap_descriptors = torch.from_numpy(np.array([descriptors[0, 0, 0]]))
193+
snap_descriptors_work = snap_descriptors.clone()
194+
snap_descriptors = predictor.data.input_data_scaler.transform(
195+
snap_descriptors
196+
)
197+
snap_descriptors.requires_grad = True
198+
199+
# Forward pass through the network.
200+
ldos = network(snap_descriptors)
201+
d_d_d_B = []
202+
203+
# Compute "gradient". This is ACTUALLY the Jacobian matrix, i.e., the one
204+
# we can compare with finite differences.
205+
for i in range(0, 11):
206+
if snap_descriptors.grad is not None:
207+
snap_descriptors.grad.zero_()
208+
209+
ldos[0, i].backward(retain_graph=True)
210+
# d_d_d_B = snap_descriptors.grad.clone()
211+
d_d_d_B.append(
212+
predictor.data.input_data_scaler.inverse_transform_backpropagation(
213+
snap_descriptors.grad.clone()
214+
)
215+
)
192216

217+
# Just for debugging purposes, not part of the actual test.
218+
# if False:
219+
# # This would be the direct application of the autograd.
220+
# # Autograd computes the vector Jacobian product, see
221+
# # https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
222+
# # Here: compare same_as_sum with d_d_d_B_sum.
223+
# ldos = network(snap_descriptors)
224+
# snap_descriptors.grad.zero_()
225+
# # print(ldos / ldos)
226+
# ldos.backward(ldos / ldos, retain_graph=True)
227+
# same_as_sum = (
228+
# predictor.data.input_data_scaler.inverse_transform_backpropagation(
229+
# snap_descriptors.grad.clone()
230+
# )
231+
# )
232+
# d_d_d_B_sum = d_d_d_B[0]
233+
# for i in range(1, 11):
234+
# d_d_d_B_sum += d_d_d_B[i]
235+
236+
with torch.no_grad():
237+
# In case we want to compare different levels of finite differences.
238+
for diff in [
239+
# 5.0e-1,
240+
# 5.0e-2,
241+
# 5.0e-3,
242+
5.0e-4,
243+
# 5.0e-5,
244+
# 5.0e-6,
245+
]:
246+
247+
# Compute finite differences. j theoretically goes up to
248+
# 36, but for a simple check this is completely enough.
249+
for j in range(0, 5):
250+
snap_descriptors_work[0, j] += diff
251+
descriptors_scaled1 = snap_descriptors_work.clone()
252+
ldos_1 = network(
253+
predictor.data.input_data_scaler.transform(
254+
descriptors_scaled1
255+
)
256+
).clone()
257+
258+
snap_descriptors_work[0, j] -= 2.0 * diff
259+
descriptors_scaled2 = snap_descriptors_work.clone()
260+
ldos_2 = network(
261+
predictor.data.input_data_scaler.transform(
262+
descriptors_scaled2
263+
)
264+
).clone()
265+
266+
# Comparison - we only compare the first component of the
267+
# LDOS for brevity, but this works for all components.
268+
snap_descriptors_work[0, j] += diff
269+
force = -1.0 * ((ldos_2[0, 0] - ldos_1[0, 0]) / (2 * diff))
270+
print(
271+
diff,
272+
force.double().numpy(),
273+
d_d_d_B[0][0, j],
274+
force.double().numpy() / d_d_d_B[0][0, j],
275+
)
276+
277+
278+
def check_output_gradient_scaling():
279+
# Load a model, set paramters.
280+
predictor: mala.Predictor
281+
parameters: mala.Parameters
282+
parameters, network, data_handler, predictor = mala.Predictor.load_run(
283+
"Be_ACE_model_FULLSCALING"
284+
)
285+
parameters.targets.target_type = "LDOS"
286+
parameters.targets.ldos_gridsize = 11
287+
parameters.targets.ldos_gridspacing_ev = 2.5
288+
parameters.targets.ldos_gridoffset_ev = -5
289+
parameters.targets.restrict_targets = None
290+
parameters.descriptors.descriptors_contain_xyz = False
291+
parameters.descriptors.descriptor_type = "ACE"
292+
293+
# Compute descriptors, only do further computations for one point.
294+
atoms1 = read("/home/fiedlerl/data/mala_data_repo/Be2/Be_snapshot1.out")
295+
descriptors, ngrid = (
296+
predictor.data.descriptor_calculator.calculate_from_atoms(
297+
atoms1, [18, 18, 27]
298+
)
299+
)
300+
snap_descriptors = torch.from_numpy(np.array([descriptors[0, 0, 0]]))
301+
snap_descriptors_work = snap_descriptors.clone()
302+
snap_descriptors = predictor.data.input_data_scaler.transform(
303+
snap_descriptors
304+
)
305+
snap_descriptors.requires_grad = True
306+
307+
# Forward pass through the network.
308+
# ldos = network(snap_descriptors)
309+
# d_d_d_B = []
310+
311+
# Compute "gradient". This is ACTUALLY the Jacobian matrix, i.e., the one
312+
# we can compare with finite differences.
313+
# for i in range(0, 11):
314+
# if snap_descriptors.grad is not None:
315+
# snap_descriptors.grad.zero_()
316+
#
317+
# ldos[0, i].backward(retain_graph=True)
318+
# # d_d_d_B = snap_descriptors.grad.clone()
319+
# d_d_d_B.append(
320+
# predictor.data.input_data_scaler.inverse_transform_backpropagation(
321+
# snap_descriptors.grad.clone()
322+
# )
323+
# )
324+
325+
if True:
326+
# This would be the direct application of the autograd.
327+
# Autograd computes the vector Jacobian product, see
328+
# https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
329+
ldos = network(snap_descriptors)
330+
ldos_unscaled = predictor.data.output_data_scaler.inverse_transform(
331+
ldos, copy=True
332+
)
333+
if snap_descriptors.grad is not None:
334+
snap_descriptors.grad.zero_()
335+
ldos.backward(
336+
predictor.data.output_data_scaler.transform_backpropagation(
337+
2 * ldos_unscaled
338+
),
339+
retain_graph=True,
340+
)
341+
d_d_d_B = (
342+
predictor.data.input_data_scaler.inverse_transform_backpropagation(
343+
snap_descriptors.grad.clone()
344+
)
345+
)
193346

347+
with torch.no_grad():
348+
# In case we want to compare different levels of finite differences.
349+
for diff in [
350+
# 5.0e-1,
351+
# 5.0e-2,
352+
# 5.0e-3,
353+
5.0e-4,
354+
# 5.0e-5,
355+
# 5.0e-6,
356+
]:
357+
358+
# Compute finite differences. j theoretically goes up to
359+
# 36, but for a simple check this is completely enough.
360+
for j in range(0, 5):
361+
snap_descriptors_work[0, j] += diff
362+
descriptors_scaled1 = snap_descriptors_work.clone()
363+
ldos_1 = predictor.data.output_data_scaler.inverse_transform(
364+
network(
365+
predictor.data.input_data_scaler.transform(
366+
descriptors_scaled1
367+
)
368+
).clone()
369+
)
370+
energy_1 = 0
371+
for i in range(0, 11):
372+
energy_1 += ldos_1[0, i] * ldos_1[0, i]
373+
374+
snap_descriptors_work[0, j] -= 2.0 * diff
375+
descriptors_scaled2 = snap_descriptors_work.clone()
376+
ldos_2 = predictor.data.output_data_scaler.inverse_transform(
377+
network(
378+
predictor.data.input_data_scaler.transform(
379+
descriptors_scaled2
380+
)
381+
).clone()
382+
)
383+
energy_2 = 0
384+
for i in range(0, 11):
385+
energy_2 += ldos_2[0, i] * ldos_2[0, i]
386+
387+
# Comparison - we only compare the first component of the
388+
# LDOS for brevity, but this works for all components.
389+
snap_descriptors_work[0, j] += diff
390+
force = -1.0 * ((energy_2 - energy_1) / (2 * diff))
391+
print(
392+
diff,
393+
force.double().numpy(),
394+
d_d_d_B[0, j],
395+
force.double().numpy() / d_d_d_B[0, j],
396+
)
397+
398+
399+
# check_input_gradient_scaling()
400+
# check_output_gradient_scaling()
194401
# band_energy_contribution()
195402
# entropy_contribution()
196403
# hartree_contribution()
197-
backpropagation_pieces()

0 commit comments

Comments
 (0)