|
4 | 4 | import mala |
5 | 5 | from mala import printout |
6 | 6 | import numpy as np |
| 7 | +import torch |
7 | 8 |
|
8 | 9 | from mala.datahandling.data_repo import data_repo_path |
9 | 10 |
|
@@ -166,32 +167,237 @@ def hartree_contribution(): |
166 | 167 | print(mala_forces[point, :] / np.array(numerical_forces)) |
167 | 168 |
|
168 | 169 |
|
169 | | -def backpropagation_pieces(): |
170 | | - """ |
171 | | - Check the individual parts of the backpropagation. |
172 | | - """ |
173 | | - # Only compute a specific part of the forces. |
174 | | - ldos_calculator: mala.LDOS |
| 170 | +def check_input_gradient_scaling(): |
| 171 | + # Load a model, set paramters. |
175 | 172 | predictor: mala.Predictor |
176 | | - ldos, ldos_calculator, parameters, predictor = run_prediction( |
177 | | - backprop=True |
| 173 | + parameters: mala.Parameters |
| 174 | + parameters, network, data_handler, predictor = mala.Predictor.load_run( |
| 175 | + "Be_ACE_model_FULLSCALING" |
178 | 176 | ) |
179 | | - # For easier testing, can also be commented out. |
180 | | - ldos_calculator.debug_forces_flag = "band_energy" |
181 | | - |
182 | | - # This next line can later be part of the prediction itself within the |
183 | | - # Predictor class. |
184 | | - ldos_calculator.setup_for_forces(predictor) |
185 | | - |
186 | | - # Performs the calculation internally. |
187 | | - mala_forces = ldos_calculator.atomic_forces |
188 | | - |
189 | | - # Should be 8748, 91 |
190 | | - print("FORCE TEST: Backpropagation machinery.") |
191 | | - print(mala_forces.size()) |
| 177 | + parameters.targets.target_type = "LDOS" |
| 178 | + parameters.targets.ldos_gridsize = 11 |
| 179 | + parameters.targets.ldos_gridspacing_ev = 2.5 |
| 180 | + parameters.targets.ldos_gridoffset_ev = -5 |
| 181 | + parameters.targets.restrict_targets = None |
| 182 | + parameters.descriptors.descriptors_contain_xyz = False |
| 183 | + parameters.descriptors.descriptor_type = "ACE" |
| 184 | + |
| 185 | + # Compute descriptors, only do further computations for one point. |
| 186 | + atoms1 = read("/home/fiedlerl/data/mala_data_repo/Be2/Be_snapshot1.out") |
| 187 | + descriptors, ngrid = ( |
| 188 | + predictor.data.descriptor_calculator.calculate_from_atoms( |
| 189 | + atoms1, [18, 18, 27] |
| 190 | + ) |
| 191 | + ) |
| 192 | + snap_descriptors = torch.from_numpy(np.array([descriptors[0, 0, 0]])) |
| 193 | + snap_descriptors_work = snap_descriptors.clone() |
| 194 | + snap_descriptors = predictor.data.input_data_scaler.transform( |
| 195 | + snap_descriptors |
| 196 | + ) |
| 197 | + snap_descriptors.requires_grad = True |
| 198 | + |
| 199 | + # Forward pass through the network. |
| 200 | + ldos = network(snap_descriptors) |
| 201 | + d_d_d_B = [] |
| 202 | + |
| 203 | + # Compute "gradient". This is ACTUALLY the Jacobian matrix, i.e., the one |
| 204 | + # we can compare with finite differences. |
| 205 | + for i in range(0, 11): |
| 206 | + if snap_descriptors.grad is not None: |
| 207 | + snap_descriptors.grad.zero_() |
| 208 | + |
| 209 | + ldos[0, i].backward(retain_graph=True) |
| 210 | + # d_d_d_B = snap_descriptors.grad.clone() |
| 211 | + d_d_d_B.append( |
| 212 | + predictor.data.input_data_scaler.inverse_transform_backpropagation( |
| 213 | + snap_descriptors.grad.clone() |
| 214 | + ) |
| 215 | + ) |
192 | 216 |
|
| 217 | + # Just for debugging purposes, not part of the actual test. |
| 218 | + # if False: |
| 219 | + # # This would be the direct application of the autograd. |
| 220 | + # # Autograd computes the vector Jacobian product, see |
| 221 | + # # https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html |
| 222 | + # # Here: compare same_as_sum with d_d_d_B_sum. |
| 223 | + # ldos = network(snap_descriptors) |
| 224 | + # snap_descriptors.grad.zero_() |
| 225 | + # # print(ldos / ldos) |
| 226 | + # ldos.backward(ldos / ldos, retain_graph=True) |
| 227 | + # same_as_sum = ( |
| 228 | + # predictor.data.input_data_scaler.inverse_transform_backpropagation( |
| 229 | + # snap_descriptors.grad.clone() |
| 230 | + # ) |
| 231 | + # ) |
| 232 | + # d_d_d_B_sum = d_d_d_B[0] |
| 233 | + # for i in range(1, 11): |
| 234 | + # d_d_d_B_sum += d_d_d_B[i] |
| 235 | + |
| 236 | + with torch.no_grad(): |
| 237 | + # In case we want to compare different levels of finite differences. |
| 238 | + for diff in [ |
| 239 | + # 5.0e-1, |
| 240 | + # 5.0e-2, |
| 241 | + # 5.0e-3, |
| 242 | + 5.0e-4, |
| 243 | + # 5.0e-5, |
| 244 | + # 5.0e-6, |
| 245 | + ]: |
| 246 | + |
| 247 | + # Compute finite differences. j theoretically goes up to |
| 248 | + # 36, but for a simple check this is completely enough. |
| 249 | + for j in range(0, 5): |
| 250 | + snap_descriptors_work[0, j] += diff |
| 251 | + descriptors_scaled1 = snap_descriptors_work.clone() |
| 252 | + ldos_1 = network( |
| 253 | + predictor.data.input_data_scaler.transform( |
| 254 | + descriptors_scaled1 |
| 255 | + ) |
| 256 | + ).clone() |
| 257 | + |
| 258 | + snap_descriptors_work[0, j] -= 2.0 * diff |
| 259 | + descriptors_scaled2 = snap_descriptors_work.clone() |
| 260 | + ldos_2 = network( |
| 261 | + predictor.data.input_data_scaler.transform( |
| 262 | + descriptors_scaled2 |
| 263 | + ) |
| 264 | + ).clone() |
| 265 | + |
| 266 | + # Comparison - we only compare the first component of the |
| 267 | + # LDOS for brevity, but this works for all components. |
| 268 | + snap_descriptors_work[0, j] += diff |
| 269 | + force = -1.0 * ((ldos_2[0, 0] - ldos_1[0, 0]) / (2 * diff)) |
| 270 | + print( |
| 271 | + diff, |
| 272 | + force.double().numpy(), |
| 273 | + d_d_d_B[0][0, j], |
| 274 | + force.double().numpy() / d_d_d_B[0][0, j], |
| 275 | + ) |
| 276 | + |
| 277 | + |
| 278 | +def check_output_gradient_scaling(): |
| 279 | + # Load a model, set paramters. |
| 280 | + predictor: mala.Predictor |
| 281 | + parameters: mala.Parameters |
| 282 | + parameters, network, data_handler, predictor = mala.Predictor.load_run( |
| 283 | + "Be_ACE_model_FULLSCALING" |
| 284 | + ) |
| 285 | + parameters.targets.target_type = "LDOS" |
| 286 | + parameters.targets.ldos_gridsize = 11 |
| 287 | + parameters.targets.ldos_gridspacing_ev = 2.5 |
| 288 | + parameters.targets.ldos_gridoffset_ev = -5 |
| 289 | + parameters.targets.restrict_targets = None |
| 290 | + parameters.descriptors.descriptors_contain_xyz = False |
| 291 | + parameters.descriptors.descriptor_type = "ACE" |
| 292 | + |
| 293 | + # Compute descriptors, only do further computations for one point. |
| 294 | + atoms1 = read("/home/fiedlerl/data/mala_data_repo/Be2/Be_snapshot1.out") |
| 295 | + descriptors, ngrid = ( |
| 296 | + predictor.data.descriptor_calculator.calculate_from_atoms( |
| 297 | + atoms1, [18, 18, 27] |
| 298 | + ) |
| 299 | + ) |
| 300 | + snap_descriptors = torch.from_numpy(np.array([descriptors[0, 0, 0]])) |
| 301 | + snap_descriptors_work = snap_descriptors.clone() |
| 302 | + snap_descriptors = predictor.data.input_data_scaler.transform( |
| 303 | + snap_descriptors |
| 304 | + ) |
| 305 | + snap_descriptors.requires_grad = True |
| 306 | + |
| 307 | + # Forward pass through the network. |
| 308 | + # ldos = network(snap_descriptors) |
| 309 | + # d_d_d_B = [] |
| 310 | + |
| 311 | + # Compute "gradient". This is ACTUALLY the Jacobian matrix, i.e., the one |
| 312 | + # we can compare with finite differences. |
| 313 | + # for i in range(0, 11): |
| 314 | + # if snap_descriptors.grad is not None: |
| 315 | + # snap_descriptors.grad.zero_() |
| 316 | + # |
| 317 | + # ldos[0, i].backward(retain_graph=True) |
| 318 | + # # d_d_d_B = snap_descriptors.grad.clone() |
| 319 | + # d_d_d_B.append( |
| 320 | + # predictor.data.input_data_scaler.inverse_transform_backpropagation( |
| 321 | + # snap_descriptors.grad.clone() |
| 322 | + # ) |
| 323 | + # ) |
| 324 | + |
| 325 | + if True: |
| 326 | + # This would be the direct application of the autograd. |
| 327 | + # Autograd computes the vector Jacobian product, see |
| 328 | + # https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html |
| 329 | + ldos = network(snap_descriptors) |
| 330 | + ldos_unscaled = predictor.data.output_data_scaler.inverse_transform( |
| 331 | + ldos, copy=True |
| 332 | + ) |
| 333 | + if snap_descriptors.grad is not None: |
| 334 | + snap_descriptors.grad.zero_() |
| 335 | + ldos.backward( |
| 336 | + predictor.data.output_data_scaler.transform_backpropagation( |
| 337 | + 2 * ldos_unscaled |
| 338 | + ), |
| 339 | + retain_graph=True, |
| 340 | + ) |
| 341 | + d_d_d_B = ( |
| 342 | + predictor.data.input_data_scaler.inverse_transform_backpropagation( |
| 343 | + snap_descriptors.grad.clone() |
| 344 | + ) |
| 345 | + ) |
193 | 346 |
|
| 347 | + with torch.no_grad(): |
| 348 | + # In case we want to compare different levels of finite differences. |
| 349 | + for diff in [ |
| 350 | + # 5.0e-1, |
| 351 | + # 5.0e-2, |
| 352 | + # 5.0e-3, |
| 353 | + 5.0e-4, |
| 354 | + # 5.0e-5, |
| 355 | + # 5.0e-6, |
| 356 | + ]: |
| 357 | + |
| 358 | + # Compute finite differences. j theoretically goes up to |
| 359 | + # 36, but for a simple check this is completely enough. |
| 360 | + for j in range(0, 5): |
| 361 | + snap_descriptors_work[0, j] += diff |
| 362 | + descriptors_scaled1 = snap_descriptors_work.clone() |
| 363 | + ldos_1 = predictor.data.output_data_scaler.inverse_transform( |
| 364 | + network( |
| 365 | + predictor.data.input_data_scaler.transform( |
| 366 | + descriptors_scaled1 |
| 367 | + ) |
| 368 | + ).clone() |
| 369 | + ) |
| 370 | + energy_1 = 0 |
| 371 | + for i in range(0, 11): |
| 372 | + energy_1 += ldos_1[0, i] * ldos_1[0, i] |
| 373 | + |
| 374 | + snap_descriptors_work[0, j] -= 2.0 * diff |
| 375 | + descriptors_scaled2 = snap_descriptors_work.clone() |
| 376 | + ldos_2 = predictor.data.output_data_scaler.inverse_transform( |
| 377 | + network( |
| 378 | + predictor.data.input_data_scaler.transform( |
| 379 | + descriptors_scaled2 |
| 380 | + ) |
| 381 | + ).clone() |
| 382 | + ) |
| 383 | + energy_2 = 0 |
| 384 | + for i in range(0, 11): |
| 385 | + energy_2 += ldos_2[0, i] * ldos_2[0, i] |
| 386 | + |
| 387 | + # Comparison - we only compare the first component of the |
| 388 | + # LDOS for brevity, but this works for all components. |
| 389 | + snap_descriptors_work[0, j] += diff |
| 390 | + force = -1.0 * ((energy_2 - energy_1) / (2 * diff)) |
| 391 | + print( |
| 392 | + diff, |
| 393 | + force.double().numpy(), |
| 394 | + d_d_d_B[0, j], |
| 395 | + force.double().numpy() / d_d_d_B[0, j], |
| 396 | + ) |
| 397 | + |
| 398 | + |
| 399 | +# check_input_gradient_scaling() |
| 400 | +# check_output_gradient_scaling() |
194 | 401 | # band_energy_contribution() |
195 | 402 | # entropy_contribution() |
196 | 403 | # hartree_contribution() |
197 | | -backpropagation_pieces() |
|
0 commit comments