Open research towards the discovery of room-temperature superconductors.
Discover ways to transform this asset
POST /speech/from-post
How this post is connected to other assets
Using a 3DSC published superconductor dataset we fine-tuned MatterGen to enable critical temperature property conditioned generation of 'S.U.N' crystal structures.
The 3DSC dataset was intentionally deduped but knowing that re-entrant superconductors are a known class of materials it could be interesting to fine-tune on the full merge between 3DSC and the alex_mp_20 set.
The fine-tuning job was run on 4 A10 GPU's with the trainer accumulated_grad_batches param reduced to 2 from 4. We tried kicking this off on M-series Mac machines given the recent improvement to the repo that enabled metal hardware support, but the training times were way too high. Single GPU training experiments led us to attempt to reduce the floating point precision down to 16 but this came with too many numerical instability issues.
Below you'll find the trainer config for our fine-tuning run:
| Name | Type | Params
-----------------------------------------------------
0 | diffusion_module | DiffusionModule | 48.8 M
-----------------------------------------------------
48.8 M Trainable params
22 Non-trainable params
48.8 M Total params
195.042 Total estimated model params size (MB)
{
"data_module": {
"_target_": "mattergen.common.data.datamodule.CrystDataModule",
"_recursive_": true,
"properties": [
"tc"
],
"dataset_transforms": [
{
"_target_": "mattergen.common.data.dataset_transform.filter_sparse_properties",
"_partial_": true
}
],
"transforms": [
{
"_target_": "mattergen.common.data.transform.symmetrize_lattice",
"_partial_": true
},
{
"_target_": "mattergen.common.data.transform.set_chemical_system_string",
"_partial_": true
}
],
"average_density": 0.05771451654022283,
"root_dir": "/teamspace/studios/this_studio/mattergen/mattergen/../datasets/cache/alex_mp_20",
"train_dataset": {
"_target_": "mattergen.common.data.dataset.CrystalDataset.from_cache_path",
"cache_path": "/teamspace/studios/this_studio/mattergen/mattergen/../datasets/cache/alex_mp_20/train",
"properties": [
"tc"
],
"transforms": [
{
"_target_": "mattergen.common.data.transform.symmetrize_lattice",
"_partial_": true
},
{
"_target_": "mattergen.common.data.transform.set_chemical_system_string",
"_partial_": true
}
],
"dataset_transforms": [
{
"_target_": "mattergen.common.data.dataset_transform.filter_sparse_properties",
"_partial_": true
}
]
},
"val_dataset": {
"_target_": "mattergen.common.data.dataset.CrystalDataset.from_cache_path",
"cache_path": "/teamspace/studios/this_studio/mattergen/mattergen/../datasets/cache/alex_mp_20/val",
"properties": [
"tc"
],
"transforms": [
{
"_target_": "mattergen.common.data.transform.symmetrize_lattice",
"_partial_": true
},
{
"_target_": "mattergen.common.data.transform.set_chemical_system_string",
"_partial_": true
}
],
"dataset_transforms": [
{
"_target_": "mattergen.common.data.dataset_transform.filter_sparse_properties",
"_partial_": true
}
]
},
"num_workers": {
"train": 0,
"val": 0
},
"batch_size": {
"train": 64,
"val": 64
},
"max_epochs": 2200
},
"trainer": {
"_target_": "pytorch_lightning.Trainer",
"accelerator": "gpu",
"devices": 4,
"num_nodes": 1,
"precision": 32,
"max_epochs": 200,
"accumulate_grad_batches": 2,
"gradient_clip_val": 0.5,
"gradient_clip_algorithm": "value",
"check_val_every_n_epoch": 5,
"strategy": {
"_target_": "pytorch_lightning.strategies.ddp.DDPStrategy",
"find_unused_parameters": true
},
"callbacks": [
{
"_target_": "pytorch_lightning.callbacks.LearningRateMonitor",
"logging_interval": "step",
"log_momentum": false
},
{
"_target_": "pytorch_lightning.callbacks.ModelCheckpoint",
"monitor": "loss_val",
"mode": "min",
"save_top_k": 1,
"save_last": true,
"verbose": false,
"every_n_epochs": 1,
"filename": "{epoch}-{loss_val:.2f}"
},
{
"_target_": "pytorch_lightning.callbacks.TQDMProgressBar",
"refresh_rate": 50
},
{
"_target_": "mattergen.common.data.callback.SetPropertyScalers"
}
]
},
"lightning_module": {
"_target_": "mattergen.diffusion.lightning_module.DiffusionLightningModule",
"optimizer_partial": {
"lr": 5e-06,
"_target_": "torch.optim.Adam",
"_partial_": true
},
"scheduler_partials": [
{
"scheduler": {
"_target_": "torch.optim.lr_scheduler.ReduceLROnPlateau",
"factor": 0.6,
"patience": 100,
"min_lr": 1e-06,
"verbose": true,
"_partial_": true
},
"interval": "epoch",
"frequency": 1,
"monitor": "loss_train",
"strict": true
}
],
"diffusion_module": {
"_target_": "mattergen.diffusion.diffusion_module.DiffusionModule",
"corruption": {
"_target_": "mattergen.diffusion.corruption.multi_corruption.MultiCorruption",
"discrete_corruptions": {
"atomic_numbers": {
"_target_": "mattergen.diffusion.corruption.d3pm_corruption.D3PMCorruption",
"d3pm": {
"_target_": "mattergen.diffusion.d3pm.d3pm.MaskDiffusion",
"dim": 101,
"schedule": {
"_target_": "mattergen.diffusion.d3pm.d3pm.create_discrete_diffusion_schedule",
"kind": "standard",
"num_steps": 1000
}
},
"offset": 1
}
},
"sdes": {
"cell": {
"_target_": "mattergen.common.diffusion.corruption.LatticeVPSDE.from_vpsde_config",
"vpsde_config": {
"beta_max": 20,
"beta_min": 0.1,
"limit_density": 0.05771451654022283,
"limit_var_scaling_constant": 0.25
}
},
"pos": {
"_target_": "mattergen.common.diffusion.corruption.NumAtomsVarianceAdjustedWrappedVESDE",
"limit_info_key": "num_atoms",
"sigma_max": 5.0,
"wrapping_boundary": 1.0
}
}
},
"loss_fn": {
"_target_": "mattergen.common.loss.MaterialsLoss",
"d3pm_hybrid_lambda": 0.01,
"include_atomic_numbers": true,
"include_cell": true,
"include_pos": true,
"reduce": "sum",
"weights": {
"atomic_numbers": 1.0,
"cell": 1.0,
"pos": 0.1
}
},
"model": {
"_target_": "mattergen.adapter.GemNetTAdapter",
"property_embeddings_adapt": {
"tc": {
"_target_": "mattergen.property_embeddings.PropertyEmbedding",
"name": "tc",
"unconditional_embedding_module": {
"_target_": "mattergen.property_embeddings.EmbeddingVector",
"hidden_dim": 512
},
"conditional_embedding_module": {
"_target_": "mattergen.diffusion.model_utils.NoiseLevelEncoding",
"d_model": 512
},
"scaler": {
"_target_": "mattergen.common.utils.data_utils.StandardScalerTorch"
}
}
},
"atom_type_diffusion": "mask",
"denoise_atom_types": true,
"gemnet": {
"_target_": "mattergen.common.gemnet.gemnet_ctrl.GemNetTCtrl",
"atom_embedding": {
"_target_": "mattergen.common.gemnet.layers.embedding_block.AtomEmbedding",
"emb_size": 512,
"with_mask_type": true
},
"cutoff": 7.0,
"emb_size_atom": 512,
"emb_size_edge": 512,
"latent_dim": 512,
"max_cell_images_per_dim": 5,
"max_neighbors": 50,
"num_blocks": 4,
"num_targets": 1,
"otf_graph": true,
"regress_stress": true,
"scale_file": "/scratch/amlt_code/mattergen/common/gemnet/gemnet-dT.json",
"condition_on_adapt": [
"tc"
]
},
"hidden_dim": 512,
"property_embeddings": {}
},
"pre_corruption_fn": {
"_target_": "mattergen.property_embeddings.SetEmbeddingType",
"dropout_fields_iid": false,
"p_unconditional": 0.2
}
}
},
"adapter": {
"model_path": "checkpoints/mattergen_base",
"load_epoch": "last",
"full_finetuning": true,
"adapter": {
"_target_": "mattergen.adapter.GemNetTAdapter",
"property_embeddings_adapt": {
"tc": {
"_target_": "mattergen.property_embeddings.PropertyEmbedding",
"name": "tc",
"unconditional_embedding_module": {
"_target_": "mattergen.property_embeddings.EmbeddingVector",
"hidden_dim": 512
},
"conditional_embedding_module": {
"_target_": "mattergen.diffusion.model_utils.NoiseLevelEncoding",
"d_model": 512
},
"scaler": {
"_target_": "mattergen.common.utils.data_utils.StandardScalerTorch"
}
}
},
"atom_type_diffusion": "mask",
"denoise_atom_types": true,
"gemnet": {
"_target_": "mattergen.common.gemnet.gemnet_ctrl.GemNetTCtrl",
"atom_embedding": {
"_target_": "mattergen.common.gemnet.layers.embedding_block.AtomEmbedding",
"emb_size": 512,
"with_mask_type": true
},
"cutoff": 7.0,
"emb_size_atom": 512,
"emb_size_edge": 512,
"latent_dim": 512,
"max_cell_images_per_dim": 5,
"max_neighbors": 50,
"num_blocks": 4,
"num_targets": 1,
"otf_graph": true,
"regress_stress": true,
"scale_file": "/scratch/amlt_code/mattergen/common/gemnet/gemnet-dT.json",
"condition_on_adapt": [
"tc"
]
},
"hidden_dim": 512,
"property_embeddings": {}
}
}
}
We let the fine-tune run for the maximum 200 epochs defined by the MatterGen provided script and generated 15 new candidate structures with a condition defined Tc of 298.15 Kelvin (as in we told the model generate ones that have that Tc, this isn't saying these structures do have this Tc... they likely don't).
Here they are:
gen_0.cif
gen_1.cif
gen_2.cif
gen_3.cif
gen_4.cif
gen_5.cif
gen_6.cif
gen_7.cif
gen_8.cif
gen_9.cif
gen_10.cif
gen_11.cif
gen_12.cif
gen_13.cif
gen_14.cif
gen_15.cif
And here are some high level attributes for each:
Structure: gen_0.cif
Formula: AgPb6
Elements: Ag, Pb
Number of atoms: 14
Lattice parameters: a=6.054, b=9.457, c=9.091
Angles: α=119.6°, β=100.6°, γ=101.3°
Volume: 418.995 ų
Average coordination number: 3.29
Structure: gen_1.cif
Formula: Yb3(CuP)4
Elements: Yb, Cu, P
Number of atoms: 11
Lattice parameters: a=4.116, b=6.436, c=7.711
Angles: α=114.7°, β=105.5°, γ=90.0°
Volume: 177.427 ų
Average coordination number: 8.55
Structure: gen_10.cif
Formula: ZrIr3
Elements: Zr, Ir
Number of atoms: 4
Lattice parameters: a=3.989, b=3.989, c=4.927
Angles: α=113.8°, β=113.9°, γ=90.0°
Volume: 64.292 ų
Average coordination number: 6.00
Structure: gen_11.cif
Formula: ScC
Elements: Sc, C
Number of atoms: 8
Lattice parameters: a=3.500, b=4.656, c=6.214
Angles: α=90.0°, β=90.0°, γ=90.1°
Volume: 101.257 ų
Average coordination number: 6.50
Structure: gen_12.cif
Formula: La2C13
Elements: La, C
Number of atoms: 15
Lattice parameters: a=4.662, b=4.480, c=9.435
Angles: α=89.3°, β=90.2°, γ=121.3°
Volume: 168.343 ų
Average coordination number: 5.33
Structure: gen_13.cif
Formula: Bi2Ir
Elements: Bi, Ir
Number of atoms: 12
Lattice parameters: a=6.719, b=6.724, c=6.728
Angles: α=89.9°, β=90.0°, γ=90.1°
Volume: 303.973 ų
Average coordination number: 4.67
Structure: gen_14.cif
Formula: NaTe4Pb3
Elements: Na, Te, Pb
Number of atoms: 8
Lattice parameters: a=6.449, b=6.474, c=6.484
Angles: α=90.0°, β=90.0°, γ=90.0°
Volume: 270.721 ų
Average coordination number: 6.00
Structure: gen_15.cif
Formula: Ta5Ir11
Elements: Ta, Ir
Number of atoms: 16
Lattice parameters: a=5.598, b=5.606, c=9.025
Angles: α=89.8°, β=90.7°, γ=119.8°
Volume: 245.755 ų
Average coordination number: 9.88
Structure: gen_2.cif
Formula: Os3W
Elements: Os, W
Number of atoms: 8
Lattice parameters: a=4.408, b=5.560, c=5.558
Angles: α=120.0°, β=90.0°, γ=90.0°
Volume: 117.996 ų
Average coordination number: 12.00
Structure: gen_3.cif
Formula: ZrSi
Elements: Zr, Si
Number of atoms: 6
Lattice parameters: a=3.843, b=3.828, c=7.898
Angles: α=104.0°, β=104.0°, γ=90.0°
Volume: 109.121 ų
Average coordination number: 6.00
Structure: gen_4.cif
Formula: Y6BS5O4
Elements: Y, B, S, O
Number of atoms: 16
Lattice parameters: a=3.844, b=5.761, c=14.192
Angles: α=86.4°, β=90.0°, γ=90.0°
Volume: 313.648 ų
Average coordination number: 5.00
Structure: gen_5.cif
Formula: GaRu2
Elements: Ga, Ru
Number of atoms: 6
Lattice parameters: a=4.988, b=4.994, c=4.985
Angles: α=92.3°, β=118.7°, γ=118.6°
Volume: 89.635 ų
Average coordination number: 10.67
Structure: gen_6.cif
Formula: Yb(FeC)2
Elements: Yb, Fe, C
Number of atoms: 5
Lattice parameters: a=3.495, b=3.495, c=5.379
Angles: α=109.0°, β=109.1°, γ=90.0°
Volume: 58.292 ų
Average coordination number: 6.40
Structure: gen_7.cif
Formula: Nb3Ir
Elements: Nb, Ir
Number of atoms: 4
Lattice parameters: a=3.299, b=4.575, c=4.571
Angles: α=90.0°, β=90.0°, γ=90.1°
Volume: 69.002 ų
Average coordination number: 7.00
Structure: gen_8.cif
Formula: SrGa3Pt
Elements: Sr, Ga, Pt
Number of atoms: 5
Lattice parameters: a=4.415, b=4.413, c=6.248
Angles: α=110.7°, β=110.7°, γ=90.0°
Volume: 105.507 ų
Average coordination number: 5.20
Structure: gen_9.cif
Formula: V2W
Elements: V, W
Number of atoms: 6
Lattice parameters: a=3.064, b=4.328, c=6.698
Angles: α=90.0°, β=103.2°, γ=90.0°
Volume: 86.453 ų
Average coordination number: 6.00
The model's 'uniqueness' bias helps us out a lot here. We're seeing chemical systems that are unique within their own batch, meaning we can quickly search for promising chemical systems and refine our search with chemical system conditioned generation. We're also seeing coordination numbers for common and relatively simple crystal structures: simple cubic (6), BCC (8), and FCC (12). This is reassuring as, despite tuning on very sparse data, we're still seeing structure output that reflects structures we know to exist in the physical world.
As for the evaluation metrics for these generated structures, there's a lot to be optimistic about:
{
"avg_energy_above_hull_per_atom": {
"value": 0.04149956944731237,
"description": "Average energy above hull per atom (eV/atom) of structures in sampled data."
},
"avg_rmsd_from_relaxation": {
"value": 0.06419174912084613,
"description": "root mean square displacements of atoms (Angstrom) from initial to final DFT relaxation steps in sampled data."
},
"frac_novel_unique_stable_structures": {
"value": 1.0,
"description": "Fraction of novel unique stable structures in sampled data within 0.1 (eV/atom) above convex hull of MP2020correction."
},
"frac_stable_structures": {
"value": 1.0,
"description": "Fraction of stable structures in sampled data within 0.1 (eV/atom) above convex hull of MP2020correction."
},
"frac_successful_jobs": {
"value": 1.0,
"description": "Fraction of structures whose jobs ran successfully."
},
"avg_comp_validity": {
"value": 1.0,
"description": "Average composition validity (according to smact) of structures in sampled data."
},
"avg_structure_comp_validity": {
"value": 1.0,
"description": "Average number of structures in sampled data that are both valid structures and have a valid smact compositions."
},
"avg_structure_validity": {
"value": 1.0,
"description": "Average structural validity of structures in sampled data. Any atom-atom distances less than 0.5 Angstroms or a volume less than 0.1 Angstrom**3 are considered invalid ."
},
"frac_novel_structures": {
"value": 1.0,
"description": "Fraction of novel structures in sampled data."
},
"frac_novel_systems": {
"value": 0.0,
"description": "Fraction of distinct chemical systems in sampled data and not in MP2020correction."
},
"frac_novel_unique_structures": {
"value": 1.0,
"description": "Fraction of novel unique structures in sampled data."
},
"frac_unique_structures": {
"value": 1.0,
"description": "Fraction of unique structures in sampled data."
},
"frac_unique_systems": {
"value": 1.0,
"description": "Fraction of structures in sampled data that have a unique chemical system within this set."
},
"precision": {
"value": 0.0,
"description": "Precision of structures in sampled data compared with MP2020correction. This is the fraction of structures in sampled data that have a matching structure in MP2020correction."
},
"recall": {
"value": 0.0,
"description": "Recall of structures in sampled data compared with structures in MP2020correction. This is the fraction of structures in sampled data that have a matching structure in MP2020correction."
}
}
What I find to be particularly exciting is that Yttrium Barium Copper Oxide (YBCO) is known to be a high temperature superconductor. MatterGen is calling out, in the gen_4.cif, that YBSO could be a candidate as well.
Handing it over to you for some classification.
Discover other posts like this one
Superconductivity typically emerges from strong interactions between electrons and vibrations in the crystal lattice (phonons). These interactions can lead to electron pairing, enabling resistance-fre
Literature review of databases with materials and . See literature review on ML models which utilize these datasets:
So far a really interesting paper. Published in 2018. Adding some informal notes and interesting findings here. Finding out how much literature is based on this study.
This post will focus on the methods available to predict/derive of a material. We want to be able to build a pipeline where we can go beyond the available (and experimental) Tc data and train a model