update readme and environment

This commit is contained in:
Hannes Signer 2025-02-27 12:48:58 +01:00
parent d5d836bd98
commit 05bbb87562
3 changed files with 268 additions and 164 deletions

View File

@ -1,5 +1,23 @@
# Training of AI Surrogate Models
- run `git lfs pull` to get the data from the large file storage
- create conda environment with `conda env create -f environment.yml`
This notebook contains the current experiments for training AI models that attempt to predict the chemistry component of POET.
The repository is structured as follows:
```
└── dataset
└── Barite_50_Data_training.h5
└── barite_50_4_corner.h5
└── doc
└── results
└── src
└── POET_Training.ipynb
└── convert_data.jl
└── optuna_runs.py
└── preprocessing.py
```
The datasets in `datasets` must first be pulled via `git lfs pull` to get the data from the large file storage.
A conda environment can then be set up with the packages contained in environment.yml with `conda env create -f environment.yml`
The `preprocessing.py` file defines all the necessary steps for preprocessing as well as the keras models used. The actual training and additional explanations then take place in `POET_Training.ipynb`.

248
environment.yaml Normal file
View File

@ -0,0 +1,248 @@
name: training
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- absl-py=2.1.0=py311h06a4308_0
- alembic=1.13.3=py311h06a4308_0
- anyio=4.6.2=py311h06a4308_0
- argon2-cffi=21.3.0=pyhd3eb1b0_0
- argon2-cffi-bindings=21.2.0=py311h5eee18b_1
- asttokens=2.0.5=pyhd3eb1b0_0
- astunparse=1.6.3=py_0
- async-lru=2.0.4=py311h06a4308_0
- attrs=24.3.0=py311h06a4308_0
- babel=2.11.0=py311h06a4308_0
- beautifulsoup4=4.12.3=py311h06a4308_0
- blas=1.0=mkl
- bleach=6.2.0=py311h06a4308_0
- bottleneck=1.4.2=py311hf4808d0_0
- brotli=1.0.9=h5eee18b_9
- brotli-bin=1.0.9=h5eee18b_9
- brotli-python=1.0.9=py311h6a678d5_9
- bzip2=1.0.8=h5eee18b_6
- c-ares=1.19.1=h5eee18b_0
- ca-certificates=2024.12.31=h06a4308_0
- certifi=2025.1.31=py311h06a4308_0
- cffi=1.17.1=py311h1fdaa30_1
- charset-normalizer=3.3.2=pyhd3eb1b0_0
- colorlog=5.0.1=py311h06a4308_1
- comm=0.2.1=py311h06a4308_0
- contourpy=1.3.1=py311hdb19cb5_0
- cycler=0.11.0=pyhd3eb1b0_0
- cyrus-sasl=2.1.28=h52b45da_1
- dbus=1.13.18=hb2f20db_0
- debugpy=1.8.11=py311h6a678d5_0
- decorator=5.1.1=pyhd3eb1b0_0
- defusedxml=0.7.1=pyhd3eb1b0_0
- executing=0.8.3=pyhd3eb1b0_0
- expat=2.6.4=h6a678d5_0
- flatbuffers=24.3.25=h6a678d5_0
- fontconfig=2.14.1=h55d465d_3
- fonttools=4.51.0=py311h5eee18b_0
- freetype=2.12.1=h4a9f257_0
- gast=0.5.3=pyhd3eb1b0_0
- giflib=5.2.2=h5eee18b_0
- glib=2.78.4=h6a678d5_0
- glib-tools=2.78.4=h6a678d5_0
- google-pasta=0.2.0=pyhd3eb1b0_0
- greenlet=3.1.1=py311h6a678d5_0
- grpcio=1.62.2=py311h6a678d5_0
- gst-plugins-base=1.14.1=h6a678d5_1
- gstreamer=1.14.1=h5eee18b_1
- h11=0.14.0=py311h06a4308_0
- h5py=3.12.1=py311hc0802c4_0
- hdf5=1.12.1=h2b7332f_3
- httpcore=1.0.2=py311h06a4308_0
- httpx=0.27.0=py311h06a4308_0
- icu=73.1=h6a678d5_0
- idna=3.7=py311h06a4308_0
- imbalanced-learn=0.12.3=py311h06a4308_1
- intel-openmp=2023.1.0=hdb19cb5_46306
- ipykernel=6.29.5=py311h06a4308_0
- ipython=8.30.0=py311h06a4308_0
- ipywidgets=8.1.5=py311h06a4308_0
- jedi=0.19.2=py311h06a4308_0
- jinja2=3.1.4=py311h06a4308_1
- joblib=1.4.2=py311h06a4308_0
- jpeg=9e=h5eee18b_3
- json5=0.9.25=py311h06a4308_0
- jsonschema=4.23.0=py311h06a4308_0
- jsonschema-specifications=2023.7.1=py311h06a4308_0
- jupyter=1.0.0=py311h06a4308_9
- jupyter-lsp=2.2.0=py311h06a4308_0
- jupyter_client=8.6.0=py311h06a4308_0
- jupyter_console=6.6.3=py311h06a4308_0
- jupyter_core=5.7.2=py311h06a4308_0
- jupyter_events=0.10.0=py311h06a4308_0
- jupyter_server=2.14.1=py311h06a4308_0
- jupyter_server_terminals=0.4.4=py311h06a4308_1
- jupyterlab=4.2.5=py311h06a4308_0
- jupyterlab_pygments=0.1.2=py_0
- jupyterlab_server=2.27.3=py311h06a4308_0
- jupyterlab_widgets=3.0.13=py311h06a4308_0
- keras=3.6.0=py311h06a4308_0
- kiwisolver=1.4.4=py311h6a678d5_0
- krb5=1.20.1=h143b758_1
- lcms2=2.16=hb9589c4_0
- ld_impl_linux-64=2.40=h12ee557_0
- lerc=4.0.0=h6a678d5_0
- libabseil=20240116.2=cxx17_h6a678d5_0
- libbrotlicommon=1.0.9=h5eee18b_9
- libbrotlidec=1.0.9=h5eee18b_9
- libbrotlienc=1.0.9=h5eee18b_9
- libclang=14.0.6=default_hc6dbbc7_2
- libclang13=14.0.6=default_he11475f_2
- libcups=2.4.2=h2d74bed_1
- libcurl=8.11.1=hc9e6f67_0
- libdeflate=1.22=h5eee18b_0
- libedit=3.1.20230828=h5eee18b_0
- libev=4.33=h7f8727e_1
- libffi=3.4.4=h6a678d5_1
- libgcc-ng=11.2.0=h1234567_1
- libgfortran-ng=11.2.0=h00389a5_1
- libgfortran5=11.2.0=h1234567_1
- libglib=2.78.4=hdc74915_0
- libgomp=11.2.0=h1234567_1
- libgrpc=1.62.2=h2d74bed_0
- libiconv=1.16=h5eee18b_3
- libllvm14=14.0.6=hecde1de_4
- libnghttp2=1.57.0=h2d74bed_0
- libpng=1.6.39=h5eee18b_0
- libpq=17.2=hdbd6064_0
- libprotobuf=4.25.3=he621ea3_0
- libsodium=1.0.18=h7b6447c_0
- libssh2=1.11.1=h251f7ec_0
- libstdcxx-ng=11.2.0=h1234567_1
- libtiff=4.5.1=hffd6297_1
- libuuid=1.41.5=h5eee18b_0
- libwebp-base=1.3.2=h5eee18b_1
- libxcb=1.15=h7f8727e_0
- libxkbcommon=1.0.1=h097e994_2
- libxml2=2.13.5=hfdd30dd_0
- lz4-c=1.9.4=h6a678d5_1
- mako=1.2.3=py311h06a4308_0
- markdown=3.4.1=py311h06a4308_0
- markdown-it-py=2.2.0=py311h06a4308_1
- markupsafe=2.1.3=py311h5eee18b_1
- matplotlib=3.10.0=py311h06a4308_0
- matplotlib-base=3.10.0=py311hbfdbfaf_0
- matplotlib-inline=0.1.6=py311h06a4308_0
- mdurl=0.1.0=py311h06a4308_0
- mistune=2.0.4=py311h06a4308_0
- mkl=2023.1.0=h213fc3f_46344
- mkl-service=2.4.0=py311h5eee18b_2
- mkl_fft=1.3.11=py311h5eee18b_0
- mkl_random=1.2.8=py311ha02d727_0
- ml_dtypes=0.4.0=py311ha02d727_0
- mysql=8.4.0=h29a9f33_1
- namex=0.0.7=py311h06a4308_0
- nbclient=0.8.0=py311h06a4308_0
- nbconvert=7.16.4=py311h06a4308_0
- nbformat=5.10.4=py311h06a4308_0
- ncurses=6.4=h6a678d5_0
- nest-asyncio=1.6.0=py311h06a4308_0
- notebook=7.2.2=py311h06a4308_1
- notebook-shim=0.2.3=py311h06a4308_0
- numexpr=2.10.1=py311h3c60e43_0
- numpy=1.26.4=py311h08b1b3b_0
- numpy-base=1.26.4=py311hf175353_0
- openjpeg=2.5.2=he7f1fd0_0
- openldap=2.6.4=h42fbc30_0
- openssl=3.0.15=h5eee18b_0
- opt_einsum=3.3.0=pyhd3eb1b0_1
- optree=0.12.1=py311hdb19cb5_0
- optuna=4.2.1=pyhd8ed1ab_0
- overrides=7.4.0=py311h06a4308_0
- packaging=24.2=py311h06a4308_0
- pandas=2.2.3=py311h6a678d5_0
- pandocfilters=1.5.0=pyhd3eb1b0_0
- parso=0.8.4=py311h06a4308_0
- pcre2=10.42=hebb0a14_1
- pexpect=4.8.0=pyhd3eb1b0_3
- pillow=11.0.0=py311hcea889d_1
- pip=24.2=py311h06a4308_0
- platformdirs=3.10.0=py311h06a4308_0
- ply=3.11=py311h06a4308_0
- prometheus_client=0.21.0=py311h06a4308_0
- prompt-toolkit=3.0.43=py311h06a4308_0
- prompt_toolkit=3.0.43=hd3eb1b0_0
- protobuf=4.25.3=py311he36ed58_1
- psutil=5.9.0=py311h5eee18b_1
- ptyprocess=0.7.0=pyhd3eb1b0_2
- pure_eval=0.2.2=pyhd3eb1b0_0
- pycparser=2.21=pyhd3eb1b0_0
- pygments=2.15.1=py311h06a4308_1
- pyparsing=3.2.0=py311h06a4308_0
- pyqt=5.15.10=py311h6a678d5_0
- pyqt5-sip=12.13.0=py311h5eee18b_0
- pysocks=1.7.1=py311h06a4308_0
- python=3.11.11=he870216_0
- python-dateutil=2.9.0post0=py311h06a4308_2
- python-fastjsonschema=2.20.0=py311h06a4308_0
- python-flatbuffers=24.3.25=py311h06a4308_0
- python-json-logger=3.2.1=py311h06a4308_0
- python-tzdata=2023.3=pyhd3eb1b0_0
- pytz=2024.1=py311h06a4308_0
- pyyaml=6.0.2=py311h5eee18b_0
- pyzmq=26.2.0=py311h6a678d5_0
- qt-main=5.15.2=hb6262e9_11
- qtconsole=5.6.0=py311h06a4308_0
- qtpy=2.4.1=py311h06a4308_0
- re2=2022.04.01=h295c915_0
- readline=8.2=h5eee18b_0
- referencing=0.30.2=py311h06a4308_0
- requests=2.32.3=py311h06a4308_1
- rfc3339-validator=0.1.4=py311h06a4308_0
- rfc3986-validator=0.1.1=py311h06a4308_0
- rich=13.9.4=py311h06a4308_0
- rpds-py=0.22.3=py311h4aa5aa6_0
- scikit-learn=1.5.2=py311h6a678d5_0
- scipy=1.14.1=py311h08b1b3b_0
- seaborn=0.13.2=py311h06a4308_1
- send2trash=1.8.2=py311h06a4308_1
- setuptools=75.1.0=py311h06a4308_0
- sip=6.7.12=py311h6a678d5_1
- six=1.16.0=pyhd3eb1b0_1
- snappy=1.2.1=h6a678d5_0
- sniffio=1.3.0=py311h06a4308_0
- soupsieve=2.5=py311h06a4308_0
- sqlalchemy=2.0.37=py311h00e1ef3_0
- sqlite=3.45.3=h5eee18b_0
- stack_data=0.2.0=pyhd3eb1b0_0
- tbb=2021.8.0=hdb19cb5_0
- tensorboard=2.17.0=py311h06a4308_0
- tensorboard-data-server=0.7.0=py311h52d8a92_1
- tensorflow=2.17.0=cpu_py311hbca4264_0
- tensorflow-base=2.17.0=cpu_py311hb07566e_0
- tensorflow-estimator=2.17.0=cpu_py311hfedf350_0
- termcolor=2.1.0=py311h06a4308_0
- terminado=0.17.1=py311h06a4308_0
- threadpoolctl=3.5.0=py311h92b7b1e_0
- tinycss2=1.2.1=py311h06a4308_0
- tk=8.6.14=h39e8969_0
- tornado=6.4.2=py311h5eee18b_0
- tqdm=4.67.1=py311h92b7b1e_0
- traitlets=5.14.3=py311h06a4308_0
- typing-extensions=4.12.2=py311h06a4308_0
- typing_extensions=4.12.2=py311h06a4308_0
- tzdata=2024b=h04d1e81_0
- unicodedata2=15.1.0=py311h5eee18b_1
- urllib3=2.2.3=py311h06a4308_0
- wcwidth=0.2.5=pyhd3eb1b0_0
- webencodings=0.5.1=py311h06a4308_1
- websocket-client=1.8.0=py311h06a4308_0
- werkzeug=3.0.6=py311h06a4308_0
- wheel=0.44.0=py311h06a4308_0
- widgetsnbextension=4.0.13=py311h06a4308_0
- wrapt=1.17.0=py311h5eee18b_0
- xz=5.4.6=h5eee18b_1
- yaml=0.2.5=h7b6447c_0
- zeromq=4.3.5=h6a678d5_0
- zlib=1.2.13=h5eee18b_1
- zstd=1.5.6=hc292b87_0
- pip:
- smogn==0.1.2
prefix: /home/signer/bin/miniconda3/envs/training

View File

@ -1,162 +0,0 @@
name: ai
channels:
- conda-forge
- defaults
- https://repo.anaconda.com/pkgs/main
- https://repo.anaconda.com/pkgs/r
dependencies:
- absl-py=2.1.0=py311hca03da5_0
- appnope=0.1.4=pyhd8ed1ab_1
- asttokens=3.0.0=pyhd8ed1ab_1
- astunparse=1.6.3=py_0
- blas=1.0=openblas
- bottleneck=1.4.2=py311hb9f6ed7_0
- brotli=1.0.9=h80987f9_9
- brotli-bin=1.0.9=h80987f9_9
- brotli-python=1.0.9=py311h313beb8_9
- bzip2=1.0.8=h80987f9_6
- c-ares=1.34.4=h5505292_0
- ca-certificates=2024.12.31=hca03da5_0
- cached-property=1.5.2=py_0
- certifi=2024.12.14=py311hca03da5_0
- charset-normalizer=3.3.2=pyhd3eb1b0_0
- comm=0.2.2=pyhd8ed1ab_1
- contourpy=1.3.1=py311h48ca7d4_0
- cycler=0.11.0=pyhd3eb1b0_0
- debugpy=1.8.11=py311h155a34a_0
- decorator=5.1.1=pyhd8ed1ab_1
- exceptiongroup=1.2.2=pyhd8ed1ab_1
- executing=2.1.0=pyhd8ed1ab_1
- flatbuffers=24.3.25=h313beb8_0
- fonttools=4.51.0=py311h80987f9_0
- freetype=2.12.1=hadb7bae_2
- gast=0.5.3=pyhd3eb1b0_0
- giflib=5.2.2=h80987f9_0
- google-pasta=0.2.0=pyhd3eb1b0_0
- grpcio=1.65.5=py311hc367efa_0
- h5py=3.12.1=nompi_py311h5dd25b7_103
- hdf5=1.14.4=nompi_ha698983_105
- icu=75.1=hfee45f7_0
- idna=3.7=py311hca03da5_0
- importlib-metadata=8.5.0=pyha770c72_1
- ipykernel=6.29.5=pyh57ce528_0
- ipython=8.31.0=pyh707e725_0
- jedi=0.19.2=pyhd8ed1ab_1
- joblib=1.4.2=py311hca03da5_0
- jupyter_client=8.6.3=pyhd8ed1ab_1
- jupyter_core=5.7.2=pyh31011fe_1
- keras=3.8.0=pyh753f3f9_0
- kiwisolver=1.4.4=py311h313beb8_0
- krb5=1.21.3=hf3e1bf2_0
- lcms2=2.16=ha0e7c42_0
- lerc=4.0.0=h313beb8_0
- libabseil=20240722.0=cxx17_h07bc746_4
- libaec=1.1.3=h313beb8_0
- libbrotlicommon=1.0.9=h80987f9_9
- libbrotlidec=1.0.9=h80987f9_9
- libbrotlienc=1.0.9=h80987f9_9
- libcurl=8.11.1=h73640d1_0
- libcxx=19.1.6=ha82da77_1
- libdeflate=1.23=hec38601_0
- libedit=3.1.20230828=h80987f9_0
- libev=4.33=h1a28f6b_1
- libexpat=2.6.4=h286801f_0
- libffi=3.4.4=hca03da5_1
- libgfortran=5.0.0=13_2_0_hd922786_3
- libgfortran5=13.2.0=hf226fd6_3
- libgrpc=1.65.5=h3d9cf25_0
- libjpeg-turbo=3.0.3=h80987f9_0
- liblzma=5.6.3=h39f12f2_1
- libnghttp2=1.64.0=h6d7220d_0
- libopenblas=0.3.21=h269037a_0
- libpng=1.6.45=h3783ad8_0
- libprotobuf=5.27.5=h53f8970_2
- libre2-11=2024.07.02=h07bc746_2
- libsodium=1.0.20=h99b78c6_0
- libsqlite=3.47.2=h3f77e49_0
- libssh2=1.11.1=h9cc3647_0
- libtiff=4.7.0=h551f018_3
- libwebp-base=1.5.0=h2471fea_0
- libxcb=1.17.0=hdb1d25a_0
- libzlib=1.3.1=h8359307_2
- llvm-openmp=14.0.6=hc6e5704_0
- lz4-c=1.9.4=h313beb8_1
- markdown=3.4.1=py311hca03da5_0
- markdown-it-py=2.2.0=py311hca03da5_1
- markupsafe=2.1.3=py311h80987f9_1
- matplotlib=3.10.0=py311hca03da5_0
- matplotlib-base=3.10.0=py311h7ef442a_0
- matplotlib-inline=0.1.7=pyhd8ed1ab_1
- mdurl=0.1.0=py311hca03da5_0
- ml_dtypes=0.4.0=py311h7aedaa7_0
- namex=0.0.7=py311hca03da5_0
- ncurses=6.5=h5e97a16_2
- nest-asyncio=1.6.0=pyhd8ed1ab_1
- numexpr=2.10.1=py311h5d9532f_0
- numpy=1.26.4=py311he598dae_0
- numpy-base=1.26.4=py311hfbfe69c_0
- openjpeg=2.5.3=h8a3d83b_0
- openssl=3.4.0=h81ee809_1
- opt_einsum=3.3.0=pyhd3eb1b0_1
- optree=0.12.1=py311h48ca7d4_0
- packaging=24.2=py311hca03da5_0
- pandas=2.2.3=py311hcf29cfe_0
- parso=0.8.4=pyhd8ed1ab_1
- pexpect=4.9.0=pyhd8ed1ab_1
- pickleshare=0.7.5=pyhd8ed1ab_1004
- pillow=11.1.0=py311hb9ba9e9_0
- pip=24.2=py311hca03da5_0
- platformdirs=4.3.6=pyhd8ed1ab_1
- prompt-toolkit=3.0.48=pyha770c72_1
- protobuf=5.27.5=py311h3f08180_0
- psutil=6.1.1=py311h917b07b_0
- pthread-stubs=0.3=h1a28f6b_1
- ptyprocess=0.7.0=pyhd8ed1ab_1
- pure_eval=0.2.3=pyhd8ed1ab_1
- pygments=2.15.1=py311hca03da5_1
- pyparsing=3.2.0=py311hca03da5_0
- pysocks=1.7.1=py311hca03da5_0
- python=3.11.11=hc22306f_1_cpython
- python-dateutil=2.9.0.post0=pyhff2d567_1
- python-flatbuffers=24.3.25=py311hca03da5_0
- python-tzdata=2023.3=pyhd3eb1b0_0
- python_abi=3.11=5_cp311
- pytz=2024.1=py311hca03da5_0
- pyzmq=26.2.0=py311h730b646_3
- re2=2024.07.02=h6589ca4_2
- readline=8.2=h1a28f6b_0
- requests=2.32.3=py311hca03da5_1
- rich=13.9.4=py311hca03da5_0
- scikit-learn=1.5.2=py311h313beb8_0
- scipy=1.14.1=py311hac8794a_0
- setuptools=75.1.0=py311hca03da5_0
- six=1.16.0=pyhd3eb1b0_1
- snappy=1.2.1=h313beb8_0
- sqlite=3.47.2=hd7222ec_0
- stack_data=0.6.3=pyhd8ed1ab_1
- tensorboard=2.17.1=pyhd8ed1ab_0
- tensorboard-data-server=0.7.0=py311ha6e5c4f_1
- tensorflow=2.17.0=cpu_py311h9d3d1e9_3
- tensorflow-base=2.17.0=cpu_py311ha270cad_3
- tensorflow-estimator=2.17.0=cpu_py311h935fadc_3
- termcolor=2.1.0=py311hca03da5_0
- threadpoolctl=3.5.0=py311hb6e6a13_0
- tk=8.6.13=h5083fa2_1
- tornado=6.4.2=py311h917b07b_0
- traitlets=5.14.3=pyhd8ed1ab_1
- typing-extensions=4.12.2=py311hca03da5_0
- typing_extensions=4.12.2=py311hca03da5_0
- tzdata=2024b=h04d1e81_0
- unicodedata2=15.1.0=py311h80987f9_1
- urllib3=2.2.3=py311hca03da5_0
- wcwidth=0.2.13=pyhd8ed1ab_1
- werkzeug=3.0.6=py311hca03da5_0
- wheel=0.44.0=py311hca03da5_0
- wrapt=1.17.0=py311h80987f9_0
- xorg-libxau=1.0.12=h5505292_0
- xorg-libxdmcp=1.1.5=hd74edd7_0
- xz=5.4.6=h80987f9_1
- zeromq=4.3.5=hc1bb282_7
- zipp=3.21.0=pyhd8ed1ab_1
- zlib=1.3.1=h8359307_2
- zstd=1.5.6=hb46c0d2_0