mirror of
https://git.gfz-potsdam.de/naaice/iphreeqc.git
synced 2025-12-16 00:28:23 +01:00
31 lines
812 B
Python
31 lines
812 B
Python
# pip install beautifulsoup4
|
|
#
|
|
# pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org beautifulsoup4
|
|
#
|
|
import os
|
|
from bs4 import BeautifulSoup
|
|
from urllib.request import urlopen
|
|
from pathlib import Path
|
|
|
|
path = Path(os.path.join(os.getcwd(), 'html\IPhreeqc_8h.html'))
|
|
|
|
response = urlopen(path.as_uri())
|
|
soup = BeautifulSoup(response, 'html.parser')
|
|
#print(soup.prettify())
|
|
#print(soup.find_all('a', 'el'))
|
|
#print('\t\t<LI> <OBJECT type="text/sitemap">')
|
|
d = {}
|
|
for a in soup.find_all('a', 'el'):
|
|
#print('{}={}'.format(a.text, a['href']))
|
|
href = a['href']
|
|
#f, h = href.split('#')
|
|
f, h, l = href.partition('#')
|
|
if f == 'IPhreeqc_8h.html' and len(l) == 33:
|
|
d[a.text] = href
|
|
|
|
# remove
|
|
d.pop('IPQ_RESULT')
|
|
|
|
for key in d.keys():
|
|
print('{}={}'.format(key, d[key]))
|