diff --git a/doc/TOC.hhc b/doc/TOC.hhc index 5bc79a9c..02e519dd 100644 --- a/doc/TOC.hhc +++ b/doc/TOC.hhc @@ -29,303 +29,311 @@
  • @@ -335,23 +343,23 @@ diff --git a/doc/parse.py b/doc/parse.py new file mode 100644 index 00000000..5784cdf1 --- /dev/null +++ b/doc/parse.py @@ -0,0 +1,30 @@ +# pip install beautifulsoup4 +# +# pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org beautifulsoup4 +# +import os +from bs4 import BeautifulSoup +from urllib.request import urlopen +from pathlib import Path + +path = Path(os.path.join(os.getcwd(), 'html\IPhreeqc_8h.html')) + +response = urlopen(path.as_uri()) +soup = BeautifulSoup(response, 'html.parser') +#print(soup.prettify()) +#print(soup.find_all('a', 'el')) +#print('\t\t
  • ') +d = {} +for a in soup.find_all('a', 'el'): + #print('{}={}'.format(a.text, a['href'])) + href = a['href'] + #f, h = href.split('#') + f, h, l = href.partition('#') + if f == 'IPhreeqc_8h.html' and len(l) == 33: + d[a.text] = href + +# remove +d.pop('IPQ_RESULT') + +for key in d.keys(): + print('{}={}'.format(key, d[key]))