From e4066456f8b4e4bf5d4a9cb7b7c9f7f2e05745b5 Mon Sep 17 00:00:00 2001 From: Scott R Charlton Date: Tue, 16 Mar 2021 14:57:12 -0600 Subject: [PATCH] fixed IPhreeqc.chm (two new methods) --- doc/TOC.hhc | 168 +++++++++++++++++++++++++++------------------------ doc/parse.py | 30 +++++++++ 2 files changed, 118 insertions(+), 80 deletions(-) create mode 100644 doc/parse.py diff --git a/doc/TOC.hhc b/doc/TOC.hhc index 5bc79a9c..02e519dd 100644 --- a/doc/TOC.hhc +++ b/doc/TOC.hhc @@ -29,303 +29,311 @@
  • @@ -335,23 +343,23 @@
    • - +
    • - +
    • - +
    • - +
    • - +
    diff --git a/doc/parse.py b/doc/parse.py new file mode 100644 index 00000000..5784cdf1 --- /dev/null +++ b/doc/parse.py @@ -0,0 +1,30 @@ +# pip install beautifulsoup4 +# +# pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org beautifulsoup4 +# +import os +from bs4 import BeautifulSoup +from urllib.request import urlopen +from pathlib import Path + +path = Path(os.path.join(os.getcwd(), 'html\IPhreeqc_8h.html')) + +response = urlopen(path.as_uri()) +soup = BeautifulSoup(response, 'html.parser') +#print(soup.prettify()) +#print(soup.find_all('a', 'el')) +#print('\t\t
  • ') +d = {} +for a in soup.find_all('a', 'el'): + #print('{}={}'.format(a.text, a['href'])) + href = a['href'] + #f, h = href.split('#') + f, h, l = href.partition('#') + if f == 'IPhreeqc_8h.html' and len(l) == 33: + d[a.text] = href + +# remove +d.pop('IPQ_RESULT') + +for key in d.keys(): + print('{}={}'.format(key, d[key]))