For myself I was not happy with existing solutions. So I created a solution with Python3
and BeautifulSoup
.
The function take HTML source as string and looks for header tags (e.g. h1
). In the next steps an id=
is created for the header and also corresponding toc entry.
def generate_toc(html_out):
"""Create a table of content based on the header tags.
The header tags are used to create and link the toc.
The toc as place on top of the html output.
Args:
html_out(string): A string containing the html source.
Returns:
(string): The new string.
"""
from bs4 import BeautifulSoup
# the parser
soup = BeautifulSoup(html_out, 'html.parser')
# create and place the div element containing the toc
toc_container = soup.new_tag('div', id='toc_container')
first_body_child = soup.body.find_all(recursive=False)[0]
first_body_child.insert_before(toc_container)
# toc headline
t = soup.new_tag('p', attrs={'class': 'toc_title'})
t.string = 'Inhalt'
toc_container.append(t)
def _sub_create_anchor(h_tag):
"""Create a toc entry based on a header-tag.
The result is a li-tag containing an a-tag.
"""
# Create anchor
anchor = uuid.uuid4()
h_tag.attrs['id'] = anchor # anchor to headline
# toc entry for that anchor
a = soup.new_tag('a', href=f'#{anchor}')
a.string = h_tag.string
# add to toc
li = soup.new_tag('li')
li.append(a)
return li
# main ul-tag for the first level of the toc
ul_tag = soup.new_tag('ul', attrs={'class': 'toc_list'})
toc_container.append(ul_tag)
# helper variables
curr_level = 1
ul_parents = [ul_tag]
# header tags to look for
h_tags_to_find = [f'h{i}' for i in range(1, 7)] # 'h1' - 'h6'
for header in soup.find_all(h_tags_to_find):
next_level = int(header.name[1:])
if curr_level < next_level: # going downstairs
# create sub ul-tag
sub_ul_tag = soup.new_tag('ul', attrs={'class': 'toc_list'})
# connect it with parent ul-tag
ul_parents[-1].append(sub_ul_tag)
# remember the sub-ul-tag
ul_parents.append(sub_ul_tag)
elif curr_level > next_level: # going upstairs
# go back to parent ul-tag
ul_parents = ul_parents[:-1]
curr_level = next_level
# toc-entry as li-a-tag
li_tag = _sub_create_anchor(header)
# add to last ul-tag
ul_parents[-1].append(li_tag)
return soup.prettify(formatter='html5')
This is maybe not elegant in all of your use cases. Myself I use to put TOC's on top of HTML reports generated by data sciences routines (e.g. pandas).