You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

56 lines
1.3 KiB

#!/bin/python3
import os
import re
# Collect html filenames recursively
print(f"Collecting html files...")
allFiles = []
def recursive_scanDirectory(dir):
for filename in os.listdir(dir):
file = os.path.join(dir, filename)
# For each file found
if os.path.isfile(file):
# If it's a .html
if re.search(r'\.html$', file):
# Ignore 404.html
if not re.search(r'404\.html$', file):
# Add it to our file list, but remove the book/ part
allFiles.append(re.findall(r'(?<=book/)(.+)', file)[0])
# For each directory found
if os.path.isdir(file):
# Call this function recursively
recursive_scanDirectory(file)
recursive_scanDirectory('book')
# Generate sitemap.xml
print('Generating sitemap content...')
urls = ''
for file in allFiles:
urls += f'''
<url>
<loc>https://www.leonetienne.de/{file}</loc>
</url>
'''
sitemap = f'''<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://www.leonetienne.de</loc>
</url>
{urls}
</urlset>
'''
# Write sitemap to file
print('Writing sitemap to file...')
sitemapFile = open('book/sitemap.xml', 'w')
sitemapFile.write(sitemap)
sitemapFile.close()