You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.3 KiB

import os
import re
# Collect html filenames recursively
print(f"Collecting html files...")
allFiles = []
def recursive_scanDirectory(dir):
for filename in os.listdir(dir):
file = os.path.join(dir, filename)
# For each file found
if os.path.isfile(file):
# If it's a .html
if'\.html$', file):
# Ignore 404.html
if not'404\.html$', file):
# Add it to our file list, but remove the book/ part
allFiles.append(re.findall(r'(?<=book/)(.+)', file)[0])
# For each directory found
if os.path.isdir(file):
# Call this function recursively
# Generate sitemap.xml
print('Generating sitemap content...')
urls = ''
for file in allFiles:
urls += f'''
sitemap = f'''<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="">
# Write sitemap to file
print('Writing sitemap to file...')
sitemapFile = open('book/sitemap.xml', 'w')