fnb-roc-website/builder.py

66 lines
2.9 KiB
Python

import lzma
import json
import os
import re
from pytz import timezone
from datetime import datetime
import bisect
from jinja2 import Environment, PackageLoader, select_autoescape
scraper_path = 'scraper/'
output_path='./static'
env = Environment(
loader=PackageLoader("builder"),
autoescape=select_autoescape()
)
blog_template = env.get_template("blog_template.html")
post_template=env.get_template("post.html")
posts = []
for folder in os.listdir(scraper_path):
if os.path.isdir(os.path.join(scraper_path, folder)):
post = {'pictures': []}
for file in os.listdir(scraper_path + folder):
if file.endswith('.xz'):
if file.endswith('.xz'):
with lzma.open(scraper_path + folder + '/' + file) as f:
json_bytes = f.read()
stri = json_bytes.decode('utf-8')
data = json.loads(stri)
# checks that we're looking at a downloaded post, not the profile picture or something
if 'node' in data.keys() and 'date' in data['node'].keys():
timestamp = data['node']['date']
post['timestamp'] = timestamp #useful for sorting
post['datetime'] = datetime.fromtimestamp(data['node']['date'], timezone("America/New_York")).strftime("%b %e, '%y")
# use negative timestamp because that's the easiest way to sort from high to low
bisect.insort_right(posts, post, key=lambda t: -t['timestamp'])
if file.endswith('.webp') or file.endswith('.jpg'):
filepath=os.path.join(scraper_path, folder, file)
def get_index(t:str):
extension_len = 6
if t.endswith('.jpg'):
extension_len = 5
lastpart = filepath[filepath.rfind('_'):-extension_len] # will get the index of the image within the post if more than one
if lastpart.isdigit():
return int(lastpart)
else:
return 0
bisect.insort_left(post['pictures'], filepath, key=get_index)
if file.endswith('.txt'):
filepath=os.path.join(scraper_path, folder, file)
with open(filepath, 'r') as file:
post['caption'] = file.read()
if ('timestamp' in post.keys()):
with open(output_path + 'posts/' + str(post['timestamp']) +'.html', 'w+') as output_file:
output_file.write(post_template.render(post=post))
homepage_template = env.get_template("index.html")
with open(output_path + 'index.html', "w+") as output_file:
output_file.write(homepage_template.render())
with open(output_path + 'blog.html', "w+") as output_file:
output_file.write(blog_template.render(posts=posts))