forked from noraatfedora/fnb-roc-website
72 lines
3.1 KiB
Python
72 lines
3.1 KiB
Python
import lzma
|
|
import json
|
|
import os
|
|
import re
|
|
from pytz import timezone
|
|
from datetime import datetime
|
|
import bisect
|
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
|
|
scraper_path = 'scraper/'
|
|
output_path='./static'
|
|
|
|
env = Environment(
|
|
loader=PackageLoader("builder"),
|
|
autoescape=select_autoescape()
|
|
)
|
|
blog_template = env.get_template("blog_template.html")
|
|
post_template=env.get_template("post.html")
|
|
|
|
try:
|
|
os.makedirs(output_path)
|
|
except FileExistsError as err:
|
|
if not os.path.isdir(output_path):
|
|
raise RuntimeError(f'output path {output_path} exists and is not a directory') from err
|
|
|
|
posts = []
|
|
|
|
for folder in os.listdir(scraper_path):
|
|
if os.path.isdir(os.path.join(scraper_path, folder)):
|
|
post = {'pictures': []}
|
|
for file in os.listdir(scraper_path + folder):
|
|
if file.endswith('.xz'):
|
|
if file.endswith('.xz'):
|
|
with lzma.open(scraper_path + folder + '/' + file) as f:
|
|
json_bytes = f.read()
|
|
stri = json_bytes.decode('utf-8')
|
|
data = json.loads(stri)
|
|
# checks that we're looking at a downloaded post, not the profile picture or something
|
|
if 'node' in data.keys() and 'date' in data['node'].keys():
|
|
timestamp = data['node']['date']
|
|
post['timestamp'] = timestamp #useful for sorting
|
|
post['datetime'] = datetime.fromtimestamp(data['node']['date'], timezone("America/New_York")).strftime("%b %e, '%y")
|
|
# use negative timestamp because that's the easiest way to sort from high to low
|
|
bisect.insort_right(posts, post, key=lambda t: -t['timestamp'])
|
|
if file.endswith('.webp') or file.endswith('.jpg'):
|
|
filepath=os.path.join(scraper_path, folder, file)
|
|
def get_index(t:str):
|
|
extension_len = 6
|
|
if t.endswith('.jpg'):
|
|
extension_len = 5
|
|
lastpart = filepath[filepath.rfind('_'):-extension_len] # will get the index of the image within the post if more than one
|
|
if lastpart.isdigit():
|
|
return int(lastpart)
|
|
else:
|
|
return 0
|
|
bisect.insort_left(post['pictures'], filepath, key=get_index)
|
|
if file.endswith('.txt'):
|
|
filepath=os.path.join(scraper_path, folder, file)
|
|
with open(filepath, 'r') as file:
|
|
post['caption'] = file.read()
|
|
|
|
if ('timestamp' in post.keys()):
|
|
with open(output_path + 'posts/' + str(post['timestamp']) +'.html', 'w+') as output_file:
|
|
output_file.write(post_template.render(post=post))
|
|
|
|
homepage_template = env.get_template("index.html")
|
|
|
|
with open(output_path + 'index.html', "w+") as output_file:
|
|
output_file.write(homepage_template.render())
|
|
|
|
with open(output_path + 'blog.html', "w+") as output_file:
|
|
output_file.write(blog_template.render(posts=posts)) |