mirror of
https://github.com/Brandon-Rozek/website.git
synced 2024-11-21 15:56:29 -05:00
Deprecated old mastodon toot sync script
This commit is contained in:
parent
a32e0e61ae
commit
2ed857eccc
2 changed files with 187 additions and 297 deletions
270
refreshtoots.py
270
refreshtoots.py
|
@ -1,110 +1,214 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
Script to update a stored
|
Script to create Hugo markdown
|
||||||
copy of Mastodon toots
|
files from Mastodon Toots
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import request
|
from urllib import request
|
||||||
|
from glob import glob
|
||||||
from http.client import HTTPResponse
|
from http.client import HTTPResponse
|
||||||
from typing import Optional
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
import json
|
import json
|
||||||
import os
|
import math
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
TOOT_SAVE_FILE = "static/data/toots.json"
|
TOOT_CONTENT_LOCATION = "content/toots"
|
||||||
BACKUP_LOCATION = "static/data/toots.backup.json"
|
|
||||||
SERVER="https://fosstodon.org"
|
SERVER="https://fosstodon.org"
|
||||||
# Quick way to find user id: https://prouser123.me/mastodon-userid-lookup/
|
# Quick way to find user id: https://prouser123.me/mastodon-userid-lookup/
|
||||||
MUID=108219415927856966
|
MUID=108219415927856966
|
||||||
# Server default (when < 0) is 20
|
# Server default (when < 0) is 20
|
||||||
RETRIEVE_NUM_TOOTS=-1
|
RETRIEVE_NUM_TOOTS=1000
|
||||||
|
MAX_TOOTS_PER_QUERY=40 # Cannot change (server default)
|
||||||
toots_data = []
|
MAX_TOOT_ID=-1
|
||||||
|
|
||||||
# Read in former toot data
|
|
||||||
has_save = False
|
|
||||||
try:
|
|
||||||
with open(TOOT_SAVE_FILE, "r", encoding="UTF-8") as f:
|
|
||||||
toots_data = json.load(f)
|
|
||||||
has_save = True
|
|
||||||
print("Successfully read saved toot data")
|
|
||||||
except OSError:
|
|
||||||
print("Unable to read saved toot data")
|
|
||||||
except Exception:
|
|
||||||
print("Unable to parse saved toot data")
|
|
||||||
|
|
||||||
# Check JSON format...
|
|
||||||
if not isinstance(toots_data, list):
|
|
||||||
print("Unexpected JSON format in saved toot data, should be of type list.")
|
|
||||||
else:
|
|
||||||
has_save = True
|
|
||||||
|
|
||||||
|
|
||||||
# Present the user the ability to continue without save data
|
def retrieve_toots_from_server():
|
||||||
if not has_save:
|
"""
|
||||||
user_input = input("Continue without saved data? (y/n) ")
|
Grabs toots from Mastodon server
|
||||||
if user_input != "y":
|
"""
|
||||||
sys.exit(-1)
|
global MAX_TOOT_ID
|
||||||
|
server_data = []
|
||||||
|
|
||||||
|
for _ in range(math.ceil(RETRIEVE_NUM_TOOTS // MAX_TOOTS_PER_QUERY)):
|
||||||
|
# Grab toots from Mastodon
|
||||||
|
limit_param = "?limit=" + str(RETRIEVE_NUM_TOOTS) \
|
||||||
|
if RETRIEVE_NUM_TOOTS > 0 else "?"
|
||||||
|
max_id = "&max_id=" + str(MAX_TOOT_ID) \
|
||||||
|
if MAX_TOOT_ID > 0 else ""
|
||||||
|
url = SERVER + "/api/v1/accounts/" + str(MUID) + "/statuses" + limit_param + max_id
|
||||||
|
response: Optional[HTTPResponse] = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = request.urlopen(url)
|
||||||
|
except Exception:
|
||||||
|
print("Unable to grab toots from Mastodon.")
|
||||||
|
|
||||||
|
if response is None:
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
# Parse server response
|
||||||
|
server_data_part: Optional[list] = None
|
||||||
|
try:
|
||||||
|
server_data_part = json.loads(response.read())
|
||||||
|
except Exception:
|
||||||
|
print("Malformed JSON response from server.")
|
||||||
|
|
||||||
|
if server_data is None:
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
if not isinstance(server_data_part, list):
|
||||||
|
print("Unexpected JSON response, should be of form list.")
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
# No more to retrieve
|
||||||
|
if len(server_data_part) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"Retrieved {len(server_data_part)} toots from server")
|
||||||
|
server_data.extend(server_data_part)
|
||||||
|
MAX_TOOT_ID = int(min(server_data_part, key=lambda p: int(p['id']))['id'])
|
||||||
|
|
||||||
|
print(f"Successfully grabbed a total of {len(server_data)} toots from server")
|
||||||
|
return server_data
|
||||||
|
|
||||||
|
|
||||||
# Parse former toot ids
|
def findall(p, s):
|
||||||
saved_toot_ids = set()
|
"""
|
||||||
for toot in toots_data:
|
Yields all the positions of
|
||||||
if 'id' in toot:
|
the pattern p in the string s.
|
||||||
saved_toot_ids.add(toot['id'])
|
Source: https://stackoverflow.com/a/34445090
|
||||||
|
"""
|
||||||
|
i = s.find(p)
|
||||||
|
while i != -1:
|
||||||
|
yield i
|
||||||
|
i = s.find(p, i+1)
|
||||||
|
|
||||||
|
|
||||||
# Grab toots from Mastodon
|
def read_json_frontmatter(markdown_contents) -> Optional[Dict[Any, Any]]:
|
||||||
limit_param = "?limit=" + str(RETRIEVE_NUM_TOOTS) \
|
"""
|
||||||
if RETRIEVE_NUM_TOOTS > 0 else ""
|
Take the contents from a Hugo markdown
|
||||||
url = SERVER + "/api/v1/accounts/" + str(MUID) + "/statuses" + limit_param
|
file and read the JSON frontmatter if it
|
||||||
response: Optional[HTTPResponse] = None
|
exists.
|
||||||
|
"""
|
||||||
|
front_matter_indices = list(findall('---', markdown_contents))
|
||||||
|
if len(front_matter_indices) < 2:
|
||||||
|
return None
|
||||||
|
front_matter = markdown_contents[(front_matter_indices[0] + 3):front_matter_indices[1]]
|
||||||
|
front_matter_json = None
|
||||||
|
try:
|
||||||
|
front_matter_json = json.loads(front_matter)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if not isinstance(front_matter_json, dict):
|
||||||
|
front_matter_json = None
|
||||||
|
front_matter_json['content'] = markdown_contents[front_matter_indices[1] + 19:-17]
|
||||||
|
return front_matter_json
|
||||||
|
|
||||||
try:
|
def reformat_toot(toot_json):
|
||||||
response = request.urlopen(url)
|
"""
|
||||||
except Exception:
|
Takes a toot_json and
|
||||||
print("Unable to grab toots from Mastodon.")
|
slightly modifies it to match
|
||||||
|
some of the fields Hugo expects.
|
||||||
if response is None:
|
"""
|
||||||
sys.exit(-1)
|
# Turn URL -> Syndication
|
||||||
|
toot_url = toot_json['url']
|
||||||
# Parse server response
|
del toot_json['uri']
|
||||||
server_data: Optional[list] = None
|
del toot_json['url']
|
||||||
try:
|
toot_json['syndication'] = toot_url
|
||||||
server_data = json.loads(response.read())
|
# Turn Created At -> Date
|
||||||
except Exception:
|
toot_date = toot_json['created_at']
|
||||||
print("Malformed JSON response from server.")
|
del toot_json['created_at']
|
||||||
|
toot_json['date'] = toot_date
|
||||||
if server_data is None:
|
# Strip out highly dynamic account information
|
||||||
sys.exit(-1)
|
del toot_json['account']['locked']
|
||||||
|
del toot_json['account']['bot']
|
||||||
if not isinstance(server_data, list):
|
del toot_json['account']['discoverable']
|
||||||
print("Unexpected JSON response, should be of form list.")
|
del toot_json['account']['group']
|
||||||
sys.exit(-1)
|
del toot_json['account']['created_at']
|
||||||
|
del toot_json['account']['note']
|
||||||
|
del toot_json['account']['followers_count']
|
||||||
|
del toot_json['account']['following_count']
|
||||||
|
del toot_json['account']['statuses_count']
|
||||||
|
del toot_json['account']['last_status_at']
|
||||||
|
|
||||||
|
|
||||||
print("Successfully grabbed toots from server")
|
def create_toot(toot_json):
|
||||||
|
"""
|
||||||
|
Takes a JSON toot from Mastodon
|
||||||
|
and creates a string representing
|
||||||
|
the contents of a Hugo markdown
|
||||||
|
file.
|
||||||
|
"""
|
||||||
|
toot_content = toot_json['content']
|
||||||
|
del toot_json['content']
|
||||||
|
return "---\n" + \
|
||||||
|
f"{json.dumps(toot_json)}\n" +\
|
||||||
|
"---\n" +\
|
||||||
|
"{{< unsafe >}}\n" +\
|
||||||
|
f"{toot_content}\n" +\
|
||||||
|
"{{< /unsafe >}}\n"
|
||||||
|
|
||||||
|
def toot_file_from_id(tootid):
|
||||||
|
"""Returns toot filename from id"""
|
||||||
|
return f"{TOOT_CONTENT_LOCATION}/{tootid}.md"
|
||||||
|
|
||||||
|
def read_toot(tootid) -> Optional[Dict[Any, Any]]:
|
||||||
|
"""
|
||||||
|
Given a toot id, return
|
||||||
|
the markdown file contents
|
||||||
|
of the toot stored in Hugo
|
||||||
|
if it exists.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(toot_file_from_id(tootid), "r", encoding="UTF-8") as toot_file:
|
||||||
|
toot_data = read_json_frontmatter(toot_file.read())
|
||||||
|
return toot_data
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def write_toot(toot):
|
||||||
|
"""
|
||||||
|
Takes a toot json
|
||||||
|
and writes it to a hugo
|
||||||
|
markdown content file.
|
||||||
|
"""
|
||||||
|
toot_id = toot['id']
|
||||||
|
try:
|
||||||
|
with open(toot_file_from_id(toot_id), "w", encoding="UTF-8") as toot_file:
|
||||||
|
toot_file.write(create_toot(toot))
|
||||||
|
except Exception as e:
|
||||||
|
print("Failed to write toot", toot_id)
|
||||||
|
|
||||||
|
# Read in saved toot data
|
||||||
|
toot_filenames = glob(TOOT_CONTENT_LOCATION + "/*.md")
|
||||||
|
toot_ids = { Path(fname).stem for fname in toot_filenames }
|
||||||
|
|
||||||
|
server_toots = retrieve_toots_from_server()
|
||||||
|
|
||||||
|
for stoot in server_toots:
|
||||||
|
# Skip boosts for now
|
||||||
|
if stoot['content'] == '':
|
||||||
|
continue
|
||||||
|
|
||||||
|
reformat_toot(stoot)
|
||||||
|
stoot_id = stoot['id']
|
||||||
|
|
||||||
|
|
||||||
# Add new toots to saved toots
|
# If the toot already exists
|
||||||
for toot in server_data:
|
if stoot_id in toot_ids:
|
||||||
if 'id' in toot and toot['id'] not in saved_toot_ids:
|
saved_tootdata = read_toot(stoot_id)
|
||||||
toots_data.append(toot)
|
if saved_tootdata is None:
|
||||||
|
print("Unable to read saved toot id", stoot_id)
|
||||||
|
|
||||||
# Create a backup of the old toots
|
# Only update if toot has changed
|
||||||
try:
|
elif saved_tootdata != stoot:
|
||||||
os.rename(TOOT_SAVE_FILE, BACKUP_LOCATION)
|
print("Updating toot id", stoot_id)
|
||||||
except:
|
write_toot(stoot)
|
||||||
print("Unable to create backup of last toot file")
|
|
||||||
sys.exit(-1)
|
|
||||||
|
|
||||||
# Write toots_data to the disk
|
# New toot found
|
||||||
try:
|
else:
|
||||||
with open(TOOT_SAVE_FILE, "w", encoding="UTF-8") as f:
|
print("Creating toot id", stoot_id)
|
||||||
json.dump(toots_data, f)
|
write_toot(stoot)
|
||||||
except:
|
|
||||||
print("Unable to write to save location.")
|
|
||||||
print("Grab backup at", BACKUP_LOCATION)
|
|
||||||
|
|
||||||
|
print("Completed")
|
||||||
print("Saved toot data to", TOOT_SAVE_FILE)
|
|
||||||
|
|
|
@ -1,214 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Script to create Hugo markdown
|
|
||||||
files from Mastodon Toots
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import request
|
|
||||||
from glob import glob
|
|
||||||
from http.client import HTTPResponse
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, Optional
|
|
||||||
import json
|
|
||||||
import math
|
|
||||||
import sys
|
|
||||||
|
|
||||||
TOOT_CONTENT_LOCATION = "content/toots"
|
|
||||||
SERVER="https://fosstodon.org"
|
|
||||||
# Quick way to find user id: https://prouser123.me/mastodon-userid-lookup/
|
|
||||||
MUID=108219415927856966
|
|
||||||
# Server default (when < 0) is 20
|
|
||||||
RETRIEVE_NUM_TOOTS=1000
|
|
||||||
MAX_TOOTS_PER_QUERY=40 # Cannot change (server default)
|
|
||||||
MAX_TOOT_ID=-1
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_toots_from_server():
|
|
||||||
"""
|
|
||||||
Grabs toots from Mastodon server
|
|
||||||
"""
|
|
||||||
global MAX_TOOT_ID
|
|
||||||
server_data = []
|
|
||||||
|
|
||||||
for _ in range(math.ceil(RETRIEVE_NUM_TOOTS // MAX_TOOTS_PER_QUERY)):
|
|
||||||
# Grab toots from Mastodon
|
|
||||||
limit_param = "?limit=" + str(RETRIEVE_NUM_TOOTS) \
|
|
||||||
if RETRIEVE_NUM_TOOTS > 0 else "?"
|
|
||||||
max_id = "&max_id=" + str(MAX_TOOT_ID) \
|
|
||||||
if MAX_TOOT_ID > 0 else ""
|
|
||||||
url = SERVER + "/api/v1/accounts/" + str(MUID) + "/statuses" + limit_param + max_id
|
|
||||||
response: Optional[HTTPResponse] = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = request.urlopen(url)
|
|
||||||
except Exception:
|
|
||||||
print("Unable to grab toots from Mastodon.")
|
|
||||||
|
|
||||||
if response is None:
|
|
||||||
sys.exit(-1)
|
|
||||||
|
|
||||||
# Parse server response
|
|
||||||
server_data_part: Optional[list] = None
|
|
||||||
try:
|
|
||||||
server_data_part = json.loads(response.read())
|
|
||||||
except Exception:
|
|
||||||
print("Malformed JSON response from server.")
|
|
||||||
|
|
||||||
if server_data is None:
|
|
||||||
sys.exit(-1)
|
|
||||||
|
|
||||||
if not isinstance(server_data_part, list):
|
|
||||||
print("Unexpected JSON response, should be of form list.")
|
|
||||||
sys.exit(-1)
|
|
||||||
|
|
||||||
# No more to retrieve
|
|
||||||
if len(server_data_part) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
print(f"Retrieved {len(server_data_part)} toots from server")
|
|
||||||
server_data.extend(server_data_part)
|
|
||||||
MAX_TOOT_ID = int(min(server_data_part, key=lambda p: int(p['id']))['id'])
|
|
||||||
|
|
||||||
print(f"Successfully grabbed a total of {len(server_data)} toots from server")
|
|
||||||
return server_data
|
|
||||||
|
|
||||||
|
|
||||||
def findall(p, s):
|
|
||||||
"""
|
|
||||||
Yields all the positions of
|
|
||||||
the pattern p in the string s.
|
|
||||||
Source: https://stackoverflow.com/a/34445090
|
|
||||||
"""
|
|
||||||
i = s.find(p)
|
|
||||||
while i != -1:
|
|
||||||
yield i
|
|
||||||
i = s.find(p, i+1)
|
|
||||||
|
|
||||||
|
|
||||||
def read_json_frontmatter(markdown_contents) -> Optional[Dict[Any, Any]]:
|
|
||||||
"""
|
|
||||||
Take the contents from a Hugo markdown
|
|
||||||
file and read the JSON frontmatter if it
|
|
||||||
exists.
|
|
||||||
"""
|
|
||||||
front_matter_indices = list(findall('---', markdown_contents))
|
|
||||||
if len(front_matter_indices) < 2:
|
|
||||||
return None
|
|
||||||
front_matter = markdown_contents[(front_matter_indices[0] + 3):front_matter_indices[1]]
|
|
||||||
front_matter_json = None
|
|
||||||
try:
|
|
||||||
front_matter_json = json.loads(front_matter)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
if not isinstance(front_matter_json, dict):
|
|
||||||
front_matter_json = None
|
|
||||||
front_matter_json['content'] = markdown_contents[front_matter_indices[1] + 19:-17]
|
|
||||||
return front_matter_json
|
|
||||||
|
|
||||||
def reformat_toot(toot_json):
|
|
||||||
"""
|
|
||||||
Takes a toot_json and
|
|
||||||
slightly modifies it to match
|
|
||||||
some of the fields Hugo expects.
|
|
||||||
"""
|
|
||||||
# Turn URL -> Syndication
|
|
||||||
toot_url = toot_json['url']
|
|
||||||
del toot_json['uri']
|
|
||||||
del toot_json['url']
|
|
||||||
toot_json['syndication'] = toot_url
|
|
||||||
# Turn Created At -> Date
|
|
||||||
toot_date = toot_json['created_at']
|
|
||||||
del toot_json['created_at']
|
|
||||||
toot_json['date'] = toot_date
|
|
||||||
# Strip out highly dynamic account information
|
|
||||||
del toot_json['account']['locked']
|
|
||||||
del toot_json['account']['bot']
|
|
||||||
del toot_json['account']['discoverable']
|
|
||||||
del toot_json['account']['group']
|
|
||||||
del toot_json['account']['created_at']
|
|
||||||
del toot_json['account']['note']
|
|
||||||
del toot_json['account']['followers_count']
|
|
||||||
del toot_json['account']['following_count']
|
|
||||||
del toot_json['account']['statuses_count']
|
|
||||||
del toot_json['account']['last_status_at']
|
|
||||||
|
|
||||||
|
|
||||||
def create_toot(toot_json):
|
|
||||||
"""
|
|
||||||
Takes a JSON toot from Mastodon
|
|
||||||
and creates a string representing
|
|
||||||
the contents of a Hugo markdown
|
|
||||||
file.
|
|
||||||
"""
|
|
||||||
toot_content = toot_json['content']
|
|
||||||
del toot_json['content']
|
|
||||||
return "---\n" + \
|
|
||||||
f"{json.dumps(toot_json)}\n" +\
|
|
||||||
"---\n" +\
|
|
||||||
"{{< unsafe >}}\n" +\
|
|
||||||
f"{toot_content}\n" +\
|
|
||||||
"{{< /unsafe >}}\n"
|
|
||||||
|
|
||||||
def toot_file_from_id(tootid):
|
|
||||||
"""Returns toot filename from id"""
|
|
||||||
return f"{TOOT_CONTENT_LOCATION}/{tootid}.md"
|
|
||||||
|
|
||||||
def read_toot(tootid) -> Optional[Dict[Any, Any]]:
|
|
||||||
"""
|
|
||||||
Given a toot id, return
|
|
||||||
the markdown file contents
|
|
||||||
of the toot stored in Hugo
|
|
||||||
if it exists.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(toot_file_from_id(tootid), "r", encoding="UTF-8") as toot_file:
|
|
||||||
toot_data = read_json_frontmatter(toot_file.read())
|
|
||||||
return toot_data
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def write_toot(toot):
|
|
||||||
"""
|
|
||||||
Takes a toot json
|
|
||||||
and writes it to a hugo
|
|
||||||
markdown content file.
|
|
||||||
"""
|
|
||||||
toot_id = toot['id']
|
|
||||||
try:
|
|
||||||
with open(toot_file_from_id(toot_id), "w", encoding="UTF-8") as toot_file:
|
|
||||||
toot_file.write(create_toot(toot))
|
|
||||||
except Exception as e:
|
|
||||||
print("Failed to write toot", toot_id)
|
|
||||||
|
|
||||||
# Read in saved toot data
|
|
||||||
toot_filenames = glob(TOOT_CONTENT_LOCATION + "/*.md")
|
|
||||||
toot_ids = { Path(fname).stem for fname in toot_filenames }
|
|
||||||
|
|
||||||
server_toots = retrieve_toots_from_server()
|
|
||||||
|
|
||||||
for stoot in server_toots:
|
|
||||||
# Skip boosts for now
|
|
||||||
if stoot['content'] == '':
|
|
||||||
continue
|
|
||||||
|
|
||||||
reformat_toot(stoot)
|
|
||||||
stoot_id = stoot['id']
|
|
||||||
|
|
||||||
|
|
||||||
# If the toot already exists
|
|
||||||
if stoot_id in toot_ids:
|
|
||||||
saved_tootdata = read_toot(stoot_id)
|
|
||||||
if saved_tootdata is None:
|
|
||||||
print("Unable to read saved toot id", stoot_id)
|
|
||||||
|
|
||||||
# Only update if toot has changed
|
|
||||||
elif saved_tootdata != stoot:
|
|
||||||
print("Updating toot id", stoot_id)
|
|
||||||
write_toot(stoot)
|
|
||||||
|
|
||||||
# New toot found
|
|
||||||
else:
|
|
||||||
print("Creating toot id", stoot_id)
|
|
||||||
write_toot(stoot)
|
|
||||||
|
|
||||||
print("Completed")
|
|
Loading…
Reference in a new issue