tumblr-stats/app.py
2025-12-29 18:08:17 -05:00

105 lines
3.4 KiB
Python

import json
import os
import sys
from typing import Any, Dict
import pytumblr
def init_client() -> pytumblr.TumblrRestClient:
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
consumer_secret = os.getenv('TUMBLR_CONSUMER_SECRET')
oauth_token = os.getenv('TUMBLR_OAUTH_TOKEN')
oauth_secret = os.getenv('TUMBLR_OAUTH_SECRET')
missing_vars = [name for name,
val in [('$TUMBLR_CONSUMER_KEY', consumer_key),
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
('$TUMBLR_OAUTH_TOKEN', oauth_token),
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
if missing_vars:
print("Missing important environment variables:", missing_vars)
sys.exit(1)
return pytumblr.TumblrRestClient(
consumer_key=consumer_key, # type: ignore
consumer_secret=consumer_secret, # type: ignore
oauth_token=oauth_token, # type: ignore
oauth_secret=oauth_secret, # type: ignore
)
def main() -> None:
client = init_client()
post_map: Dict[str, Any] = {}
dumpster: Dict[str, Any] = {}
total = 0
offset = 0
limit = 20
while offset <= total:
# Begin LOOP
# Get me some posts! 😈🍪🍪🍪
data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \
offset = offset, \
limit = limit)
# Total check for the first iteration, but always checked for sanity.
if total == 0:
# Let's see what's in there,
total_posts = data['total_posts']
# Was nothing there?
if not total_posts:
print("Couldn't get total posts. We're outta here!")
sys.exit(1)
# Something was there, so we're good.
print(f"I'm working with {total_posts} total posts...")
total = total_posts
curr_posts = data['posts']
local_post_map: Dict[str, Any] = {}
for curr_post in curr_posts:
curr_key = curr_post['id_string']
if curr_key not in local_post_map:
local_post_map[curr_key] = curr_post
filtered_local_post_map = {}
for local_key in local_post_map:
local_post = local_post_map[local_key]
if 'parent_post_url' not in local_post:
filtered_local_post_map[local_key] = local_post
else:
dumpster[local_key] = local_post
# The sacred should we add, and if we should, DO ADD, if statement.
if any(post not in post_map for post in filtered_local_post_map):
post_map.update(filtered_local_post_map)
# The increment. Should always end the loop!
offset += limit
if offset <= total:
print(f"Processed batch {(offset // limit) - 1} of {total // 20}...")
else:
print(f"Processed final batch of {total} posts")
# End LOOP
if not post_map and not dumpster:
print('We found nothing, so we end with nothing. 🤷')
sys.exit(1)
else:
with open("./tumblr_data.json", "w") as f:
json.dump(post_map, f, indent=2, sort_keys=True)
with open("./tumblr_dumpster.json", "w") as f:
json.dump(dumpster, f, indent=2, sort_keys=True)
return
def retrieve_all_posts(tag: str):
return
if __name__ == '__main__':
main()
sys.exit(0)