tumblr-stats/app.py

136 lines
4.5 KiB
Python

from dataclasses import asdict
import json
from operator import itemgetter
import os
import sys
from typing import Any, Dict, List, Tuple
import pytumblr
from model import StatsModel
def init_client() -> pytumblr.TumblrRestClient:
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
consumer_secret = os.getenv('TUMBLR_CONSUMER_SECRET')
oauth_token = os.getenv('TUMBLR_OAUTH_TOKEN')
oauth_secret = os.getenv('TUMBLR_OAUTH_SECRET')
missing_vars = [name for name,
val in [('$TUMBLR_CONSUMER_KEY', consumer_key),
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
('$TUMBLR_OAUTH_TOKEN', oauth_token),
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
if missing_vars:
print("Missing important environment variables:", missing_vars)
sys.exit(1)
return pytumblr.TumblrRestClient(
consumer_key=consumer_key, # type: ignore
consumer_secret=consumer_secret, # type: ignore
oauth_token=oauth_token, # type: ignore
oauth_secret=oauth_secret, # type: ignore
)
def calculate_total_notes(post_map: Dict[str, Any]) -> int:
total = 0
for post_key in post_map:
total += post_map[post_key]['note_count']
return total
def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]:
post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True)
return [post['post_url'] for post in post_list]
def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
post_map: Dict[str, Any] = {}
dumpster: Dict[str, Any] = {}
total = 0
offset = 0
limit = 20
while offset <= total:
### Begin LOOP
# Get me some posts! 😈🍪🍪🍪
data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag},
offset=offset,
limit=limit)
# Total check for the first iteration, but always checked for sanity.
if total == 0:
# Let's see what's in there,
total_posts = data['total_posts']
# Was nothing there?
if not total_posts:
print("Couldn't get total posts. We're outta here!")
sys.exit(1)
# Something was there, so we're good.
print(f"I'm working with {total_posts} total posts...")
total = total_posts
curr_posts = data['posts']
local_post_map: Dict[str, Any] = {}
for curr_post in curr_posts:
curr_key = curr_post['id_string']
if curr_key not in local_post_map:
local_post_map[curr_key] = curr_post
filtered_local_post_map = {}
for local_key in local_post_map:
local_post = local_post_map[local_key]
if 'parent_post_url' not in local_post:
filtered_local_post_map[local_key] = local_post
else:
dumpster[local_key] = local_post
# The sacred should we add, and if we should, DO ADD, if statement.
if any(post not in post_map for post in filtered_local_post_map):
post_map.update(filtered_local_post_map)
# The increment and status printing. Should always end the loop!
offset += limit
if offset < total:
print(f"Processed batch {offset // limit} of {total // 20}...")
else:
print(f"Processed all {total} posts")
### End LOOP
return (post_map, dumpster)
def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel:
post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag)
stats_model: StatsModel = StatsModel()
stats_model.operation = 'build_tag_stats'
stats_model.blog_name = blog_name
stats_model.post_map = post_map
stats_model.dumpster = dumpster
stats_model.total_posts = len(post_map) + len(dumpster)
stats_model.total_original_posts = len(post_map)
stats_model.total_original_post_notes = calculate_total_notes(post_map)
stats_model.ranked_post_urls = determine_top_post_urls(post_map)
return stats_model
def main() -> None:
client = init_client()
stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha')
with open("./tumblr_data.json", "w") as f:
json.dump(asdict(stats_model), f, indent=2, sort_keys=True)
return
if __name__ == '__main__':
main()
sys.exit(0)