from dataclasses import asdict import json from operator import itemgetter import os import sys from typing import Any, Dict, List, Tuple import pytumblr from model import StatsModel def init_client() -> pytumblr.TumblrRestClient: consumer_key = os.getenv('TUMBLR_CONSUMER_KEY') consumer_secret = os.getenv('TUMBLR_CONSUMER_SECRET') oauth_token = os.getenv('TUMBLR_OAUTH_TOKEN') oauth_secret = os.getenv('TUMBLR_OAUTH_SECRET') missing_vars = [name for name, val in [('$TUMBLR_CONSUMER_KEY', consumer_key), ('$TUMBLR_CONSUMER_SECRET', consumer_secret), ('$TUMBLR_OAUTH_TOKEN', oauth_token), ('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None] if missing_vars: print("Missing important environment variables:", missing_vars) sys.exit(1) return pytumblr.TumblrRestClient( consumer_key=consumer_key, # type: ignore consumer_secret=consumer_secret, # type: ignore oauth_token=oauth_token, # type: ignore oauth_secret=oauth_secret, # type: ignore ) def calculate_total_notes(post_map: Dict[str, Any]) -> int: total = 0 for post_key in post_map: total += post_map[post_key]['note_count'] return total def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]: post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True) return [post['post_url'] for post in post_list] def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]: post_map: Dict[str, Any] = {} dumpster: Dict[str, Any] = {} total = 0 offset = 0 limit = 20 while offset <= total: ### Begin LOOP # Get me some posts! 😈🍪🍪🍪 data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag}, offset=offset, limit=limit) # Total check for the first iteration, but always checked for sanity. if total == 0: # Let's see what's in there, total_posts = data['total_posts'] # Was nothing there? if not total_posts: print("Couldn't get total posts. We're outta here!") sys.exit(1) # Something was there, so we're good. print(f"I'm working with {total_posts} total posts...") total = total_posts curr_posts = data['posts'] local_post_map: Dict[str, Any] = {} for curr_post in curr_posts: curr_key = curr_post['id_string'] if curr_key not in local_post_map: local_post_map[curr_key] = curr_post filtered_local_post_map = {} for local_key in local_post_map: local_post = local_post_map[local_key] if 'parent_post_url' not in local_post: filtered_local_post_map[local_key] = local_post else: dumpster[local_key] = local_post # The sacred should we add, and if we should, DO ADD, if statement. if any(post not in post_map for post in filtered_local_post_map): post_map.update(filtered_local_post_map) # The increment and status printing. Should always end the loop! offset += limit if offset < total: print(f"Processed batch {offset // limit} of {total // 20}...") else: print(f"Processed all {total} posts") ### End LOOP return (post_map, dumpster) def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel: post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag) stats_model: StatsModel = StatsModel() stats_model.operation = 'build_tag_stats' stats_model.blog_name = blog_name stats_model.post_map = post_map stats_model.dumpster = dumpster stats_model.total_posts = len(post_map) + len(dumpster) stats_model.total_original_posts = len(post_map) stats_model.total_original_post_notes = calculate_total_notes(post_map) stats_model.ranked_post_urls = determine_top_post_urls(post_map) return stats_model def main() -> None: client = init_client() stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha') with open("./tumblr_data.json", "w") as f: json.dump(asdict(stats_model), f, indent=2, sort_keys=True) return if __name__ == '__main__': main() sys.exit(0)