import json import os import sys from typing import Any, Dict import pytumblr def init_client() -> pytumblr.TumblrRestClient: consumer_key = os.getenv('TUMBLR_CONSUMER_KEY') consumer_secret = os.getenv('TUMBLR_CONSUMER_SECRET') oauth_token = os.getenv('TUMBLR_OAUTH_TOKEN') oauth_secret = os.getenv('TUMBLR_OAUTH_SECRET') missing_vars = [name for name, val in [('$TUMBLR_CONSUMER_KEY', consumer_key), ('$TUMBLR_CONSUMER_SECRET', consumer_secret), ('$TUMBLR_OAUTH_TOKEN', oauth_token), ('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None] if missing_vars: print("Missing important environment variables:", missing_vars) sys.exit(1) return pytumblr.TumblrRestClient( consumer_key=consumer_key, # type: ignore consumer_secret=consumer_secret, # type: ignore oauth_token=oauth_token, # type: ignore oauth_secret=oauth_secret, # type: ignore ) def main() -> None: client = init_client() post_map: Dict[str, Any] = {} dumpster: Dict[str, Any] = {} total = 0 offset = 0 limit = 20 while offset <= total: # Begin LOOP # Get me some posts! 😈🍪🍪🍪 data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \ offset = offset, \ limit = limit) # Total check for the first iteration, but always checked for sanity. if total == 0: # Let's see what's in there, total_posts = data['total_posts'] # Was nothing there? if not total_posts: print("Couldn't get total posts. We're outta here!") sys.exit(1) # Something was there, so we're good. print(f"I'm working with {total_posts} total posts...") total = total_posts curr_posts = data['posts'] local_post_map: Dict[str, Any] = {} for curr_post in curr_posts: curr_key = curr_post['id_string'] if curr_key not in local_post_map: local_post_map[curr_key] = curr_post filtered_local_post_map = {} for local_key in local_post_map: local_post = local_post_map[local_key] if 'parent_post_url' not in local_post: filtered_local_post_map[local_key] = local_post else: dumpster[local_key] = local_post # The sacred should we add, and if we should, DO ADD, if statement. if any(post not in post_map for post in filtered_local_post_map): post_map.update(filtered_local_post_map) # The increment. Should always end the loop! offset += limit if offset <= total: print(f"Processed batch {(offset // limit) - 1} of {total // 20}...") else: print(f"Processed final batch of {total} posts") # End LOOP if not post_map and not dumpster: print('We found nothing, so we end with nothing. 🤷') sys.exit(1) else: with open("./tumblr_data.json", "w") as f: json.dump(post_map, f, indent=2, sort_keys=True) with open("./tumblr_dumpster.json", "w") as f: json.dump(dumpster, f, indent=2, sort_keys=True) return def retrieve_all_posts(tag: str): return if __name__ == '__main__': main() sys.exit(0)