Added a more structured model, some basic stats, ranked post list for a tag

This commit is contained in:
Amber McCloughan 2025-12-29 19:05:30 -05:00
parent 620f3d2975
commit 8a38cb510b
2 changed files with 95 additions and 32 deletions

94
app.py
View File

@ -1,11 +1,15 @@
from dataclasses import asdict
import json import json
from operator import itemgetter
import os import os
import sys import sys
from typing import Any, Dict from typing import Any, Dict, List, Tuple
import pytumblr import pytumblr
from model import StatsModel
def init_client() -> pytumblr.TumblrRestClient: def init_client() -> pytumblr.TumblrRestClient:
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY') consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
@ -18,33 +22,44 @@ def init_client() -> pytumblr.TumblrRestClient:
('$TUMBLR_CONSUMER_SECRET', consumer_secret), ('$TUMBLR_CONSUMER_SECRET', consumer_secret),
('$TUMBLR_OAUTH_TOKEN', oauth_token), ('$TUMBLR_OAUTH_TOKEN', oauth_token),
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None] ('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
if missing_vars: if missing_vars:
print("Missing important environment variables:", missing_vars) print("Missing important environment variables:", missing_vars)
sys.exit(1) sys.exit(1)
return pytumblr.TumblrRestClient( return pytumblr.TumblrRestClient(
consumer_key=consumer_key, # type: ignore consumer_key=consumer_key, # type: ignore
consumer_secret=consumer_secret, # type: ignore consumer_secret=consumer_secret, # type: ignore
oauth_token=oauth_token, # type: ignore oauth_token=oauth_token, # type: ignore
oauth_secret=oauth_secret, # type: ignore oauth_secret=oauth_secret, # type: ignore
) )
def main() -> None: def calculate_total_notes(post_map: Dict[str, Any]) -> int:
client = init_client() total = 0
for post_key in post_map:
total += post_map[post_key]['note_count']
return total
def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]:
post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True)
return [post['post_url'] for post in post_list]
def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
post_map: Dict[str, Any] = {} post_map: Dict[str, Any] = {}
dumpster: Dict[str, Any] = {} dumpster: Dict[str, Any] = {}
total = 0 total = 0
offset = 0 offset = 0
limit = 20 limit = 20
while offset <= total: while offset <= total:
# Begin LOOP ### Begin LOOP
# Get me some posts! 😈🍪🍪🍪 # Get me some posts! 😈🍪🍪🍪
data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \ data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag},
offset = offset, \ offset=offset,
limit = limit) limit=limit)
# Total check for the first iteration, but always checked for sanity. # Total check for the first iteration, but always checked for sanity.
if total == 0: if total == 0:
# Let's see what's in there, # Let's see what's in there,
@ -54,11 +69,11 @@ def main() -> None:
if not total_posts: if not total_posts:
print("Couldn't get total posts. We're outta here!") print("Couldn't get total posts. We're outta here!")
sys.exit(1) sys.exit(1)
# Something was there, so we're good. # Something was there, so we're good.
print(f"I'm working with {total_posts} total posts...") print(f"I'm working with {total_posts} total posts...")
total = total_posts total = total_posts
curr_posts = data['posts'] curr_posts = data['posts']
local_post_map: Dict[str, Any] = {} local_post_map: Dict[str, Any] = {}
for curr_post in curr_posts: for curr_post in curr_posts:
@ -78,28 +93,43 @@ def main() -> None:
if any(post not in post_map for post in filtered_local_post_map): if any(post not in post_map for post in filtered_local_post_map):
post_map.update(filtered_local_post_map) post_map.update(filtered_local_post_map)
# The increment. Should always end the loop! # The increment and status printing. Should always end the loop!
offset += limit offset += limit
if offset <= total: if offset < total:
print(f"Processed batch {(offset // limit) - 1} of {total // 20}...") print(f"Processed batch {offset // limit} of {total // 20}...")
else: else:
print(f"Processed final batch of {total} posts") print(f"Processed all {total} posts")
# End LOOP ### End LOOP
if not post_map and not dumpster: return (post_map, dumpster)
print('We found nothing, so we end with nothing. 🤷')
sys.exit(1)
else:
with open("./tumblr_data.json", "w") as f:
json.dump(post_map, f, indent=2, sort_keys=True)
with open("./tumblr_dumpster.json", "w") as f:
json.dump(dumpster, f, indent=2, sort_keys=True)
return
def retrieve_all_posts(tag: str): def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel:
post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag)
stats_model: StatsModel = StatsModel()
stats_model.operation = 'build_tag_stats'
stats_model.blog_name = blog_name
stats_model.post_map = post_map
stats_model.dumpster = dumpster
stats_model.total_posts = len(post_map) + len(dumpster)
stats_model.total_original_posts = len(post_map)
stats_model.total_original_post_notes = calculate_total_notes(post_map)
stats_model.ranked_post_urls = determine_top_post_urls(post_map)
return stats_model
def main() -> None:
client = init_client()
stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha')
with open("./tumblr_data.json", "w") as f:
json.dump(asdict(stats_model), f, indent=2, sort_keys=True)
return return
if __name__ == '__main__': if __name__ == '__main__':
main() main()
sys.exit(0) sys.exit(0)

33
model.py Normal file
View File

@ -0,0 +1,33 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List
@dataclass
class StatsModel:
"""Class that models the output of the Tumblr stats script."""
# Operation used to output stats.
operation: str = field(default_factory=str)
# Blog in question.
blog_name: str = field(default_factory=str)
# Tags used.
tags: List[str] = field(default_factory=list)
# Original posts.
post_map: Dict[str, Any] = field(default_factory=dict)
# Posts that are not original.
dumpster: Dict[str, Any] = field(default_factory=dict)
# Total posts handled.
total_posts: int = field(default_factory=int)
# Total original posts (per blog_name) handled.
total_original_posts: int = field(default_factory=int)
# Total original post (per blog_name) notes handled
total_original_post_notes: int = field(default_factory=int)
# Posts ranked from most popular to least popular.
ranked_post_urls: List[str] = field(default_factory=list)