Added a more structured model, some basic stats, ranked post list for a tag

2025-12-29 19:05:30 -05:00 · 2025-12-29 19:05:30 -05:00 · 8a38cb510b
commit 8a38cb510b
parent 620f3d2975
2 changed files with 95 additions and 32 deletions
--- a/app.py
+++ b/app.py
@ -1,11 +1,15 @@

+from dataclasses import asdict
 import json
+from operator import itemgetter
 import os
 import sys
-from typing import Any, Dict
+from typing import Any, Dict, List, Tuple

 import pytumblr

+from model import StatsModel
+

 def init_client() -> pytumblr.TumblrRestClient:
    consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
@ -18,33 +22,44 @@ def init_client() -> pytumblr.TumblrRestClient:
                            ('$TUMBLR_CONSUMER_SECRET', consumer_secret),
                            ('$TUMBLR_OAUTH_TOKEN', oauth_token),
                            ('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
-    
+
    if missing_vars:
        print("Missing important environment variables:", missing_vars)
        sys.exit(1)

    return pytumblr.TumblrRestClient(
-        consumer_key=consumer_key, # type: ignore
-        consumer_secret=consumer_secret, # type: ignore
-        oauth_token=oauth_token, # type: ignore
-        oauth_secret=oauth_secret, # type: ignore
+        consumer_key=consumer_key,  # type: ignore
+        consumer_secret=consumer_secret,  # type: ignore
+        oauth_token=oauth_token,  # type: ignore
+        oauth_secret=oauth_secret,  # type: ignore
    )


-def main() -> None:
-    client = init_client()
+def calculate_total_notes(post_map: Dict[str, Any]) -> int:
+    total = 0
+    for post_key in post_map:
+        total += post_map[post_key]['note_count']
+    return total
+
+
+def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]:
+    post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True)
+    return [post['post_url'] for post in post_list]
+
+
+def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    post_map: Dict[str, Any] = {}
    dumpster: Dict[str, Any] = {}
    total = 0
    offset = 0
    limit = 20
    while offset <= total:
-        # Begin LOOP
+        ### Begin LOOP
        # Get me some posts! 😈🍪🍪🍪
-        data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \
-                            offset = offset, \
-                            limit = limit)
-        
+        data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag},
+                            offset=offset,
+                            limit=limit)
+
        # Total check for the first iteration, but always checked for sanity.
        if total == 0:
            # Let's see what's in there,
@ -54,11 +69,11 @@ def main() -> None:
            if not total_posts:
                print("Couldn't get total posts. We're outta here!")
                sys.exit(1)
-            
+
            # Something was there, so we're good.
            print(f"I'm working with {total_posts} total posts...")
            total = total_posts
-        
+
        curr_posts = data['posts']
        local_post_map: Dict[str, Any] = {}
        for curr_post in curr_posts:
@ -78,28 +93,43 @@ def main() -> None:
        if any(post not in post_map for post in filtered_local_post_map):
            post_map.update(filtered_local_post_map)

-        # The increment. Should always end the loop!
+        # The increment and status printing. Should always end the loop!
        offset += limit
-        if offset <= total:
-            print(f"Processed batch {(offset // limit) - 1} of {total // 20}...")
+        if offset < total:
+            print(f"Processed batch {offset // limit} of {total // 20}...")
        else:
-            print(f"Processed final batch of {total} posts")
-        # End LOOP
-    
-    if not post_map and not dumpster:
-        print('We found nothing, so we end with nothing. 🤷')
-        sys.exit(1)
-    else:
-        with open("./tumblr_data.json", "w") as f:
-            json.dump(post_map, f, indent=2, sort_keys=True)
-        with open("./tumblr_dumpster.json", "w") as f:
-            json.dump(dumpster, f, indent=2, sort_keys=True)
-    return
+            print(f"Processed all {total} posts")
+        ### End LOOP
+
+    return (post_map, dumpster)


-def retrieve_all_posts(tag: str):
+def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel:
+    post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag)
+
+    stats_model: StatsModel = StatsModel()
+    stats_model.operation = 'build_tag_stats'
+    stats_model.blog_name = blog_name
+    stats_model.post_map = post_map
+    stats_model.dumpster = dumpster
+    stats_model.total_posts = len(post_map) + len(dumpster)
+    stats_model.total_original_posts = len(post_map)
+    stats_model.total_original_post_notes = calculate_total_notes(post_map)
+    stats_model.ranked_post_urls = determine_top_post_urls(post_map)
+
+    return stats_model
+
+
+def main() -> None:
+    client = init_client()
+
+    stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha')
+    with open("./tumblr_data.json", "w") as f:
+        json.dump(asdict(stats_model), f, indent=2, sort_keys=True)
+
    return

+
 if __name__ == '__main__':
    main()
-    sys.exit(0)
+    sys.exit(0)
--- a/model.py
+++ b/model.py
@ -0,0 +1,33 @@
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+
+
+@dataclass
+class StatsModel:
+    """Class that models the output of the Tumblr stats script."""
+    # Operation used to output stats.
+    operation: str = field(default_factory=str)
+
+    # Blog in question.
+    blog_name: str = field(default_factory=str)
+
+    # Tags used.
+    tags: List[str] = field(default_factory=list)
+
+    # Original posts.
+    post_map: Dict[str, Any] = field(default_factory=dict)
+
+    # Posts that are not original.
+    dumpster: Dict[str, Any] = field(default_factory=dict)
+
+    # Total posts handled.
+    total_posts: int = field(default_factory=int)
+
+    # Total original posts (per blog_name) handled.
+    total_original_posts: int = field(default_factory=int)
+
+    # Total original post (per blog_name) notes handled
+    total_original_post_notes: int = field(default_factory=int)
+
+    # Posts ranked from most popular to least popular.
+    ranked_post_urls: List[str] = field(default_factory=list)