Added a more structured model, some basic stats, ranked post list for a tag

This commit is contained in:
Amber McCloughan 2025-12-29 19:05:30 -05:00
parent 620f3d2975
commit 8a38cb510b
2 changed files with 95 additions and 32 deletions

74
app.py
View File

@ -1,11 +1,15 @@
from dataclasses import asdict
import json import json
from operator import itemgetter
import os import os
import sys import sys
from typing import Any, Dict from typing import Any, Dict, List, Tuple
import pytumblr import pytumblr
from model import StatsModel
def init_client() -> pytumblr.TumblrRestClient: def init_client() -> pytumblr.TumblrRestClient:
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY') consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
@ -31,19 +35,30 @@ def init_client() -> pytumblr.TumblrRestClient:
) )
def main() -> None: def calculate_total_notes(post_map: Dict[str, Any]) -> int:
client = init_client() total = 0
for post_key in post_map:
total += post_map[post_key]['note_count']
return total
def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]:
post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True)
return [post['post_url'] for post in post_list]
def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
post_map: Dict[str, Any] = {} post_map: Dict[str, Any] = {}
dumpster: Dict[str, Any] = {} dumpster: Dict[str, Any] = {}
total = 0 total = 0
offset = 0 offset = 0
limit = 20 limit = 20
while offset <= total: while offset <= total:
# Begin LOOP ### Begin LOOP
# Get me some posts! 😈🍪🍪🍪 # Get me some posts! 😈🍪🍪🍪
data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \ data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag},
offset = offset, \ offset=offset,
limit = limit) limit=limit)
# Total check for the first iteration, but always checked for sanity. # Total check for the first iteration, but always checked for sanity.
if total == 0: if total == 0:
@ -78,28 +93,43 @@ def main() -> None:
if any(post not in post_map for post in filtered_local_post_map): if any(post not in post_map for post in filtered_local_post_map):
post_map.update(filtered_local_post_map) post_map.update(filtered_local_post_map)
# The increment. Should always end the loop! # The increment and status printing. Should always end the loop!
offset += limit offset += limit
if offset <= total: if offset < total:
print(f"Processed batch {(offset // limit) - 1} of {total // 20}...") print(f"Processed batch {offset // limit} of {total // 20}...")
else: else:
print(f"Processed final batch of {total} posts") print(f"Processed all {total} posts")
# End LOOP ### End LOOP
if not post_map and not dumpster: return (post_map, dumpster)
print('We found nothing, so we end with nothing. 🤷')
sys.exit(1)
else: def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel:
post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag)
stats_model: StatsModel = StatsModel()
stats_model.operation = 'build_tag_stats'
stats_model.blog_name = blog_name
stats_model.post_map = post_map
stats_model.dumpster = dumpster
stats_model.total_posts = len(post_map) + len(dumpster)
stats_model.total_original_posts = len(post_map)
stats_model.total_original_post_notes = calculate_total_notes(post_map)
stats_model.ranked_post_urls = determine_top_post_urls(post_map)
return stats_model
def main() -> None:
client = init_client()
stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha')
with open("./tumblr_data.json", "w") as f: with open("./tumblr_data.json", "w") as f:
json.dump(post_map, f, indent=2, sort_keys=True) json.dump(asdict(stats_model), f, indent=2, sort_keys=True)
with open("./tumblr_dumpster.json", "w") as f:
json.dump(dumpster, f, indent=2, sort_keys=True)
return return
def retrieve_all_posts(tag: str):
return
if __name__ == '__main__': if __name__ == '__main__':
main() main()
sys.exit(0) sys.exit(0)

33
model.py Normal file
View File

@ -0,0 +1,33 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List
@dataclass
class StatsModel:
"""Class that models the output of the Tumblr stats script."""
# Operation used to output stats.
operation: str = field(default_factory=str)
# Blog in question.
blog_name: str = field(default_factory=str)
# Tags used.
tags: List[str] = field(default_factory=list)
# Original posts.
post_map: Dict[str, Any] = field(default_factory=dict)
# Posts that are not original.
dumpster: Dict[str, Any] = field(default_factory=dict)
# Total posts handled.
total_posts: int = field(default_factory=int)
# Total original posts (per blog_name) handled.
total_original_posts: int = field(default_factory=int)
# Total original post (per blog_name) notes handled
total_original_post_notes: int = field(default_factory=int)
# Posts ranked from most popular to least popular.
ranked_post_urls: List[str] = field(default_factory=list)