Added a more structured model, some basic stats, ranked post list for a tag

This commit is contained in:
Amber McCloughan 2025-12-29 19:05:30 -05:00
parent 620f3d2975
commit 8a38cb510b
2 changed files with 95 additions and 32 deletions

94
app.py
View File

@ -1,11 +1,15 @@
from dataclasses import asdict
import json
from operator import itemgetter
import os
import sys
from typing import Any, Dict
from typing import Any, Dict, List, Tuple
import pytumblr
from model import StatsModel
def init_client() -> pytumblr.TumblrRestClient:
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
@ -18,33 +22,44 @@ def init_client() -> pytumblr.TumblrRestClient:
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
('$TUMBLR_OAUTH_TOKEN', oauth_token),
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
if missing_vars:
print("Missing important environment variables:", missing_vars)
sys.exit(1)
return pytumblr.TumblrRestClient(
consumer_key=consumer_key, # type: ignore
consumer_secret=consumer_secret, # type: ignore
oauth_token=oauth_token, # type: ignore
oauth_secret=oauth_secret, # type: ignore
consumer_key=consumer_key, # type: ignore
consumer_secret=consumer_secret, # type: ignore
oauth_token=oauth_token, # type: ignore
oauth_secret=oauth_secret, # type: ignore
)
def main() -> None:
client = init_client()
def calculate_total_notes(post_map: Dict[str, Any]) -> int:
total = 0
for post_key in post_map:
total += post_map[post_key]['note_count']
return total
def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]:
post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True)
return [post['post_url'] for post in post_list]
def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
post_map: Dict[str, Any] = {}
dumpster: Dict[str, Any] = {}
total = 0
offset = 0
limit = 20
while offset <= total:
# Begin LOOP
### Begin LOOP
# Get me some posts! 😈🍪🍪🍪
data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \
offset = offset, \
limit = limit)
data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag},
offset=offset,
limit=limit)
# Total check for the first iteration, but always checked for sanity.
if total == 0:
# Let's see what's in there,
@ -54,11 +69,11 @@ def main() -> None:
if not total_posts:
print("Couldn't get total posts. We're outta here!")
sys.exit(1)
# Something was there, so we're good.
print(f"I'm working with {total_posts} total posts...")
total = total_posts
curr_posts = data['posts']
local_post_map: Dict[str, Any] = {}
for curr_post in curr_posts:
@ -78,28 +93,43 @@ def main() -> None:
if any(post not in post_map for post in filtered_local_post_map):
post_map.update(filtered_local_post_map)
# The increment. Should always end the loop!
# The increment and status printing. Should always end the loop!
offset += limit
if offset <= total:
print(f"Processed batch {(offset // limit) - 1} of {total // 20}...")
if offset < total:
print(f"Processed batch {offset // limit} of {total // 20}...")
else:
print(f"Processed final batch of {total} posts")
# End LOOP
if not post_map and not dumpster:
print('We found nothing, so we end with nothing. 🤷')
sys.exit(1)
else:
with open("./tumblr_data.json", "w") as f:
json.dump(post_map, f, indent=2, sort_keys=True)
with open("./tumblr_dumpster.json", "w") as f:
json.dump(dumpster, f, indent=2, sort_keys=True)
return
print(f"Processed all {total} posts")
### End LOOP
return (post_map, dumpster)
def retrieve_all_posts(tag: str):
def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel:
post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag)
stats_model: StatsModel = StatsModel()
stats_model.operation = 'build_tag_stats'
stats_model.blog_name = blog_name
stats_model.post_map = post_map
stats_model.dumpster = dumpster
stats_model.total_posts = len(post_map) + len(dumpster)
stats_model.total_original_posts = len(post_map)
stats_model.total_original_post_notes = calculate_total_notes(post_map)
stats_model.ranked_post_urls = determine_top_post_urls(post_map)
return stats_model
def main() -> None:
client = init_client()
stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha')
with open("./tumblr_data.json", "w") as f:
json.dump(asdict(stats_model), f, indent=2, sort_keys=True)
return
if __name__ == '__main__':
main()
sys.exit(0)
sys.exit(0)

33
model.py Normal file
View File

@ -0,0 +1,33 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List
@dataclass
class StatsModel:
"""Class that models the output of the Tumblr stats script."""
# Operation used to output stats.
operation: str = field(default_factory=str)
# Blog in question.
blog_name: str = field(default_factory=str)
# Tags used.
tags: List[str] = field(default_factory=list)
# Original posts.
post_map: Dict[str, Any] = field(default_factory=dict)
# Posts that are not original.
dumpster: Dict[str, Any] = field(default_factory=dict)
# Total posts handled.
total_posts: int = field(default_factory=int)
# Total original posts (per blog_name) handled.
total_original_posts: int = field(default_factory=int)
# Total original post (per blog_name) notes handled
total_original_post_notes: int = field(default_factory=int)
# Posts ranked from most popular to least popular.
ranked_post_urls: List[str] = field(default_factory=list)