116 lines
4.3 KiB
Python
116 lines
4.3 KiB
Python
from collections import defaultdict
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from operator import itemgetter
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
@dataclass
|
|
class StatsModel:
|
|
"""Class that models the output of the Tumblr stats script."""
|
|
# The operation that was used to output stats.
|
|
operation: str
|
|
|
|
# The blog in question.
|
|
blog_name: str
|
|
|
|
# Contains original posts, indexed by post ID.
|
|
original_post_map: Dict[str, Any]
|
|
|
|
# Contains posts that are not original, indexed by post ID.
|
|
unoriginal_post_map: Dict[str, Any]
|
|
|
|
# Any tags used.
|
|
tags: List[str] = field(default_factory=list)
|
|
|
|
# Total count of posts processed.
|
|
total_posts: int = field(init=False)
|
|
|
|
# Total original posts (for blog_name) processed.
|
|
total_original_posts: int = field(init=False)
|
|
|
|
# Total original post (for blog_name) notes processed.
|
|
total_original_post_notes: int = field(init=False)
|
|
|
|
# Total notes for original posts within each month and year.
|
|
total_original_post_notes_by_month_and_year: Dict[str, int] = field(
|
|
init=False)
|
|
|
|
# Tags ranked from most popular to least popular by notes.
|
|
most_popular_tags: List[Dict[str, Any]] = field(init=False)
|
|
|
|
def __post_init__(self):
|
|
self.total_posts = self.calculate_total_posts()
|
|
self.total_original_posts = self.calculate_total_original_posts()
|
|
self.total_original_post_notes = self.calculate_total_original_post_notes()
|
|
self.total_original_post_notes_by_month_and_year = self.calculate_total_original_post_notes_by_month_and_year()
|
|
self.most_popular_tags = self.determine_most_popular_tags('note_count')
|
|
|
|
def calculate_total_posts(self) -> int:
|
|
return len(self.original_post_map) + len(self.unoriginal_post_map)
|
|
|
|
def calculate_total_original_posts(self) -> int:
|
|
return len(self.original_post_map)
|
|
|
|
def calculate_total_original_post_notes(self) -> int:
|
|
total = 0
|
|
for post_key in self.original_post_map:
|
|
total += self.original_post_map[post_key]['note_count']
|
|
return total
|
|
|
|
def calculate_total_original_post_notes_by_month_and_year(self) -> Dict[str, int]:
|
|
# https://docs.python.org/3/library/collections.html#defaultdict-objects
|
|
date_map: Dict[str, Any] = {}
|
|
date_map = defaultdict(lambda: {'note_count': 0,
|
|
'post_count': 0},
|
|
date_map)
|
|
|
|
# Gathering the results.
|
|
for post_key in self.original_post_map:
|
|
post = self.original_post_map[post_key]
|
|
# Format is like '2025-12-28 20:00:34 GMT'
|
|
post_date: datetime = datetime.strptime(
|
|
post['date'], '%Y-%m-%d %H:%M:%S %Z')
|
|
post_date_key = f"{post_date.year}-{post_date.month:02}"
|
|
sts = date_map[post_date_key]
|
|
sts['year_month'] = post_date_key
|
|
sts['post_count'] += 1
|
|
sts['note_count'] += post['note_count']
|
|
|
|
# Results postprocessing.
|
|
for date in date_map:
|
|
sts = date_map[date]
|
|
post_count = sts['post_count']
|
|
note_count = sts['note_count']
|
|
sts['notes_to_posts_ratio'] = note_count / post_count
|
|
|
|
return date_map
|
|
|
|
def determine_most_popular_tags(self, sort_key: str) -> List[Dict[str, Any]]:
|
|
# https://docs.python.org/3/library/collections.html#defaultdict-objects
|
|
tag_dict: Dict[str, Any] = {}
|
|
tag_dict = defaultdict(lambda: {'note_count': 0,
|
|
'post_count': 0},
|
|
tag_dict)
|
|
|
|
# Gathering the results.
|
|
for post_key in self.original_post_map:
|
|
post = self.original_post_map[post_key]
|
|
tags = post['tags']
|
|
for tag in tags:
|
|
sts = tag_dict[tag]
|
|
sts['tag'] = tag
|
|
sts['post_count'] += 1
|
|
sts['note_count'] += post['note_count']
|
|
|
|
# Results postprocessing.
|
|
for tag in tag_dict:
|
|
sts = tag_dict[tag]
|
|
post_count = sts['post_count']
|
|
note_count = sts['note_count']
|
|
sts['notes_to_posts_ratio'] = note_count / post_count
|
|
|
|
# https://stackoverflow.com/a/73050
|
|
return sorted(list(tag_dict.values()), key=itemgetter(sort_key),
|
|
reverse=True)
|