imgchest/model/
scraped_user.rs1use once_cell::sync::Lazy;
2use scraper::Html;
3use scraper::Selector;
4use time::Date;
5use time::OffsetDateTime;
6use time::Time;
7
8static APP_SELECTOR: Lazy<Selector> = Lazy::new(|| Selector::parse("#app").unwrap());
9
10#[derive(Debug, thiserror::Error)]
12pub enum FromHtmlError {
13 #[error("missing {0}")]
14 MissingElement(&'static str),
15
16 #[error("missing attribute {0}")]
17 MissingAttribute(&'static str),
18
19 #[error("invalid data page")]
20 InvalidDataPage(serde_json::Error),
21}
22
23#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
25pub struct ScrapedUser {
26 pub name: Box<str>,
28
29 pub posts: u64,
31
32 pub comments: u64,
34
35 pub created: OffsetDateTime,
40
41 pub post_views: u64,
46
47 pub experience: u64,
52
53 pub favorites: u64,
58}
59
60impl ScrapedUser {
61 pub(crate) fn from_html(html: &Html) -> Result<Self, FromHtmlError> {
63 let app_element = html
66 .select(&APP_SELECTOR)
67 .next()
68 .ok_or(FromHtmlError::MissingElement("app div"))?;
69 let data_page_attr = app_element
70 .attr("data-page")
71 .ok_or(FromHtmlError::MissingAttribute("data-page"))?;
72 let page_data: PageData =
73 serde_json::from_str(data_page_attr).map_err(FromHtmlError::InvalidDataPage)?;
74
75 Ok(Self {
76 name: page_data.props.target_user.username,
77 posts: page_data.props.target_user.post_count,
78 comments: page_data.props.target_user.comment_count,
79 created: OffsetDateTime::new_utc(
80 page_data.props.target_user.created_at,
81 Time::MIDNIGHT,
82 ),
83
84 post_views: page_data.props.target_user.post_views,
85 experience: page_data.props.target_user.experience,
86 favorites: page_data.props.target_user.favorite_count,
87 })
88 }
89}
90
91#[derive(Debug, serde::Deserialize)]
92struct PageData {
93 props: PageDataProps,
94}
95
96#[derive(Debug, serde::Deserialize)]
97struct PageDataProps {
98 #[serde(rename = "targetUser")]
99 target_user: TargetUser,
100}
101
102#[derive(Debug, serde::Deserialize)]
103struct TargetUser {
104 username: Box<str>,
105 post_count: u64,
106 comment_count: u64,
107 #[serde(with = "mdy_date")]
108 created_at: Date,
109
110 post_views: u64,
111 experience: u64,
112 favorite_count: u64,
113}
114
115time::serde::format_description!(mdy_date, Date, "[month]/[day]/[year]");