imgchest/model/
scraped_user.rs

1use once_cell::sync::Lazy;
2use scraper::Html;
3use scraper::Selector;
4use time::Date;
5use time::OffsetDateTime;
6use time::Time;
7
8static APP_SELECTOR: Lazy<Selector> = Lazy::new(|| Selector::parse("#app").unwrap());
9
10/// An error that may occur while parsing a post
11#[derive(Debug, thiserror::Error)]
12pub enum FromHtmlError {
13    #[error("missing {0}")]
14    MissingElement(&'static str),
15
16    #[error("missing attribute {0}")]
17    MissingAttribute(&'static str),
18
19    #[error("invalid data page")]
20    InvalidDataPage(serde_json::Error),
21}
22
23/// A User
24#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
25pub struct ScrapedUser {
26    /// The user's name
27    pub name: Box<str>,
28
29    /// The number of posts created by this user
30    pub posts: u64,
31
32    /// The number of comments created by this user
33    pub comments: u64,
34
35    /// The time this user was created.
36    ///
37    /// # Warning
38    /// This is an estimate.
39    pub created: OffsetDateTime,
40
41    /// The number of views all posts made by this user have gotten.
42    ///
43    /// # Warning
44    /// This is not a part of the real api struct.
45    pub post_views: u64,
46
47    /// The experience gained by the user?
48    ///
49    /// # Warning
50    /// This is not a part of the real api struct.
51    pub experience: u64,
52
53    /// The number of favorites by the user.
54    ///
55    /// # Warning
56    /// This is not a part of the real api struct.
57    pub favorites: u64,
58}
59
60impl ScrapedUser {
61    /// Parse this from html
62    pub(crate) fn from_html(html: &Html) -> Result<Self, FromHtmlError> {
63        // Implement:
64        // JSON.parse(document.getElementById('app').getAttribute('data-page'))
65        let app_element = html
66            .select(&APP_SELECTOR)
67            .next()
68            .ok_or(FromHtmlError::MissingElement("app div"))?;
69        let data_page_attr = app_element
70            .attr("data-page")
71            .ok_or(FromHtmlError::MissingAttribute("data-page"))?;
72        let page_data: PageData =
73            serde_json::from_str(data_page_attr).map_err(FromHtmlError::InvalidDataPage)?;
74
75        Ok(Self {
76            name: page_data.props.target_user.username,
77            posts: page_data.props.target_user.post_count,
78            comments: page_data.props.target_user.comment_count,
79            created: OffsetDateTime::new_utc(
80                page_data.props.target_user.created_at,
81                Time::MIDNIGHT,
82            ),
83
84            post_views: page_data.props.target_user.post_views,
85            experience: page_data.props.target_user.experience,
86            favorites: page_data.props.target_user.favorite_count,
87        })
88    }
89}
90
91#[derive(Debug, serde::Deserialize)]
92struct PageData {
93    props: PageDataProps,
94}
95
96#[derive(Debug, serde::Deserialize)]
97struct PageDataProps {
98    #[serde(rename = "targetUser")]
99    target_user: TargetUser,
100}
101
102#[derive(Debug, serde::Deserialize)]
103struct TargetUser {
104    username: Box<str>,
105    post_count: u64,
106    comment_count: u64,
107    #[serde(with = "mdy_date")]
108    created_at: Date,
109
110    post_views: u64,
111    experience: u64,
112    favorite_count: u64,
113}
114
115time::serde::format_description!(mdy_date, Date, "[month]/[day]/[year]");