imgchest/model/
scraped_post.rs1use once_cell::sync::Lazy;
2use scraper::Html;
3use scraper::Selector;
4
5static APP_SELECTOR: Lazy<Selector> = Lazy::new(|| Selector::parse("#app").unwrap());
6
7#[derive(Debug, thiserror::Error)]
9pub enum FromHtmlError {
10 #[error("missing {0}")]
11 MissingElement(&'static str),
12
13 #[error("missing attribute {0}")]
14 MissingAttribute(&'static str),
15
16 #[error("invalid data page")]
17 InvalidDataPage(serde_json::Error),
18}
19
20#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
22pub struct ScrapedPost {
23 pub id: Box<str>,
25
26 pub title: Box<str>,
28
29 pub username: Box<str>,
31
32 pub views: u64,
39
40 pub nsfw: bool,
42
43 pub image_count: u64,
45
46 pub images: Box<[File]>,
50}
51
52impl ScrapedPost {
53 pub(crate) fn from_html(html: &Html) -> Result<Self, FromHtmlError> {
55 let app_element = html
58 .select(&APP_SELECTOR)
59 .next()
60 .ok_or(FromHtmlError::MissingElement("app div"))?;
61 let data_page_attr = app_element
62 .attr("data-page")
63 .ok_or(FromHtmlError::MissingAttribute("data-page"))?;
64 let page_data: PageData =
65 serde_json::from_str(data_page_attr).map_err(FromHtmlError::InvalidDataPage)?;
66
67 let image_count = u64::try_from(page_data.props.post.files.len()).unwrap();
69 let images: Vec<_> = page_data
70 .props
71 .post
72 .files
73 .into_iter()
74 .map(|file| File {
75 id: file.id,
76 description: file.description,
77 link: file.link,
78 position: file.position,
79 })
80 .collect();
81 Ok(Self {
82 id: page_data.props.post.slug,
83 title: page_data.props.post.title,
84 username: page_data.props.post.user.username,
85 views: page_data.props.post.views,
86 nsfw: page_data.props.post.nsfw != 0,
87 image_count,
88 images: images.into(),
89 })
90 }
91}
92
93#[derive(Debug, serde::Deserialize)]
94struct PageData {
95 props: PageDataProps,
96}
97
98#[derive(Debug, serde::Deserialize)]
99struct PageDataProps {
100 post: PageDataPost,
101}
102
103#[derive(Debug, serde::Deserialize)]
104struct PageDataPost {
105 files: Vec<PageDataFile>,
106 nsfw: u8,
107 slug: Box<str>,
108 title: Box<str>,
109 user: PageDataUser,
110 views: u64,
111}
112
113#[derive(Debug, serde::Deserialize)]
114struct PageDataUser {
115 username: Box<str>,
116}
117
118#[derive(Debug, serde::Deserialize)]
119struct PageDataFile {
120 id: Box<str>,
121 description: Option<Box<str>>,
122 link: Box<str>,
123 position: u32,
124}
125
126#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
128pub struct File {
129 pub id: Box<str>,
131
132 pub description: Option<Box<str>>,
134
135 pub link: Box<str>,
137
138 pub position: u32,
142 }