Skip to content

Commit

Permalink
chore(smart): fix re-rendering wait_for
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 25, 2024
1 parent 5f4d5bd commit ce34344
Show file tree
Hide file tree
Showing 28 changed files with 122 additions and 24 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.22.6"
version = "2.22.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
5 changes: 1 addition & 4 deletions spider/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1811,10 +1811,7 @@ impl Page {
true,
true,
None,
Some(crate::configuration::WaitForSelector::new(
Some(core::time::Duration::from_millis(250)),
"body".into(),
)),
None,
)),
&configuration.screenshot,
false,
Expand Down
7 changes: 2 additions & 5 deletions spider/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -677,11 +677,8 @@ pub async fn run_openai_request(
if json_res.js.len() <= 400
&& json_res.js.contains("window.location")
{
match page.content_bytes().await {
Ok(b) => {
page_response.content = Some(b.into());
}
_ => (),
if let Ok(b) = page.content_bytes().await {
page_response.content = Some(b.into());
}
} else {
page_response.content = html;
Expand Down
10 changes: 7 additions & 3 deletions spider/src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ impl Website {
/// Clear the disk. This does nothing with [disk] flag enabled.
#[cfg(feature = "disk")]
async fn clear_disk(&self) {
if self.sqlite.ready() {
if self.sqlite.pool_inited() {
let _ = DatabaseHandler::clear_table(self.get_db_pool().await).await;
}
}
Expand Down Expand Up @@ -791,8 +791,12 @@ impl Website {
/// Links visited getter for disk. This does nothing with [disk] flag enabled.
#[cfg(feature = "disk")]
pub async fn get_links_disk(&self) -> HashSet<CaseInsensitiveString> {
if let Ok(links) = DatabaseHandler::get_all_resources(self.get_db_pool().await).await {
links
if self.sqlite.pool_inited() {
if let Ok(links) = DatabaseHandler::get_all_resources(self.get_db_pool().await).await {
links
} else {
Default::default()
}
} else {
Default::default()
}
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.22.6"
version = "2.22.7"
rust-version = "1.70"
authors = [
"j-mendez <[email protected]>"
Expand Down
14 changes: 14 additions & 0 deletions spider_chrome/src/handler/blockers/scripts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ lazy_static::lazy_static! {
"https://static.parastorage.com/services/tag-manager-client/",
"https://static.parastorage.com/services/form-app/",
"https://www.datadoghq-browser-agent.com/",
"https://image6.pubmatic.com/AdServer/",
"https://featureassets.org",
"https://cdn.rudderlabs.com",
"https://script.hotjar.com/",
Expand All @@ -70,6 +71,10 @@ lazy_static::lazy_static! {
"https://static.addtoany.com/menu/",
"https://www.gstatic.com/cast/sdk/libs/sender/1.0/cast_framework.js",
"https://www.gstatic.com/eureka/clank/131/cast_sender.js",
"https://static.adsafeprotected.com/",
"https://ssum-sec.casalemedia.com/usermatch",
"https://cdn.brandmetrics.com/scripts/",
"https://cdn.confiant-integrations.net/",
"https://www.b2i.us/b2i/",
"https://acsbapp.com/apps/app/dist/js/app.js",
"https://cdn.doofinder.com/livelayer/",
Expand Down Expand Up @@ -102,6 +107,9 @@ lazy_static::lazy_static! {
"https://cl.k5a.io/",
"https://cdn-cookieyes.com/",
"https://pbs.yahoo.com/",
"https://ads.pubmatic.com/AdServer/js/",
"https://widgets.outbrain.com/nanoWidget/externals/obPixelFrame/obPixelFrame.js",
"https://widgets.outbrain.com/external/externals/intentiq.js",
"//d2wy8f7a9ursnm.cloudfront.net/v8/bugsnag.min.js",
".sharethis.com",
".newrelic.com",
Expand All @@ -126,6 +134,7 @@ lazy_static::lazy_static! {
"assets/TrackingPixel",
"https://ads.",
"http://ads.",
".pubmatic.com/AdServer/",
"https://tracking.",
"http://tracking.",
"https://static-tracking.",
Expand Down Expand Up @@ -165,6 +174,11 @@ lazy_static::lazy_static! {
"https://www.youtube.com/iframe_api", // Youtube iframes.
"https://f.vimeocdn.com", // Vimeo EMBEDDINGS
"https://i.vimeocdn.com/",
"https://image2.pubmatic.com/AdServer/",
"https://ads.pubmatic.com/AdServer/js/",
"https://cdn.taboola.com/libtrc/static/topics/",
"https://pm-widget.taboola.com/",
"https://gum.criteo.com/syncframe",
// "https://www.youtube.com/s/player/", // Youtube player not needed usually since iframe_api is used mainly
// vercel live
"https://vercel.live/api/",
Expand Down
15 changes: 15 additions & 0 deletions spider_chrome/src/handler/blockers/xhr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,12 @@ lazy_static::lazy_static! {
"https://s.yimg.com/wi",
"https://disney.my.sentry.io/api/",
"https://www.redditstatic.com/ads",
"https://logx.optimizely.com/v1/events",
"https://api-2-0.spot.im/v1.0.0/",
"https://static.hotjar.com/",
"https://matchadsrvr.yieldmo.com/track/",
"https://sentry.io/api/",
"https://prebid.media.net/",
"https://buy.tinypass.com/",
"https://idx.liadm.com",
"https://geo.privacymanager.io/",
Expand All @@ -89,6 +93,17 @@ lazy_static::lazy_static! {
"https://public-api.wordpress.com/geo/",
"https://events.api.secureserver.net/",
"https://csp.secureserver.net/eventbus",
"https://cdn.optimizely.com/datafiles/",
"https://ad.doubleclick.net/",
"https://metrics.beyondwords.io/events",
"https://rtb.openx.net/openrtbb/prebidjs",
"https://beacon.taboola.com/",
"https://collector.ex.co/main/events",
"https://hb.emxdgt.com/",
"https://token.rubiconproject.com/",
"https://prebid-server.rubiconproject.com",
"https://targeting.unrulymedia.com/unruly_prebid",
"https://prebid.adnxs.com/",
"https://doh.cq0.co/resolve",
"https://cdn.segment.",
".wixapps.net/api/v1/bulklog",
Expand Down
1 change: 1 addition & 0 deletions spider_chrome/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![recursion_limit = "256"]
//! A high-level API for programmatically interacting with the [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/).
//!
//! This crate uses the [Chrome DevTools protocol] to drive/launch a Chromium or
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://static.files.bbci.co.uk/core/bundle-component-consent-banner.
https://static.files.bbci.co.uk/core/bundle-consent-banner.
https://static.files.bbci.co.uk/core/website/assets/static/scripts/riddle/
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://libs.outbrain.com/video/
https://turnip.cdn.turner.com/top/player-ui/
https://warnermediagroup-com.videoplayerhub.com/galleryloader.js
3 changes: 3 additions & 0 deletions spider_chrome/url_patterns/domains/bbc.com/xhr/pattern1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://www.bbc.com/userinfo
https://www.bbc.co.uk/wc-data/container/consent-banner
https://idcta.api.bbc.com/idcta/config
10 changes: 10 additions & 0 deletions spider_chrome/url_patterns/domains/cnn.com/scripts/pattern1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
https://eus.rubiconproject.com/usync.html?p=20986&endpoint=us-east
https://widgets.outbrain.com/keystone/conv/KS_conversions.js
https://simage2.pubmatic.com/AdServer/
https://segment-data-us-east.zqtk.net
https://cdn.ml314.com/taglw.js
https://cdn.optimizely.com/public/
https://s.ntv.io/serve/load.js
https://ads.pubmatic.com
https://widgets.outbrain.com/nanoWidget/externals/obPixelFrame/obPixelFrame.htm
https://js-sec.indexww.com/um/ixmatch.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://libs.outbrain.com/video/
https://turnip.cdn.turner.com/top/player-ui/
https://warnermediagroup-com.videoplayerhub.com/galleryloader.js
8 changes: 8 additions & 0 deletions spider_chrome/url_patterns/domains/cnn.com/xhr/pattern1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
https://mcdp-chidc2.outbrain.com/
https://receive.wmcdp.io/v1/reg
https://collector.cdp.cnn.com/com.snowplowanalytics.snowplow/tp2
https://i.clean.gg/1a
https://www.cnn.com/public/api/alerts
https://collector-px611dwqp1.px-cloud.net/api/v2/collector
https://ids.cdnwidget.com/
https://dfp.bouncex.net/pub/v2/segment/2qi31PsFb3FLPiCctqR86TDa7Ws
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
https://amplify.outbrain.com/cp/obtp.js
https://static.criteo.net/js/ld/ld.js
https://fundingchoicesmessages.google.com/
https://static.foxnews.com/static/leap/sites/fnc/metrics.js
https://static.foxnews.com/static/isa/app/lib/VisitorAPI.js
https://static.foxnews.com/static/orion/scripts/core/utils/geo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://player.h-cdn.com/loader.js?
https://static.foxnews.com/static/orion/scripts/core/video/
https://foxnewsplayer-a.akamaihd.net/player/
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
https://prod.pyxis.atp.fox/pyxis/submit
https://sdk.iad-05.braze.com/api/v3/data/
https://psb.taboola.com/topics_api
https://direct-events-collector.spot.im/api/v2/events?stream_name=init
https://launcher.spot.im/spot/sp_ANQXRpqH
https://p.flipp.com/beacons
https://api.foxnews.com/v3/video-player/
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
https://gum.criteo.com/sync?
https://pm-widget.taboola.com/usatodaydemo/
https://www.washingtonpost.com/subscribe/privacy-
https://g.3gl.net/jp/3543/v3.3.9/M
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://vidstat.taboola.com/vpaid/
https://imprnjmp.taboola.com/
https://cdn.taboola.com/libtrc/usatodaydemo/loader.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
https://hlsmedia.gannett-cdn.com/authoring/videos/
https://hp.taboola.com/usatodaydemo/
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://static.files.bbci.co.uk/core/bundle-component-consent-banner.
https://static.files.bbci.co.uk/core/bundle-consent-banner.
https://static.files.bbci.co.uk/core/website/assets/static/scripts/riddle/
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://libs.outbrain.com/video/
https://turnip.cdn.turner.com/top/player-ui/
https://warnermediagroup-com.videoplayerhub.com/galleryloader.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
https://ny77jj.washingtonpost.com/
https://api.permutive.com/v2.0/
https://targeting.washingtonpost.com/api/v1/targeting
https://subscribe.washingtonpost.com/offers/service/v2/offers/
https://www.washingtonpost.com/prism/api/alerts
https://mug.criteo.com/sid?
https://subscribe.washingtonpost.com/offers/service/get-adot-offer/
https://subscribe.washingtonpost.com/logging/logHandledError
https://ib.adnxs.com/getuidj
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.22.6"
version = "2.22.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.22.6"
version = "2.22.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.22.6"
version = "2.22.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.22.6"
version = "2.22.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down

0 comments on commit ce34344

Please sign in to comment.