diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..651ee694 --- /dev/null +++ b/.rspec @@ -0,0 +1,2 @@ +--require spec_helper +--pattern 'spec/**/*.rb' diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..5e291ce7 --- /dev/null +++ b/Gemfile @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +ruby "~> 3.4" + +# gem "rails" + +gem "rspec" +gem "nokogiri", "~> 1.19" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 00000000..1671e1e4 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,54 @@ +GEM + remote: https://rubygems.org/ + specs: + diff-lcs (1.6.2) + nokogiri (1.19.3-aarch64-linux-gnu) + racc (~> 1.4) + nokogiri (1.19.3-aarch64-linux-musl) + racc (~> 1.4) + nokogiri (1.19.3-arm-linux-gnu) + racc (~> 1.4) + nokogiri (1.19.3-arm-linux-musl) + racc (~> 1.4) + nokogiri (1.19.3-arm64-darwin) + racc (~> 1.4) + nokogiri (1.19.3-x86_64-darwin) + racc (~> 1.4) + nokogiri (1.19.3-x86_64-linux-gnu) + racc (~> 1.4) + nokogiri (1.19.3-x86_64-linux-musl) + racc (~> 1.4) + racc (1.8.1) + rspec (3.13.2) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.6) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.5) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.8) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-support (3.13.7) + +PLATFORMS + aarch64-linux-gnu + aarch64-linux-musl + arm-linux-gnu + arm-linux-musl + arm64-darwin + x86_64-darwin + x86_64-linux-gnu + x86_64-linux-musl + +DEPENDENCIES + nokogiri (~> 1.19) + rspec + +RUBY VERSION + ruby 3.4.9p82 + +BUNDLED WITH + 2.6.9 diff --git a/files/claude-monet-paintings.html b/files/claude-monet-paintings.html new file mode 100644 index 00000000..4d367f66 --- /dev/null +++ b/files/claude-monet-paintings.html @@ -0,0 +1,55 @@ +Claude Monet paintings - Google Search
Skip to main contentAccessibility help
Accessibility feedback

Search Results

Claude Monet
French painter
Google apps
\ No newline at end of file diff --git a/files/michelangelo-sculptures.html b/files/michelangelo-sculptures.html new file mode 100644 index 00000000..bb47a230 --- /dev/null +++ b/files/michelangelo-sculptures.html @@ -0,0 +1,55 @@ +Michelangelo paintings - Google Search
Skip to main contentAccessibility help
Accessibility feedback

Search Results

Michelangelo
Italian sculptor and painter
Google apps
\ No newline at end of file diff --git a/files/pablo-picasso.html b/files/pablo-picasso.html new file mode 100644 index 00000000..768f191d --- /dev/null +++ b/files/pablo-picasso.html @@ -0,0 +1,49 @@ +Pablo Picasso paintings - Google Search
Skip to main contentAccessibility help
Accessibility feedback

Search Results

Pablo Picasso
Spanish painter and sculptor
Google apps
\ No newline at end of file diff --git a/lib/page.rb b/lib/page.rb new file mode 100644 index 00000000..dfee31de --- /dev/null +++ b/lib/page.rb @@ -0,0 +1,87 @@ +require "nokogiri" +require "json" + +class ScraperError < RuntimeError + # left intentionally empty; exists for semantic error handling/catching +end + +class Page + def initialize(html) + @html = html + @doc = Nokogiri::HTML(@html) + @image_map = get_lazy_load_map + end + + # public entrypoint; extend here for additional block types + def scrape + scrape_carousel + end + + # search for images that are lazily loaded + # these images are stored in