Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion components/ecoindex/scraper/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ def run_page_analysis(
wait_after_scroll=wait_after_scroll,
wait_before_scroll=wait_before_scroll,
page_load_timeout=20,
logger=logger,
)
try:
return (run(scraper.get_page_analysis()), True)
except Exception as e:
logger.error(f"{url} -- {e.msg if hasattr(e, 'msg') else e}")
if logger:
logger.error(f"{url} -- {e.msg if hasattr(e, 'msg') else e}")

return (
Result(
Expand Down
13 changes: 10 additions & 3 deletions components/ecoindex/scraper/scrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(
basic_auth: str | None = None,
cookies: list[SetCookieParam] = [],
custom_headers: dict[str, str] = {},
logger=None,
):
self.url = url
self.window_size = window_size
Expand All @@ -50,6 +51,7 @@ def __init__(
self.basic_auth = basic_auth
self.cookies = cookies
self.custom_headers = custom_headers
self.logger = logger

@staticmethod
def get_user_agent() -> UserAgent:
Expand Down Expand Up @@ -129,7 +131,13 @@ async def scrap_page(self) -> PageMetrics:
async def generate_screenshot(self) -> None:
if self.screenshot and self.screenshot.folder and self.screenshot.id:
await self.page.screenshot(path=self.screenshot.get_png())
await convert_screenshot_to_webp(self.screenshot)
try:
await convert_screenshot_to_webp(self.screenshot)
except ImportError:
if self.logger:
self.logger.warning(
"WebP conversion skipped: Pillow library is not installed."
)
await set_screenshot_rights(
screenshot=self.screenshot,
uid=self.screenshot_uid,
Expand Down Expand Up @@ -191,8 +199,7 @@ async def check_page_response(self, response) -> None:
{
"mimetype": content_type,
"message": (
"This resource is not "
"a standard page with mimeType 'text/html'"
"This resource is not a standard page with mimeType 'text/html'"
),
}
)
14 changes: 12 additions & 2 deletions components/ecoindex/utils/screenshots.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import os

from ecoindex.models import ScreenShot
from PIL import Image

try:
from PIL import Image

_pillow_available = True
except ImportError:
_pillow_available = False


async def convert_screenshot_to_webp(screenshot: ScreenShot) -> None:
if not _pillow_available:
raise ImportError("Pillow is required for WebP conversion. Install it with: pip install ecoindex-scraper[webp]")
image = Image.open(rf"{screenshot.get_png()}")
width, height = image.size
ratio = 800 / height if width > height else 600 / width
Expand All @@ -17,7 +25,9 @@ async def convert_screenshot_to_webp(screenshot: ScreenShot) -> None:


async def set_screenshot_rights(
screenshot: ScreenShot, uid: int | None = None, gid: int | None = None
screenshot: ScreenShot,
uid: int | None = None,
gid: int | None = None,
) -> None:
if uid and gid:
os.chown(path=screenshot.get_webp(), uid=uid, gid=gid)
194 changes: 97 additions & 97 deletions poetry.lock

Large diffs are not rendered by default.

191 changes: 96 additions & 95 deletions projects/ecoindex_api/poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions projects/ecoindex_api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ aiofile = "^3.8.8"
alembic = "^1.12.1"
celery = "^5.3.4"
fastapi = "^0.109.1"
pillow = "^12.0.0"
pillow = { version = "^12.2.0", optional = true }
playwright = "^1.39.0"
playwright-stealth = "^1.0.6"
pydantic = { version = ">=2.1.1,<=2.4.2", extras = ["email"] }
Expand All @@ -46,7 +46,7 @@ ua-generator = "^2.0.5"
uvicorn = "^0.23.2"

[tool.poetry.group.worker.dependencies]
pillow = "^12.0.0"
pillow = { version = "^12.2.0", optional = true }
playwright = "^1.39.0"
playwright-stealth = "^1.0.6"

Expand Down
10 changes: 8 additions & 2 deletions projects/ecoindex_scraper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@ This module provides a simple interface to get the [Ecoindex](http://www.ecoinde
## Install

```shell
pip install ecoindex-scraper
pip install ecoindex_scraper
```

If you need to convert the screenshot to webp with the `generate_screenshot` method, you need to install the Pillow dependency.

```shell
pip install ecoindex_scraper[webp] # OR directly pillow
```

## Use
Expand Down Expand Up @@ -171,4 +177,4 @@ pprint(requests_by_category.model_dump())
# 'javascript': {'total_count': 1, 'total_size': 9823.0},
# 'other': {'total_count': 1, 'total_size': 892.0},
# 'video': {'total_count': 0, 'total_size': 0.0}}
```
```
194 changes: 99 additions & 95 deletions projects/ecoindex_scraper/poetry.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion projects/ecoindex_scraper/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ pydantic = "^2.4.2"
python = "^3.10"
typing-extensions = "^4.8.0"
pyyaml = "^6.0.1"
pillow = ">=10.1,<13.0"
pillow = { version = "^12.2.0", optional = true }
setuptools = ">=69.5.1,<79.0.0"
ua-generator = "^2.0.5"

[tool.poetry.extras]
webp = ["pillow"]

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ setuptools = "^78.1.1"
cryptography = "^44.0.2"

[tool.poetry.group.scraper.dependencies]
pillow = "^12.0.0"
playwright = "^1.39.0"
playwright-stealth = "^1.0.6"

[tool.poetry.group.scraper-webp.dependencies]
pillow = { version = "^12.2.0", optional = true }

[tool.poetry.group.cli.dependencies]
click-spinner = "^0.1.10"
jinja2 = "^3.1.3"
Expand Down
Loading