Coverage for src\baobab_web_api_caller\pagination\paginator.py: 88%
54 statements
« prev ^ index » next coverage.py v7.10.3, created at 2026-03-21 12:10 +0100
« prev ^ index » next coverage.py v7.10.3, created at 2026-03-21 12:10 +0100
1"""Itération générique sur des pages basées sur une URL suivante."""
3from __future__ import annotations
5from dataclasses import dataclass
6from typing import Generic, Iterator, TypeVar
7from urllib.parse import parse_qsl, urlparse
9from baobab_web_api_caller.core.baobab_request import BaobabRequest
10from baobab_web_api_caller.exceptions.configuration_exception import ConfigurationException
11from baobab_web_api_caller.pagination.next_page_url_extractor import NextPageUrlExtractor
12from baobab_web_api_caller.pagination.page_extractor import PageExtractor
13from baobab_web_api_caller.pagination.page_result import PageResult
14from baobab_web_api_caller.service.baobab_service_caller import BaobabServiceCaller
16TItem = TypeVar("TItem") # pylint: disable=invalid-name
19@dataclass(frozen=True, slots=True)
20class Paginator(Generic[TItem]):
21 """Paginator générique, basé sur une URL de page suivante.
23 Le paginator ne connaît pas le format de la réponse. Il délègue l'extraction des items et de
24 l'URL de la page suivante à des extracteurs injectés.
25 """
27 service_caller: BaobabServiceCaller
28 page_extractor: PageExtractor[TItem]
29 next_page_url_extractor: NextPageUrlExtractor
31 def pages(self, initial_request: BaobabRequest) -> Iterator[PageResult[TItem]]:
32 """Itère sur les pages.
34 :param initial_request: Requête initiale (souvent GET).
35 :type initial_request: BaobabRequest
36 :return: Itérateur de pages.
37 :rtype: Iterator[PageResult[TItem]]
38 """
40 request: BaobabRequest | None = initial_request
41 while request is not None:
42 response = self.service_caller.call(request)
43 items = self.page_extractor.extract_items(response)
44 next_url = self.next_page_url_extractor.extract_next_page_url(response)
45 yield PageResult(items=items, next_page_url=next_url)
47 request = (
48 None
49 if next_url is None
50 else self._request_from_next_url(
51 next_url=next_url,
52 base_request=initial_request,
53 )
54 )
56 def items(self, initial_request: BaobabRequest) -> Iterator[TItem]:
57 """Itère sur tous les items de toutes les pages."""
59 for page in self.pages(initial_request):
60 yield from page.items
62 def _request_from_next_url(
63 self, *, next_url: str, base_request: BaobabRequest
64 ) -> BaobabRequest:
65 parsed = urlparse(next_url)
66 if parsed.scheme in {"http", "https"}:
67 base = urlparse(self.service_caller.service_config.base_url)
68 if parsed.scheme != base.scheme or parsed.netloc != base.netloc: 68 ↛ 72line 68 didn't jump to line 72 because the condition on line 68 was always true
69 raise ConfigurationException(
70 "next_page_url must target the same host as service_config.base_url"
71 )
72 path = parsed.path
73 query = parsed.query
74 else:
75 # URL relative (ex: /v1/items?page=2)
76 path = parsed.path
77 query = parsed.query
79 if not path: 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true
80 raise ConfigurationException("next_page_url must include a path")
81 if not path.startswith("/"): 81 ↛ 82line 81 didn't jump to line 82 because the condition on line 81 was never true
82 path = f"/{path}"
84 query_params = self._parse_query_params(query)
85 return BaobabRequest(
86 method=base_request.method,
87 path=path,
88 query_params=query_params,
89 headers=base_request.headers,
90 timeout_seconds=base_request.timeout_seconds,
91 )
93 @staticmethod
94 def _parse_query_params(query: str) -> dict[str, str | list[str]]:
95 params: dict[str, str | list[str]] = {}
96 for k, v in parse_qsl(query, keep_blank_values=True, strict_parsing=False):
97 existing = params.get(k)
98 if existing is None:
99 params[k] = v
100 elif isinstance(existing, list): 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true
101 existing.append(v)
102 else:
103 params[k] = [existing, v]
104 return params