Coverage for src\baobab_web_api_caller\pagination\paginator.py: 88%

54 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2026-03-21 12:10 +0100

1"""Itération générique sur des pages basées sur une URL suivante.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass 

6from typing import Generic, Iterator, TypeVar 

7from urllib.parse import parse_qsl, urlparse 

8 

9from baobab_web_api_caller.core.baobab_request import BaobabRequest 

10from baobab_web_api_caller.exceptions.configuration_exception import ConfigurationException 

11from baobab_web_api_caller.pagination.next_page_url_extractor import NextPageUrlExtractor 

12from baobab_web_api_caller.pagination.page_extractor import PageExtractor 

13from baobab_web_api_caller.pagination.page_result import PageResult 

14from baobab_web_api_caller.service.baobab_service_caller import BaobabServiceCaller 

15 

16TItem = TypeVar("TItem") # pylint: disable=invalid-name 

17 

18 

19@dataclass(frozen=True, slots=True) 

20class Paginator(Generic[TItem]): 

21 """Paginator générique, basé sur une URL de page suivante. 

22 

23 Le paginator ne connaît pas le format de la réponse. Il délègue l'extraction des items et de 

24 l'URL de la page suivante à des extracteurs injectés. 

25 """ 

26 

27 service_caller: BaobabServiceCaller 

28 page_extractor: PageExtractor[TItem] 

29 next_page_url_extractor: NextPageUrlExtractor 

30 

31 def pages(self, initial_request: BaobabRequest) -> Iterator[PageResult[TItem]]: 

32 """Itère sur les pages. 

33 

34 :param initial_request: Requête initiale (souvent GET). 

35 :type initial_request: BaobabRequest 

36 :return: Itérateur de pages. 

37 :rtype: Iterator[PageResult[TItem]] 

38 """ 

39 

40 request: BaobabRequest | None = initial_request 

41 while request is not None: 

42 response = self.service_caller.call(request) 

43 items = self.page_extractor.extract_items(response) 

44 next_url = self.next_page_url_extractor.extract_next_page_url(response) 

45 yield PageResult(items=items, next_page_url=next_url) 

46 

47 request = ( 

48 None 

49 if next_url is None 

50 else self._request_from_next_url( 

51 next_url=next_url, 

52 base_request=initial_request, 

53 ) 

54 ) 

55 

56 def items(self, initial_request: BaobabRequest) -> Iterator[TItem]: 

57 """Itère sur tous les items de toutes les pages.""" 

58 

59 for page in self.pages(initial_request): 

60 yield from page.items 

61 

62 def _request_from_next_url( 

63 self, *, next_url: str, base_request: BaobabRequest 

64 ) -> BaobabRequest: 

65 parsed = urlparse(next_url) 

66 if parsed.scheme in {"http", "https"}: 

67 base = urlparse(self.service_caller.service_config.base_url) 

68 if parsed.scheme != base.scheme or parsed.netloc != base.netloc: 68 ↛ 72line 68 didn't jump to line 72 because the condition on line 68 was always true

69 raise ConfigurationException( 

70 "next_page_url must target the same host as service_config.base_url" 

71 ) 

72 path = parsed.path 

73 query = parsed.query 

74 else: 

75 # URL relative (ex: /v1/items?page=2) 

76 path = parsed.path 

77 query = parsed.query 

78 

79 if not path: 79 ↛ 80line 79 didn't jump to line 80 because the condition on line 79 was never true

80 raise ConfigurationException("next_page_url must include a path") 

81 if not path.startswith("/"): 81 ↛ 82line 81 didn't jump to line 82 because the condition on line 81 was never true

82 path = f"/{path}" 

83 

84 query_params = self._parse_query_params(query) 

85 return BaobabRequest( 

86 method=base_request.method, 

87 path=path, 

88 query_params=query_params, 

89 headers=base_request.headers, 

90 timeout_seconds=base_request.timeout_seconds, 

91 ) 

92 

93 @staticmethod 

94 def _parse_query_params(query: str) -> dict[str, str | list[str]]: 

95 params: dict[str, str | list[str]] = {} 

96 for k, v in parse_qsl(query, keep_blank_values=True, strict_parsing=False): 

97 existing = params.get(k) 

98 if existing is None: 

99 params[k] = v 

100 elif isinstance(existing, list): 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true

101 existing.append(v) 

102 else: 

103 params[k] = [existing, v] 

104 return params