Coverage for src\baobab_web_api_caller\download\bulk_file_downloader.py: 93%

71 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2026-03-21 12:10 +0100

1"""Téléchargement de fichiers distants en streaming.""" 

2 

3# pylint: disable=duplicate-code 

4 

5from __future__ import annotations 

6 

7from dataclasses import dataclass 

8from pathlib import Path 

9 

10import requests 

11 

12from baobab_web_api_caller.config.default_header_provider import DefaultHeaderProvider 

13from baobab_web_api_caller.config.service_config import ServiceConfig 

14from baobab_web_api_caller.core.baobab_request import BaobabRequest 

15from baobab_web_api_caller.core.baobab_response import BaobabResponse 

16from baobab_web_api_caller.core.error_response_mapper import ErrorResponseMapper 

17from baobab_web_api_caller.core.request_url_builder import RequestUrlBuilder 

18from baobab_web_api_caller.exceptions.configuration_exception import ConfigurationException 

19from baobab_web_api_caller.exceptions.timeout_exception import TimeoutException 

20from baobab_web_api_caller.exceptions.transport_exception import TransportException 

21from baobab_web_api_caller.transport.call_context_builder import build_call_context 

22from baobab_web_api_caller.transport.requests_session_factory import RequestsSessionFactory 

23 

24 

25@dataclass(frozen=True, slots=True) 

26class BulkFileDownloader: 

27 """Télécharge une ressource distante vers le disque en streaming. 

28 

29 Le downloader est séparé de la consommation classique (JSON) afin d'éviter de charger les 

30 contenus volumineux en mémoire. 

31 """ 

32 

33 service_config: ServiceConfig 

34 session_factory: RequestsSessionFactory 

35 url_builder: RequestUrlBuilder 

36 default_header_provider: DefaultHeaderProvider 

37 error_response_mapper: ErrorResponseMapper 

38 

39 @classmethod 

40 def from_service_config( 

41 cls, service_config: ServiceConfig, session_factory: RequestsSessionFactory 

42 ) -> "BulkFileDownloader": 

43 """Construit un downloader à partir d'une configuration de service.""" 

44 

45 return cls( 

46 service_config=service_config, 

47 session_factory=session_factory, 

48 url_builder=RequestUrlBuilder(base_url=service_config.base_url), 

49 default_header_provider=DefaultHeaderProvider( 

50 default_headers=service_config.default_headers 

51 ), 

52 error_response_mapper=ErrorResponseMapper(), 

53 ) 

54 

55 def download( # pylint: disable=too-many-branches,too-many-statements 

56 self, 

57 request: BaobabRequest, 

58 *, 

59 output_path: Path, 

60 chunk_size: int = 1024 * 64, 

61 overwrite: bool = False, 

62 ) -> Path: 

63 """Télécharge la ressource et l'écrit sur disque. 

64 

65 L'écriture est effectuée dans un fichier temporaire puis renommée, afin d'éviter les 

66 fichiers partiels en cas d'erreur. 

67 

68 La réponse streaming est fermée systématiquement en fin d'exécution (succès, erreur HTTP 

69 ou exception), afin d'éviter toute fuite de ressources. 

70 

71 :param request: Requête à exécuter (souvent GET). 

72 :type request: BaobabRequest 

73 :param output_path: Chemin cible. 

74 :type output_path: Path 

75 :param chunk_size: Taille de chunk pour le streaming. 

76 :type chunk_size: int 

77 :param overwrite: Autorise l'écrasement du fichier cible. 

78 :type overwrite: bool 

79 :return: Chemin final. 

80 :rtype: Path 

81 :raises ConfigurationException: Si les paramètres sont invalides. 

82 :raises TimeoutException: En cas de timeout réseau. 

83 :raises HttpException: Si la réponse HTTP indique une erreur (4xx/5xx), mappée via 

84 `ErrorResponseMapper`. 

85 :raises TransportException: En cas d'erreur réseau ou d'écriture. 

86 """ 

87 

88 if request.json_body is not None or request.form_body is not None: 

89 raise ConfigurationException("download only supports requests without body") 

90 if chunk_size <= 0: 

91 raise ConfigurationException("chunk_size must be positive") 

92 

93 output_path = Path(output_path) 

94 if output_path.exists() and not overwrite: 

95 raise TransportException("output_path already exists") 

96 

97 ctx = None 

98 try: 

99 ctx = build_call_context( 

100 request=request, 

101 service_config=self.service_config, 

102 default_header_provider=self.default_header_provider, 

103 url_builder=self.url_builder, 

104 session_factory=self.session_factory, 

105 ) 

106 except requests.Timeout as exc: # pragma: no cover 

107 raise TimeoutException(str(exc)) from exc 

108 except requests.RequestException as exc: # pragma: no cover 

109 raise TransportException(str(exc)) from exc 

110 

111 if ctx is None: 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true

112 raise TransportException("call context was not built") 

113 

114 response: requests.Response | None = None 

115 try: 

116 try: 

117 response = ctx.session.request( 

118 method=ctx.prepared_request.method.value, 

119 url=ctx.url, 

120 params=None, 

121 headers=dict(ctx.prepared_request.headers), 

122 json=None, 

123 data=None, 

124 timeout=ctx.timeout, 

125 stream=True, 

126 ) 

127 except requests.Timeout as exc: # pragma: no cover 

128 raise TimeoutException(str(exc)) from exc 

129 except requests.RequestException as exc: # pragma: no cover 

130 raise TransportException(str(exc)) from exc 

131 

132 try: 

133 headers: dict[str, str] = {str(k): str(v) for k, v in response.headers.items()} 

134 status = int(response.status_code) 

135 

136 if status >= 400: 

137 raw = BaobabResponse( 

138 status_code=status, headers=headers, text=response.text, content=None 

139 ) 

140 self.error_response_mapper.raise_for_error(raw) 

141 

142 tmp_path = output_path.with_suffix(output_path.suffix + ".part") 

143 try: 

144 output_path.parent.mkdir(parents=True, exist_ok=True) 

145 with tmp_path.open("wb") as f: 

146 for chunk in response.iter_content(chunk_size=chunk_size): 

147 if not chunk: 

148 continue 

149 f.write(chunk) 

150 if overwrite and output_path.exists(): 

151 output_path.unlink() 

152 tmp_path.replace(output_path) 

153 return output_path 

154 except OSError as exc: 

155 try: 

156 if tmp_path.exists(): 156 ↛ 157line 156 didn't jump to line 157 because the condition on line 156 was never true

157 tmp_path.unlink() 

158 except OSError: 

159 pass 

160 raise TransportException(str(exc)) from exc 

161 finally: 

162 if response is not None: 

163 response.close() 

164 finally: 

165 if ctx is not None: 

166 ctx.session.close()