Coverage for /var/devmt/py/utils4_1.5.0rc1/utils4/crypto.py: 100%

84 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-12 15:38 +0100

1#!/usr/bin/env python3 

2# -*- coding: utf-8 -*- 

3""" 

4:Purpose: This module provides a light wrapper around the ``base64`` 

5 and ``hashlib`` libraries to provide some additional 

6 functionality. 

7 

8:Platform: Linux/Windows | Python 3.6+ 

9:Developer: J Berendt 

10:Email: support@s3dev.uk 

11 

12:Comments: n/a 

13 

14:Example: 

15 Example code use:: 

16 

17 >>> from utils4.crypto import crypto 

18 

19 

20 To obtain a quick MD5 hash:: 

21 

22 >>> s = "The quick brown fox jumps over the lazy dog" 

23 >>> output = crypto.md5(s) 

24 >>> print(output) 

25 

26 9e107d9d372bb6826bd81d3542a419d6 

27 

28 

29 To obtain a Base64 encoded MD5 hash:: 

30 

31 >>> s = "The quick brown fox jumps over the lazy dog" 

32 >>> output = crypto.b64md5(s) 

33 >>> print(output) 

34 

35 OWUxMDdkOWQzNzJiYjY4MjZiZDgxZDM1NDJhNDE5ZDY= 

36 

37 

38 For examples on checksumming a file, please refer to: 

39 

40 - :meth:`Crypto.checksum_crc32` 

41 - :meth:`Crypto.checksum_md5` 

42 - :meth:`Crypto.checksum_sha1` 

43 - :meth:`Crypto.checksum_sha256` 

44 - :meth:`Crypto.checksum_sha512` 

45 

46""" 

47# pylint: disable=invalid-name 

48 

49import base64 

50import hashlib 

51import zlib 

52from typing import Union 

53from utils4 import convert 

54 

55 

56class Crypto: 

57 """Main class used for hashing and encoding. 

58 

59 This class acts as a simple wrapper around the ``base64`` and 

60 ``hashlib`` libraries, providing additional functionality. 

61 

62 """ 

63 

64 def b64(self, data: str, decode: bool=True) -> Union[bytes, str]: 

65 """Create an encoded or decoded Base64 encryption. 

66 

67 Args: 

68 data (str): String to be encoded. If a ``str`` data type is 

69 received, it is encoded to ``bytes`` before encoding. 

70 decode (bool, optional): Return a decoded string. Defaults to True. 

71 

72 Returns: 

73 Union[bytes, str]: An encoded or decoded Base64 encrypted string. 

74 

75 """ 

76 data = self._encode(data) 

77 b = base64.b64encode(data) 

78 if decode: 

79 b = b.decode() 

80 return b 

81 

82 def b64md5(self, data: Union[iter, str], trunc: int=None) -> str: 

83 """Create an optionally truncated Base64 encoded MD5 hash from a 

84 string or array. 

85 

86 Args: 

87 data (Union[iter, str]): A string or an iterable object containing 

88 strings to be encoded. 

89 trunc (int, optional): Truncate the Base64 string to (n) 

90 characters. As string slicing is used, values such as ``-1`` 

91 are also valid. Defaults to None. 

92 

93 Returns: 

94 str: An (optionally truncated) Base64 encoded MD5 hash of the 

95 passed string or iterable. 

96 

97 """ 

98 s = ''.join(data).encode() 

99 h = self.md5(s, decode=False) 

100 b = self.b64(h, decode=True) 

101 b = b[:trunc] if trunc else b 

102 return b 

103 

104 @staticmethod 

105 def checksum_crc32(path: str, return_integer: bool=False) -> Union[int, str]: 

106 """Generate a 32-bit CRC32 checksum for the given file. 

107 

108 Args: 

109 path (str): Full path to the file. 

110 return_integer (bool, optional): Return the original unsigned 

111 32-bit integer, rather than the hex string. Defaults to False. 

112 

113 Important: 

114 This algorithm is *not* cryptographically strong and should not be 

115 used for authentication or digital signatures; nor is it suitable 

116 for use as a general hash algorithm. 

117 

118 -- zlib.crc32 `Documentation`_ 

119 

120 .. _Documentation: https://docs.python.org/3/library/zlib.html#zlib.crc32 

121 

122 :Design: 

123 This method breaks the file into 32768-byte chunks for more memory 

124 efficient reading. Meaning this method has a maximum memory use 

125 overhead of ~32K. 

126 

127 :Example: 

128 

129 Example for calculating the crc32 checksum for a file, returning a 

130 hex string:: 

131 

132 >>> from utils4.crypto import crypto 

133 

134 >>> crypto.checksum_crc32(path='/tmp/test.txt') 

135 '2a30e66b' 

136 

137 

138 Example for calculating the crc32 checksum for a file, returning 

139 an integer:: 

140 

141 >>> from utils4.crypto import crypto 

142 

143 >>> crypto.checksum_crc32(path='/tmp/test.txt', return_integer=True) 

144 707847787 

145 

146 Returns: 

147 Union[int, str]: If the ``return_integer`` value is ``False`` 

148 (default action), a CRC32 32-bit hex string (checksum string) of 

149 the file's contents is returned. Otherwise, an unsigned 32-bit 

150 integer is returned. 

151 

152 """ 

153 size = 1024*32 # 32K chunks 

154 with open(path, 'rb') as f: 

155 crcval = 0 

156 chunk = f.read(size) 

157 while len(chunk) > 0: 

158 crcval = zlib.crc32(chunk, crcval) 

159 chunk = f.read(size) 

160 if return_integer: 

161 rtn = crcval 

162 else: 

163 rtn = convert.int2hex(crcval & 0xFFFFFFFF) 

164 return rtn 

165 

166 @staticmethod 

167 def checksum_md5(path: str) -> str: 

168 """Generate a 128-bit MD5 checksum for the given file. 

169 

170 Args: 

171 path (str): Full path to the file. 

172 

173 :Design: 

174 This method breaks the file into 32768-byte chunks 

175 (64-bytes * 512 blocks) for more memory efficient reading; 

176 taking advantage of the fact that MD5 uses 512-bit (64-byte) digest 

177 blocks. Meaning this method has a maximum memory use overhead of 

178 ~32K. 

179 

180 :Example: 

181 

182 Example calculating the MD5 checksum for a file:: 

183 

184 >>> from utils4.crypto import crypto 

185 

186 >>> crypto.checksum_md5(path='/tmp/test.txt') 

187 '9ec06901e8f25eb9810c5e0db88e7dcd' 

188 

189 Returns: 

190 str: A 128-bit MD5 hex digest (checksum string) of the file's 

191 contents. 

192 

193 """ 

194 md5 = hashlib.md5() 

195 size = 64*512 # 32K chunks - 64-byte digest blocks (x512 blocks) 

196 with open(path, 'rb') as f: 

197 chunk = f.read(size) 

198 while len(chunk) > 0: 

199 md5.update(chunk) 

200 chunk = f.read(size) 

201 return md5.hexdigest() 

202 

203 @staticmethod 

204 def checksum_sha1(path: str) -> str: 

205 """Generate a 160-bit SHA1 checksum for the given file. 

206 

207 Args: 

208 path (str): Full path to the file. 

209 

210 :Design: 

211 This method breaks the file into 32768-byte chunks 

212 (64-bytes * 512 blocks) for more memory efficient reading; 

213 taking advantage of the fact that SHA1 uses 512-bit (64-byte) 

214 digest blocks. Meaning this method has a maximum memory use 

215 overhead of ~32K. 

216 

217 :Example: 

218 

219 Example calculating the SHA1 checksum for a file:: 

220 

221 >>> from utils4.crypto import crypto 

222 

223 >>> crypto.checksum_sha1(path='/tmp/test.txt') 

224 'e49a1493c637a24800119fb53ef7dbc580221e89' 

225 

226 Returns: 

227 str: A 160-bit SHA1 hex digest (checksum string) of the file's 

228 contents. 

229 

230 """ 

231 sha1 = hashlib.sha1() 

232 size = 64*512 # 32K chunks - 64-byte digest blocks (x512 blocks) 

233 with open(path, 'rb') as f: 

234 chunk = f.read(size) 

235 while len(chunk) > 0: 

236 sha1.update(chunk) 

237 chunk = f.read(size) 

238 return sha1.hexdigest() 

239 

240 @staticmethod 

241 def checksum_sha256(path: str) -> str: 

242 """Generate a 256-bit SHA256 checksum for the given file. 

243 

244 Args: 

245 path (str): Full path to the file. 

246 

247 :Design: 

248 This method breaks the file into 32768-byte chunks 

249 (64-bytes * 512 blocks) for more memory efficient reading; 

250 taking advantage of the fact that SHA256 uses 512-bit (64-byte) 

251 digest blocks. Meaning this method has a maximum memory use 

252 overhead of ~32K. 

253 

254 :Example: 

255 

256 Example calculating the SHA256 checksum for a file:: 

257 

258 >>> from utils4.crypto import crypto 

259 

260 >>> crypto.checksum_sha256(path='/tmp/test.txt') 

261 'e899df8e51b60bf8a6ede73fe5c7b4267bf5e48937e848bac3c6efd906833821' 

262 

263 Returns: 

264 str: A 256-bit SHA256 hex digest (checksum string) of the file's 

265 contents. 

266 

267 """ 

268 sha256 = hashlib.sha256() 

269 size = 64*512 # 32K chunks - 64-byte digest blocks (x512 blocks) 

270 with open(path, 'rb') as f: 

271 chunk = f.read(size) 

272 while len(chunk) > 0: 

273 sha256.update(chunk) 

274 chunk = f.read(size) 

275 return sha256.hexdigest() 

276 

277 @staticmethod 

278 def checksum_sha512(path: str) -> str: 

279 """Generate a 512-bit SHA512 checksum for the given file. 

280 

281 Args: 

282 path (str): Full path to the file. 

283 

284 :Design: 

285 This method breaks the file into 32768-byte chunks 

286 (128-bytes * 256 blocks) for more memory efficient reading; 

287 taking advantage of the fact that SHA512 uses 1024-bit (128-byte) 

288 digest blocks. Meaning this method has a maximum memory use 

289 overhead of ~32K. 

290 

291 :Example: 

292 

293 Example calculating the SHA512 checksum for a file:: 

294 

295 >>> from utils4.crypto import crypto 

296 

297 >>> crypto.checksum_sha512(path='/tmp/test.txt') 

298 ('247adcb6f5b284b3e45c9281171ba7a6' 

299 '2502692ee9ee8020bd5827602972409f' 

300 '9bdfc2ec7e5452223c19b3745d3f04e2' 

301 '542ef0d0e075139d1ee3b5f678c9aaec') # Single string 

302 

303 Returns: 

304 str: A 512-bit SHA512 hex digest (checksum string) of the file's 

305 contents. 

306 

307 """ 

308 sha512 = hashlib.sha512() 

309 size = 128*256 # 32K chunks - 128-byte digest blocks (x256 blocks) 

310 with open(path, 'rb') as f: 

311 chunk = f.read(size) 

312 while len(chunk) > 0: 

313 sha512.update(chunk) 

314 chunk = f.read(size) 

315 return sha512.hexdigest() 

316 

317 def md5(self, data: str, decode: bool=True) -> str: 

318 """Create an optionally encoded or decoded MD5 hash. 

319 

320 Args: 

321 data (str): String to be hashed. If a ``str`` data type 

322 is passed, it is encoded to ``bytes`` before hashing. 

323 decode (bool, optional): Return a decoded string. Defaults to True. 

324 

325 Returns: 

326 str: An encoded or decoded MD5 hash, depending on the value passed 

327 to the ``decode`` parameter. 

328 

329 """ 

330 data = self._encode(data) 

331 h = hashlib.md5(data).hexdigest() 

332 if not decode: 

333 h = h.encode() 

334 return h 

335 

336 @staticmethod 

337 def _encode(data: Union[bytes, str]) -> bytes: 

338 """Test if a string is ``str`` or ``bytes`` before processing. 

339 

340 Args: 

341 data (Union[bytes, str]): String value to be encoded. 

342 

343 If the received ``data`` parameter is a ``str`` type, it is converted 

344 to a ``bytes`` type and returned. If the string is already a ``bytes`` 

345 type, it is returned, unmodified. 

346 

347 Raises: 

348 ValueError: If the ``data`` object is neither a ``str`` or 

349 ``bytes`` type. 

350 

351 Returns: 

352 bytes: A ``bytes`` encoded string. 

353 

354 """ 

355 if not isinstance(data, (bytes, str)): 

356 raise ValueError('Expected a bytes or str type.') 

357 data = data.encode() if isinstance(data, str) else data 

358 return data 

359 

360 

361crypto = Crypto()