Coverage for /var/devmt/py/utils4_1.5.0rc1/utils4/crypto.py: 100%
84 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-12 15:38 +0100
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-12 15:38 +0100
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3"""
4:Purpose: This module provides a light wrapper around the ``base64``
5 and ``hashlib`` libraries to provide some additional
6 functionality.
8:Platform: Linux/Windows | Python 3.6+
9:Developer: J Berendt
10:Email: support@s3dev.uk
12:Comments: n/a
14:Example:
15 Example code use::
17 >>> from utils4.crypto import crypto
20 To obtain a quick MD5 hash::
22 >>> s = "The quick brown fox jumps over the lazy dog"
23 >>> output = crypto.md5(s)
24 >>> print(output)
26 9e107d9d372bb6826bd81d3542a419d6
29 To obtain a Base64 encoded MD5 hash::
31 >>> s = "The quick brown fox jumps over the lazy dog"
32 >>> output = crypto.b64md5(s)
33 >>> print(output)
35 OWUxMDdkOWQzNzJiYjY4MjZiZDgxZDM1NDJhNDE5ZDY=
38 For examples on checksumming a file, please refer to:
40 - :meth:`Crypto.checksum_crc32`
41 - :meth:`Crypto.checksum_md5`
42 - :meth:`Crypto.checksum_sha1`
43 - :meth:`Crypto.checksum_sha256`
44 - :meth:`Crypto.checksum_sha512`
46"""
47# pylint: disable=invalid-name
49import base64
50import hashlib
51import zlib
52from typing import Union
53from utils4 import convert
56class Crypto:
57 """Main class used for hashing and encoding.
59 This class acts as a simple wrapper around the ``base64`` and
60 ``hashlib`` libraries, providing additional functionality.
62 """
64 def b64(self, data: str, decode: bool=True) -> Union[bytes, str]:
65 """Create an encoded or decoded Base64 encryption.
67 Args:
68 data (str): String to be encoded. If a ``str`` data type is
69 received, it is encoded to ``bytes`` before encoding.
70 decode (bool, optional): Return a decoded string. Defaults to True.
72 Returns:
73 Union[bytes, str]: An encoded or decoded Base64 encrypted string.
75 """
76 data = self._encode(data)
77 b = base64.b64encode(data)
78 if decode:
79 b = b.decode()
80 return b
82 def b64md5(self, data: Union[iter, str], trunc: int=None) -> str:
83 """Create an optionally truncated Base64 encoded MD5 hash from a
84 string or array.
86 Args:
87 data (Union[iter, str]): A string or an iterable object containing
88 strings to be encoded.
89 trunc (int, optional): Truncate the Base64 string to (n)
90 characters. As string slicing is used, values such as ``-1``
91 are also valid. Defaults to None.
93 Returns:
94 str: An (optionally truncated) Base64 encoded MD5 hash of the
95 passed string or iterable.
97 """
98 s = ''.join(data).encode()
99 h = self.md5(s, decode=False)
100 b = self.b64(h, decode=True)
101 b = b[:trunc] if trunc else b
102 return b
104 @staticmethod
105 def checksum_crc32(path: str, return_integer: bool=False) -> Union[int, str]:
106 """Generate a 32-bit CRC32 checksum for the given file.
108 Args:
109 path (str): Full path to the file.
110 return_integer (bool, optional): Return the original unsigned
111 32-bit integer, rather than the hex string. Defaults to False.
113 Important:
114 This algorithm is *not* cryptographically strong and should not be
115 used for authentication or digital signatures; nor is it suitable
116 for use as a general hash algorithm.
118 -- zlib.crc32 `Documentation`_
120 .. _Documentation: https://docs.python.org/3/library/zlib.html#zlib.crc32
122 :Design:
123 This method breaks the file into 32768-byte chunks for more memory
124 efficient reading. Meaning this method has a maximum memory use
125 overhead of ~32K.
127 :Example:
129 Example for calculating the crc32 checksum for a file, returning a
130 hex string::
132 >>> from utils4.crypto import crypto
134 >>> crypto.checksum_crc32(path='/tmp/test.txt')
135 '2a30e66b'
138 Example for calculating the crc32 checksum for a file, returning
139 an integer::
141 >>> from utils4.crypto import crypto
143 >>> crypto.checksum_crc32(path='/tmp/test.txt', return_integer=True)
144 707847787
146 Returns:
147 Union[int, str]: If the ``return_integer`` value is ``False``
148 (default action), a CRC32 32-bit hex string (checksum string) of
149 the file's contents is returned. Otherwise, an unsigned 32-bit
150 integer is returned.
152 """
153 size = 1024*32 # 32K chunks
154 with open(path, 'rb') as f:
155 crcval = 0
156 chunk = f.read(size)
157 while len(chunk) > 0:
158 crcval = zlib.crc32(chunk, crcval)
159 chunk = f.read(size)
160 if return_integer:
161 rtn = crcval
162 else:
163 rtn = convert.int2hex(crcval & 0xFFFFFFFF)
164 return rtn
166 @staticmethod
167 def checksum_md5(path: str) -> str:
168 """Generate a 128-bit MD5 checksum for the given file.
170 Args:
171 path (str): Full path to the file.
173 :Design:
174 This method breaks the file into 32768-byte chunks
175 (64-bytes * 512 blocks) for more memory efficient reading;
176 taking advantage of the fact that MD5 uses 512-bit (64-byte) digest
177 blocks. Meaning this method has a maximum memory use overhead of
178 ~32K.
180 :Example:
182 Example calculating the MD5 checksum for a file::
184 >>> from utils4.crypto import crypto
186 >>> crypto.checksum_md5(path='/tmp/test.txt')
187 '9ec06901e8f25eb9810c5e0db88e7dcd'
189 Returns:
190 str: A 128-bit MD5 hex digest (checksum string) of the file's
191 contents.
193 """
194 md5 = hashlib.md5()
195 size = 64*512 # 32K chunks - 64-byte digest blocks (x512 blocks)
196 with open(path, 'rb') as f:
197 chunk = f.read(size)
198 while len(chunk) > 0:
199 md5.update(chunk)
200 chunk = f.read(size)
201 return md5.hexdigest()
203 @staticmethod
204 def checksum_sha1(path: str) -> str:
205 """Generate a 160-bit SHA1 checksum for the given file.
207 Args:
208 path (str): Full path to the file.
210 :Design:
211 This method breaks the file into 32768-byte chunks
212 (64-bytes * 512 blocks) for more memory efficient reading;
213 taking advantage of the fact that SHA1 uses 512-bit (64-byte)
214 digest blocks. Meaning this method has a maximum memory use
215 overhead of ~32K.
217 :Example:
219 Example calculating the SHA1 checksum for a file::
221 >>> from utils4.crypto import crypto
223 >>> crypto.checksum_sha1(path='/tmp/test.txt')
224 'e49a1493c637a24800119fb53ef7dbc580221e89'
226 Returns:
227 str: A 160-bit SHA1 hex digest (checksum string) of the file's
228 contents.
230 """
231 sha1 = hashlib.sha1()
232 size = 64*512 # 32K chunks - 64-byte digest blocks (x512 blocks)
233 with open(path, 'rb') as f:
234 chunk = f.read(size)
235 while len(chunk) > 0:
236 sha1.update(chunk)
237 chunk = f.read(size)
238 return sha1.hexdigest()
240 @staticmethod
241 def checksum_sha256(path: str) -> str:
242 """Generate a 256-bit SHA256 checksum for the given file.
244 Args:
245 path (str): Full path to the file.
247 :Design:
248 This method breaks the file into 32768-byte chunks
249 (64-bytes * 512 blocks) for more memory efficient reading;
250 taking advantage of the fact that SHA256 uses 512-bit (64-byte)
251 digest blocks. Meaning this method has a maximum memory use
252 overhead of ~32K.
254 :Example:
256 Example calculating the SHA256 checksum for a file::
258 >>> from utils4.crypto import crypto
260 >>> crypto.checksum_sha256(path='/tmp/test.txt')
261 'e899df8e51b60bf8a6ede73fe5c7b4267bf5e48937e848bac3c6efd906833821'
263 Returns:
264 str: A 256-bit SHA256 hex digest (checksum string) of the file's
265 contents.
267 """
268 sha256 = hashlib.sha256()
269 size = 64*512 # 32K chunks - 64-byte digest blocks (x512 blocks)
270 with open(path, 'rb') as f:
271 chunk = f.read(size)
272 while len(chunk) > 0:
273 sha256.update(chunk)
274 chunk = f.read(size)
275 return sha256.hexdigest()
277 @staticmethod
278 def checksum_sha512(path: str) -> str:
279 """Generate a 512-bit SHA512 checksum for the given file.
281 Args:
282 path (str): Full path to the file.
284 :Design:
285 This method breaks the file into 32768-byte chunks
286 (128-bytes * 256 blocks) for more memory efficient reading;
287 taking advantage of the fact that SHA512 uses 1024-bit (128-byte)
288 digest blocks. Meaning this method has a maximum memory use
289 overhead of ~32K.
291 :Example:
293 Example calculating the SHA512 checksum for a file::
295 >>> from utils4.crypto import crypto
297 >>> crypto.checksum_sha512(path='/tmp/test.txt')
298 ('247adcb6f5b284b3e45c9281171ba7a6'
299 '2502692ee9ee8020bd5827602972409f'
300 '9bdfc2ec7e5452223c19b3745d3f04e2'
301 '542ef0d0e075139d1ee3b5f678c9aaec') # Single string
303 Returns:
304 str: A 512-bit SHA512 hex digest (checksum string) of the file's
305 contents.
307 """
308 sha512 = hashlib.sha512()
309 size = 128*256 # 32K chunks - 128-byte digest blocks (x256 blocks)
310 with open(path, 'rb') as f:
311 chunk = f.read(size)
312 while len(chunk) > 0:
313 sha512.update(chunk)
314 chunk = f.read(size)
315 return sha512.hexdigest()
317 def md5(self, data: str, decode: bool=True) -> str:
318 """Create an optionally encoded or decoded MD5 hash.
320 Args:
321 data (str): String to be hashed. If a ``str`` data type
322 is passed, it is encoded to ``bytes`` before hashing.
323 decode (bool, optional): Return a decoded string. Defaults to True.
325 Returns:
326 str: An encoded or decoded MD5 hash, depending on the value passed
327 to the ``decode`` parameter.
329 """
330 data = self._encode(data)
331 h = hashlib.md5(data).hexdigest()
332 if not decode:
333 h = h.encode()
334 return h
336 @staticmethod
337 def _encode(data: Union[bytes, str]) -> bytes:
338 """Test if a string is ``str`` or ``bytes`` before processing.
340 Args:
341 data (Union[bytes, str]): String value to be encoded.
343 If the received ``data`` parameter is a ``str`` type, it is converted
344 to a ``bytes`` type and returned. If the string is already a ``bytes``
345 type, it is returned, unmodified.
347 Raises:
348 ValueError: If the ``data`` object is neither a ``str`` or
349 ``bytes`` type.
351 Returns:
352 bytes: A ``bytes`` encoded string.
354 """
355 if not isinstance(data, (bytes, str)):
356 raise ValueError('Expected a bytes or str type.')
357 data = data.encode() if isinstance(data, str) else data
358 return data
361crypto = Crypto()