shell bypass 403
"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Copyright © 2019 Cloud Linux Software Inc.
This software is also available under ImunifyAV commercial license,
see <https://www.imunify360.com/legal/eula>
"""
import hashlib
import os
import re
from typing import Optional
from .config import MAX_FILE_SIZE
def dir_traverse_generator(
target_dir: str, max_size: int = MAX_FILE_SIZE
) -> str:
for root, dirs, files in os.walk(target_dir):
for file in files:
path = os.path.join(root, file)
if os.path.getsize(path) <= max_size:
yield os.path.join(root, file)
def all_possible_relative_paths(abs_path: str, root_dir: str = "/") -> list:
# accepts absolute file_path
# returns list of all possible partial paths relative to root_dir
# e.g., for (/a/b/c/d.txt, '/a/b/') it should return ['c/d.txt', 'd.txt']
rel_path = os.path.relpath(abs_path, root_dir or "/") if abs_path else ""
path_parts = rel_path.strip(os.sep).split(os.sep)
return [os.sep.join(path_parts[i:]) for i in range(len(path_parts))]
def get_base_dir(abs_path: str, rel_path: str) -> str:
# returns absolute path of base_dir such that os.path.join(base_dir, rel_path) == abs_path
# e.g. get_base_dir('/a/b/c/d.txt', 'c/d.txt') should return '/a/b/'
if not abs_path.endswith(rel_path):
raise ValueError(
f"rel_path '{rel_path}' is not a suffix of abs_path '{abs_path}'"
)
return abs_path[: -len(rel_path)]
class HashCalculator:
HASHING_ALGORITHMS = {
"md5": hashlib.md5,
"sha256": hashlib.sha256,
}
BUFFER_SIZE = 8192
_consolidate_whitespace = re.compile(b"[\x20\x09-\x0d]+")
_remove_control_characters = re.compile(b"[\x00-\x08\x0e-\x1f\x7f-\xff]+")
def __init__(self, algorithm: str):
self.algorithm = algorithm
self._hasher_factory = self.HASHING_ALGORITHMS[algorithm]
self._normalizer = (
self._normalize_data_for_md5 if algorithm == "md5" else lambda x: x
)
@classmethod
def _normalize_data_for_md5(cls, data: bytes) -> bytes:
"""
This method normalizes binary data by:
- Removing the special control characters: 0x00-0x08, 0x0E-0x1F, 0x7F-0xFF;
- 0x00-0x08, 0x0E-0x1F are ASCII control characters minus TAB, LF, VT, FF and CR;
- 0x7F-0xFF are unicode control characters (DEL from C0 and C1 set).
- Consolidating the clusters of whitespace characters (0x20, 0x09-0x0D) into single space character (0x20);
- Turns all uppercase ASCII characters to lowercase.
"""
if not isinstance(data, bytes):
raise TypeError("Normalization function expects bytes input")
data = cls._remove_control_characters.sub(b"", data)
data = cls._consolidate_whitespace.sub(b" ", data)
return data.lower()
def calc_hash(
self, filepath: str, apply_normalization: bool = False
) -> Optional[str]:
if not os.path.isfile(filepath):
return
with open(filepath, "rb") as file:
normalized_data = (
self._normalizer(file.read())
if apply_normalization
else file.read()
)
hasher = self._hasher_factory()
for chunk in (
normalized_data[i : i + self.BUFFER_SIZE]
for i in range(0, len(normalized_data), self.BUFFER_SIZE)
):
hasher.update(chunk)
return hasher.hexdigest()