2022-09-03 22:20:01 +02:00
# Author: Scito (https://scito.ch)
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
2022-12-28 22:28:54 +01:00
from __future__ import annotations # workaround for PYTHON <= 3.10
2022-09-03 22:20:01 +02:00
import csv
2022-12-24 01:59:35 +01:00
import glob
import io
2022-09-03 22:20:01 +02:00
import json
import os
2022-12-24 01:59:35 +01:00
import re
2022-09-09 13:13:13 +02:00
import shutil
2022-09-03 23:47:43 +02:00
import sys
2022-12-28 22:28:54 +01:00
import pathlib
from typing import BinaryIO , Any , Union , List
# Types
# workaround for PYTHON <= 3.9: Workaround for str | pathlib.Path
PathLike = Union [ str , pathlib . Path ]
2022-09-03 23:47:43 +02:00
# Ref. https://stackoverflow.com/a/16571630
2022-12-28 22:28:54 +01:00
# workaround for PYTHON <= 3.10: class Capturing(list[Any]):
class Capturing ( List [ Any ] ) :
2022-09-03 23:47:43 +02:00
''' Capture stdout and stderr
Usage :
with Capturing ( ) as output :
print ( " Output " )
'''
2022-12-28 22:28:54 +01:00
# TODO remove type ignore if fixed, see https://github.com/python/mypy/issues/11871, https://stackoverflow.com/questions/72174409/type-hinting-the-return-value-of-a-class-method-that-returns-self
def __enter__ ( self ) : # type: ignore
2022-09-03 23:47:43 +02:00
self . _stdout = sys . stdout
2022-12-24 01:59:35 +01:00
sys . stdout = self . _stringio_std = io . StringIO ( )
2022-12-21 16:47:31 -08:00
self . _stderr = sys . stderr
2022-12-24 01:59:35 +01:00
sys . stderr = self . _stringio_err = io . StringIO ( )
2022-09-03 23:47:43 +02:00
return self
2022-12-28 22:28:54 +01:00
def __exit__ ( self , * args : Any ) - > None :
2022-12-21 16:47:31 -08:00
self . extend ( self . _stringio_std . getvalue ( ) . splitlines ( ) )
del self . _stringio_std # free up some memory
2022-09-03 23:47:43 +02:00
sys . stdout = self . _stdout
2022-12-21 16:47:31 -08:00
self . extend ( self . _stringio_err . getvalue ( ) . splitlines ( ) )
del self . _stringio_err # free up some memory
sys . stderr = self . _stderr
2022-09-03 22:20:01 +02:00
2022-12-28 22:28:54 +01:00
def file_exits ( file : PathLike ) - > bool :
2022-12-04 12:23:39 +01:00
return os . path . isfile ( file )
2022-12-28 22:28:54 +01:00
def remove_file ( file : PathLike ) - > None :
2022-12-04 12:23:39 +01:00
if file_exits ( file ) : os . remove ( file )
2022-12-28 22:28:54 +01:00
def remove_files ( glob_pattern : str ) - > None :
2022-12-04 12:23:39 +01:00
for f in glob . glob ( glob_pattern ) :
os . remove ( f )
2022-09-09 13:13:13 +02:00
2022-12-28 22:28:54 +01:00
def remove_dir_with_files ( dir : PathLike ) - > None :
2022-09-09 13:13:13 +02:00
if os . path . exists ( dir ) : shutil . rmtree ( dir )
2022-09-03 22:20:01 +02:00
2022-12-28 22:28:54 +01:00
def read_csv ( filename : str ) - > List [ List [ str ] ] :
2022-09-03 22:20:01 +02:00
""" Returns a list of lines. """
2022-12-19 16:39:28 +01:00
with open ( filename , " r " , encoding = " utf-8 " , newline = ' ' ) as infile :
2022-12-28 22:28:54 +01:00
lines : List [ List [ str ] ] = [ ]
2022-09-03 22:20:01 +02:00
reader = csv . reader ( infile )
for line in reader :
lines . append ( line )
return lines
2022-12-28 22:28:54 +01:00
def read_csv_str ( data_str : str ) - > List [ List [ str ] ] :
2022-12-18 19:24:07 +01:00
""" Returns a list of lines. """
2022-12-28 22:28:54 +01:00
lines : List [ List [ str ] ] = [ ]
reader = csv . reader ( data_str . splitlines ( ) )
2022-12-18 19:24:07 +01:00
for line in reader :
lines . append ( line )
return lines
2022-12-28 22:28:54 +01:00
def read_json ( filename : str ) - > Any :
2022-09-03 22:20:01 +02:00
""" Returns a list or a dictionary. """
2022-12-19 16:39:28 +01:00
with open ( filename , " r " , encoding = " utf-8 " ) as infile :
2022-09-03 22:20:01 +02:00
return json . load ( infile )
2022-09-03 23:47:43 +02:00
2022-12-28 22:28:54 +01:00
def read_json_str ( data_str : str ) - > Any :
2022-12-18 19:24:07 +01:00
""" Returns a list or a dictionary. """
2022-12-28 22:28:54 +01:00
return json . loads ( data_str )
2022-12-18 19:24:07 +01:00
2022-12-28 22:28:54 +01:00
def read_file_to_list ( filename : str ) - > List [ str ] :
2022-09-03 23:47:43 +02:00
""" Returns a list of lines. """
2022-12-19 16:39:28 +01:00
with open ( filename , " r " , encoding = " utf-8 " ) as infile :
2022-09-03 23:47:43 +02:00
return infile . readlines ( )
2022-12-28 22:28:54 +01:00
def read_file_to_str ( filename : str ) - > str :
2022-09-03 23:47:43 +02:00
""" Returns a str. """
return " " . join ( read_file_to_list ( filename ) )
2022-12-24 01:59:35 +01:00
2022-12-28 22:28:54 +01:00
def read_binary_file_as_stream ( filename : str ) - > BinaryIO :
2022-12-24 01:59:35 +01:00
""" Returns binary file content. """
with open ( filename , " rb " , ) as infile :
return io . BytesIO ( infile . read ( ) )
2022-12-28 22:28:54 +01:00
def replace_escaped_octal_utf8_bytes_with_str ( str : str ) - > str :
2022-12-24 01:59:35 +01:00
encoded_name_strings = re . findall ( r ' name: .*$ ' , str , flags = re . MULTILINE )
for encoded_name_string in encoded_name_strings :
escaped_bytes = re . findall ( r ' ((?: \\ [0-9]+)+) ' , encoded_name_string )
for byte_sequence in escaped_bytes :
unicode_str = b ' ' . join ( [ int ( byte , 8 ) . to_bytes ( 1 , ' little ' ) for byte in byte_sequence . split ( ' \\ ' ) if byte ] ) . decode ( ' utf-8 ' )
print ( " Replace ' {} ' by ' {} ' " . format ( byte_sequence , unicode_str ) )
str = str . replace ( byte_sequence , unicode_str )
return str
2022-12-28 22:28:54 +01:00
def quick_and_dirty_workaround_encoding_problem ( str : str ) - > str :
2022-12-24 01:59:35 +01:00
return re . sub ( r ' name: " encoding: .*$ ' , ' ' , str , flags = re . MULTILINE )
2022-12-28 22:28:54 +01:00
def count_files_in_dir ( path : PathLike ) - > int :
return len ( [ name for name in os . listdir ( path ) if os . path . isfile ( os . path . join ( path , name ) ) ] )