Source code for PyFunceble.core.multiprocess

"""
The tool to check the availability or syntax of domain, IP or URL.

::


    ██████╗ ██╗   ██╗███████╗██╗   ██╗███╗   ██╗ ██████╗███████╗██████╗ ██╗     ███████╗
    ██╔══██╗╚██╗ ██╔╝██╔════╝██║   ██║████╗  ██║██╔════╝██╔════╝██╔══██╗██║     ██╔════╝
    ██████╔╝ ╚████╔╝ █████╗  ██║   ██║██╔██╗ ██║██║     █████╗  ██████╔╝██║     █████╗
    ██╔═══╝   ╚██╔╝  ██╔══╝  ██║   ██║██║╚██╗██║██║     ██╔══╝  ██╔══██╗██║     ██╔══╝
    ██║        ██║   ██║     ╚██████╔╝██║ ╚████║╚██████╗███████╗██████╔╝███████╗███████╗
    ╚═╝        ╚═╝   ╚═╝      ╚═════╝ ╚═╝  ╚═══╝ ╚═════╝╚══════╝╚═════╝ ╚══════╝╚══════╝

Provides the multiprocessing core interface.

Author:
    Nissar Chababy, @funilrys, contactTATAfunilrysTODTODcom

Special thanks:
    https://pyfunceble.github.io/special-thanks.html

Contributors:
    https://pyfunceble.github.io/contributors.html

Project link:
    https://github.com/funilrys/PyFunceble

Project documentation:
    https://pyfunceble.readthedocs.io/en/master/

Project homepage:
    https://pyfunceble.github.io/

License:
::


    Copyright 2017, 2018, 2019, 2020 Nissar Chababy

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
"""

import sys
from itertools import chain
from multiprocessing import Manager, Pipe, Process, active_children
from tempfile import NamedTemporaryFile
from traceback import format_exc

import domain2idna
from colorama import Fore, Style
from colorama import init as initiate_colorama

import PyFunceble

from .api import APICore
from .file import FileCore


[docs]class OurProcessWrapper(Process): """ Wrapper of Process. The object of this class is just to overwrite :code:`Process.run()` in order to catch exceptions. .. note:: This class takes the same arguments as :code:`Process`. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.conn1, self.conn2 = Pipe() self._exception_receiver = None
[docs] def run(self): """ Overwrites :code:`Process.run()`. """ try: # We run a normal process. Process.run(self) # We send None as message as there was no exception. self.conn2.send(None) except Exception as exception: # pylint: disable=broad-except PyFunceble.LOGGER.exception() # We get the traceback. traceback = format_exc() # We send the exception and its traceback to the pipe. self.conn2.send((exception, traceback))
@property def exception(self): """ Provides a way to check if an exception was send. """ try: if self.conn1.poll(): # There is something to read. # We get and save the exception. self._exception_receiver = self.conn1.recv() except EOFError: pass self.conn2.close() return self._exception_receiver
[docs]class MultiprocessCore( FileCore ): # pragma: no cover pylint: disable=too-many-instance-attributes """ Brain of PyFunceble for file testing with multiple processes.s. :param str file: The file we are testing. :param str file_type: The file type. Should be one of the following. - :code:`domain` - :code:`url` """ def __init__(self, file, file_content_type="domain"): super().__init__(file, file_content_type=file_content_type) # pylint: disable=arguments-differ
[docs] def test( self, subject, file_content_type, loader, manager_data, intern, ignore_inactive_db_check=False, custom=None, ): """ Tests the given subject and return the result. """ PyFunceble.LOADER = loader if not PyFunceble.LOADER.was_configuration_loaded(): PyFunceble.LOADER.get_config() PyFunceble.LOADER.set_custom_config(custom) PyFunceble.LOADER.inject_all() initiate_colorama(True) PyFunceble.INTERN.update(intern) if PyFunceble.CONFIGURATION.idna_conversion: subject = domain2idna.domain2idna(subject) if not self.should_be_ignored( subject, self.autocontinue, self.inactive_db, ignore_inactive_db_check=ignore_inactive_db_check, ): if isinstance(PyFunceble.CONFIGURATION.cooldown_time, (float, int)): PyFunceble.sleep(PyFunceble.CONFIGURATION.cooldown_time) if file_content_type in ["domain"]: subject = subject.lower() if PyFunceble.CONFIGURATION.syntax: result = APICore( subject, complete=True, is_parent=False, db_file_name=self.file ).syntax(file_content_type) elif PyFunceble.CONFIGURATION.reputation: result = APICore( subject, complete=True, is_parent=False, db_file_name=self.file ).reputation(file_content_type) else: result = APICore( subject, complete=True, is_parent=False, db_file_name=self.file ).availability(file_content_type) self.generate_complement_status_file(result["tested"], result["status"]) self.save_into_database(result, self.file) if manager_data is not None: manager_data.append(result) else: self.post_test_treatment( result, self.file_type, complements_test_started=self.complements_test_started, auto_continue_db=self.autocontinue, inactive_db=self.inactive_db, mining=self.mining, whois_db=self.whois_db, ) elif self.autosave.authorized or PyFunceble.CONFIGURATION.print_dots: PyFunceble.LOGGER.info(f"Skipped {subject!r}.") print(".", end="")
def __merge_processes_data(self, manager_data, tracker=None): """ Reads all results and put them at the right location. """ if manager_data is not None: if ( not self.autosave.authorized and PyFunceble.CONFIGURATION.multiprocess_merging_mode != "live" and not PyFunceble.CONFIGURATION.quiet ): print( Fore.MAGENTA + Style.BRIGHT + "\nMerging cross processes data... This process may take some time." ) for test_output in manager_data: if self.autosave.authorized: print(Fore.MAGENTA + Style.BRIGHT + "Merging process data ...") self.post_test_treatment( test_output, self.file_type, complements_test_started=self.complements_test_started, auto_continue_db=self.autocontinue, inactive_db=self.inactive_db, mining=self.mining, whois_db=self.whois_db, ) if tracker: tracker.add_position(len(test_output["tested"])) manager_data[:] = [] self.autocontinue.save() self.inactive_db.save() self.mining.save() self.cleanup(self.autocontinue, self.autosave, test_completed=False) def __check_exception(self, processes, manager_data): """ Checks if an exception is present into the given pool of processes. :param list processes. A list of running processes. """ exception_present = False for process in processes: # We loop through the list of processes. try: if process.exception and not exception_present: # There in an exception in the currently # read process. # We get the traceback _, traceback = process.exception # We print the traceback. print(traceback) PyFunceble.LOGGER.error(traceback) exception_present = True if exception_present: # We kill the process. process.terminate() except AttributeError: continue if exception_present: # We finally exit. self.__merge_processes_data(manager_data) sys.exit(1) def __start_process(self, subject, manager_data, ignore_inactive_db_check=False): """ Starts a new process. """ if subject in self.autocontinue: if self.autosave.authorized or PyFunceble.CONFIGURATION.print_dots: PyFunceble.LOGGER.info(f"Skipped {subject!r}: already tested.") print(".", end="") else: original_config = PyFunceble.CONFIGURATION.copy() original_intern = PyFunceble.INTERN.copy() process = OurProcessWrapper( target=self.test, args=( subject, self.file_type, PyFunceble.LOADER, manager_data, original_intern, ignore_inactive_db_check, { "api_file_generation": PyFunceble.CONFIGURATION.db_type == "json", "inactive_database": False, "auto_continue": False, "quiet": PyFunceble.CONFIGURATION.quiet, }, ), ) process.name = f"PyF {subject}" process.start() PyFunceble.LOADER.config.update(original_config) PyFunceble.LOADER.inject_all() PyFunceble.INTERN.update(original_intern) def __process_live_merging(self, finished, manager_data, tracker): """ Processes the live merging. """ if ( PyFunceble.CONFIGURATION.multiprocess_merging_mode == "live" and not finished and not self.autosave.is_time_exceed() ): self.__merge_processes_data(manager_data, tracker=tracker) return True return False def __process_end_merging(self, finished, manager_data, tracker): """ Processes the end merging. """ if finished or self.autosave.is_time_exceed(): while "PyF" in " ".join([x.name for x in reversed(active_children())]): continue self.__merge_processes_data(manager_data, tracker=tracker) return True return False # pylint: disable=too-many-nested-blocks,too-many-branches def __run_multiprocess_test( self, stream, manager, ignore_inactive_db_check=False, tracker=None ): """ Tests the content of the given file. """ self.print_header() finished = False index = "funilrys" if PyFunceble.CONFIGURATION.db_type == "json": manager_data = manager.list() else: manager_data = None minimum_position = tracker.get_position() if tracker else 0 file_position = 0 while True: while ( len(active_children()) <= PyFunceble.CONFIGURATION.maximal_processes and not self.autosave.is_time_exceed() ): try: line = next(stream) if isinstance(line, tuple): index, line = line if ( tracker and tracker.authorized and file_position < minimum_position ): file_position += len(line) if ( self.autosave.authorized or PyFunceble.CONFIGURATION.print_dots ): PyFunceble.LOGGER.info( f"Skipped {line!r}: insufficient position." ) print(".", end="") continue subjects = self.get_subjects(line) if isinstance(subjects, list): for subject in subjects: self.__start_process( subject, manager_data, ignore_inactive_db_check=ignore_inactive_db_check, ) if index != "funilrys": # An index was given, we remove the index and subject from # the mining database. self.mining.remove(index, subject) else: self.__start_process( subjects, manager_data, ignore_inactive_db_check=ignore_inactive_db_check, ) if index != "funilrys": # An index was given, we remove the index and subject from # the mining database. self.mining.remove(index, subjects) if tracker and tracker.authorized: file_position += len(line) continue except StopIteration: finished = True break self.__check_exception(active_children(), manager_data) while len( active_children() ) >= PyFunceble.CONFIGURATION.maximal_processes and "PyF" in " ".join( [x.name for x in reversed(active_children())] ): self.__process_live_merging(finished, manager_data, tracker) continue if self.__process_live_merging(finished, manager_data, tracker): continue if self.__process_end_merging(finished, manager_data, tracker): break
[docs] def work_process(self, *args): """ Work process for :code`construct_and_get_shadow_file`. """ PyFunceble.LOADER = args[-2] if not PyFunceble.LOADER.was_configuration_loaded(): PyFunceble.LOADER.get_config() PyFunceble.LOADER.inject_all() PyFunceble.INTERN.update(args[-1]) self.write_in_shadow_file_if_needed(*args[:-2])
[docs] def construct_and_get_shadow_file( self, file_stream, ignore_inactive_db_check=False ): """ Provides a path to a file which contain the list to file. The idea is to do a comparison between what we already tested and what we still have to test. """ def start_process(*args): original_config = PyFunceble.CONFIGURATION.copy() origin_intern = PyFunceble.INTERN.copy() args += (PyFunceble.LOADER,) args += (origin_intern,) process = OurProcessWrapper(target=self.work_process, args=args) process.name = f"PyF shadow {line}" process.start() PyFunceble.LOADER.config.update(original_config) PyFunceble.LOADER.inject_all() PyFunceble.INTERN.update(origin_intern) if PyFunceble.CONFIGURATION.shadow_file: with NamedTemporaryFile(delete=False) as temp_file: if self.autosave.authorized or PyFunceble.CONFIGURATION.print_dots: print("") finished = False while True: while ( len(active_children()) <= PyFunceble.CONFIGURATION.maximal_processes ): try: line = next(file_stream) start_process( line, temp_file.name, ignore_inactive_db_check, self.autocontinue, self.inactive_db, ) active_children() continue except StopIteration: finished = True break while len( active_children() ) >= PyFunceble.CONFIGURATION.maximal_processes and "shadow" in " ".join( [x.name for x in reversed(active_children())] ): active_children() if finished: break if self.autosave.authorized or PyFunceble.CONFIGURATION.print_dots: print("") return temp_file.name return file_stream.name
[docs] def run_test(self): """ Runs the test of the content of the given file. """ with open(self.file, "r", encoding="utf-8") as file_stream, open( self.construct_and_get_shadow_file(file_stream), "r", encoding="utf-8" ) as shadow_file: tracker = PyFunceble.engine.HashesTracker(shadow_file.name) with Manager() as manager: self.__run_multiprocess_test(shadow_file, manager, tracker=tracker) shadow_file_name = shadow_file.name if PyFunceble.CONFIGURATION.shadow_file: PyFunceble.helpers.File(shadow_file_name).delete() if self.autocontinue.is_empty(): with open(self.file, "r", encoding="utf-8") as file_stream, open( self.construct_and_get_shadow_file( file_stream, ignore_inactive_db_check=True ), "r", encoding="utf-8", ) as shadow_file: with Manager() as manager: self.__run_multiprocess_test( file_stream, manager, ignore_inactive_db_check=True ) shadow_file_name = shadow_file.name if PyFunceble.CONFIGURATION.shadow_file: PyFunceble.helpers.File(shadow_file_name).delete() with Manager() as manager: self.__run_multiprocess_test( chain(self.inactive_db.get_to_retest()), manager ) with Manager() as manager: self.complements_test_started = True self.__run_multiprocess_test( self.get_complements(self.autocontinue), manager ) self.complements_test_started = False with Manager() as manager: self.__run_multiprocess_test(chain(self.mining.list_of_mined()), manager) tracker.reset_position() self.cleanup(self.autocontinue, self.autosave, test_completed=True)