Source code for PyFunceble.cli.processes.workers.miner

The tool to check the availability or syntax of domain, IP or URL.


    ██████╗ ██╗   ██╗███████╗██╗   ██╗███╗   ██╗ ██████╗███████╗██████╗ ██╗     ███████╗
    ██╔══██╗╚██╗ ██╔╝██╔════╝██║   ██║████╗  ██║██╔════╝██╔════╝██╔══██╗██║     ██╔════╝
    ██████╔╝ ╚████╔╝ █████╗  ██║   ██║██╔██╗ ██║██║     █████╗  ██████╔╝██║     █████╗
    ██╔═══╝   ╚██╔╝  ██╔══╝  ██║   ██║██║╚██╗██║██║     ██╔══╝  ██╔══██╗██║     ██╔══╝
    ██║        ██║   ██║     ╚██████╔╝██║ ╚████║╚██████╗███████╗██████╔╝███████╗███████╗
    ╚═╝        ╚═╝   ╚═╝      ╚═════╝ ╚═╝  ╚═══╝ ╚═════╝╚══════╝╚═════╝ ╚══════╝╚══════╝

Provides our miner worker. This is the description of a single miner worker.

    Nissar Chababy, @funilrys, contactTATAfunilrysTODTODcom

Special thanks:


Project link:

Project documentation:

Project homepage:


    Copyright 2017, 2018, 2019, 2020, 2021 Nissar Chababy

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

import copy
import socket
from typing import List, Optional, Tuple

from domain2idna import domain2idna

import PyFunceble.facility
import PyFunceble.factory
from PyFunceble.checker.status_base import CheckerStatusBase
from PyFunceble.cli.processes.workers.base import WorkerBase
from PyFunceble.cli.utils.stdout import print_single_line
from PyFunceble.converter.url2netloc import Url2Netloc

[docs]class MinerWorker(WorkerBase): """ Provides our miner worker. The objective of this worker is to provides a single worker (or process if you prefer) which will be used to handle the mining of dataset to test. """ STD_NAME: str = "pyfunceble_miner_worker" INACTIVE_STATUSES: Tuple[str, ...] = (,, ) url2netloc: Optional[Url2Netloc] = None def __post_init__(self) -> None: # Be sure that all settings are loaded proprely!! PyFunceble.factory.Requester.guess_all_settings() self.url2netloc = Url2Netloc() return super().__post_init__()
[docs] @staticmethod def mine_from(subject: str) -> Optional[List[str]]: """ Given the subject to work from, try to get the related subjects. :param subject: The URL to start from. """"Started mining from %r", subject) result = [] try: req = PyFunceble.factory.Requester.get(subject, allow_redirects=True) for element in req.history: if "location" in element.headers: result.append(element.headers["location"]) result.extend([x for x in req.history if isinstance(x, str)]) except ( PyFunceble.factory.Requester.exceptions.ConnectionError, PyFunceble.factory.Requester.exceptions.Timeout, PyFunceble.factory.Requester.exceptions.InvalidURL, PyFunceble.factory.Requester.urllib3_exceptions.InvalidHeader, socket.timeout, ): PyFunceble.facility.Logger.error( "Could not mine from %r", subject, exc_info=True ) PyFunceble.facility.Logger.debug("Mined from %r:\n%r.", subject, result)"Finished mining from %r", subject) return result
[docs] def target(self, consumed: Tuple[dict, CheckerStatusBase]) -> None: if not isinstance(consumed, tuple) or not isinstance( consumed[1], CheckerStatusBase ): "Skipping latest dataset because consumed data was not a tuple." ) return None # Just for human brain. test_dataset, test_result = consumed if "from_miner" in test_dataset: "Skipping dataset because it comes from the mining mechanism." ) return None if test_result.status in self.INACTIVE_STATUSES: "Skipping dataset because status is not active." ) return None if test_dataset["subject_type"] == "domain": subject = f"http://{test_result.idna_subject}:80" else: # Assuming it's already a URL. subject = test_result.idna_subject print_single_line("M") self.add_to_output_queue("pyfunceble") self.share_waiting_message() mined = self.mine_from(subject) for url in mined: to_send = copy.deepcopy(test_dataset) to_send["from_miner"] = True if test_dataset["subject_type"] == "domain": netloc = self.url2netloc.set_data_to_convert(url).get_converted() if ":" in netloc: netloc = netloc[: netloc.find(":")] to_send["subject"] = netloc to_send["idna_subject"] = domain2idna(netloc) else: if not test_result.idna_subject.endswith("/") and url.endswith("/"): url = url[:-1] to_send["subject"] = url to_send["idna_subject"] = domain2idna(url) if to_send["idna_subject"] == test_result.idna_subject: "Skipping %r because found in test result.", url ) continue self.add_to_output_queue(to_send) # Returning None because we manually add into the queue. return None