# pylint:disable=line-too-long
"""
The tool to check the availability or syntax of domains, IPv4 or URL.
::
██████╗ ██╗ ██╗███████╗██╗ ██╗███╗ ██╗ ██████╗███████╗██████╗ ██╗ ███████╗
██╔══██╗╚██╗ ██╔╝██╔════╝██║ ██║████╗ ██║██╔════╝██╔════╝██╔══██╗██║ ██╔════╝
██████╔╝ ╚████╔╝ █████╗ ██║ ██║██╔██╗ ██║██║ █████╗ ██████╔╝██║ █████╗
██╔═══╝ ╚██╔╝ ██╔══╝ ██║ ██║██║╚██╗██║██║ ██╔══╝ ██╔══██╗██║ ██╔══╝
██║ ██║ ██║ ╚██████╔╝██║ ╚████║╚██████╗███████╗██████╔╝███████╗███████╗
╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═══╝ ╚═════╝╚══════╝╚═════╝ ╚══════╝╚══════╝
This submodule will provide adblock decoding interface.
Author:
Nissar Chababy, @funilrys, contactTATAfunilrysTODTODcom
Special thanks:
https://pyfunceble.github.io/special-thanks.html
Contributors:
https://pyfunceble.github.io/contributors.html
Project link:
https://github.com/funilrys/PyFunceble
Project documentation:
https://pyfunceble.readthedocs.io/en/master/
Project homepage:
https://pyfunceble.github.io/
License:
::
MIT License
Copyright (c) 2017, 2018, 2019 Nissar Chababy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
# pylint: enable=line-too-long
from PyFunceble.check import Check
from PyFunceble.helpers import List, Regex
[docs]class AdBlock: # pylint: disable=too-few-public-methods
"""
Provide the adblock decoding logic.
:param list list_from_file: The file in list format.
:param bool aggressive:
Activate the extration of all domains present into
the given adbloack filter list.
.. warning::
This is for now for testing purpose.
It may be one day accessible to the public
but please if you read this, think twice
before using it.
"""
def __init__(self, list_from_file, aggressive=False):
self.to_format = self._remove_ignored(list_from_file)
self.aggressive = aggressive
# We set the options separator.
self.options_separator = "$"
# We set the separator of options
self.option_separator = ","
[docs] def _remove_ignored(self, list_from_file):
"""
Removed the ignored element from the given list.
:param list list_from_file:
The list which represent the file we are decoding.
:return: The filtered list.
:rtype: list
"""
return [x for x in list_from_file if not self._is_to_ignore(x)]
[docs] @classmethod
def _is_to_ignore(cls, line):
"""
Check if we have to ignore the given line.
:param str line: The line from the file.
:return: The result of the check.
:rtype: bool
"""
# We set the list of regex to match to be
# considered as ignored.
to_ignore = [r"(^!|^@@|^\/|^\[|^\.|^-|^_|^\?|^&)"] # , r"(\$|,)(image)"]
for element in to_ignore:
# We loop through the list of regex.
if Regex(line, element, return_data=False).match():
# The currently read line match the currently read
# regex.
# We return true, it has to be ignored.
return True
# Wer return False, it does not has to be ignored.
return False
[docs] def _handle_options(self, options):
"""
Handle the data from the options.
:param list options: The list of options from the rule.
:return: The list of domains to return globally.
:rtype: list
"""
# We initiate a variable which will save our result
result = []
# We initiate the regex which will be used to extract the domain listed
# under the option domain=
regex_domain_option = r"domain=(.*)"
for option in options:
# We loop through the list of option.
try:
# We try to extract the list of domains from the currently read
# option.
domains = Regex(
option, regex_domain_option, return_data=True, rematch=True, group=0
).match()[-1]
if domains:
# We could extract something.
if self.aggressive: # pragma: no cover
result.extend(
[
x
for x in domains.split("|")
if x and not x.startswith("~")
]
)
else:
# We return True.
return True
except TypeError:
pass
# We return the result.
return result
[docs] def decode(self):
"""
Decode/extract the domains to test from the adblock formated file.
:return: The list of domains to test.
:rtype: list
"""
# We initiate a variable which will save what we are going to return.
result = []
# We initiate the first regex we are going to use to get
# the element to format.
regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$"
# We initiate the third regex we are going to use to get
# the element to format.
regex_v3 = (
r"(?:#+(?:[a-z]+?)?\[[a-z]+(?:\^|\*)\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])"
)
# We initiate the fourth regex we are going to use to get
# the element to format.
regex_v4 = r"^\|(.*\..*)\|$"
for line in self.to_format:
# We loop through the different line.
rematch = rematch_v3 = rematch_v4 = None
# We extract the different group from our first regex.
rematch = Regex(
line, regex, return_data=True, rematch=True, group=0
).match()
# We extract the different group from our fourth regex.
#
# Note: We execute the following in second because it is more
# specific that others.
rematch_v4 = Regex(
line, regex_v4, return_data=True, rematch=True, group=0
).match()
# We extract the different group from our third regex.
rematch_v3 = Regex(
line, regex_v3, return_data=True, rematch=True, group=0
).match()
if rematch:
# The first extraction was successfull.
if self.options_separator in line:
options = line.split(self.options_separator)[-1].split(
self.option_separator
)
if (
not options[-1]
or "third-party" in options
or "script" in options
or "popup" in options
or "xmlhttprequest" in options
):
# We extend the result with the extracted elements.
result.extend(self._extract_base(rematch))
extra = self._handle_options(options)
if extra and isinstance(extra, list): # pragma: no cover
extra.extend(self._extract_base(rematch))
result.extend(self._extract_base(extra))
elif extra:
result.extend(self._extract_base(rematch))
else:
# We extend the result with the extracted elements.
result.extend(self._extract_base(rematch))
if rematch_v4:
# The fourth extraction was successfull.
# We extend the formatted elements from the extracted elements.
result.extend(List(self._format_decoded(rematch_v4)).format())
if rematch_v3:
# The second extraction was successfull.
# We extend the formatted elements from the extracted elements.
result.extend(List(self._format_decoded(rematch_v3)).format())
# We return the result.
return List(result).format()