PNG  IHDR* pHYs+ IDATx]n#; cdLb Ǚ[at¤_:uP}>!Usă cag޿ ֵNu`ݼTâabO7uL&y^wFٝA"l[|ŲHLN밪4*sG3|Dv}?+y߉{OuOAt4Jj.u]Gz*҉sP'VQKbA1u\`& Af;HWj hsO;ogTu uj7S3/QzUr&wS`M$X_L7r2;aE+ώ%vikDA:dR+%KzƉo>eOth$z%: :{WwaQ:wz%4foɹE[9<]#ERINƻv溂E%P1i01 |Jvҗ&{b?9g=^wζXn/lK::90KwrюO\!ջ3uzuGv^;騢wq<Iatv09:tt~hEG`v;3@MNZD.1]L:{ծI3`L(÷ba")Y.iljCɄae#I"1 `3*Bdz>j<fU40⨬%O$3cGt]j%Fߠ_twJ;ABU8vP3uEԑwQ V:h%))LfraqX-ۿX]v-\9I gl8tzX ]ecm)-cgʒ#Uw=Wlێn(0hPP/ӨtQ“&J35 $=]r1{tLuǮ*i0_;NƝ8;-vݏr8+U-kruȕYr0RnC]*ެ(M:]gE;{]tg(#ZJ9y>utRDRMdr9㪩̞zֹb<ģ&wzJM"iI( .ꮅX)Qw:9,i좜\Ԛi7&N0:asϓc];=ΗOӣ APqz93 y $)A*kVHZwBƺnWNaby>XMN*45~ղM6Nvm;A=jֲ.~1}(9`KJ/V F9[=`~[;sRuk]rєT!)iQO)Y$V ی ۤmzWz5IM Zb )ˆC`6 rRa}qNmUfDsWuˤV{ Pݝ'=Kֳbg,UҘVz2ﴻnjNgBb{? ߮tcsͻQuxVCIY۠:(V뺕 ٥2;t`@Fo{Z9`;]wMzU~%UA蛚dI vGq\r82iu +St`cR.6U/M9IENDB` REDROOM
PHP 5.6.40
Preview: charsetprober.py Size: 4.99 KB
/lib/python3.6/site-packages/chardet/charsetprober.py

######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#   Mark Pilgrim - port to Python
#   Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301  USA
######################### END LICENSE BLOCK #########################

import logging
import re

from .enums import ProbingState


class CharSetProber(object):

    SHORTCUT_THRESHOLD = 0.95

    def __init__(self, lang_filter=None):
        self._state = None
        self.lang_filter = lang_filter
        self.logger = logging.getLogger(__name__)

    def reset(self):
        self._state = ProbingState.DETECTING

    @property
    def charset_name(self):
        return None

    def feed(self, buf):
        pass

    @property
    def state(self):
        return self._state

    def get_confidence(self):
        return 0.0

    @staticmethod
    def filter_high_byte_only(buf):
        buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
        return buf

    @staticmethod
    def filter_international_words(buf):
        """
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [\x80-\xFF]
        marker: everything else [^a-zA-Z\x80-\xFF]

        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.

        This filter applies to all scripts which do not use English characters.
        """
        filtered = bytearray()

        # This regex expression filters out only words that have at-least one
        # international character. The word may include one marker character at
        # the end.
        words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
                           buf)

        for word in words:
            filtered.extend(word[:-1])

            # If the last character in the word is a marker, replace it with a
            # space as markers shouldn't affect our analysis (they are used
            # similarly across all languages and may thus have similar
            # frequencies).
            last_char = word[-1:]
            if not last_char.isalpha() and last_char < b'\x80':
                last_char = b' '
            filtered.extend(last_char)

        return filtered

    @staticmethod
    def filter_with_english_letters(buf):
        """
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        Also retains English alphabet and high byte characters immediately
        before occurrences of >.

        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        """
        filtered = bytearray()
        in_tag = False
        prev = 0

        for curr in range(len(buf)):
            # Slice here to get bytes instead of an int with Python 3
            buf_char = buf[curr:curr + 1]
            # Check if we're coming out of or entering an HTML tag
            if buf_char == b'>':
                in_tag = False
            elif buf_char == b'<':
                in_tag = True

            # If current character is not extended-ASCII and not alphabetic...
            if buf_char < b'\x80' and not buf_char.isalpha():
                # ...and we're not in a tag
                if curr > prev and not in_tag:
                    # Keep everything after last non-extended-ASCII,
                    # non-alphabetic character
                    filtered.extend(buf[prev:curr])
                    # Output a space to delimit stretch we kept
                    filtered.extend(b' ')
                prev = curr + 1

        # If we're not in a tag...
        if not in_tag:
            # Keep everything after last non-extended-ASCII, non-alphabetic
            # character
            filtered.extend(buf[prev:])

        return filtered

Directory Contents

Dirs: 2 × Files: 39

Name Size Perms Modified Actions
cli DIR
- drwxr-xr-x 2025-03-30 04:21:31
Edit Download
- drwxr-xr-x 2025-03-30 04:21:31
Edit Download
30.52 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
1.72 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
9.19 KB lrw-r--r-- 2017-04-11 19:52:09
Edit Download
3.70 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
4.99 KB lrw-r--r-- 2017-06-08 14:21:53
Edit Download
3.51 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
1.11 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
1.81 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
1.62 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
3.86 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
10.26 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
3.66 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
13.23 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
1.71 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
30.88 KB lrw-r--r-- 2017-04-11 20:48:55
Edit Download
1.71 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
20.23 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
1.71 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
13.51 KB lrw-r--r-- 2017-06-08 14:21:53
Edit Download
25.17 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
19.18 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
12.54 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
17.53 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
12.39 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
11.08 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
12.30 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
11.03 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
10.84 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
5.24 KB lrw-r--r-- 2017-06-08 14:21:53
Edit Download
3.33 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
1.96 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
24.88 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
5.52 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
3.46 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
3.69 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
12.19 KB lrw-r--r-- 2017-06-08 14:32:38
Edit Download
2.70 KB lrw-r--r-- 2017-04-11 17:51:33
Edit Download
242 B lrw-r--r-- 2017-06-08 14:32:13
Edit Download
1.52 KB lrw-r--r-- 2017-04-12 18:41:25
Edit Download

If ZipArchive is unavailable, a .tar will be created (no compression).
© 2026 REDROOM — Secure File Manager. All rights reserved. Built with ❤️ & Red Dark UI